diff options
286 files changed, 8713 insertions, 1926 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 5613aa378a83..4341ccf5c0c4 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -630,12 +630,6 @@ static int __hw_perf_event_init(struct perf_event *event) return ev; } - /* The EV67 does not support mode exclusion */ - if (attr->exclude_kernel || attr->exclude_user - || attr->exclude_hv || attr->exclude_idle) { - return -EPERM; - } - /* * We place the event type in event_base here and leave calculation * of the codes to programme the PMU for alpha_pmu_enable() because @@ -771,6 +765,7 @@ static struct pmu pmu = { .start = alpha_pmu_start, .stop = alpha_pmu_stop, .read = alpha_pmu_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index e49e06834516..fce4b426c379 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -294,13 +294,7 @@ static int mmdc_pmu_event_init(struct perf_event *event) return -EOPNOTSUPP; } - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest || - event->attr.sample_period) + if (event->attr.sample_period) return -EINVAL; if (cfg < 0 || cfg >= MMDC_NUM_COUNTERS) @@ -456,6 +450,7 @@ static int mmdc_pmu_init(struct mmdc_pmu *pmu_mmdc, .start = mmdc_pmu_event_start, .stop = mmdc_pmu_event_stop, .read = mmdc_pmu_event_update, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }, .mmdc_base = mmdc_base, .dev = dev, diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c index afe5b4c7b164..99bcd074916a 100644 --- a/arch/arm/mm/cache-l2x0-pmu.c +++ b/arch/arm/mm/cache-l2x0-pmu.c @@ -314,14 +314,6 @@ static int l2x0_pmu_event_init(struct perf_event *event) event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - if (event->cpu < 0) return -EINVAL; @@ -544,6 +536,7 @@ static __init int l2x0_pmu_init(void) .del = l2x0_pmu_event_del, .event_init = l2x0_pmu_event_init, .attr_groups = l2x0_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; l2x0_pmu_reset(); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 72238eedc360..d2b8e6061933 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1306,15 +1306,6 @@ static int h_24x7_event_init(struct perf_event *event) return -EINVAL; } - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - /* no branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; @@ -1577,6 +1568,7 @@ static struct pmu h_24x7_pmu = { .start_txn = h_24x7_event_start_txn, .commit_txn = h_24x7_event_commit_txn, .cancel_txn = h_24x7_event_cancel_txn, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; static int hv_24x7_init(void) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 43fabb3cae0f..735e77b09cdb 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -232,15 +232,6 @@ static int h_gpci_event_init(struct perf_event *event) return -EINVAL; } - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - /* no branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; @@ -285,6 +276,7 @@ static struct pmu h_gpci_pmu = { .start = h_gpci_event_start, .stop = h_gpci_event_stop, .read = h_gpci_event_update, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; static int hv_gpci_init(void) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index f292a3f284f1..b1c37cc3fa98 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -473,15 +473,6 @@ static int nest_imc_event_init(struct perf_event *event) if (event->hw.sample_period) return -EINVAL; - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - if (event->cpu < 0) return -EINVAL; @@ -748,15 +739,6 @@ static int core_imc_event_init(struct perf_event *event) if (event->hw.sample_period) return -EINVAL; - /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - if (event->cpu < 0) return -EINVAL; @@ -1069,6 +1051,7 @@ static int update_pmu_ops(struct imc_pmu *pmu) pmu->pmu.stop = imc_event_stop; pmu->pmu.read = imc_event_update; pmu->pmu.attr_groups = pmu->attr_groups; + pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group; switch (pmu->domain) { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index bfabeb1889cc..1266194afb02 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1600,7 +1600,7 @@ static void aux_sdb_init(unsigned long sdb) /* * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling - * @cpu: On which to allocate, -1 means current + * @event: Event the buffer is setup for, event->cpu == -1 means current * @pages: Array of pointers to buffer pages passed from perf core * @nr_pages: Total pages * @snapshot: Flag for snapshot mode @@ -1612,8 +1612,8 @@ static void aux_sdb_init(unsigned long sdb) * * Return the private AUX buffer structure if success or NULL if fails. */ -static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, - bool snapshot) +static void *aux_buffer_setup(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct sf_buffer *sfb; struct aux_buffer *aux; diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index d50bb4dc0650..62f317c9113a 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -253,15 +253,6 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } -static const struct perf_event_attr ibs_notsupp = { - .exclude_user = 1, - .exclude_kernel = 1, - .exclude_hv = 1, - .exclude_idle = 1, - .exclude_host = 1, - .exclude_guest = 1, -}; - static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -282,9 +273,6 @@ static int perf_ibs_init(struct perf_event *event) if (event->pmu != &perf_ibs->pmu) return -ENOENT; - if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) - return -EINVAL; - if (config & ~perf_ibs->config_mask) return -EINVAL; @@ -537,6 +525,7 @@ static struct perf_ibs perf_ibs_fetch = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }, .msr = MSR_AMD64_IBSFETCHCTL, .config_mask = IBS_FETCH_CONFIG_MASK, diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 3210fee27e7f..7635c23f7d82 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -223,11 +223,6 @@ static int perf_iommu_event_init(struct perf_event *event) if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - /* IOMMU counters do not have usr/os/guest/host bits */ - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_host || event->attr.exclude_guest) - return -EINVAL; - if (event->cpu < 0) return -EINVAL; @@ -414,6 +409,7 @@ static const struct pmu iommu_pmu __initconst = { .read = perf_iommu_read, .task_ctx_nr = perf_invalid_context, .attr_groups = amd_iommu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; static __init int init_one_iommu(unsigned int idx) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 2aefacf5c5b2..c5ff084551c6 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -136,14 +136,7 @@ static int pmu_event_init(struct perf_event *event) return -ENOENT; /* Unsupported modes and filters. */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest || - /* no sampling */ - event->attr.sample_period) + if (event->attr.sample_period) return -EINVAL; if (cfg != AMD_POWER_EVENTSEL_PKG) @@ -226,6 +219,7 @@ static struct pmu pmu_class = { .start = pmu_event_start, .stop = pmu_event_stop, .read = pmu_event_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; static int power_cpu_exit(unsigned int cpu) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 398df6eaa109..79cfd3b30ceb 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -201,11 +201,6 @@ static int amd_uncore_event_init(struct perf_event *event) if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - /* NB and Last level cache counters do not have usr/os/guest/host bits */ - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_host || event->attr.exclude_guest) - return -EINVAL; - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -307,6 +302,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; static struct pmu amd_llc_pmu = { @@ -317,6 +313,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index a01ef1b0f883..7cdd7b13bbda 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -77,10 +77,12 @@ static size_t buf_size(struct page *page) } static void * -bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) +bts_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { struct bts_buffer *buf; struct page *page; + int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 730978dff63f..17096d3cd616 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -18,6 +18,7 @@ #include <asm/hardirq.h> #include <asm/intel-family.h> #include <asm/apic.h> +#include <asm/cpu_device_id.h> #include "../perf_event.h" @@ -3206,16 +3207,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; - /* - * If PMU counter has PEBS enabled it is not enough to disable counter - * on a guest entry since PEBS memory write can overshoot guest entry - * and corrupt guest memory. Disabling PEBS solves the problem. - */ - arr[1].msr = MSR_IA32_PEBS_ENABLE; - arr[1].host = cpuc->pebs_enabled; - arr[1].guest = 0; + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + arr[0].guest &= ~cpuc->pebs_enabled; + else + arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); + *nr = 1; + + if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { + /* + * If PMU counter has PEBS enabled it is not enough to + * disable counter on a guest entry since PEBS memory + * write can overshoot guest entry and corrupt guest + * memory. Disabling PEBS solves the problem. + * + * Don't do this if the CPU already enforces it. + */ + arr[1].msr = MSR_IA32_PEBS_ENABLE; + arr[1].host = cpuc->pebs_enabled; + arr[1].guest = 0; + *nr = 2; + } - *nr = 2; return arr; } @@ -3748,36 +3760,62 @@ static __init void intel_clovertown_quirk(void) x86_pmu.pebs_constraints = NULL; } -static int intel_snb_pebs_broken(int cpu) +static const struct x86_cpu_desc isolation_ucodes[] = { + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE, 3, 0x0000001f), + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT, 1, 0x0000001e), + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E, 1, 0x00000015), + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037), + INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE, 4, 0x00000023), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E, 1, 0x00000014), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x00000010), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x07000009), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f000009), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e000002), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 10, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 11, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 12, 0x0000004e), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 13, 0x0000004e), + {} +}; + +static void intel_check_pebs_isolation(void) { - u32 rev = UINT_MAX; /* default to broken for unknown models */ + x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes); +} - switch (cpu_data(cpu).x86_model) { - case INTEL_FAM6_SANDYBRIDGE: - rev = 0x28; - break; +static __init void intel_pebs_isolation_quirk(void) +{ + WARN_ON_ONCE(x86_pmu.check_microcode); + x86_pmu.check_microcode = intel_check_pebs_isolation; + intel_check_pebs_isolation(); +} - case INTEL_FAM6_SANDYBRIDGE_X: - switch (cpu_data(cpu).x86_stepping) { - case 6: rev = 0x618; break; - case 7: rev = 0x70c; break; - } - } +static const struct x86_cpu_desc pebs_ucodes[] = { + INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028), + INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618), + INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c), + {} +}; - return (cpu_data(cpu).microcode < rev); +static bool intel_snb_pebs_broken(void) +{ + return !x86_cpu_has_min_microcode_rev(pebs_ucodes); } static void intel_snb_check_microcode(void) { - int pebs_broken = 0; - int cpu; - - for_each_online_cpu(cpu) { - if ((pebs_broken = intel_snb_pebs_broken(cpu))) - break; - } - - if (pebs_broken == x86_pmu.pebs_broken) + if (intel_snb_pebs_broken() == x86_pmu.pebs_broken) return; /* @@ -3894,23 +3932,22 @@ static __init void intel_nehalem_quirk(void) } } -static bool intel_glp_counter_freezing_broken(int cpu) -{ - u32 rev = UINT_MAX; /* default to broken for unknown stepping */ - - switch (cpu_data(cpu).x86_stepping) { - case 1: - rev = 0x28; - break; - case 8: - rev = 0x6; - break; - } +static const struct x86_cpu_desc counter_freezing_ucodes[] = { + INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e), + INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e), + INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008), + INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_X, 1, 0x00000028), + INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028), + INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006), + {} +}; - return (cpu_data(cpu).microcode < rev); +static bool intel_counter_freezing_broken(void) +{ + return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes); } -static __init void intel_glp_counter_freezing_quirk(void) +static __init void intel_counter_freezing_quirk(void) { /* Check if it's already disabled */ if (disable_counter_freezing) @@ -3920,7 +3957,7 @@ static __init void intel_glp_counter_freezing_quirk(void) * If the system starts with the wrong ucode, leave the * counter-freezing feature permanently disabled. */ - if (intel_glp_counter_freezing_broken(raw_smp_processor_id())) { + if (intel_counter_freezing_broken()) { pr_info("PMU counter freezing disabled due to CPU errata," "please upgrade microcode\n"); x86_pmu.counter_freezing = false; @@ -4271,6 +4308,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_ATOM_GOLDMONT_X: + x86_add_quirk(intel_counter_freezing_quirk); memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -4297,7 +4335,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_GOLDMONT_PLUS: - x86_add_quirk(intel_glp_counter_freezing_quirk); + x86_add_quirk(intel_counter_freezing_quirk); memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, @@ -4440,6 +4478,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_HASWELL_ULT: case INTEL_FAM6_HASWELL_GT3E: x86_add_quirk(intel_ht_bug); + x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -4471,6 +4510,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_BROADWELL_XEON_D: case INTEL_FAM6_BROADWELL_GT3E: case INTEL_FAM6_BROADWELL_X: + x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -4533,6 +4573,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: + x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index d2e780705c5a..94a4b7fc75d0 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -280,13 +280,7 @@ static int cstate_pmu_event_init(struct perf_event *event) return -ENOENT; /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest || - event->attr.sample_period) /* no sampling */ + if (event->attr.sample_period) /* no sampling */ return -EINVAL; if (event->cpu < 0) @@ -437,7 +431,7 @@ static struct pmu cstate_core_pmu = { .start = cstate_pmu_event_start, .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, - .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, .module = THIS_MODULE, }; @@ -451,7 +445,7 @@ static struct pmu cstate_pkg_pmu = { .start = cstate_pmu_event_start, .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, - .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, .module = THIS_MODULE, }; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e9acf1d2e7b2..10c99ce1fead 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1628,6 +1628,8 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; + if (x86_pmu.version <= 4) + x86_pmu.pebs_no_isolation = 1; if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; int format = x86_pmu.intel_cap.pebs_format; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 9494ca68fd9d..c0e86ff21f81 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1114,10 +1114,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, * Return: Our private PT buffer structure. */ static void * -pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) +pt_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct pt_buffer *buf; - int node, ret; + int node, ret, cpu = event->cpu; if (!nr_pages) return NULL; diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 91039ffed633..94dc564146ca 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -397,13 +397,7 @@ static int rapl_pmu_event_init(struct perf_event *event) return -EINVAL; /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest || - event->attr.sample_period) /* no sampling */ + if (event->attr.sample_period) /* no sampling */ return -EINVAL; /* must be done before validate_group */ @@ -699,6 +693,7 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.read = rapl_pmu_event_read; rapl_pmus->pmu.module = THIS_MODULE; + rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; return 0; } diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 27a461414b30..d516161c00c4 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -695,14 +695,6 @@ static int uncore_pmu_event_init(struct perf_event *event) if (pmu->func_id < 0) return -ENOENT; - /* - * Uncore PMU does measure at all privilege level all the time. - * So it doesn't make sense to specify any exclude bits. - */ - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_hv || event->attr.exclude_idle) - return -EINVAL; - /* Sampling not supported yet */ if (hwc->sample_period) return -EINVAL; @@ -800,6 +792,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) .stop = uncore_pmu_event_stop, .read = uncore_pmu_event_read, .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; } else { pmu->pmu = *pmu->type->pmu; diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 2593b0d7aeee..b12517fae77a 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -397,13 +397,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event) return -EINVAL; /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest || - event->attr.sample_period) /* no sampling */ + if (event->attr.sample_period) /* no sampling */ return -EINVAL; /* @@ -497,6 +491,7 @@ static struct pmu snb_uncore_imc_pmu = { .start = uncore_pmu_event_start, .stop = uncore_pmu_event_stop, .read = uncore_pmu_event_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; static struct intel_uncore_ops snb_uncore_imc_ops = { diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 1b9f85abf9bc..a878e6286e4a 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -160,13 +160,7 @@ static int msr_event_init(struct perf_event *event) return -ENOENT; /* unsupported modes and filters */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest || - event->attr.sample_period) /* no sampling */ + if (event->attr.sample_period) /* no sampling */ return -EINVAL; if (cfg >= PERF_MSR_EVENT_MAX) @@ -256,7 +250,7 @@ static struct pmu pmu_msr = { .start = msr_event_start, .stop = msr_event_stop, .read = msr_event_update, - .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, }; static int __init msr_init(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index d46fd6754d92..7e75f474b076 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -601,13 +601,14 @@ struct x86_pmu { /* * Intel DebugStore bits */ - unsigned int bts :1, - bts_active :1, - pebs :1, - pebs_active :1, - pebs_broken :1, - pebs_prec_dist :1, - pebs_no_tlb :1; + unsigned int bts :1, + bts_active :1, + pebs :1, + pebs_active :1, + pebs_broken :1, + pebs_prec_dist :1, + pebs_no_tlb :1, + pebs_no_isolation :1; int pebs_record_size; int pebs_buffer_size; void (*drain_pebs)(struct pt_regs *regs); diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index baeba0567126..3417110574c1 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -11,4 +11,32 @@ extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); +/* + * Match specific microcode revisions. + * + * vendor/family/model/stepping must be all set. + * + * Only checks against the boot CPU. When mixed-stepping configs are + * valid for a CPU model, add a quirk for every valid stepping and + * do the fine-tuning in the quirk handler. + */ + +struct x86_cpu_desc { + __u8 x86_family; + __u8 x86_vendor; + __u8 x86_model; + __u8 x86_stepping; + __u32 x86_microcode_rev; +}; + +#define INTEL_CPU_DESC(mod, step, rev) { \ + .x86_family = 6, \ + .x86_vendor = X86_VENDOR_INTEL, \ + .x86_model = mod, \ + .x86_stepping = step, \ + .x86_microcode_rev = rev, \ +} + +extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table); + #endif diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ebeac487a20c..e8b628b1b279 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -11,6 +11,7 @@ #include <linux/stop_machine.h> #include <linux/slab.h> #include <linux/kdebug.h> +#include <linux/kprobes.h> #include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> @@ -764,8 +765,8 @@ int poke_int3_handler(struct pt_regs *regs) regs->ip = (unsigned long) bp_int3_handler; return 1; - } +NOKPROBE_SYMBOL(poke_int3_handler); /** * text_poke_bp() -- update instructions on live kernel on SMP diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 69f6bbb41be0..01004bfb1a1b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -819,11 +819,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects - * all up to and including B1. - */ - if (c->x86_model <= 1 && c->x86_stepping <= 1) + + /* Fix erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); } diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 3fed38812eea..6dd78d8235e4 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -48,3 +48,34 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) return NULL; } EXPORT_SYMBOL(x86_match_cpu); + +static const struct x86_cpu_desc * +x86_match_cpu_with_stepping(const struct x86_cpu_desc *match) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + const struct x86_cpu_desc *m; + + for (m = match; m->x86_family | m->x86_model; m++) { + if (c->x86_vendor != m->x86_vendor) + continue; + if (c->x86 != m->x86_family) + continue; + if (c->x86_model != m->x86_model) + continue; + if (c->x86_stepping != m->x86_stepping) + continue; + return m; + } + return NULL; +} + +bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table) +{ + const struct x86_cpu_desc *res = x86_match_cpu_with_stepping(table); + + if (!res || res->x86_microcode_rev > boot_cpu_data.microcode) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(x86_cpu_has_min_microcode_rev); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8257a59704ae..3e3789c8f8e1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -269,7 +269,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -static int is_ftrace_caller(unsigned long ip) +static nokprobe_inline int is_ftrace_caller(unsigned long ip) { if (ip == ftrace_update_func) return 1; @@ -299,6 +299,7 @@ int ftrace_int3_handler(struct pt_regs *regs) return 1; } +NOKPROBE_SYMBOL(ftrace_int3_handler); static int ftrace_write(unsigned long ip, const char *val, int size) { diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4ba75afba527..a034cb808e7e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1028,6 +1028,13 @@ NOKPROBE_SYMBOL(kprobe_fault_handler); int __init arch_populate_kprobe_blacklist(void) { + int ret; + + ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); + if (ret) + return ret; + return kprobe_add_area_blacklist((unsigned long)__entry_text_start, (unsigned long)__entry_text_end); } diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 6adf6e6c2933..f14262952015 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -97,6 +97,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) } asm ( + ".pushsection .rodata\n" "optprobe_template_func:\n" ".global optprobe_template_entry\n" "optprobe_template_entry:\n" @@ -136,8 +137,7 @@ asm ( #endif ".global optprobe_template_end\n" "optprobe_template_end:\n" - ".type optprobe_template_func, @function\n" - ".size optprobe_template_func, .-optprobe_template_func\n"); + ".popsection\n"); void optprobe_template_func(void); STACK_FRAME_NON_STANDARD(optprobe_template_func); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9b7c4ca8f0a7..e289ce1332ab 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -111,6 +111,7 @@ void ist_enter(struct pt_regs *regs) /* This code is a bit fragile. Test it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); } +NOKPROBE_SYMBOL(ist_enter); void ist_exit(struct pt_regs *regs) { diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index abe8249b893b..8c88bf0a1e5f 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -14,6 +14,7 @@ #include <linux/perf_event.h> #include <linux/percpu-defs.h> #include <linux/slab.h> +#include <linux/stringhash.h> #include <linux/types.h> #include <linux/workqueue.h> @@ -30,11 +31,14 @@ static DEFINE_PER_CPU(struct coresight_device *, csdev_src); PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK)); +/* Sink ID - same for all ETMs */ +PMU_FORMAT_ATTR(sinkid, "config2:0-31"); static struct attribute *etm_config_formats_attr[] = { &format_attr_cycacc.attr, &format_attr_timestamp.attr, &format_attr_retstack.attr, + &format_attr_sinkid.attr, NULL, }; @@ -43,8 +47,18 @@ static const struct attribute_group etm_pmu_format_group = { .attrs = etm_config_formats_attr, }; +static struct attribute *etm_config_sinks_attr[] = { + NULL, +}; + +static const struct attribute_group etm_pmu_sinks_group = { + .name = "sinks", + .attrs = etm_config_sinks_attr, +}; + static const struct attribute_group *etm_pmu_attr_groups[] = { &etm_pmu_format_group, + &etm_pmu_sinks_group, NULL, }; @@ -177,31 +191,28 @@ static void etm_free_aux(void *data) schedule_work(&event_data->work); } -static void *etm_setup_aux(int event_cpu, void **pages, +static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - int cpu; + u32 id; + int cpu = event->cpu; cpumask_t *mask; struct coresight_device *sink; struct etm_event_data *event_data = NULL; - event_data = alloc_event_data(event_cpu); + event_data = alloc_event_data(cpu); if (!event_data) return NULL; INIT_WORK(&event_data->work, free_event_data); - /* - * In theory nothing prevent tracers in a trace session from being - * associated with different sinks, nor having a sink per tracer. But - * until we have HW with this kind of topology we need to assume tracers - * in a trace session are using the same sink. Therefore go through - * the coresight bus and pick the first enabled sink. - * - * When operated from sysFS users are responsible to enable the sink - * while from perf, the perf tools will do it based on the choice made - * on the cmd line. As such the "enable_sink" flag in sysFS is reset. - */ - sink = coresight_get_enabled_sink(true); + /* First get the selected sink from user space. */ + if (event->attr.config2) { + id = (u32)event->attr.config2; + sink = coresight_get_sink_by_id(id); + } else { + sink = coresight_get_enabled_sink(true); + } + if (!sink || !sink_ops(sink)->alloc_buffer) goto err; @@ -479,6 +490,77 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link) return 0; } +static ssize_t etm_perf_sink_name_show(struct device *dev, + struct device_attribute *dattr, + char *buf) +{ + struct dev_ext_attribute *ea; + + ea = container_of(dattr, struct dev_ext_attribute, attr); + return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var)); +} + +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ + int ret; + unsigned long hash; + const char *name; + struct device *pmu_dev = etm_pmu.dev; + struct device *pdev = csdev->dev.parent; + struct dev_ext_attribute *ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return -EINVAL; + + if (csdev->ea != NULL) + return -EINVAL; + + if (!etm_perf_up) + return -EPROBE_DEFER; + + ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL); + if (!ea) + return -ENOMEM; + + name = dev_name(pdev); + /* See function coresight_get_sink_by_id() to know where this is used */ + hash = hashlen_hash(hashlen_string(NULL, name)); + + ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL); + if (!ea->attr.attr.name) + return -ENOMEM; + + ea->attr.attr.mode = 0444; + ea->attr.show = etm_perf_sink_name_show; + ea->var = (unsigned long *)hash; + + ret = sysfs_add_file_to_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + + if (!ret) + csdev->ea = ea; + + return ret; +} + +void etm_perf_del_symlink_sink(struct coresight_device *csdev) +{ + struct device *pmu_dev = etm_pmu.dev; + struct dev_ext_attribute *ea = csdev->ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return; + + if (!ea) + return; + + sysfs_remove_file_from_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + csdev->ea = NULL; +} + static int __init etm_perf_init(void) { int ret; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index da7d9336a15c..015213abe00a 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -59,6 +59,8 @@ struct etm_event_data { #ifdef CONFIG_CORESIGHT int etm_perf_symlink(struct coresight_device *csdev, bool link); +int etm_perf_add_symlink_sink(struct coresight_device *csdev); +void etm_perf_del_symlink_sink(struct coresight_device *csdev); static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { struct etm_event_data *data = perf_get_aux(handle); @@ -70,7 +72,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle) #else static inline int etm_perf_symlink(struct coresight_device *csdev, bool link) { return -EINVAL; } - +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ return -EINVAL; } +void etm_perf_del_symlink_sink(struct coresight_device *csdev) {} static inline void *etm_perf_sink_config(struct perf_output_handle *handle) { return NULL; diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index 579f34943bf1..b936c6d7e13f 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -147,6 +147,7 @@ void coresight_disable_path(struct list_head *path); int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data); struct coresight_device *coresight_get_sink(struct list_head *path); struct coresight_device *coresight_get_enabled_sink(bool reset); +struct coresight_device *coresight_get_sink_by_id(u32 id); struct list_head *coresight_build_path(struct coresight_device *csdev, struct coresight_device *sink); void coresight_release_path(struct list_head *path); diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 2b0df1a0a8df..29cef898afba 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -11,6 +11,7 @@ #include <linux/err.h> #include <linux/export.h> #include <linux/slab.h> +#include <linux/stringhash.h> #include <linux/mutex.h> #include <linux/clk.h> #include <linux/coresight.h> @@ -18,6 +19,7 @@ #include <linux/delay.h> #include <linux/pm_runtime.h> +#include "coresight-etm-perf.h" #include "coresight-priv.h" static DEFINE_MUTEX(coresight_mutex); @@ -540,6 +542,47 @@ struct coresight_device *coresight_get_enabled_sink(bool deactivate) return dev ? to_coresight_device(dev) : NULL; } +static int coresight_sink_by_id(struct device *dev, void *data) +{ + struct coresight_device *csdev = to_coresight_device(dev); + unsigned long hash; + + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + + if (!csdev->ea) + return 0; + /* + * See function etm_perf_add_symlink_sink() to know where + * this comes from. + */ + hash = (unsigned long)csdev->ea->var; + + if ((u32)hash == *(u32 *)data) + return 1; + } + + return 0; +} + +/** + * coresight_get_sink_by_id - returns the sink that matches the id + * @id: Id of the sink to match + * + * The name of a sink is unique, whether it is found on the AMBA bus or + * otherwise. As such the hash of that name can easily be used to identify + * a sink. + */ +struct coresight_device *coresight_get_sink_by_id(u32 id) +{ + struct device *dev = NULL; + + dev = bus_find_device(&coresight_bustype, NULL, &id, + coresight_sink_by_id); + + return dev ? to_coresight_device(dev) : NULL; +} + /* * coresight_grab_device - Power up this device and any of the helper * devices connected to it for trace operation. Since the helper devices @@ -1167,6 +1210,22 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) goto err_out; } + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + ret = etm_perf_add_symlink_sink(csdev); + + if (ret) { + device_unregister(&csdev->dev); + /* + * As with the above, all resources are free'd + * explicitly via coresight_device_release() triggered + * from put_device(), which is in turn called from + * function device_unregister(). + */ + goto err_out; + } + } + mutex_lock(&coresight_mutex); coresight_fixup_device_conns(csdev); @@ -1185,6 +1244,7 @@ EXPORT_SYMBOL_GPL(coresight_register); void coresight_unregister(struct coresight_device *csdev) { + etm_perf_del_symlink_sink(csdev); /* Remove references of that device in the topology */ coresight_remove_conns(csdev); device_unregister(&csdev->dev); diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 1bfeb160c5b1..bfd03e023308 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1327,15 +1327,6 @@ static int cci_pmu_event_init(struct perf_event *event) if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EOPNOTSUPP; - /* We have no filtering of any kind */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - /* * Following the example set by other "uncore" PMUs, we accept any CPU * and rewrite its affinity dynamically rather than having perf core @@ -1433,6 +1424,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) .stop = cci_pmu_stop, .read = pmu_read, .attr_groups = pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; cci_pmu->plat_device = pdev; diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 7dd850e02f19..2ae76026e947 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -741,10 +741,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) return -EOPNOTSUPP; } - if (has_branch_stack(event) || event->attr.exclude_user || - event->attr.exclude_kernel || event->attr.exclude_hv || - event->attr.exclude_idle || event->attr.exclude_host || - event->attr.exclude_guest) { + if (has_branch_stack(event)) { dev_dbg(ccn->dev, "Can't exclude execution levels!\n"); return -EINVAL; } @@ -1290,6 +1287,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) .read = arm_ccn_pmu_event_read, .pmu_enable = arm_ccn_pmu_enable, .pmu_disable = arm_ccn_pmu_disable, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; /* No overflow interrupt? Have to use a timer instead. */ diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 660cb8ac886a..5851de56bbd0 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -562,13 +562,7 @@ static int dsu_pmu_event_init(struct perf_event *event) return -EINVAL; } - if (has_branch_stack(event) || - event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) { + if (has_branch_stack(event)) { dev_dbg(dsu_pmu->pmu.dev, "Can't support filtering\n"); return -EINVAL; } @@ -735,6 +729,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) .read = dsu_pmu_read, .attr_groups = dsu_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; rc = perf_pmu_register(&dsu_pmu->pmu, name, -1); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index d0b7dd8fb184..eec75b97e7ea 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -357,13 +357,6 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) } static int -event_requires_mode_exclusion(struct perf_event_attr *attr) -{ - return attr->exclude_idle || attr->exclude_user || - attr->exclude_kernel || attr->exclude_hv; -} - -static int __hw_perf_event_init(struct perf_event *event) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); @@ -393,9 +386,8 @@ __hw_perf_event_init(struct perf_event *event) /* * Check whether we need to exclude the counter from certain modes. */ - if ((!armpmu->set_event_filter || - armpmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { + if (armpmu->set_event_filter && + armpmu->set_event_filter(hwc, &event->attr)) { pr_debug("ARM performance counters do not support " "mode exclusion\n"); return -EOPNOTSUPP; @@ -867,6 +859,9 @@ int armpmu_register(struct arm_pmu *pmu) if (ret) return ret; + if (!pmu->set_event_filter) + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; + ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); if (ret) goto out_destroy; diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 8e46a9dad2fa..7cb766dafe85 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -824,10 +824,10 @@ static void arm_spe_pmu_read(struct perf_event *event) { } -static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages, - bool snapshot) +static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { - int i; + int i, cpu = event->cpu; struct page **pglist; struct arm_spe_pmu_buf *buf; diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 69372e2bc93c..0eba947c2ee9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -396,6 +396,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, .attr_groups = hisi_ddrc_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 443906e0aff3..2553a844ebf6 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -407,6 +407,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, .attr_groups = hisi_hha_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; ret = perf_pmu_register(&hha_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 0bde5d919b2e..cf1cc34f402a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -397,6 +397,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, .attr_groups = hisi_l3c_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 9efd2413240c..f028cbc3443c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -142,15 +142,6 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EOPNOTSUPP; - /* counters do not have these bits */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_host || - event->attr.exclude_guest || - event->attr.exclude_hv || - event->attr.exclude_idle) - return -EINVAL; - /* * The uncore counters not specific to any CPU, so cannot * support per-task diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 842135cf35a3..091b4d7d32c4 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -509,14 +509,6 @@ static int l2_cache_event_init(struct perf_event *event) return -EOPNOTSUPP; } - /* We cannot filter accurately so we just don't allow it. */ - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_hv || event->attr.exclude_idle) { - dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, - "Can't exclude execution levels\n"); - return -EOPNOTSUPP; - } - if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) || ((event->attr.config & ~L2_EVT_MASK) != 0)) && (event->attr.config != L2CYCLE_CTR_RAW_CODE)) { @@ -982,6 +974,7 @@ static int l2_cache_pmu_probe(struct platform_device *pdev) .stop = l2_cache_event_stop, .read = l2_cache_event_read, .attr_groups = l2_cache_pmu_attr_grps, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; l2cache_pmu->num_counters = get_num_counters(); diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 2dc63d61f2ea..5d70646da8c7 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -495,13 +495,6 @@ static int qcom_l3_cache__event_init(struct perf_event *event) return -ENOENT; /* - * There are no per-counter mode filters in the PMU. - */ - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_hv || event->attr.exclude_idle) - return -EINVAL; - - /* * Sampling not supported since these events are not core-attributable. */ if (hwc->sample_period) @@ -777,6 +770,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev) .read = qcom_l3_cache__event_read, .attr_groups = qcom_l3_cache_pmu_attr_grps, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index c9a1701d3e54..43d76c85da56 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -424,15 +424,6 @@ static int tx2_uncore_event_init(struct perf_event *event) if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - /* We have no filtering of any kind */ - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - if (event->cpu < 0) return -EINVAL; @@ -572,6 +563,7 @@ static int tx2_uncore_pmu_register( .start = tx2_uncore_event_start, .stop = tx2_uncore_event_stop, .read = tx2_uncore_event_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; tx2_pmu->pmu.name = devm_kasprintf(dev, GFP_KERNEL, diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 0dc9ff0f8894..d4ec04868d59 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -917,11 +917,6 @@ static int xgene_perf_event_init(struct perf_event *event) if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - /* SOC counters do not have usr/os/guest/host bits */ - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_host || event->attr.exclude_guest) - return -EINVAL; - if (event->cpu < 0) return -EINVAL; /* @@ -1136,6 +1131,7 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name) .start = xgene_perf_start, .stop = xgene_perf_stop, .read = xgene_perf_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; /* Hardware counter init */ diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 46c67a764877..7b87965f7a65 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -154,8 +154,9 @@ struct coresight_connection { * @orphan: true if the component has connections that haven't been linked. * @enable: 'true' if component is currently part of an active path. * @activated: 'true' only if a _sink_ has been activated. A sink can be - activated but not yet enabled. Enabling for a _sink_ - happens when a source has been selected for that it. + * activated but not yet enabled. Enabling for a _sink_ + * appens when a source has been selected for that it. + * @ea: Device attribute for sink representation under PMU directory. */ struct coresight_device { struct coresight_connection *conns; @@ -168,7 +169,9 @@ struct coresight_device { atomic_t *refcnt; bool orphan; bool enable; /* true only if configured as part of a path */ + /* sink specific fields */ bool activated; /* true only if a sink is part of a path */ + struct dev_ext_attribute *ea; }; #define to_coresight_device(d) container_of(d, struct coresight_device, dev) diff --git a/include/linux/filter.h b/include/linux/filter.h index e532fcc6e4b5..1eb1fa4acd0b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -966,6 +966,7 @@ bpf_address_lookup(unsigned long addr, unsigned long *size, void bpf_prog_kallsyms_add(struct bpf_prog *fp); void bpf_prog_kallsyms_del(struct bpf_prog *fp); +void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); #else /* CONFIG_BPF_JIT */ @@ -1021,6 +1022,12 @@ static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) { } + +static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +{ + sym[0] = '\0'; +} + #endif /* CONFIG_BPF_JIT */ void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e1a051724f7e..67e485bfad63 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -53,8 +53,8 @@ struct perf_guest_info_callbacks { #include <linux/atomic.h> #include <linux/sysfs.h> #include <linux/perf_regs.h> -#include <linux/workqueue.h> #include <linux/cgroup.h> +#include <linux/refcount.h> #include <asm/local.h> struct perf_callchain_entry { @@ -244,6 +244,7 @@ struct perf_event; #define PERF_PMU_CAP_EXCLUSIVE 0x10 #define PERF_PMU_CAP_ITRACE 0x20 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40 +#define PERF_PMU_CAP_NO_EXCLUDE 0x80 /** * struct pmu - generic performance monitoring unit @@ -409,7 +410,7 @@ struct pmu { /* * Set up pmu-private data structures for an AUX area */ - void *(*setup_aux) (int cpu, void **pages, + void *(*setup_aux) (struct perf_event *event, void **pages, int nr_pages, bool overwrite); /* optional */ @@ -742,7 +743,7 @@ struct perf_event_context { int nr_stat; int nr_freq; int rotate_disable; - atomic_t refcount; + refcount_t refcount; struct task_struct *task; /* @@ -983,9 +984,9 @@ extern void perf_event_output_forward(struct perf_event *event, extern void perf_event_output_backward(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); -extern void perf_event_output(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs); +extern int perf_event_output(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs); static inline bool is_default_overflow_handler(struct perf_event *event) @@ -1009,6 +1010,15 @@ perf_event__output_id_sample(struct perf_event *event, extern void perf_log_lost_samples(struct perf_event *event, u64 lost); +static inline bool event_has_any_exclude_flag(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv || + attr->exclude_guest || attr->exclude_host; +} + static inline bool is_sampling_event(struct perf_event *event) { return event->attr.sample_period != 0; @@ -1118,6 +1128,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, } extern void perf_event_mmap(struct vm_area_struct *vma); + +extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, + bool unregister, const char *sym); +extern void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags); + extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); @@ -1338,6 +1355,13 @@ static inline int perf_unregister_guest_info_callbacks (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } + +typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); +static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, + bool unregister, const char *sym) { } +static inline void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_namespaces(struct task_struct *tsk) { } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9de8780ac8d9..7198ddd0c6b1 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -372,7 +372,9 @@ struct perf_event_attr { context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ - __reserved_1 : 35; + ksymbol : 1, /* include ksymbol events */ + bpf_event : 1, /* include bpf events */ + __reserved_1 : 33; union { __u32 wakeup_events; /* wakeup every n events */ @@ -445,8 +447,6 @@ struct perf_event_query_bpf { __u32 ids[0]; }; -#define perf_flags(attr) (*(&(attr)->read_format + 1)) - /* * Ioctls that can be done on a perf event fd: */ @@ -965,9 +965,58 @@ enum perf_event_type { */ PERF_RECORD_NAMESPACES = 16, + /* + * Record ksymbol register/unregister events: + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u32 len; + * u16 ksym_type; + * u16 flags; + * char name[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_KSYMBOL = 17, + + /* + * Record bpf events: + * enum perf_bpf_event_type { + * PERF_BPF_EVENT_UNKNOWN = 0, + * PERF_BPF_EVENT_PROG_LOAD = 1, + * PERF_BPF_EVENT_PROG_UNLOAD = 2, + * }; + * + * struct { + * struct perf_event_header header; + * u16 type; + * u16 flags; + * u32 id; + * u8 tag[BPF_TAG_SIZE]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_BPF_EVENT = 18, + PERF_RECORD_MAX, /* non-ABI */ }; +enum perf_record_ksymbol_type { + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ +}; + +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) + +enum perf_bpf_event_type { + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ +}; + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f908b9356025..19c49313c709 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -495,7 +495,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog, *symbol_end = addr + hdr->pages * PAGE_SIZE; } -static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) { const char *end = sym + KSYM_NAME_LEN; const struct btf_type *type; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8577bb7f8be6..d7185c8f77bf 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1219,6 +1219,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic_dec_and_test(&prog->aux->refcnt)) { + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); bpf_prog_kallsyms_del_all(prog); @@ -1562,6 +1563,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) } bpf_prog_kallsyms_add(prog); + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); return err; free_used_maps: diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 24a77c34e9ad..c2b41a263166 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance events callchain code, extracted from core.c: * @@ -5,8 +6,6 @@ * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> - * - * For licensing details see kernel-base/COPYING */ #include <linux/perf_event.h> diff --git a/kernel/events/core.c b/kernel/events/core.c index 26d6edab051a..932babd9e86c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance events core code: * @@ -5,8 +6,6 @@ * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> - * - * For licensing details see kernel-base/COPYING */ #include <linux/fs.h> @@ -385,6 +384,8 @@ static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; +static atomic_t nr_ksymbol_events __read_mostly; +static atomic_t nr_bpf_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -1171,7 +1172,7 @@ static void perf_event_ctx_deactivate(struct perf_event_context *ctx) static void get_ctx(struct perf_event_context *ctx) { - WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); + refcount_inc(&ctx->refcount); } static void free_ctx(struct rcu_head *head) @@ -1185,7 +1186,7 @@ static void free_ctx(struct rcu_head *head) static void put_ctx(struct perf_event_context *ctx) { - if (atomic_dec_and_test(&ctx->refcount)) { + if (refcount_dec_and_test(&ctx->refcount)) { if (ctx->parent_ctx) put_ctx(ctx->parent_ctx); if (ctx->task && ctx->task != TASK_TOMBSTONE) @@ -1267,7 +1268,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting) again: rcu_read_lock(); ctx = READ_ONCE(event->ctx); - if (!atomic_inc_not_zero(&ctx->refcount)) { + if (!refcount_inc_not_zero(&ctx->refcount)) { rcu_read_unlock(); goto again; } @@ -1400,7 +1401,7 @@ retry: } if (ctx->task == TASK_TOMBSTONE || - !atomic_inc_not_zero(&ctx->refcount)) { + !refcount_inc_not_zero(&ctx->refcount)) { raw_spin_unlock(&ctx->lock); ctx = NULL; } else { @@ -4056,7 +4057,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) INIT_LIST_HEAD(&ctx->event_list); INIT_LIST_HEAD(&ctx->pinned_active); INIT_LIST_HEAD(&ctx->flexible_active); - atomic_set(&ctx->refcount, 1); + refcount_set(&ctx->refcount, 1); } static struct perf_event_context * @@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event) if (attr->mmap || attr->mmap_data || attr->mmap2 || attr->comm || attr->comm_exec || - attr->task || + attr->task || attr->ksymbol || attr->context_switch) return true; return false; @@ -4305,6 +4306,10 @@ static void unaccount_event(struct perf_event *event) dec = true; if (has_branch_stack(event)) dec = true; + if (event->attr.ksymbol) + atomic_dec(&nr_ksymbol_events); + if (event->attr.bpf_event) + atomic_dec(&nr_bpf_events); if (dec) { if (!atomic_add_unless(&perf_sched_count, -1, 1)) @@ -5396,7 +5401,7 @@ struct ring_buffer *ring_buffer_get(struct perf_event *event) rcu_read_lock(); rb = rcu_dereference(event->rb); if (rb) { - if (!atomic_inc_not_zero(&rb->refcount)) + if (!refcount_inc_not_zero(&rb->refcount)) rb = NULL; } rcu_read_unlock(); @@ -5406,7 +5411,7 @@ struct ring_buffer *ring_buffer_get(struct perf_event *event) void ring_buffer_put(struct ring_buffer *rb) { - if (!atomic_dec_and_test(&rb->refcount)) + if (!refcount_dec_and_test(&rb->refcount)) return; WARN_ON_ONCE(!list_empty(&rb->event_list)); @@ -5471,7 +5476,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) /* this has to be the last one */ rb_free_aux(rb); - WARN_ON_ONCE(atomic_read(&rb->aux_refcount)); + WARN_ON_ONCE(refcount_read(&rb->aux_refcount)); mutex_unlock(&event->mmap_mutex); } @@ -6497,7 +6502,7 @@ void perf_prepare_sample(struct perf_event_header *header, data->phys_addr = perf_virt_to_phys(data->addr); } -static __always_inline void +static __always_inline int __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, @@ -6507,13 +6512,15 @@ __perf_event_output(struct perf_event *event, { struct perf_output_handle handle; struct perf_event_header header; + int err; /* protect the callchain buffers */ rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (output_begin(&handle, event, header.size)) + err = output_begin(&handle, event, header.size); + if (err) goto exit; perf_output_sample(&handle, &header, data, event); @@ -6522,6 +6529,7 @@ __perf_event_output(struct perf_event *event, exit: rcu_read_unlock(); + return err; } void @@ -6540,12 +6548,12 @@ perf_event_output_backward(struct perf_event *event, __perf_event_output(event, data, regs, perf_output_begin_backward); } -void +int perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { - __perf_event_output(event, data, regs, perf_output_begin); + return __perf_event_output(event, data, regs, perf_output_begin); } /* @@ -7658,6 +7666,207 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_output_end(&handle); } +/* + * ksymbol register/unregister tracking + */ + +struct perf_ksymbol_event { + const char *name; + int name_len; + struct { + struct perf_event_header header; + u64 addr; + u32 len; + u16 ksym_type; + u16 flags; + } event_id; +}; + +static int perf_event_ksymbol_match(struct perf_event *event) +{ + return event->attr.ksymbol; +} + +static void perf_event_ksymbol_output(struct perf_event *event, void *data) +{ + struct perf_ksymbol_event *ksymbol_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_ksymbol_match(event)) + return; + + perf_event_header__init_id(&ksymbol_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + ksymbol_event->event_id.header.size); + if (ret) + return; + + perf_output_put(&handle, ksymbol_event->event_id); + __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister, + const char *sym) +{ + struct perf_ksymbol_event ksymbol_event; + char name[KSYM_NAME_LEN]; + u16 flags = 0; + int name_len; + + if (!atomic_read(&nr_ksymbol_events)) + return; + + if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX || + ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN) + goto err; + + strlcpy(name, sym, KSYM_NAME_LEN); + name_len = strlen(name) + 1; + while (!IS_ALIGNED(name_len, sizeof(u64))) + name[name_len++] = '\0'; + BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64)); + + if (unregister) + flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER; + + ksymbol_event = (struct perf_ksymbol_event){ + .name = name, + .name_len = name_len, + .event_id = { + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = sizeof(ksymbol_event.event_id) + + name_len, + }, + .addr = addr, + .len = len, + .ksym_type = ksym_type, + .flags = flags, + }, + }; + + perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL); + return; +err: + WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type); +} + +/* + * bpf program load/unload tracking + */ + +struct perf_bpf_event { + struct bpf_prog *prog; + struct { + struct perf_event_header header; + u16 type; + u16 flags; + u32 id; + u8 tag[BPF_TAG_SIZE]; + } event_id; +}; + +static int perf_event_bpf_match(struct perf_event *event) +{ + return event->attr.bpf_event; +} + +static void perf_event_bpf_output(struct perf_event *event, void *data) +{ + struct perf_bpf_event *bpf_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_bpf_match(event)) + return; + + perf_event_header__init_id(&bpf_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + bpf_event->event_id.header.size); + if (ret) + return; + + perf_output_put(&handle, bpf_event->event_id); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, + enum perf_bpf_event_type type) +{ + bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD; + char sym[KSYM_NAME_LEN]; + int i; + + if (prog->aux->func_cnt == 0) { + bpf_get_prog_name(prog, sym); + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)prog->bpf_func, + prog->jited_len, unregister, sym); + } else { + for (i = 0; i < prog->aux->func_cnt; i++) { + struct bpf_prog *subprog = prog->aux->func[i]; + + bpf_get_prog_name(subprog, sym); + perf_event_ksymbol( + PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)subprog->bpf_func, + subprog->jited_len, unregister, sym); + } + } +} + +void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags) +{ + struct perf_bpf_event bpf_event; + + if (type <= PERF_BPF_EVENT_UNKNOWN || + type >= PERF_BPF_EVENT_MAX) + return; + + switch (type) { + case PERF_BPF_EVENT_PROG_LOAD: + case PERF_BPF_EVENT_PROG_UNLOAD: + if (atomic_read(&nr_ksymbol_events)) + perf_event_bpf_emit_ksymbols(prog, type); + break; + default: + break; + } + + if (!atomic_read(&nr_bpf_events)) + return; + + bpf_event = (struct perf_bpf_event){ + .prog = prog, + .event_id = { + .header = { + .type = PERF_RECORD_BPF_EVENT, + .size = sizeof(bpf_event.event_id), + }, + .type = type, + .flags = flags, + .id = prog->aux->id, + }, + }; + + BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64)); + + memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE); + perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL); +} + void perf_event_itrace_started(struct perf_event *event) { event->attach_state |= PERF_ATTACH_ITRACE; @@ -9788,6 +9997,15 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) if (ctx) perf_event_ctx_unlock(event->group_leader, ctx); + if (!ret) { + if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && + event_has_any_exclude_flag(event)) { + if (event->destroy) + event->destroy(event); + ret = -EINVAL; + } + } + if (ret) module_put(pmu->module); @@ -9916,6 +10134,10 @@ static void account_event(struct perf_event *event) inc = true; if (is_cgroup_event(event)) inc = true; + if (event->attr.ksymbol) + atomic_inc(&nr_ksymbol_events); + if (event->attr.bpf_event) + atomic_inc(&nr_bpf_events); if (inc) { /* @@ -10407,7 +10629,7 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader, again: rcu_read_lock(); gctx = READ_ONCE(group_leader->ctx); - if (!atomic_inc_not_zero(&gctx->refcount)) { + if (!refcount_inc_not_zero(&gctx->refcount)) { rcu_read_unlock(); goto again; } diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 5befb338a18d..c5cd852fe86b 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -1,18 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0+ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 6dc725a7e7bc..79c47076700a 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -4,13 +4,14 @@ #include <linux/hardirq.h> #include <linux/uaccess.h> +#include <linux/refcount.h> /* Buffer handling */ #define RING_BUFFER_WRITABLE 0x01 struct ring_buffer { - atomic_t refcount; + refcount_t refcount; struct rcu_head rcu_head; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; @@ -48,7 +49,7 @@ struct ring_buffer { atomic_t aux_mmap_count; unsigned long aux_mmap_locked; void (*free_aux)(void *); - atomic_t aux_refcount; + refcount_t aux_refcount; void **aux_pages; void *aux_priv; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 5ab4fe3b1dcc..678ccec60d8f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance events ring-buffer code: * @@ -5,8 +6,6 @@ * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> - * - * For licensing details see kernel-base/COPYING */ #include <linux/perf_event.h> @@ -285,7 +284,7 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) else rb->overwrite = 1; - atomic_set(&rb->refcount, 1); + refcount_set(&rb->refcount, 1); INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); @@ -358,7 +357,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (!atomic_read(&rb->aux_mmap_count)) goto err; - if (!atomic_inc_not_zero(&rb->aux_refcount)) + if (!refcount_inc_not_zero(&rb->aux_refcount)) goto err; /* @@ -658,7 +657,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, goto out; } - rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages, overwrite); if (!rb->aux_priv) goto out; @@ -671,7 +670,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, * we keep a refcount here to make sure either of the two can * reference them safely. */ - atomic_set(&rb->aux_refcount, 1); + refcount_set(&rb->aux_refcount, 1); rb->aux_overwrite = overwrite; rb->aux_watermark = watermark; @@ -690,7 +689,7 @@ out: void rb_free_aux(struct ring_buffer *rb) { - if (atomic_dec_and_test(&rb->aux_refcount)) + if (refcount_dec_and_test(&rb->aux_refcount)) __rb_free_aux(rb); } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8aef47ee7bfa..affa830a198c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * User-space Probes (UProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2008-2012 * Authors: * Srikar Dronamraju diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index f3a04994e063..14934afa9e68 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -494,7 +494,7 @@ static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter) static int get_ksymbol_bpf(struct kallsym_iter *iter) { - iter->module_name[0] = '\0'; + strlcpy(iter->module_name, "bpf", MODULE_NAME_LEN); iter->exported = 0; return bpf_get_kallsym(iter->pos - iter->pos_ftrace_mod_end, &iter->value, &iter->type, diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f4ddfdd2d07e..c83e54727131 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1396,7 +1396,7 @@ bool __weak arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__kprobes_text_end; } -bool within_kprobe_blacklist(unsigned long addr) +static bool __within_kprobe_blacklist(unsigned long addr) { struct kprobe_blacklist_entry *ent; @@ -1410,7 +1410,26 @@ bool within_kprobe_blacklist(unsigned long addr) if (addr >= ent->start_addr && addr < ent->end_addr) return true; } + return false; +} +bool within_kprobe_blacklist(unsigned long addr) +{ + char symname[KSYM_NAME_LEN], *p; + + if (__within_kprobe_blacklist(addr)) + return true; + + /* Check if the address is on a suffixed-symbol */ + if (!lookup_symbol_name(addr, symname)) { + p = strchr(symname, '.'); + if (!p) + return false; + *p = '\0'; + addr = (unsigned long)kprobe_lookup_name(symname, 0); + if (addr) + return __within_kprobe_blacklist(addr); + } return false; } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 95932333a48b..bc35a54ae3d4 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -50,6 +50,7 @@ #include <linux/random.h> #include <linux/jhash.h> #include <linux/nmi.h> +#include <linux/kprobes.h> #include <asm/sections.h> @@ -2814,6 +2815,7 @@ void lockdep_hardirqs_on(unsigned long ip) __trace_hardirqs_on_caller(ip); current->lockdep_recursion = 0; } +NOKPROBE_SYMBOL(lockdep_hardirqs_on); /* * Hardirqs were disabled: @@ -2843,6 +2845,7 @@ void lockdep_hardirqs_off(unsigned long ip) } else debug_atomic_inc(redundant_hardirqs_off); } +NOKPROBE_SYMBOL(lockdep_hardirqs_off); /* * Softirqs will be enabled: @@ -3650,7 +3653,8 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) return 0; } -static int __lock_is_held(const struct lockdep_map *lock, int read) +static nokprobe_inline +int __lock_is_held(const struct lockdep_map *lock, int read) { struct task_struct *curr = current; int i; @@ -3883,6 +3887,7 @@ int lock_is_held_type(const struct lockdep_map *lock, int read) return ret; } EXPORT_SYMBOL_GPL(lock_is_held_type); +NOKPROBE_SYMBOL(lock_is_held_type); struct pin_cookie lock_pin_lock(struct lockdep_map *lock) { diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9180158756d2..74db52a0a466 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -62,6 +62,7 @@ #include <linux/suspend.h> #include <linux/ftrace.h> #include <linux/tick.h> +#include <linux/kprobes.h> #include "tree.h" #include "rcu.h" @@ -872,6 +873,7 @@ void rcu_nmi_enter(void) { rcu_nmi_enter_common(false); } +NOKPROBE_SYMBOL(rcu_nmi_enter); /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 1971869c4072..f4ca36d92138 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -52,6 +52,7 @@ #include <linux/tick.h> #include <linux/rcupdate_wait.h> #include <linux/sched/isolation.h> +#include <linux/kprobes.h> #define CREATE_TRACE_POINTS @@ -249,6 +250,7 @@ int notrace debug_lockdep_rcu_enabled(void) current->lockdep_recursion == 0; } EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); +NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled); /** * rcu_read_lock_held() - might we be in RCU read-side critical section? diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f1a86a0d881d..d64c00afceb5 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -431,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_event_output(event, sd, regs); - return 0; + return perf_event_output(event, sd, regs); } BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d3294721f119..d42a473b8240 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -14,6 +14,7 @@ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ftrace.h> +#include <linux/kprobes.h> #include "trace.h" @@ -365,7 +366,7 @@ out: __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); } -static inline void +static nokprobe_inline void start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) { int cpu; @@ -401,7 +402,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) atomic_dec(&data->disabled); } -static inline void +static nokprobe_inline void stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) { int cpu; @@ -443,6 +444,7 @@ void start_critical_timings(void) start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); } EXPORT_SYMBOL_GPL(start_critical_timings); +NOKPROBE_SYMBOL(start_critical_timings); void stop_critical_timings(void) { @@ -452,6 +454,7 @@ void stop_critical_timings(void) stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); } EXPORT_SYMBOL_GPL(stop_critical_timings); +NOKPROBE_SYMBOL(stop_critical_timings); #ifdef CONFIG_FUNCTION_TRACER static bool function_enabled; @@ -611,6 +614,7 @@ void tracer_hardirqs_on(unsigned long a0, unsigned long a1) if (!preempt_trace(pc) && irq_trace()) stop_critical_timing(a0, a1, pc); } +NOKPROBE_SYMBOL(tracer_hardirqs_on); void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { @@ -619,6 +623,7 @@ void tracer_hardirqs_off(unsigned long a0, unsigned long a1) if (!preempt_trace(pc) && irq_trace()) start_critical_timing(a0, a1, pc); } +NOKPROBE_SYMBOL(tracer_hardirqs_off); static int irqsoff_tracer_init(struct trace_array *tr) { diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index 71f553cceb3c..4d8e99fdbbbe 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -9,6 +9,7 @@ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ftrace.h> +#include <linux/kprobes.h> #include "trace.h" #define CREATE_TRACE_POINTS @@ -30,6 +31,7 @@ void trace_hardirqs_on(void) lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on); +NOKPROBE_SYMBOL(trace_hardirqs_on); void trace_hardirqs_off(void) { @@ -43,6 +45,7 @@ void trace_hardirqs_off(void) lockdep_hardirqs_off(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_off); +NOKPROBE_SYMBOL(trace_hardirqs_off); __visible void trace_hardirqs_on_caller(unsigned long caller_addr) { @@ -56,6 +59,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr) lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on_caller); +NOKPROBE_SYMBOL(trace_hardirqs_on_caller); __visible void trace_hardirqs_off_caller(unsigned long caller_addr) { @@ -69,6 +73,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr) lockdep_hardirqs_off(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_off_caller); +NOKPROBE_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_TRACE_IRQFLAGS */ #ifdef CONFIG_TRACE_PREEMPT_TOGGLE diff --git a/lib/bsearch.c b/lib/bsearch.c index 18b445b010c3..82512fe7b33c 100644 --- a/lib/bsearch.c +++ b/lib/bsearch.c @@ -11,6 +11,7 @@ #include <linux/export.h> #include <linux/bsearch.h> +#include <linux/kprobes.h> /* * bsearch - binary search an array of elements @@ -53,3 +54,4 @@ void *bsearch(const void *key, const void *base, size_t num, size_t size, return NULL; } EXPORT_SYMBOL(bsearch); +NOKPROBE_SYMBOL(bsearch); diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 85925aaa4fff..157d9e31f6c2 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -5,10 +5,11 @@ * DEBUG_PREEMPT variant of smp_processor_id(). */ #include <linux/export.h> +#include <linux/kprobes.h> #include <linux/sched.h> -notrace static unsigned int check_preemption_disabled(const char *what1, - const char *what2) +notrace static nokprobe_inline +unsigned int check_preemption_disabled(const char *what1, const char *what2) { int this_cpu = raw_smp_processor_id(); @@ -56,9 +57,11 @@ notrace unsigned int debug_smp_processor_id(void) return check_preemption_disabled("smp_processor_id", ""); } EXPORT_SYMBOL(debug_smp_processor_id); +NOKPROBE_SYMBOL(debug_smp_processor_id); notrace void __this_cpu_preempt_check(const char *op) { check_preemption_disabled("__this_cpu_", op); } EXPORT_SYMBOL(__this_cpu_preempt_check); +NOKPROBE_SYMBOL(__this_cpu_preempt_check); diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 5467c6bf9ceb..61e46d54a67c 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -53,10 +53,6 @@ FEATURE_TESTS_BASIC := \ libslang \ libcrypto \ libunwind \ - libunwind-x86 \ - libunwind-x86_64 \ - libunwind-arm \ - libunwind-aarch64 \ pthread-attr-setaffinity-np \ pthread-barrier \ reallocarray \ @@ -70,7 +66,6 @@ FEATURE_TESTS_BASIC := \ sched_getcpu \ sdt \ setns \ - libopencsd \ libaio # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list @@ -84,6 +79,11 @@ FEATURE_TESTS_EXTRA := \ libbabeltrace \ libbfd-liberty \ libbfd-liberty-z \ + libopencsd \ + libunwind-x86 \ + libunwind-x86_64 \ + libunwind-arm \ + libunwind-aarch64 \ libunwind-debug-frame \ libunwind-debug-frame-arm \ libunwind-debug-frame-aarch64 \ diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 20cdaa4fc112..e903b86b742f 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -170,14 +170,14 @@ # include "test-setns.c" #undef main -#define main main_test_libopencsd -# include "test-libopencsd.c" -#undef main - #define main main_test_libaio # include "test-libaio.c" #undef main +#define main main_test_reallocarray +# include "test-reallocarray.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -217,8 +217,8 @@ int main(int argc, char *argv[]) main_test_sched_getcpu(); main_test_sdt(); main_test_setns(); - main_test_libopencsd(); main_test_libaio(); + main_test_reallocarray(); return 0; } diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c index 573000f93212..c3c201691b4f 100644 --- a/tools/build/feature/test-get_current_dir_name.c +++ b/tools/build/feature/test-get_current_dir_name.c @@ -8,3 +8,4 @@ int main(void) free(get_current_dir_name()); return 0; } +#undef _GNU_SOURCE diff --git a/tools/build/feature/test-libpython.c b/tools/build/feature/test-libpython.c index 0c1641b0d9a7..371c9113e49d 100644 --- a/tools/build/feature/test-libpython.c +++ b/tools/build/feature/test-libpython.c @@ -7,3 +7,4 @@ int main(void) return 0; } +#undef _GNU_SOURCE diff --git a/tools/build/feature/test-reallocarray.c b/tools/build/feature/test-reallocarray.c index 8170de35150d..8f6743e31da7 100644 --- a/tools/build/feature/test-reallocarray.c +++ b/tools/build/feature/test-reallocarray.c @@ -6,3 +6,5 @@ int main(void) { return !!reallocarray(NULL, 1, 1); } + +#undef _GNU_SOURCE diff --git a/tools/build/feature/test-sched_getcpu.c b/tools/build/feature/test-sched_getcpu.c index e448deb4124c..48995ac7911e 100644 --- a/tools/build/feature/test-sched_getcpu.c +++ b/tools/build/feature/test-sched_getcpu.c @@ -8,3 +8,5 @@ int main(void) { return sched_getcpu(); } + +#undef _GNU_SOURCE diff --git a/tools/build/feature/test-setns.c b/tools/build/feature/test-setns.c index 1f714d2a658b..4a1581ae7a55 100644 --- a/tools/build/feature/test-setns.c +++ b/tools/build/feature/test-setns.c @@ -5,3 +5,4 @@ int main(void) { return setns(0, 0); } +#undef _GNU_SOURCE diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h index 112582253dd0..8e9ed4786269 100644 --- a/tools/include/linux/rbtree.h +++ b/tools/include/linux/rbtree.h @@ -43,13 +43,28 @@ struct rb_root { struct rb_node *rb_node; }; +/* + * Leftmost-cached rbtrees. + * + * We do not cache the rightmost node based on footprint + * size vs number of potential users that could benefit + * from O(1) rb_last(). Just not worth it, users that want + * this feature can always implement the logic explicitly. + * Furthermore, users that want to cache both pointers may + * find it a bit asymmetric, but that's ok. + */ +struct rb_root_cached { + struct rb_root rb_root; + struct rb_node *rb_leftmost; +}; #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) #define RB_ROOT (struct rb_root) { NULL, } +#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } #define rb_entry(ptr, type, member) container_of(ptr, type, member) -#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) +#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) /* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ #define RB_EMPTY_NODE(node) \ @@ -68,6 +83,12 @@ extern struct rb_node *rb_prev(const struct rb_node *); extern struct rb_node *rb_first(const struct rb_root *); extern struct rb_node *rb_last(const struct rb_root *); +extern void rb_insert_color_cached(struct rb_node *, + struct rb_root_cached *, bool); +extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *); +/* Same as rb_first(), but O(1) */ +#define rb_first_cached(root) (root)->rb_leftmost + /* Postorder iteration - always visit the parent after its children */ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); @@ -75,6 +96,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); +extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root); static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) @@ -90,12 +113,29 @@ static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, ____ptr ? rb_entry(____ptr, type, member) : NULL; \ }) - -/* - * Handy for checking that we are not deleting an entry that is - * already in a list, found in block/{blk-throttle,cfq-iosched}.c, - * probably should be moved to lib/rbtree.c... +/** + * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of + * given type allowing the backing memory of @pos to be invalidated + * + * @pos: the 'type *' to use as a loop cursor. + * @n: another 'type *' to use as temporary storage + * @root: 'rb_root *' of the rbtree. + * @field: the name of the rb_node field within 'type'. + * + * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as + * list_for_each_entry_safe() and allows the iteration to continue independent + * of changes to @pos by the body of the loop. + * + * Note, however, that it cannot handle other modifications that re-order the + * rbtree it is iterating over. This includes calling rb_erase() on @pos, as + * rb_erase() may rebalance the tree, causing us to miss some nodes. */ +#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ + for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ + pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ + typeof(*pos), field); 1; }); \ + pos = n) + static inline void rb_erase_init(struct rb_node *n, struct rb_root *root) { rb_erase(n, root); diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h index 43be941db695..d008e1404580 100644 --- a/tools/include/linux/rbtree_augmented.h +++ b/tools/include/linux/rbtree_augmented.h @@ -44,7 +44,9 @@ struct rb_augment_callbacks { void (*rotate)(struct rb_node *old, struct rb_node *new); }; -extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, +extern void __rb_insert_augmented(struct rb_node *node, + struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); /* * Fixup the rbtree and update the augmented information when rebalancing. @@ -60,7 +62,16 @@ static inline void rb_insert_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - __rb_insert_augmented(node, root, augment->rotate); + __rb_insert_augmented(node, root, false, NULL, augment->rotate); +} + +static inline void +rb_insert_augmented_cached(struct rb_node *node, + struct rb_root_cached *root, bool newleft, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, &root->rb_root, + newleft, &root->rb_leftmost, augment->rotate); } #define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ @@ -93,7 +104,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ old->rbaugmented = rbcompute(old); \ } \ rbstatic const struct rb_augment_callbacks rbname = { \ - rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ + .propagate = rbname ## _propagate, \ + .copy = rbname ## _copy, \ + .rotate = rbname ## _rotate \ }; @@ -126,11 +139,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, { if (parent) { if (parent->rb_left == old) - parent->rb_left = new; + WRITE_ONCE(parent->rb_left, new); else - parent->rb_right = new; + WRITE_ONCE(parent->rb_right, new); } else - root->rb_node = new; + WRITE_ONCE(root->rb_node, new); } extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, @@ -138,12 +151,17 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, + struct rb_node **leftmost, const struct rb_augment_callbacks *augment) { - struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *child = node->rb_right; + struct rb_node *tmp = node->rb_left; struct rb_node *parent, *rebalance; unsigned long pc; + if (leftmost && node == *leftmost) + *leftmost = rb_next(node); + if (!tmp) { /* * Case 1: node to erase has no more than 1 child (easy!) @@ -170,6 +188,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, tmp = parent; } else { struct rb_node *successor = child, *child2; + tmp = child->rb_left; if (!tmp) { /* @@ -183,6 +202,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, */ parent = successor; child2 = successor->rb_right; + augment->copy(node, successor); } else { /* @@ -204,19 +224,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, successor = tmp; tmp = tmp->rb_left; } while (tmp); - parent->rb_left = child2 = successor->rb_right; - successor->rb_right = child; + child2 = successor->rb_right; + WRITE_ONCE(parent->rb_left, child2); + WRITE_ONCE(successor->rb_right, child); rb_set_parent(child, successor); + augment->copy(node, successor); augment->propagate(parent, successor); } - successor->rb_left = tmp = node->rb_left; + tmp = node->rb_left; + WRITE_ONCE(successor->rb_left, tmp); rb_set_parent(tmp, successor); pc = node->__rb_parent_color; tmp = __rb_parent(pc); __rb_change_child(node, successor, tmp, root); + if (child2) { successor->__rb_parent_color = pc; rb_set_parent_color(child2, parent, RB_BLACK); @@ -237,9 +261,21 @@ static __always_inline void rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + struct rb_node *rebalance = __rb_erase_augmented(node, root, + NULL, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } -#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ +static __always_inline void +rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, + augment); + if (rebalance) + __rb_erase_color(rebalance, &root->rb_root, augment->rotate); +} + +#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 9de8780ac8d9..7198ddd0c6b1 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -372,7 +372,9 @@ struct perf_event_attr { context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ - __reserved_1 : 35; + ksymbol : 1, /* include ksymbol events */ + bpf_event : 1, /* include bpf events */ + __reserved_1 : 33; union { __u32 wakeup_events; /* wakeup every n events */ @@ -445,8 +447,6 @@ struct perf_event_query_bpf { __u32 ids[0]; }; -#define perf_flags(attr) (*(&(attr)->read_format + 1)) - /* * Ioctls that can be done on a perf event fd: */ @@ -965,9 +965,58 @@ enum perf_event_type { */ PERF_RECORD_NAMESPACES = 16, + /* + * Record ksymbol register/unregister events: + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u32 len; + * u16 ksym_type; + * u16 flags; + * char name[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_KSYMBOL = 17, + + /* + * Record bpf events: + * enum perf_bpf_event_type { + * PERF_BPF_EVENT_UNKNOWN = 0, + * PERF_BPF_EVENT_PROG_LOAD = 1, + * PERF_BPF_EVENT_PROG_UNLOAD = 2, + * }; + * + * struct { + * struct perf_event_header header; + * u16 type; + * u16 flags; + * u32 id; + * u8 tag[BPF_TAG_SIZE]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_BPF_EVENT = 18, + PERF_RECORD_MAX, /* non-ABI */ }; +enum perf_record_ksymbol_type { + PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, + PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ +}; + +#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) + +enum perf_bpf_event_type { + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ +}; + #define PERF_MAX_STACK_DEPTH 127 #define PERF_MAX_CONTEXTS_PER_STACK 8 diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 17c2b596f043..904adb70a4f0 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -22,6 +22,7 @@ */ #include <linux/rbtree_augmented.h> +#include <linux/export.h> /* * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree @@ -43,6 +44,30 @@ * parentheses and have some accompanying text comment. */ +/* + * Notes on lockless lookups: + * + * All stores to the tree structure (rb_left and rb_right) must be done using + * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the + * tree structure as seen in program order. + * + * These two requirements will allow lockless iteration of the tree -- not + * correct iteration mind you, tree rotations are not atomic so a lookup might + * miss entire subtrees. + * + * But they do guarantee that any such traversal will only see valid elements + * and that it will indeed complete -- does not get stuck in a loop. + * + * It also guarantees that if the lookup returns an element it is the 'correct' + * one. But not returning an element does _NOT_ mean it's not present. + * + * NOTE: + * + * Stores to __rb_parent_color are not important for simple lookups so those + * are left undone as of now. Nor did I check for loops involving parent + * pointers. + */ + static inline void rb_set_black(struct rb_node *rb) { rb->__rb_parent_color |= RB_BLACK; @@ -70,22 +95,35 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, static __always_inline void __rb_insert(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + if (newleft) + *leftmost = node; + while (true) { /* - * Loop invariant: node is red - * - * If there is a black parent, we are done. - * Otherwise, take some corrective action as we don't - * want a red root or two consecutive red nodes. + * Loop invariant: node is red. */ - if (!parent) { + if (unlikely(!parent)) { + /* + * The inserted node is root. Either this is the + * first node, or we recursed at Case 1 below and + * are no longer violating 4). + */ rb_set_parent_color(node, NULL, RB_BLACK); break; - } else if (rb_is_black(parent)) + } + + /* + * If there is a black parent, we are done. + * Otherwise, take some corrective action as, + * per 4), we don't want a red root or two + * consecutive red nodes. + */ + if(rb_is_black(parent)) break; gparent = rb_red_parent(parent); @@ -94,7 +132,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root, if (parent != tmp) { /* parent == gparent->rb_left */ if (tmp && rb_is_red(tmp)) { /* - * Case 1 - color flips + * Case 1 - node's uncle is red (color flips). * * G g * / \ / \ @@ -117,7 +155,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_right; if (node == tmp) { /* - * Case 2 - left rotate at parent + * Case 2 - node's uncle is black and node is + * the parent's right child (left rotate at parent). * * G G * / \ / \ @@ -128,8 +167,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * This still leaves us in violation of 4), the * continuation into Case 3 will fix that. */ - parent->rb_right = tmp = node->rb_left; - node->rb_left = parent; + tmp = node->rb_left; + WRITE_ONCE(parent->rb_right, tmp); + WRITE_ONCE(node->rb_left, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -140,7 +180,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* - * Case 3 - right rotate at gparent + * Case 3 - node's uncle is black and node is + * the parent's left child (right rotate at gparent). * * G P * / \ / \ @@ -148,8 +189,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * / \ * n U */ - gparent->rb_left = tmp; /* == parent->rb_right */ - parent->rb_right = gparent; + WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */ + WRITE_ONCE(parent->rb_right, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -170,8 +211,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_left; if (node == tmp) { /* Case 2 - right rotate at parent */ - parent->rb_left = tmp = node->rb_right; - node->rb_right = parent; + tmp = node->rb_right; + WRITE_ONCE(parent->rb_left, tmp); + WRITE_ONCE(node->rb_right, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -182,8 +224,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* Case 3 - left rotate at gparent */ - gparent->rb_right = tmp; /* == parent->rb_left */ - parent->rb_left = gparent; + WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */ + WRITE_ONCE(parent->rb_left, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -223,8 +265,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * Sl Sr N Sl */ - parent->rb_right = tmp1 = sibling->rb_left; - sibling->rb_left = parent; + tmp1 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp1); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -268,15 +311,31 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * * (p) (p) * / \ / \ - * N S --> N Sl + * N S --> N sl * / \ \ - * sl Sr s + * sl Sr S * \ * Sr + * + * Note: p might be red, and then both + * p and sl are red after rotation(which + * breaks property 4). This is fixed in + * Case 4 (in __rb_rotate_set_parents() + * which set sl the color of p + * and set p RB_BLACK) + * + * (p) (sl) + * / \ / \ + * N sl --> P S + * \ / \ + * S N Sr + * \ + * Sr */ - sibling->rb_left = tmp1 = tmp2->rb_right; - tmp2->rb_right = sibling; - parent->rb_right = tmp2; + tmp1 = tmp2->rb_right; + WRITE_ONCE(sibling->rb_left, tmp1); + WRITE_ONCE(tmp2->rb_right, sibling); + WRITE_ONCE(parent->rb_right, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -296,8 +355,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * (sl) sr N (sl) */ - parent->rb_right = tmp2 = sibling->rb_left; - sibling->rb_left = parent; + tmp2 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp2); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -309,8 +369,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, sibling = parent->rb_left; if (rb_is_red(sibling)) { /* Case 1 - right rotate at parent */ - parent->rb_left = tmp1 = sibling->rb_right; - sibling->rb_right = parent; + tmp1 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp1); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -334,10 +395,11 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, } break; } - /* Case 3 - right rotate at sibling */ - sibling->rb_right = tmp1 = tmp2->rb_left; - tmp2->rb_left = sibling; - parent->rb_left = tmp2; + /* Case 3 - left rotate at sibling */ + tmp1 = tmp2->rb_left; + WRITE_ONCE(sibling->rb_right, tmp1); + WRITE_ONCE(tmp2->rb_left, sibling); + WRITE_ONCE(parent->rb_left, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -345,9 +407,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, tmp1 = sibling; sibling = tmp2; } - /* Case 4 - left rotate at parent + color flips */ - parent->rb_left = tmp2 = sibling->rb_right; - sibling->rb_right = parent; + /* Case 4 - right rotate at parent + color flips */ + tmp2 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp2); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -378,22 +441,41 @@ static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} static const struct rb_augment_callbacks dummy_callbacks = { - dummy_propagate, dummy_copy, dummy_rotate + .propagate = dummy_propagate, + .copy = dummy_copy, + .rotate = dummy_rotate }; void rb_insert_color(struct rb_node *node, struct rb_root *root) { - __rb_insert(node, root, dummy_rotate); + __rb_insert(node, root, false, NULL, dummy_rotate); } void rb_erase(struct rb_node *node, struct rb_root *root) { struct rb_node *rebalance; - rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + rebalance = __rb_erase_augmented(node, root, + NULL, &dummy_callbacks); if (rebalance) ____rb_erase_color(rebalance, root, dummy_rotate); } +void rb_insert_color_cached(struct rb_node *node, + struct rb_root_cached *root, bool leftmost) +{ + __rb_insert(node, &root->rb_root, leftmost, + &root->rb_leftmost, dummy_rotate); +} + +void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, &root->rb_root, dummy_rotate); +} + /* * Augmented rbtree manipulation functions. * @@ -402,9 +484,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root) */ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { - __rb_insert(node, root, augment_rotate); + __rb_insert(node, root, newleft, leftmost, augment_rotate); } /* @@ -498,15 +581,24 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new, { struct rb_node *parent = rb_parent(victim); + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; + /* Set the surrounding nodes to point to the replacement */ - __rb_change_child(victim, new, parent, root); if (victim->rb_left) rb_set_parent(victim->rb_left, new); if (victim->rb_right) rb_set_parent(victim->rb_right, new); + __rb_change_child(victim, new, parent, root); +} - /* Copy the pointers/colour from the victim to the replacement */ - *new = *victim; +void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root) +{ + rb_replace_node(victim, new, &root->rb_root); + + if (root->rb_leftmost == victim) + root->rb_leftmost = new; } static struct rb_node *rb_left_deepest_node(const struct rb_node *node) diff --git a/tools/perf/Build b/tools/perf/Build index e5232d567611..5f392dbb88fc 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_ CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" -libperf-y += util/ -libperf-y += arch/ -libperf-y += ui/ -libperf-y += scripts/ -libperf-$(CONFIG_TRACE) += trace/beauty/ +perf-y += util/ +perf-y += arch/ +perf-y += ui/ +perf-y += scripts/ +perf-$(CONFIG_TRACE) += trace/beauty/ gtk-y += ui/gtk/ diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 4ac7775fbc11..86f3dcc15f83 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -120,6 +120,10 @@ Given a $HOME/.perfconfig like this: children = true group = true + [llvm] + dump-obj = true + clang-opt = -g + You can hide source code of annotate feature setting the config to false with % perf config annotate.hide_src_code=true @@ -553,6 +557,33 @@ trace.*:: trace.show_zeros:: Do not suppress syscall arguments that are equal to zero. +llvm.*:: + llvm.clang-path:: + Path to clang. If omit, search it from $PATH. + + llvm.clang-bpf-cmd-template:: + Cmdline template. Below lines show its default value. Environment + variable is used to pass options. + "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \ + -Wno-unused-value -Wno-pointer-sign -working-directory \ + $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -" + + llvm.clang-opt:: + Options passed to clang. + + llvm.kbuild-dir:: + kbuild directory. If not set, use /lib/modules/`uname -r`/build. + If set to "" deliberately, skip kernel header auto-detector. + + llvm.kbuild-opts:: + Options passed to 'make' when detecting kernel header options. + + llvm.dump-obj:: + Enable perf dump BPF object files compiled by LLVM. + + llvm.opts:: + Options passed to llc. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index d232b13ea713..8f0c2be34848 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -88,6 +88,20 @@ OPTIONS If you want to profile write accesses in [0x1000~1008), just set 'mem:0x1000/8:w'. + - a BPF source file (ending in .c) or a precompiled object file (ending + in .o) selects one or more BPF events. + The BPF program can attach to various perf events based on the ELF section + names. + + When processing a '.c' file, perf searches an installed LLVM to compile it + into an object file first. Optional clang options can be passed via the + '--clang-opt' command line option, e.g.: + + perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \ + -e tests/bpf-script-example.c + + Note: '--clang-opt' must be placed before '--event/-e'. + - a group of events surrounded by a pair of brace ("{event1,event2,...}"). Each event is separated by commas and the group should be quoted to prevent the shell interpretation. You also need to use --group on @@ -440,6 +454,11 @@ Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: Asynchronous mode is supported only when linking Perf tool with libc library providing implementation for Posix AIO API. +--affinity=mode:: +Set affinity mask of trace reading thread according to the policy defined by 'mode' value: + node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer + cpu - thread affinity mask is set to cpu of the processed mmap buffer + --all-kernel:: Configure all used events to run in kernel space. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b441c88cafa1..0f11d5891301 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -109,6 +109,13 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) +FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm +FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64 +FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86 +FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64 + +FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto + ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif @@ -218,6 +225,8 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) +FEATURE_CHECK_LDFLAGS-libaio = -lrt + CFLAGS += -fno-omit-frame-pointer CFLAGS += -ggdb3 CFLAGS += -funwind-tables @@ -386,7 +395,8 @@ ifeq ($(feature-setns), 1) $(call detected,CONFIG_SETNS) endif -ifndef NO_CORESIGHT +ifdef CORESIGHT + $(call feature_check,libopencsd) ifeq ($(feature-libopencsd), 1) CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) LDFLAGS += $(LIBOPENCSD_LDFLAGS) @@ -482,6 +492,7 @@ endif ifndef NO_LIBUNWIND have_libunwind := + $(call feature_check,libunwind-x86) ifeq ($(feature-libunwind-x86), 1) $(call detected,CONFIG_LIBUNWIND_X86) CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT @@ -490,6 +501,7 @@ ifndef NO_LIBUNWIND have_libunwind = 1 endif + $(call feature_check,libunwind-aarch64) ifeq ($(feature-libunwind-aarch64), 1) $(call detected,CONFIG_LIBUNWIND_AARCH64) CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0ee6795d82cc..01f7555fd933 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -102,7 +102,7 @@ include ../scripts/utilities.mak # When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if # llvm-config is not in $PATH. # -# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding. +# Define CORESIGHT if you DO WANT support for CoreSight trace decoding. # # Define NO_AIO if you do not want support of Posix AIO based trace # streaming for record mode. Currently Posix AIO trace streaming is @@ -344,9 +344,9 @@ endif export PERL_PATH -LIB_FILE=$(OUTPUT)libperf.a +LIBPERF_A=$(OUTPUT)libperf.a -PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) +PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) ifndef NO_LIBBPF PERFLIBS += $(LIBBPF) endif @@ -549,6 +549,8 @@ JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o +LIBPERF_IN := $(OUTPUT)libperf-in.o + export JEVENTS build := -f $(srctree)/tools/build/Makefile.build dir=. obj @@ -565,9 +567,12 @@ $(JEVENTS): $(JEVENTS_IN) $(PMU_EVENTS_IN): $(JEVENTS) FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events -$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) +$(LIBPERF_IN): prepare FORCE + $(Q)$(MAKE) $(build)=libperf + +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ - $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ + $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBS) -o $@ $(GTK_IN): FORCE $(Q)$(MAKE) $(build)=gtk @@ -683,12 +688,7 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -LIBPERF_IN := $(OUTPUT)libperf-in.o - -$(LIBPERF_IN): prepare FORCE - $(Q)$(MAKE) $(build)=libperf - -$(LIB_FILE): $(LIBPERF_IN) +$(LIBPERF_A): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' @@ -863,8 +863,8 @@ ifndef NO_LIBPYTHON $(call QUIET_INSTALL, python-scripts) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \ - $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ - $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ + $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ + $(INSTALL) scripts/python/*.py -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' endif $(call QUIET_INSTALL, perf_completion-script) \ @@ -910,7 +910,7 @@ python-clean: $(python-clean) clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) + $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index d9b6af837c7d..688818844c11 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1,2 +1,2 @@ -libperf-y += common.o -libperf-y += $(SRCARCH)/ +perf-y += common.o +perf-y += $(SRCARCH)/ diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build index 41bf61da476a..36222e64bbf7 100644 --- a/tools/perf/arch/arm/Build +++ b/tools/perf/arch/arm/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +perf-y += util/ +perf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build index d9ae2733f9cc..bc8e97380c82 100644 --- a/tools/perf/arch/arm/tests/Build +++ b/tools/perf/arch/arm/tests/Build @@ -1,5 +1,5 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o -libperf-y += vectors-page.o +perf-y += regs_load.o +perf-y += dwarf-unwind.o +perf-y += vectors-page.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c index 9a0242e74cfc..2c35e532bc9a 100644 --- a/tools/perf/arch/arm/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm/tests/dwarf-unwind.c @@ -3,6 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" +#include "map_groups.h" #include "event.h" #include "debug.h" #include "tests/tests.h" diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index e64c5f216448..296f0eac5e18 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,6 +1,6 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o +perf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2f595cd73da6..911426721170 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -5,6 +5,7 @@ */ #include <api/fs/fs.h> +#include <linux/bits.h> #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/coresight-pmu.h> @@ -22,12 +23,10 @@ #include "../../util/thread_map.h" #include "../../util/cs-etm.h" +#include <errno.h> #include <stdlib.h> #include <sys/stat.h> -#define ENABLE_SINK_MAX 128 -#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/" - struct cs_etm_recording { struct auxtrace_record itr; struct perf_pmu *cs_etm_pmu; @@ -60,10 +59,48 @@ static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr, return 0; } +static int cs_etm_set_sink_attr(struct perf_pmu *pmu, + struct perf_evsel *evsel) +{ + char msg[BUFSIZ], path[PATH_MAX], *sink; + struct perf_evsel_config_term *term; + int ret = -EINVAL; + u32 hash; + + if (evsel->attr.config2 & GENMASK(31, 0)) + return 0; + + list_for_each_entry(term, &evsel->config_terms, list) { + if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) + continue; + + sink = term->val.drv_cfg; + snprintf(path, PATH_MAX, "sinks/%s", sink); + + ret = perf_pmu__scan_file(pmu, path, "%x", &hash); + if (ret != 1) { + pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n", + sink, perf_evsel__name(evsel), errno, + str_error_r(errno, msg, sizeof(msg))); + return ret; + } + + evsel->attr.config2 |= hash; + return 0; + } + + /* + * No sink was provided on the command line - for _now_ treat + * this as an error. + */ + return ret; +} + static int cs_etm_recording_options(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts) { + int ret; struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; @@ -92,6 +129,10 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, if (!cs_etm_evsel) return 0; + ret = cs_etm_set_sink_attr(cs_etm_pmu, cs_etm_evsel); + if (ret) + return ret; + if (opts->use_clockid) { pr_err("Cannot use clockid (-k option) with %s\n", CORESIGHT_ETM_PMU_NAME); @@ -598,54 +639,3 @@ struct auxtrace_record *cs_etm_record_init(int *err) out: return NULL; } - -static FILE *cs_device__open_file(const char *name) -{ - struct stat st; - char path[PATH_MAX]; - const char *sysfs; - - sysfs = sysfs__mountpoint(); - if (!sysfs) - return NULL; - - snprintf(path, PATH_MAX, - "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name); - - if (stat(path, &st) < 0) - return NULL; - - return fopen(path, "w"); - -} - -static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...) -{ - va_list args; - FILE *file; - int ret = -EINVAL; - - va_start(args, fmt); - file = cs_device__open_file(name); - if (file) { - ret = vfprintf(file, fmt, args); - fclose(file); - } - va_end(args); - return ret; -} - -int cs_etm_set_drv_config(struct perf_evsel_config_term *term) -{ - int ret; - char enable_sink[ENABLE_SINK_MAX]; - - snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s", - term->val.drv_cfg, "enable_sink"); - - ret = cs_device__print_file(enable_sink, "%d", 1); - if (ret < 0) - return ret; - - return 0; -} diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h index 1a12e64f5127..a3354bda4fe8 100644 --- a/tools/perf/arch/arm/util/cs-etm.h +++ b/tools/perf/arch/arm/util/cs-etm.h @@ -7,9 +7,6 @@ #ifndef INCLUDE__PERF_CS_ETM_H__ #define INCLUDE__PERF_CS_ETM_H__ -#include "../../util/evsel.h" - struct auxtrace_record *cs_etm_record_init(int *err); -int cs_etm_set_drv_config(struct perf_evsel_config_term *term); #endif diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index e047571e6080..bbc297a7e2e3 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -7,8 +7,8 @@ #include <string.h> #include <linux/coresight-pmu.h> #include <linux/perf_event.h> +#include <linux/string.h> -#include "cs-etm.h" #include "arm-spe.h" #include "../../util/pmu.h" @@ -19,7 +19,6 @@ struct perf_event_attr if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { /* add ETM default config here */ pmu->selectable = true; - pmu->set_drv_config = cs_etm_set_drv_config; #if defined(__aarch64__) } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) { return arm_spe_pmu_default_config(pmu); diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build index 41bf61da476a..36222e64bbf7 100644 --- a/tools/perf/arch/arm64/Build +++ b/tools/perf/arch/arm64/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +perf-y += util/ +perf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build index 883c57ff0c08..41707fea74b3 100644 --- a/tools/perf/arch/arm64/tests/Build +++ b/tools/perf/arch/arm64/tests/Build @@ -1,4 +1,4 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o +perf-y += regs_load.o +perf-y += dwarf-unwind.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c index 5522ce384723..a6a407fa1b8b 100644 --- a/tools/perf/arch/arm64/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c @@ -3,6 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" +#include "map_groups.h" #include "event.h" #include "debug.h" #include "tests/tests.h" diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 68f8a8eb3ad0..3cde540d2fcf 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,10 +1,10 @@ -libperf-y += header.o -libperf-y += sym-handling.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-y += header.o +perf-y += sym-handling.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ +perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ ../../arm/util/cs-etm.o \ arm-spe.o diff --git a/tools/perf/arch/nds32/Build b/tools/perf/arch/nds32/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/nds32/Build +++ b/tools/perf/arch/nds32/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/nds32/util/Build b/tools/perf/arch/nds32/util/Build index ca623bbf993c..d0bc205fe49a 100644 --- a/tools/perf/arch/nds32/util/Build +++ b/tools/perf/arch/nds32/util/Build @@ -1 +1 @@ -libperf-y += header.o +perf-y += header.o diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build index db52fa22d3a1..a7dd46a5b678 100644 --- a/tools/perf/arch/powerpc/Build +++ b/tools/perf/arch/powerpc/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-y += tests/ +perf-y += util/ +perf-y += tests/ diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build index d827ef384b33..3526ab0af9f9 100644 --- a/tools/perf/arch/powerpc/tests/Build +++ b/tools/perf/arch/powerpc/tests/Build @@ -1,4 +1,4 @@ -libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o -libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +perf-$(CONFIG_DWARF_UNWIND) += regs_load.o +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c index 5f39efef0856..5c178e4a1995 100644 --- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c +++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c @@ -3,6 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" +#include "map_groups.h" #include "event.h" #include "debug.h" #include "tests/tests.h" diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index ba98bd006488..7cf0b8803097 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,11 +1,11 @@ -libperf-y += header.o -libperf-y += sym-handling.o -libperf-y += kvm-stat.o -libperf-y += perf_regs.o -libperf-y += mem-events.o +perf-y += header.o +perf-y += sym-handling.o +perf-y += kvm-stat.o +perf-y += perf_regs.o +perf-y += mem-events.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_DWARF) += skip-callchain-idx.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += skip-callchain-idx.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 596ad6aedaac..f9db341c47b6 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -3,6 +3,8 @@ #include "util/kvm-stat.h" #include "util/parse-events.h" #include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" #include "book3s_hv_exits.h" #include "book3s_hcalls.h" diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 7c6eeb4633fe..2918bb16c892 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -16,6 +16,9 @@ #include "util/thread.h" #include "util/callchain.h" #include "util/debug.h" +#include "util/dso.h" +#include "util/map.h" +#include "util/symbol.h" /* * When saving the callchain on Power, the kernel conservatively saves diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/s390/Build +++ b/tools/perf/arch/s390/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 4a233683c684..22797f043b84 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,9 +1,9 @@ -libperf-y += header.o -libperf-y += kvm-stat.o +perf-y += header.o +perf-y += kvm-stat.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-y += machine.o +perf-y += machine.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index aaabab5e2830..7e3961a4b292 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -11,6 +11,7 @@ #include <errno.h> #include "../../util/kvm-stat.h" +#include "../../util/evsel.h" #include <asm/sie.h> define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/sh/Build +++ b/tools/perf/arch/sh/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/sh/util/Build +++ b/tools/perf/arch/sh/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/sparc/Build +++ b/tools/perf/arch/sparc/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/sparc/util/Build +++ b/tools/perf/arch/sparc/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build index db52fa22d3a1..a7dd46a5b678 100644 --- a/tools/perf/arch/x86/Build +++ b/tools/perf/arch/x86/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-y += tests/ +perf-y += util/ +perf-y += tests/ diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 586849ff83a0..3d83d0c6982d 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -1,8 +1,8 @@ -libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o -libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +perf-$(CONFIG_DWARF_UNWIND) += regs_load.o +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o -libperf-y += arch-tests.o -libperf-y += rdpmc.o -libperf-y += perf-time-to-tsc.o -libperf-$(CONFIG_AUXTRACE) += insn-x86.o -libperf-$(CONFIG_X86_64) += bp-modify.o +perf-y += arch-tests.o +perf-y += rdpmc.o +perf-y += perf-time-to-tsc.o +perf-$(CONFIG_AUXTRACE) += insn-x86.o +perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 7879df34569a..6ad0a1cedb13 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -3,6 +3,7 @@ #include "perf_regs.h" #include "thread.h" #include "map.h" +#include "map_groups.h" #include "event.h" #include "debug.h" #include "tests/tests.h" diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 844b8f335532..7aab0be5fc5f 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,18 +1,18 @@ -libperf-y += header.o -libperf-y += tsc.o -libperf-y += pmu.o -libperf-y += kvm-stat.o -libperf-y += perf_regs.o -libperf-y += group.o -libperf-y += machine.o -libperf-y += event.o +perf-y += header.o +perf-y += tsc.o +perf-y += pmu.o +perf-y += kvm-stat.o +perf-y += perf_regs.o +perf-y += group.o +perf-y += machine.o +perf-y += event.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o -libperf-$(CONFIG_AUXTRACE) += intel-pt.o -libperf-$(CONFIG_AUXTRACE) += intel-bts.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += intel-pt.o +perf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 081353d7b095..865a9762f22e 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> #include "../../util/kvm-stat.h" +#include "../../util/evsel.h" #include <asm/svm.h> #include <asm/vmx.h> #include <asm/kvm.h> diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/xtensa/Build +++ b/tools/perf/arch/xtensa/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/xtensa/util/Build +++ b/tools/perf/arch/xtensa/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 93d679eaf1f4..7f3c3fea67b4 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -27,6 +27,7 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" +#include "util/map.h" #include "util/session.h" #include "util/tool.h" #include "util/data.h" @@ -227,7 +228,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, * the DSO? */ if (al->sym != NULL) { - rb_erase(&al->sym->rb_node, + rb_erase_cached(&al->sym->rb_node, &al->map->dso->symbols); symbol__delete(al->sym); dso__reset_find_symbol_cache(al->map->dso); @@ -305,7 +306,7 @@ static void hists__find_annotations(struct hists *hists, struct perf_evsel *evsel, struct perf_annotate *ann) { - struct rb_node *nd = rb_first(&hists->entries), *next; + struct rb_node *nd = rb_first_cached(&hists->entries), *next; int key = K_RIGHT; while (nd) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index d340d2e42776..efaaab23c6fd 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -33,6 +33,7 @@ #include "ui/browsers/hists.h" #include "thread.h" #include "mem2node.h" +#include "symbol.h" struct c2c_hists { struct hists hists; @@ -1969,7 +1970,7 @@ static void calc_width(struct c2c_hist_entry *c2c_he) set_nodestr(c2c_he); } -static int filter_cb(struct hist_entry *he) +static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; @@ -1986,7 +1987,7 @@ static int filter_cb(struct hist_entry *he) return 0; } -static int resort_cl_cb(struct hist_entry *he) +static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; struct c2c_hists *c2c_hists; @@ -2073,7 +2074,7 @@ static int setup_nodes(struct perf_session *session) #define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) -static int resort_hitm_cb(struct hist_entry *he) +static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; c2c_he = container_of(he, struct c2c_hist_entry, he); @@ -2088,14 +2089,14 @@ static int resort_hitm_cb(struct hist_entry *he) static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_node *next = rb_first_cached(&hists->entries); int ret = 0; while (next) { struct hist_entry *he; he = rb_entry(next, struct hist_entry, rb_node); - ret = cb(he); + ret = cb(he, NULL); if (ret) break; next = rb_next(&he->rb_node); @@ -2215,7 +2216,7 @@ static void print_pareto(FILE *out) if (WARN_ONCE(ret, "failed to setup sort entries\n")) return; - nd = rb_first(&c2c.hists.hists.entries); + nd = rb_first_cached(&c2c.hists.hists.entries); for (; nd; nd = rb_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); @@ -2283,7 +2284,7 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) static void c2c_browser__update_nr_entries(struct hist_browser *hb) { u64 nr_entries = 0; - struct rb_node *nd = rb_first(&hb->hists->entries); + struct rb_node *nd = rb_first_cached(&hb->hists->entries); while (nd) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); @@ -2343,7 +2344,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) struct c2c_cacheline_browser *cl_browser; struct hist_browser *browser; int key = -1; - const char help[] = + static const char help[] = " ENTER Toggle callchains (if present) \n" " n Toggle Node details info \n" " s Toggle full length of symbol and source line columns \n" @@ -2424,7 +2425,7 @@ static int perf_c2c__hists_browse(struct hists *hists) { struct hist_browser *browser; int key = -1; - const char help[] = + static const char help[] = " d Display cacheline details \n" " ENTER Toggle callchains (if present) \n" " q Quit \n"; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 39db2ee32d48..751e1971456b 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -429,7 +429,7 @@ get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt) static void hists__baseline_only(struct hists *hists) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *next; if (hists__has(hists, need_collapse)) @@ -437,13 +437,13 @@ static void hists__baseline_only(struct hists *hists) else root = hists->entries_in; - next = rb_first(root); + next = rb_first_cached(root); while (next != NULL) { struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); if (!hist_entry__next_pair(he)) { - rb_erase(&he->rb_node_in, root); + rb_erase_cached(&he->rb_node_in, root); hist_entry__delete(he); } } @@ -451,7 +451,7 @@ static void hists__baseline_only(struct hists *hists) static void hists__precompute(struct hists *hists) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *next; if (hists__has(hists, need_collapse)) @@ -459,7 +459,7 @@ static void hists__precompute(struct hists *hists) else root = hists->entries_in; - next = rb_first(root); + next = rb_first_cached(root); while (next != NULL) { struct hist_entry *he, *pair; struct data__file *d; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index eda41673c4f3..9bb1f35d5cb7 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -12,6 +12,7 @@ #include "util/color.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/map.h" #include "util/session.h" #include "util/tool.h" #include "util/debug.h" @@ -19,6 +20,7 @@ #include "util/data.h" #include "util/auxtrace.h" #include "util/jit.h" +#include "util/symbol.h" #include "util/thread.h" #include <subcmd/parse-options.h> diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index 90d1a2305b72..bc7a2bc7aed7 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -13,6 +13,7 @@ #include <subcmd/parse-options.h> #include "debug.h" #include "machine.h" +#include "map.h" #include "symbol.h" static int __cmd_kallsyms(int argc, const char **argv) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index b63bca4b0c2a..b80ec0883537 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -6,6 +6,7 @@ #include "util/evsel.h" #include "util/util.h" #include "util/config.h" +#include "util/map.h" #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" @@ -334,7 +335,7 @@ static int build_alloc_func_list(void) struct alloc_func *func; struct machine *machine = &kmem_session->machines.host; regex_t alloc_func_regex; - const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; + static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED); if (ret) { @@ -1924,7 +1925,7 @@ int cmd_kmem(int argc, const char **argv) NULL }; struct perf_session *session; - const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n"; + static const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n"; int ret = perf_config(kmem_config, NULL); if (ret) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index ead221e49f00..c9f98d00c0e9 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -82,9 +82,9 @@ int cmd_list(int argc, const char **argv) else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "metric") == 0) - metricgroup__print(true, false, NULL, raw_dump); + metricgroup__print(true, false, NULL, raw_dump, details_flag); else if (strcmp(argv[i], "metricgroup") == 0) - metricgroup__print(false, true, NULL, raw_dump); + metricgroup__print(false, true, NULL, raw_dump, details_flag); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; @@ -102,7 +102,7 @@ int cmd_list(int argc, const char **argv) s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump); - metricgroup__print(true, true, s, raw_dump); + metricgroup__print(true, true, s, raw_dump, details_flag); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv) details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, NULL, raw_dump); + metricgroup__print(true, true, NULL, raw_dump, details_flag); free(s); } } diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 57393e94d156..ba7e8d87dec3 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -13,6 +13,7 @@ #include "util/data.h" #include "util/mem-events.h" #include "util/debug.h" +#include "util/map.h" #include "util/symbol.h" #define MEM_OPERATION_LOAD 0x1 diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 99de91698de1..46d3c2deeb40 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -32,6 +32,7 @@ #include "perf.h" #include "builtin.h" +#include "namespaces.h" #include "util/util.h" #include "util/strlist.h" #include "util/strfilter.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 882285fb9f64..6c3719ac901d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -23,7 +23,6 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" -#include "util/drv_configs.h" #include "util/session.h" #include "util/tool.h" #include "util/symbol.h" @@ -39,8 +38,10 @@ #include "util/bpf-loader.h" #include "util/trigger.h" #include "util/perf-hooks.h" +#include "util/cpu-set-sched.h" #include "util/time-utils.h" #include "util/units.h" +#include "util/bpf-event.h" #include "asm/bug.h" #include <errno.h> @@ -81,12 +82,17 @@ struct record { bool timestamp_boundary; struct switch_output switch_output; unsigned long long samples; + cpu_set_t affinity_mask; }; static volatile int auxtrace_record__snapshot_started; static DEFINE_TRIGGER(auxtrace_snapshot_trigger); static DEFINE_TRIGGER(switch_output_trigger); +static const char *affinity_tags[PERF_AFFINITY_MAX] = { + "SYS", "NODE", "CPU" +}; + static bool switch_output_signal(struct record *rec) { return rec->switch_output.signal && @@ -531,9 +537,13 @@ static int record__mmap_evlist(struct record *rec, struct record_opts *opts = &rec->opts; char msg[512]; + if (opts->affinity != PERF_AFFINITY_SYS) + cpu__setup_cpunode_map(); + if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, - opts->auxtrace_snapshot_mode, opts->nr_cblocks) < 0) { + opts->auxtrace_snapshot_mode, + opts->nr_cblocks, opts->affinity) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " @@ -566,7 +576,6 @@ static int record__open(struct record *rec) struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct record_opts *opts = &rec->opts; - struct perf_evsel_config_term *err_term; int rc = 0; /* @@ -619,14 +628,6 @@ try_again: goto out; } - if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", - err_term->val.drv_cfg, perf_evsel__name(pos), errno, - str_error_r(errno, msg, sizeof(msg))); - rc = -1; - goto out; - } - rc = record__mmap(rec); if (rc) goto out; @@ -722,6 +723,16 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; +static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) +{ + if (rec->opts.affinity != PERF_AFFINITY_SYS && + !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { + CPU_ZERO(&rec->affinity_mask); + CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); + sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); + } +} + static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, bool overwrite) { @@ -749,6 +760,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli struct perf_mmap *map = &maps[i]; if (map->base) { + record__adjust_affinity(rec, map); if (!record__aio_enabled(rec)) { if (perf_mmap__push(map, rec, record__pushfn) != 0) { rc = -1; @@ -1082,6 +1094,11 @@ static int record__synthesize(struct record *rec, bool tail) return err; } + err = perf_event__synthesize_bpf_events(tool, process_synthesized_event, + machine, opts); + if (err < 0) + pr_warning("Couldn't synthesize bpf events.\n"); + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, process_synthesized_event, opts->sample_address, 1); @@ -1639,6 +1656,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) return -1; } +static int record__parse_affinity(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + + if (unset || !str) + return 0; + + if (!strcasecmp(str, "node")) + opts->affinity = PERF_AFFINITY_NODE; + else if (!strcasecmp(str, "cpu")) + opts->affinity = PERF_AFFINITY_CPU; + + return 0; +} + static int record__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused) @@ -1839,6 +1871,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), + OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", @@ -1946,6 +1979,9 @@ static struct option __record_options[] = { &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", record__aio_parse), #endif + OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", + "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", + record__parse_affinity), OPT_END() }; @@ -1980,6 +2016,9 @@ int cmd_record(int argc, const char **argv) # undef REASON #endif + CPU_ZERO(&rec->affinity_mask); + rec->opts.affinity = PERF_AFFINITY_SYS; + rec->evlist = perf_evlist__new(); if (rec->evlist == NULL) return -ENOMEM; @@ -2143,6 +2182,8 @@ int cmd_record(int argc, const char **argv) if (verbose > 0) pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); + pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); + err = __cmd_record(&record, argc, argv); out: perf_evlist__delete(rec->evlist); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4958095be4fc..2e8c74d6430c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -16,6 +16,7 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <linux/err.h> +#include "util/map.h" #include "util/symbol.h" #include "util/callchain.h" #include "util/values.h" @@ -615,6 +616,21 @@ static int report__collapse_hists(struct report *rep) return ret; } +static int hists__resort_cb(struct hist_entry *he, void *arg) +{ + struct report *rep = arg; + struct symbol *sym = he->ms.sym; + + if (rep->symbol_ipc && sym && !sym->annotate2) { + struct perf_evsel *evsel = hists_to_evsel(he->hists); + + symbol__annotate2(sym, he->ms.map, evsel, + &annotation__default_options, NULL); + } + + return 0; +} + static void report__output_resort(struct report *rep) { struct ui_progress prog; @@ -622,8 +638,10 @@ static void report__output_resort(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); - evlist__for_each_entry(rep->session->evlist, pos) - perf_evsel__output_resort(pos, &prog); + evlist__for_each_entry(rep->session->evlist, pos) { + perf_evsel__output_resort_cb(pos, &prog, + hists__resort_cb, rep); + } ui_progress__finish(); } @@ -753,7 +771,8 @@ static int tasks_print(struct report *rep, FILE *fp) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&threads->entries); nd; + nd = rb_next(nd)) { task = tasks + itask++; task->thread = rb_entry(nd, struct thread, rb_node); @@ -956,9 +975,9 @@ int cmd_report(int argc, const char **argv) int branch_mode = -1; bool branch_call_mode = false; #define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" - const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" - CALLCHAIN_REPORT_HELP - "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; + static const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" + CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char * const report_usage[] = { "perf report [<options>]", diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index cbf39dab19c1..640558e9352e 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -213,7 +213,7 @@ struct perf_sched { u64 all_runtime; u64 all_count; u64 cpu_last_switched[MAX_CPUS]; - struct rb_root atom_root, sorted_atom_root, merged_atom_root; + struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root; struct list_head sort_list, cmp_pid; bool force; bool skip_merge; @@ -271,7 +271,7 @@ struct evsel_runtime { struct idle_thread_runtime { struct thread_runtime tr; struct thread *last_thread; - struct rb_root sorted_root; + struct rb_root_cached sorted_root; struct callchain_root callchain; struct callchain_cursor cursor; }; @@ -950,10 +950,10 @@ thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms * } static struct work_atoms * -thread_atoms_search(struct rb_root *root, struct thread *thread, +thread_atoms_search(struct rb_root_cached *root, struct thread *thread, struct list_head *sort_list) { - struct rb_node *node = root->rb_node; + struct rb_node *node = root->rb_root.rb_node; struct work_atoms key = { .thread = thread }; while (node) { @@ -976,10 +976,11 @@ thread_atoms_search(struct rb_root *root, struct thread *thread, } static void -__thread_latency_insert(struct rb_root *root, struct work_atoms *data, +__thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data, struct list_head *sort_list) { - struct rb_node **new = &(root->rb_node), *parent = NULL; + struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; + bool leftmost = true; while (*new) { struct work_atoms *this; @@ -992,12 +993,14 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data, if (cmp > 0) new = &((*new)->rb_left); - else + else { new = &((*new)->rb_right); + leftmost = false; + } } rb_link_node(&data->node, parent, new); - rb_insert_color(&data->node, root); + rb_insert_color_cached(&data->node, root, leftmost); } static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) @@ -1447,15 +1450,15 @@ static int sort_dimension__add(const char *tok, struct list_head *list) static void perf_sched__sort_lat(struct perf_sched *sched) { struct rb_node *node; - struct rb_root *root = &sched->atom_root; + struct rb_root_cached *root = &sched->atom_root; again: for (;;) { struct work_atoms *data; - node = rb_first(root); + node = rb_first_cached(root); if (!node) break; - rb_erase(node, root); + rb_erase_cached(node, root); data = rb_entry(node, struct work_atoms, node); __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); } @@ -2762,12 +2765,12 @@ static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) return ret; } -static size_t timehist_print_idlehist_callchain(struct rb_root *root) +static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root) { size_t ret = 0; FILE *fp = stdout; struct callchain_node *chain; - struct rb_node *rb_node = rb_first(root); + struct rb_node *rb_node = rb_first_cached(root); printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, @@ -2868,7 +2871,7 @@ static void timehist_print_summary(struct perf_sched *sched, if (itr == NULL) continue; - callchain_param.sort(&itr->sorted_root, &itr->callchain, + callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain, 0, &callchain_param); printf(" CPU %2d:", i); @@ -3074,11 +3077,12 @@ static void print_bad_events(struct perf_sched *sched) } } -static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) +static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data) { - struct rb_node **new = &(root->rb_node), *parent = NULL; + struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; struct work_atoms *this; const char *comm = thread__comm_str(data->thread), *this_comm; + bool leftmost = true; while (*new) { int cmp; @@ -3092,6 +3096,7 @@ static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) new = &((*new)->rb_left); } else if (cmp < 0) { new = &((*new)->rb_right); + leftmost = false; } else { this->num_merged++; this->total_runtime += data->total_runtime; @@ -3109,7 +3114,7 @@ static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) data->num_merged++; rb_link_node(&data->node, parent, new); - rb_insert_color(&data->node, root); + rb_insert_color_cached(&data->node, root, leftmost); } static void perf_sched__merge_lat(struct perf_sched *sched) @@ -3120,8 +3125,8 @@ static void perf_sched__merge_lat(struct perf_sched *sched) if (sched->skip_merge) return; - while ((node = rb_first(&sched->atom_root))) { - rb_erase(node, &sched->atom_root); + while ((node = rb_first_cached(&sched->atom_root))) { + rb_erase_cached(node, &sched->atom_root); data = rb_entry(node, struct work_atoms, node); __merge_work_atoms(&sched->merged_atom_root, data); } @@ -3143,7 +3148,7 @@ static int perf_sched__lat(struct perf_sched *sched) printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); printf(" -----------------------------------------------------------------------------------------------------------------\n"); - next = rb_first(&sched->sorted_atom_root); + next = rb_first_cached(&sched->sorted_atom_root); while (next) { struct work_atoms *work_list; @@ -3336,7 +3341,7 @@ static int __cmd_record(int argc, const char **argv) int cmd_sched(int argc, const char **argv) { - const char default_sort_order[] = "avg, max, switch, runtime"; + static const char default_sort_order[] = "avg, max, switch, runtime"; struct perf_sched sched = { .tool = { .sample = perf_sched__process_tracepoint_sample, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ac221f137ed2..8d5fe092525c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -10,6 +10,7 @@ #include "util/perf_regs.h" #include "util/session.h" #include "util/tool.h" +#include "util/map.h" #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 63a3afc7f32b..bb24f9c17f9a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -52,7 +52,6 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" -#include "util/drv_configs.h" #include "util/color.h" #include "util/stat.h" #include "util/header.h" @@ -83,7 +82,6 @@ #include <unistd.h> #include <sys/time.h> #include <sys/resource.h> -#include <sys/wait.h> #include "sane_ctype.h" @@ -418,7 +416,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int status = 0; const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; - struct perf_evsel_config_term *err_term; if (interval) { ts.tv_sec = interval / USEC_PER_MSEC; @@ -515,13 +512,6 @@ try_again: return -1; } - if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", - err_term->val.drv_cfg, perf_evsel__name(counter), errno, - str_error_r(errno, msg, sizeof(msg))); - return -1; - } - if (STAT_RECORD) { int err, fd = perf_data__fd(&perf_stat.data); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f64e312db787..231a90daa958 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -22,13 +22,14 @@ #include "perf.h" #include "util/annotate.h" +#include "util/bpf-event.h" #include "util/config.h" #include "util/color.h" -#include "util/drv_configs.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/event.h" #include "util/machine.h" +#include "util/map.h" #include "util/session.h" #include "util/symbol.h" #include "util/thread.h" @@ -366,7 +367,7 @@ static void perf_top__prompt_symbol(struct perf_top *top, const char *msg) if (p) *p = 0; - next = rb_first(&hists->entries); + next = rb_first_cached(&hists->entries); while (next) { n = rb_entry(next, struct hist_entry, rb_node); if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { @@ -1184,10 +1185,6 @@ static void init_process_thread(struct perf_top *top) static int __cmd_top(struct perf_top *top) { - char msg[512]; - struct perf_evsel *pos; - struct perf_evsel_config_term *err_term; - struct perf_evlist *evlist = top->evlist; struct record_opts *opts = &top->record_opts; pthread_t thread, thread_process; int ret; @@ -1215,6 +1212,12 @@ static int __cmd_top(struct perf_top *top) init_process_thread(top); + ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process, + &top->session->machines.host, + &top->record_opts); + if (ret < 0) + pr_warning("Couldn't synthesize bpf events.\n"); + machine__synthesize_threads(&top->session->machines.host, &opts->target, top->evlist->threads, false, top->nr_threads_synthesize); @@ -1232,14 +1235,6 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; - ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term); - if (ret) { - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", - err_term->val.drv_cfg, perf_evsel__name(pos), errno, - str_error_r(errno, msg, sizeof(msg))); - goto out_delete; - } - top->session->evlist = top->evlist; perf_session__set_id_hdr_size(top->session); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b36061cd1ab8..68a01e624ad3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -29,6 +29,8 @@ #include "util/evlist.h" #include <subcmd/exec-cmd.h> #include "util/machine.h" +#include "util/map.h" +#include "util/symbol.h" #include "util/path.h" #include "util/session.h" #include "util/thread.h" @@ -1039,6 +1041,9 @@ static const size_t trace__entry_str_size = 2048; static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd) { + if (fd < 0) + return NULL; + if (fd > ttrace->files.max) { struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file)); @@ -2766,7 +2771,8 @@ static int trace__set_filter_loop_pids(struct trace *trace) if (parent == NULL) break; - if (!strcmp(thread__comm_str(parent), "sshd")) { + if (!strcmp(thread__comm_str(parent), "sshd") || + strstarts(thread__comm_str(parent), "gnome-terminal")) { pids[nr++] = parent->tid; break; } @@ -3865,7 +3871,8 @@ int cmd_trace(int argc, const char **argv) goto init_augmented_syscall_tp; } - if (strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_enter") == 0) { + if (trace.syscalls.events.augmented->priv == NULL && + strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) { struct perf_evsel *augmented = trace.syscalls.events.augmented; if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || perf_evsel__init_augmented_syscall_tp_args(augmented)) diff --git a/tools/perf/design.txt b/tools/perf/design.txt index a28dca2582aa..0453ba26cdbd 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -222,6 +222,10 @@ The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a way to request that counting of events be restricted to times when the CPU is in user, kernel and/or hypervisor mode. +Furthermore the 'exclude_host' and 'exclude_guest' bits provide a way +to request counting of events restricted to guest and host contexts when +using Linux as the hypervisor. + The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap operations, these can be used to relate userspace IP addresses to actual code, even after the mapping (or even the whole process) is gone, diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 53c233370fae..f9b2161e1ca4 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -18,23 +18,13 @@ #include <pid_filter.h> /* bpf-output associated map */ -struct bpf_map SEC("maps") __augmented_syscalls__ = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = __NR_CPUS__, -}; +bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); struct syscall { bool enabled; }; -struct bpf_map SEC("maps") syscalls = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(struct syscall), - .max_entries = 512, -}; +bpf_map(syscalls, ARRAY, int, struct syscall, 512); struct syscall_enter_args { unsigned long long common_tp_fields; @@ -141,8 +131,8 @@ int sys_enter(struct syscall_enter_args *args) len = sizeof(augmented_args.args); } - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); - return 0; + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); } SEC("raw_syscalls:sys_exit") diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 2ae44813ef2d..524fdb8534b3 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -19,12 +19,8 @@ #include <stdio.h> #include <linux/socket.h> -struct bpf_map SEC("maps") __augmented_syscalls__ = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = __NR_CPUS__, -}; +/* bpf-output associated map */ +bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); struct syscall_exit_args { unsigned long long common_tp_fields; @@ -55,9 +51,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ len &= sizeof(augmented_args.filename.value) - 1; \ } \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, len); \ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ @@ -125,10 +121,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ /* addrlen = augmented_args.args.addrlen; */ \ /* */ \ probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ } \ int syscall_exit(syscall)(struct syscall_exit_args *args) \ { \ diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c index b59e8812ee8c..e81b535346c0 100644 --- a/tools/perf/examples/bpf/etcsnoop.c +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -21,12 +21,8 @@ #include <stdio.h> -struct bpf_map SEC("maps") __augmented_syscalls__ = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = __NR_CPUS__, -}; +/* bpf-output associated map */ +bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); struct augmented_filename { int size; @@ -49,11 +45,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ args->filename_ptr); \ if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ return 0; \ - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ - return 0; \ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ } struct syscall_enter_openat_args { diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h index e667577207dc..5df7ed9d9020 100644 --- a/tools/perf/include/bpf/bpf.h +++ b/tools/perf/include/bpf/bpf.h @@ -18,6 +18,14 @@ struct bpf_map { unsigned int numa_node; }; +#define bpf_map(name, _type, type_key, type_val, _max_entries) \ +struct bpf_map SEC("maps") name = { \ + .type = BPF_MAP_TYPE_##_type, \ + .key_size = sizeof(type_key), \ + .value_size = sizeof(type_val), \ + .max_entries = _max_entries, \ +} + /* * FIXME: this should receive .max_entries as a parameter, as careful * tuning of these limits is needed to avoid hitting limits that @@ -26,13 +34,7 @@ struct bpf_map { * For the current need, 'perf trace --filter-pids', 64 should * be good enough, but this surely needs to be revisited. */ -#define pid_map(name, value_type) \ -struct bpf_map SEC("maps") name = { \ - .type = BPF_MAP_TYPE_HASH, \ - .key_size = sizeof(pid_t), \ - .value_size = sizeof(value_type), \ - .max_entries = 64, \ -} +#define pid_map(name, value_type) bpf_map(name, HASH, pid_t, value_type, 64) static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem; static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 388c6dd128b8..b120e547ddc7 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -66,6 +66,7 @@ struct record_opts { bool ignore_missing_thread; bool strict_freq; bool sample_id; + bool bpf_event; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; @@ -83,6 +84,14 @@ struct record_opts { clockid_t clockid; u64 clockid_res_ns; int nr_cblocks; + int affinity; +}; + +enum perf_affinity { + PERF_AFFINITY_SYS = 0, + PERF_AFFINITY_NODE, + PERF_AFFINITY_CPU, + PERF_AFFINITY_MAX }; struct option; diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json new file mode 100644 index 000000000000..bffb2d4a6420 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -0,0 +1,2245 @@ +[ + { + "BriefDescription": "% of finished branches that were treated as BC+8", + "MetricExpr": "PM_BR_BC_8_CONV / PM_BRU_FIN * 100", + "MetricGroup": "branch_prediction", + "MetricName": "bc_8_branch_ratio_percent" + }, + { + "BriefDescription": "% of finished branches that were pairable but not treated as BC+8", + "MetricExpr": "PM_BR_BC_8 / PM_BRU_FIN * 100", + "MetricGroup": "branch_prediction", + "MetricName": "bc_8_not_converted_branch_ratio_percent" + }, + { + "BriefDescription": "Percent of mispredicted branches out of all predicted (correctly and incorrectly) branches that completed", + "MetricExpr": "PM_BR_MPRED_CMPL / (PM_BR_PRED_BR0 + PM_BR_PRED_BR1) * 100", + "MetricGroup": "branch_prediction", + "MetricName": "br_misprediction_percent" + }, + { + "BriefDescription": "% of Branch miss predictions per instruction", + "MetricExpr": "PM_BR_MPRED_CMPL / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "branch_mispredict_rate_percent" + }, + { + "BriefDescription": "Count cache branch misprediction per instruction", + "MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ccache_mispredict_rate_percent" + }, + { + "BriefDescription": "Percent of count catch mispredictions out of all completed branches that required count cache predictionn", + "MetricExpr": "PM_BR_MPRED_CCACHE / (PM_BR_PRED_CCACHE_BR0 + PM_BR_PRED_CCACHE_BR1) * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ccache_misprediction_percent" + }, + { + "BriefDescription": "CR MisPredictions per Instruction", + "MetricExpr": "PM_BR_MPRED_CR / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "cr_mispredict_rate_percent" + }, + { + "BriefDescription": "Link stack branch misprediction", + "MetricExpr": "(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "lstack_mispredict_rate_percent" + }, + { + "BriefDescription": "Percent of link stack mispredictions out of all completed branches that required link stack prediction", + "MetricExpr": "(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / (PM_BR_PRED_LSTACK_BR0 + PM_BR_PRED_LSTACK_BR1) * 100", + "MetricGroup": "branch_prediction", + "MetricName": "lstack_misprediction_percent" + }, + { + "BriefDescription": "TA MisPredictions per Instruction", + "MetricExpr": "PM_BR_MPRED_TA / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ta_mispredict_rate_percent" + }, + { + "BriefDescription": "Percent of target address mispredictions out of all completed branches that required address prediction", + "MetricExpr": "PM_BR_MPRED_TA / (PM_BR_PRED_CCACHE_BR0 + PM_BR_PRED_CCACHE_BR1 + PM_BR_PRED_LSTACK_BR0 + PM_BR_PRED_LSTACK_BR1) * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ta_misprediction_percent" + }, + { + "BriefDescription": "Percent of branches completed that were taken", + "MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BR_CMPL", + "MetricGroup": "branch_prediction", + "MetricName": "taken_branches_percent" + }, + { + "BriefDescription": "Percent of chip+group+sys pumps that were incorrectly predicted", + "MetricExpr": "PM_PUMP_MPRED * 100 / (PM_PUMP_CPRED + PM_PUMP_MPRED)", + "MetricGroup": "bus_stats", + "MetricName": "any_pump_mpred_percent" + }, + { + "BriefDescription": "Percent of chip pumps that were correctly predicted as chip pumps the first time", + "MetricExpr": "PM_CHIP_PUMP_CPRED * 100 / PM_L2_CHIP_PUMP", + "MetricGroup": "bus_stats", + "MetricName": "chip_pump_cpred_percent" + }, + { + "BriefDescription": "Percent of group pumps that were correctly predicted as group pumps the first time", + "MetricExpr": "PM_GRP_PUMP_CPRED * 100 / PM_L2_GROUP_PUMP", + "MetricGroup": "bus_stats", + "MetricName": "group_pump_cpred_percent" + }, + { + "BriefDescription": "Percent of system pumps that were correctly predicted as group pumps the first time", + "MetricExpr": "PM_SYS_PUMP_CPRED * 100 / PM_L2_GROUP_PUMP", + "MetricGroup": "bus_stats", + "MetricName": "sys_pump_cpred_percent" + }, + { + "BriefDescription": "Cycles stalled due to CRU or BRU operations", + "MetricExpr": "PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "bru_cru_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled due to ISU Branch Operations", + "MetricExpr": "PM_CMPLU_STALL_BRU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "bru_stall_cpi" + }, + { + "BriefDescription": "Cycles in which a Group Completed", + "MetricExpr": "PM_GRP_CMPL / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "completion_cpi" + }, + { + "BriefDescription": "Cycles stalled by CO queue full", + "MetricExpr": "PM_CMPLU_STALL_COQ_FULL / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "coq_full_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled due to CRU Operations", + "MetricExpr": "(PM_CMPLU_STALL_BRU_CRU - PM_CMPLU_STALL_BRU) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "cru_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by flushes", + "MetricExpr": "PM_CMPLU_STALL_FLUSH / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "flush_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by FXU Multi-Cycle Instructions", + "MetricExpr": "PM_CMPLU_STALL_FXLONG / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_multi_cyc_cpi" + }, + { + "BriefDescription": "Cycles stalled by FXU", + "MetricExpr": "PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_stall_cpi" + }, + { + "BriefDescription": "Other cycles stalled by FXU", + "MetricExpr": "(PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_FXLONG / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_stall_other_cpi" + }, + { + "BriefDescription": "Cycles GCT empty due to Branch Mispredicts", + "MetricExpr": "PM_GCT_NOSLOT_BR_MPRED / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_br_mpred_cpi" + }, + { + "BriefDescription": "Cycles GCT empty due to Branch Mispredicts and Icache Misses", + "MetricExpr": "PM_GCT_NOSLOT_BR_MPRED_ICMISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_br_mpred_ic_miss_cpi" + }, + { + "BriefDescription": "GCT empty cycles", + "MetricExpr": "PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held", + "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held due to issue queue", + "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_ISSQ / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_issq_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held due to maps", + "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_MAP / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_map_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held due to syncs and other effects", + "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_OTHER / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_other_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held due to SRQ", + "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_SRQ / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_srq_cpi" + }, + { + "BriefDescription": "Cycles stalled by GCT empty due to Icache misses", + "MetricExpr": "PM_GCT_NOSLOT_IC_MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_ic_miss_cpi" + }, + { + "BriefDescription": "Cycles stalled by GCT empty due to Icache misses that resolve in the local L2 or L3", + "MetricExpr": "(PM_GCT_NOSLOT_IC_MISS - PM_GCT_NOSLOT_IC_L3MISS) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_ic_miss_l2l3_cpi" + }, + { + "BriefDescription": "Cycles stalled by GCT empty due to Icache misses that resolve off-chip", + "MetricExpr": "PM_GCT_NOSLOT_IC_L3MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_ic_miss_l3miss_cpi" + }, + { + "BriefDescription": "Other GCT empty cycles", + "MetricExpr": "(PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_IC_MISS / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_BR_MPRED / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_BR_MPRED_ICMISS / PM_RUN_INST_CMPL) - ((PM_GCT_NOSLOT_DISP_HELD_MAP / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_SRQ / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_ISSQ / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_OTHER / PM_RUN_INST_CMPL))", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by heavyweight syncs", + "MetricExpr": "PM_CMPLU_STALL_HWSYNC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "hwsync_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU", + "MetricExpr": "PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses", + "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in distant interventions and memory", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_REMOTE) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_distant_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in remote or distant caches", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31 / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l21l31_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3, where there was a conflict", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l2l3_conflict_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3 / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l2l3_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3, where there was no conflict", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l2l3_noconflict_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in other core's caches or memory", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l3miss_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in local memory or local L4", + "MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_lmem_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in remote interventions and memory", + "MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_remote_cpi" + }, + { + "BriefDescription": "Cycles stalled by ERAT Translation rejects", + "MetricExpr": "PM_CMPLU_STALL_ERAT_MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_erat_miss_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU load finishes", + "MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_ld_fin_cpi" + }, + { + "BriefDescription": "Cycles stalled by LHS rejects", + "MetricExpr": "PM_CMPLU_STALL_REJECT_LHS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_lhs_cpi" + }, + { + "BriefDescription": "Cycles stalled by LMQ Full rejects", + "MetricExpr": "PM_CMPLU_STALL_REJ_LMQ_FULL / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_lmq_full_cpi" + }, + { + "BriefDescription": "Cycles stalled by Other LSU Operations", + "MetricExpr": "(PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_STORE / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_LOAD_FINISH / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_ST_FWD / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU Rejects", + "MetricExpr": "PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_reject_cpi" + }, + { + "BriefDescription": "Cycles stalled by Other LSU Rejects", + "MetricExpr": "(PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJECT_LHS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_ERAT_MISS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJ_LMQ_FULL / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_reject_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU store forwarding", + "MetricExpr": "PM_CMPLU_STALL_ST_FWD / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_st_fwd_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU Stores", + "MetricExpr": "PM_CMPLU_STALL_STORE / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_store_cpi" + }, + { + "BriefDescription": "Cycles stalled by lightweight syncs", + "MetricExpr": "PM_CMPLU_STALL_LWSYNC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lwsync_stall_cpi" + }, + { + "MetricExpr": "PM_CMPLU_STALL_MEM_ECC_DELAY / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "mem_ecc_delay_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by nops (nothing next to finish)", + "MetricExpr": "PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "no_ntf_stall_cpi" + }, + { + "MetricExpr": "PM_NTCG_ALL_FIN / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntcg_all_fin_cpi" + }, + { + "MetricExpr": "PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntcg_flush_cpi" + }, + { + "BriefDescription": "Other thread block stall cycles", + "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_LWSYNC - PM_CMPLU_STALL_HWSYNC - PM_CMPLU_STALL_MEM_ECC_DELAY - PM_CMPLU_STALL_FLUSH - PM_CMPLU_STALL_COQ_FULL) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_block_stall_cpi" + }, + { + "BriefDescription": "Cycles unaccounted for", + "MetricExpr": "(PM_RUN_CYC / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL) - (PM_NTCG_ALL_FIN / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_THRD / PM_RUN_INST_CMPL) - (PM_GRP_CMPL / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_cpi" + }, + { + "BriefDescription": "Stall cycles unaccounted for", + "MetricExpr": "(PM_CMPLU_STALL / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_stall_cpi" + }, + { + "BriefDescription": "Run cycles per run instruction", + "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "run_cpi" + }, + { + "BriefDescription": "Completion Stall Cycles", + "MetricExpr": "PM_CMPLU_STALL / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "stall_cpi" + }, + { + "BriefDescription": "Cycles a thread was blocked", + "MetricExpr": "PM_CMPLU_STALL_THRD / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "thread_block_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU", + "MetricExpr": "PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by other VSU Operations", + "MetricExpr": "(PM_CMPLU_STALL_VSU - PM_CMPLU_STALL_VECTOR - PM_CMPLU_STALL_SCALAR) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU Scalar Operations", + "MetricExpr": "PM_CMPLU_STALL_SCALAR / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_scalar_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU Scalar Long Operations", + "MetricExpr": "PM_CMPLU_STALL_SCALAR_LONG / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_scalar_long_cpi" + }, + { + "BriefDescription": "Cycles stalled by Other VSU Scalar Operations", + "MetricExpr": "(PM_CMPLU_STALL_SCALAR / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_SCALAR_LONG / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_scalar_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU Vector Operations", + "MetricExpr": "PM_CMPLU_STALL_VECTOR / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_vector_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU Vector Long Operations", + "MetricExpr": "PM_CMPLU_STALL_VECTOR_LONG / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_vector_long_cpi" + }, + { + "BriefDescription": "Cycles stalled by other VSU Vector Operations", + "MetricExpr": "(PM_CMPLU_STALL_VECTOR - PM_CMPLU_STALL_VECTOR_LONG) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_vector_other_cpi" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L4 per Inst", + "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant Memory per Inst", + "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "Percentage of L2 load hits per instruction where the L2 experienced a Load-Hit-Store conflict", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_lhs_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2 per Inst", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L2 load hits per instruction where the L2 did not experience a conflict", + "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_no_conflict_rate_percent" + }, + { + "BriefDescription": "Percentage of L2 load hits per instruction where the L2 experienced some conflict other than Load-Hit-Store", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_other_conflict_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2 per Inst", + "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "Percentage of L3 load hits per instruction where the load collided with a pending prefetch", + "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_conflict_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L3 per Inst", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L3 load hits per instruction where the L3 did not experience a conflict", + "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_no_conflict_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from L3 per Inst", + "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Local L4 per Inst", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Local Memory per Inst", + "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", + "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", + "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rmem_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 demand load misses per run instruction", + "MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "l1_ld_miss_rate_percent" + }, + { + "BriefDescription": "% of DL1 misses that result in a cache reload", + "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_miss_reloads_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L4", + "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl4_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant Memory", + "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dmem_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core", + "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l21_mod_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core", + "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l21_shr_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L2 with a Load-Hit-Store conflict", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_lhs_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L2 with no conflicts", + "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_no_conflict_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L2 with some conflict other than Load-Hit-Store", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_other_conflict_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2", + "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core", + "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l31_mod_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core", + "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l31_shr_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L3 where the load collided with a pending prefetch", + "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_conflict_percent" + }, + { + "BriefDescription": "Percentage of L3 load hits per instruction where the line was brought into the L3 by a prefetch operation", + "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_mepf_rate_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L3 without conflicts", + "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_no_conflict_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from L3", + "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Local L4", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_ll4_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Local Memory", + "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_lmem_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L4", + "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl4_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote Memory", + "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rmem_percent" + }, + { + "BriefDescription": "dL1 miss portion of CPI", + "MetricExpr": "( (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)/ (PM_RUN_CYC / PM_RUN_INST_CMPL)) * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dcache_miss_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_DL2L3_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl2l3_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_DL2L3_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl2l3_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_DL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl4_cpi_percent" + }, + { + "BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_DMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dmem_cpi_percent" + }, + { + "BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L21_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l21_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L21_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l21_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L2 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL) ) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l2_cpi_percent" + }, + { + "BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L31_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l31_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L31_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l31_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L3 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l3_cpi_percent" + }, + { + "BriefDescription": "estimate of Local L4 miss rates with measured LL4 latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_LL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "ll4_cpi_percent" + }, + { + "BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_LMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "lmem_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_RL2L3_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl2l3_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_RL2L3_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl2l3_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_RL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl4_cpi_percent" + }, + { + "BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_RMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rmem_cpi_percent" + }, + { + "BriefDescription": "Branch Mispredict flushes per instruction", + "MetricExpr": "PM_FLUSH_BR_MPRED / PM_RUN_INST_CMPL * 100", + "MetricGroup": "general", + "MetricName": "br_mpred_flush_rate_percent" + }, + { + "BriefDescription": "Cycles per instruction", + "MetricExpr": "PM_CYC / PM_INST_CMPL", + "MetricGroup": "general", + "MetricName": "cpi" + }, + { + "BriefDescription": "Percentage Cycles a group completed", + "MetricExpr": "PM_GRP_CMPL / PM_CYC * 100", + "MetricGroup": "general", + "MetricName": "cyc_grp_completed_percent" + }, + { + "BriefDescription": "Percentage Cycles a group dispatched", + "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", + "MetricGroup": "general", + "MetricName": "cyc_grp_dispatched_percent" + }, + { + "BriefDescription": "Cycles per group", + "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL", + "MetricGroup": "general", + "MetricName": "cyc_per_group" + }, + { + "BriefDescription": "GCT empty cycles", + "MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100", + "MetricGroup": "general", + "MetricName": "disp_flush_rate_percent" + }, + { + "BriefDescription": "% DTLB miss rate per inst", + "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100", + "MetricGroup": "general", + "MetricName": "dtlb_miss_rate_percent" + }, + { + "BriefDescription": "Flush rate (%)", + "MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "flush_rate_percent" + }, + { + "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_11to14_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_15to17_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_18plus_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_1to2_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_3to6_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_7to10_slots_percent" + }, + { + "BriefDescription": "Avg. group size", + "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", + "MetricGroup": "general", + "MetricName": "group_size" + }, + { + "BriefDescription": "Instructions per group", + "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", + "MetricGroup": "general", + "MetricName": "inst_per_group" + }, + { + "BriefDescription": "Instructions per cycles", + "MetricExpr": "PM_INST_CMPL / PM_CYC", + "MetricGroup": "general", + "MetricName": "ipc" + }, + { + "BriefDescription": "% ITLB miss rate per inst", + "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100", + "MetricGroup": "general", + "MetricName": "itlb_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 load misses per L1 load ref", + "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100", + "MetricGroup": "general", + "MetricName": "l1_ld_miss_ratio_percent" + }, + { + "BriefDescription": "Percentage of L1 store misses per run instruction", + "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l1_st_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 store misses per L1 store ref", + "MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100", + "MetricGroup": "general", + "MetricName": "l1_st_miss_ratio_percent" + }, + { + "BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)", + "MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_inst_miss_rate_percent" + }, + { + "BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_ld_miss_rate_percent" + }, + { + "BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_pteg_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L2 store misses per run instruction", + "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_st_miss_rate_percent" + }, + { + "BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)", + "MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_inst_miss_rate_percent" + }, + { + "BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_ld_miss_rate_percent" + }, + { + "BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_pteg_miss_rate_percent" + }, + { + "BriefDescription": "Run cycles per cycle", + "MetricExpr": "PM_RUN_CYC / PM_CYC*100", + "MetricGroup": "general", + "MetricName": "run_cycles_percent" + }, + { + "BriefDescription": "Percentage of cycles spent in SMT2 Mode", + "MetricExpr": "(PM_RUN_CYC_SMT2_MODE/PM_RUN_CYC) * 100", + "MetricGroup": "general", + "MetricName": "smt2_cycles_percent" + }, + { + "BriefDescription": "Percentage of cycles spent in SMT4 Mode", + "MetricExpr": "(PM_RUN_CYC_SMT4_MODE/PM_RUN_CYC) * 100", + "MetricGroup": "general", + "MetricName": "smt4_cycles_percent" + }, + { + "BriefDescription": "Percentage of cycles spent in SMT8 Mode", + "MetricExpr": "(PM_RUN_CYC_SMT8_MODE/PM_RUN_CYC) * 100", + "MetricGroup": "general", + "MetricName": "smt8_cycles_percent" + }, + { + "BriefDescription": "IPC of all instructions completed by the core while this thread was stalled", + "MetricExpr": "PM_CMPLU_STALL_OTHER_CMPL/PM_RUN_CYC", + "MetricGroup": "general", + "MetricName": "smt_benefit" + }, + { + "BriefDescription": "Instruction dispatch-to-completion ratio", + "MetricExpr": "PM_INST_DISP / PM_INST_CMPL", + "MetricGroup": "general", + "MetricName": "speculation" + }, + { + "BriefDescription": "Percentage of cycles spent in Single Thread Mode", + "MetricExpr": "(PM_RUN_CYC_ST_MODE/PM_RUN_CYC) * 100", + "MetricGroup": "general", + "MetricName": "st_cycles_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L4 per Inst", + "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant Memory per Inst", + "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", + "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", + "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from L2 per Inst", + "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l2_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core per Inst", + "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3 other core per Inst", + "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from L3 per Inst", + "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l3_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local L4 per Inst", + "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local Memory per Inst", + "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L4 per Inst", + "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote Memory per Inst", + "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rmem_rate_percent" + }, + { + "BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)", + "MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "l1_inst_miss_rate_percent" + }, + { + "BriefDescription": "% Branches per instruction", + "MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_mix", + "MetricName": "branches_per_inst" + }, + { + "BriefDescription": "Total Fixed point operations", + "MetricExpr": "(PM_FXU0_FIN + PM_FXU1_FIN)/PM_RUN_INST_CMPL", + "MetricGroup": "instruction_mix", + "MetricName": "fixed_per_inst" + }, + { + "BriefDescription": "FXU0 balance", + "MetricExpr": "PM_FXU0_FIN / (PM_FXU0_FIN + PM_FXU1_FIN)", + "MetricGroup": "instruction_mix", + "MetricName": "fxu0_balance" + }, + { + "BriefDescription": "Fraction of cycles that FXU0 is in use", + "MetricExpr": "PM_FXU0_FIN / PM_RUN_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu0_fin" + }, + { + "BriefDescription": "FXU0 only Busy", + "MetricExpr": "PM_FXU0_BUSY_FXU1_IDLE / PM_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu0_only_busy" + }, + { + "BriefDescription": "Fraction of cycles that FXU1 is in use", + "MetricExpr": "PM_FXU1_FIN / PM_RUN_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu1_fin" + }, + { + "BriefDescription": "FXU1 only Busy", + "MetricExpr": "PM_FXU1_BUSY_FXU0_IDLE / PM_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu1_only_busy" + }, + { + "BriefDescription": "Both FXU Busy", + "MetricExpr": "PM_FXU_BUSY / PM_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu_both_busy" + }, + { + "BriefDescription": "Both FXU Idle", + "MetricExpr": "PM_FXU_IDLE / PM_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu_both_idle" + }, + { + "BriefDescription": "PCT instruction loads", + "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_mix", + "MetricName": "loads_per_inst" + }, + { + "BriefDescription": "PCT instruction stores", + "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_mix", + "MetricName": "stores_per_inst" + }, + { + "BriefDescription": "Icache Fetchs per Icache Miss", + "MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "icache_miss_reload" + }, + { + "BriefDescription": "% of ICache reloads due to prefetch", + "MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "icache_pref_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L4", + "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant Memory", + "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dmem_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core", + "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l21_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core", + "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l21_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from L2", + "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l2_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core", + "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l31_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core", + "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l31_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from L3", + "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l3_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local L4", + "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_ll4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local Memory", + "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_lmem_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L4", + "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote Memory", + "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rmem_percent" + }, + { + "BriefDescription": "Average number of stores that gather in the store buffer before being sent to an L2 RC machine", + "MetricExpr": "PM_ST_CMPL / (PM_L2_ST / 2)", + "MetricGroup": "l2_stats", + "MetricName": "avg_stores_gathered" + }, + { + "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", + "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_st_miss_ratio_percent" + }, + { + "BriefDescription": "Percentage of L2 store misses per drained store. A drained store may contain multiple individual stores if they target the same line", + "MetricExpr": "PM_L2_ST_MISS / (PM_L2_ST / 2)", + "MetricGroup": "l2_stats", + "MetricName": "l2_store_miss_ratio_percent" + }, + { + "BriefDescription": "average L1 miss latency using marked events", + "MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1", + "MetricGroup": "latency", + "MetricName": "average_dl1miss_latency" + }, + { + "BriefDescription": "Average icache miss latency", + "MetricExpr": "(PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ)", + "MetricGroup": "latency", + "MetricName": "average_il1_miss_latency" + }, + { + "BriefDescription": "average service time for SYNC", + "MetricExpr": "PM_LSU_SRQ_SYNC_CYC / PM_LSU_SRQ_SYNC", + "MetricGroup": "latency", + "MetricName": "average_sync_cyc" + }, + { + "BriefDescription": "Cycles LMQ slot0 was active on an average", + "MetricExpr": "PM_LSU_LMQ_S0_VALID / PM_LSU_LMQ_S0_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_lmq_life_time" + }, + { + "BriefDescription": "Average number of cycles LRQ stays active for one load. Slot 0 is VALID ONLY FOR EVEN THREADS", + "MetricExpr": "PM_LSU_LRQ_S0_VALID / PM_LSU_LRQ_S0_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_lrq_life_time_even" + }, + { + "BriefDescription": "Average number of cycles LRQ stays active for one load. Slot 43 is valid ONLY FOR ODD THREADS", + "MetricExpr": "PM_LSU_LRQ_S43_VALID / PM_LSU_LRQ_S43_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_lrq_life_time_odd" + }, + { + "BriefDescription": "Average number of cycles SRQ stays active for one load. Slot 0 is VALID ONLY FOR EVEN THREADS", + "MetricExpr": "PM_LSU_SRQ_S0_VALID / PM_LSU_SRQ_S0_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_srq_life_time_even" + }, + { + "BriefDescription": "Average number of cycles SRQ stays active for one load. Slot 39 is valid ONLY FOR ODD THREADS", + "MetricExpr": "PM_LSU_SRQ_S39_VALID / PM_LSU_SRQ_S39_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_srq_life_time_odd" + }, + { + "BriefDescription": "Marked background kill latency, measured in L2", + "MetricExpr": "PM_MRK_FAB_RSP_BKILL_CYC / PM_MRK_FAB_RSP_BKILL", + "MetricGroup": "latency", + "MetricName": "bkill_latency" + }, + { + "BriefDescription": "Marked dclaim latency, measured in L2", + "MetricExpr": "PM_MRK_FAB_RSP_DCLAIM_CYC / PM_MRK_FAB_RSP_DCLAIM", + "MetricGroup": "latency", + "MetricName": "dclaim_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD", + "MetricGroup": "latency", + "MetricName": "dl2l3_mod_latency" + }, + { + "BriefDescription": "Marked L2L3 distant Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR", + "MetricGroup": "latency", + "MetricName": "dl2l3_shr_latency" + }, + { + "BriefDescription": "Distant L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4", + "MetricGroup": "latency", + "MetricName": "dl4_latency" + }, + { + "BriefDescription": "Marked Dmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM", + "MetricGroup": "latency", + "MetricName": "dmem_latency" + }, + { + "BriefDescription": "estimated exposed miss latency for dL1 misses, ie load miss when we were NTC", + "MetricExpr": "PM_MRK_LD_MISS_EXPOSED_CYC / PM_MRK_LD_MISS_EXPOSED", + "MetricGroup": "latency", + "MetricName": "exposed_dl1miss_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from L2.1 in the M state", + "MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD", + "MetricGroup": "latency", + "MetricName": "l21_mod_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from L2.1 in the S state", + "MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR", + "MetricGroup": "latency", + "MetricName": "l21_shr_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered a conflict at RC machine dispatch time due to load-hit-store", + "MetricExpr": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", + "MetricGroup": "latency", + "MetricName": "l2_disp_conflict_ldhitst_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered a conflict at RC machine dispatch time NOT due load-hit-store", + "MetricExpr": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER", + "MetricGroup": "latency", + "MetricName": "l2_disp_conflict_other_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L2", + "MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l2_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that were satisfied by lines prefetched into the L3. This information is forwarded from the L3", + "MetricExpr": "PM_MRK_DATA_FROM_L2_MEPF_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l2_mepf_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered no conflicts", + "MetricExpr": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l2_no_conflict_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L3 and beyond", + "MetricExpr": "PM_MRK_DATA_FROM_L2MISS_CYC/ PM_MRK_DATA_FROM_L2MISS", + "MetricGroup": "latency", + "MetricName": "l2miss_latency" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD", + "MetricGroup": "latency", + "MetricName": "l31_mod_latency" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR", + "MetricGroup": "latency", + "MetricName": "l31_shr_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L3", + "MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3", + "MetricGroup": "latency", + "MetricName": "l3_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L3 and suffered no conflicts", + "MetricExpr": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l3_no_conflict_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that come from beyond the L3", + "MetricExpr": "PM_MRK_DATA_FROM_L3MISS_CYC/ PM_MRK_DATA_FROM_L3MISS", + "MetricGroup": "latency", + "MetricName": "l3miss_latency" + }, + { + "BriefDescription": "Average latency for marked reloads that hit in the L3 on the MEPF state. i.e. lines that were prefetched into the L3", + "MetricExpr": "PM_MRK_DATA_FROM_L3_MEPF_CYC/ PM_MRK_DATA_FROM_L3_MEPF", + "MetricGroup": "latency", + "MetricName": "l3pref_latency" + }, + { + "BriefDescription": "Local L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4", + "MetricGroup": "latency", + "MetricName": "ll4_latency" + }, + { + "BriefDescription": "Marked Lmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM", + "MetricGroup": "latency", + "MetricName": "lmem_latency" + }, + { + "BriefDescription": "Latency for marked reloads that hit in the L2 or L3 of any other core on a different chip", + "MetricExpr": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC/ PM_MRK_DATA_FROM_OFF_CHIP_CACHE", + "MetricGroup": "latency", + "MetricName": "off_chip_cache_latency" + }, + { + "BriefDescription": "Latency for marked reloads that hit in the L2 or L3 of any other core on the same chip", + "MetricExpr": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC/ PM_MRK_DATA_FROM_ON_CHIP_CACHE", + "MetricGroup": "latency", + "MetricName": "on_chip_cache_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD", + "MetricGroup": "latency", + "MetricName": "rl2l3_mod_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR", + "MetricGroup": "latency", + "MetricName": "rl2l3_shr_latency" + }, + { + "BriefDescription": "Remote L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4", + "MetricGroup": "latency", + "MetricName": "rl4_latency" + }, + { + "BriefDescription": "Marked Rmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM", + "MetricGroup": "latency", + "MetricName": "rmem_latency" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "erat_reject_rate_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / (PM_LSU_FIN - PM_LSU_FX_FIN)", + "MetricGroup": "lsu_rejects", + "MetricName": "erat_reject_ratio_percent" + }, + { + "BriefDescription": "LHS reject ratio", + "MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lhs_reject_rate_percent" + }, + { + "BriefDescription": "LHS reject ratio", + "MetricExpr": "PM_LSU_REJECT_LHS *100/ (PM_LSU_FIN - PM_LSU_FX_FIN)", + "MetricGroup": "lsu_rejects", + "MetricName": "lhs_reject_ratio_percent" + }, + { + "BriefDescription": "LMQ full reject ratio", + "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lmq_full_reject_rate_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1", + "MetricGroup": "lsu_rejects", + "MetricName": "lmq_full_reject_ratio_percent" + }, + { + "BriefDescription": "LSU reject ratio", + "MetricExpr": "PM_LSU_REJECT *100/ PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lsu_reject_rate_percent" + }, + { + "BriefDescription": "LSU reject ratio", + "MetricExpr": "PM_LSU_REJECT *100/ (PM_LSU_FIN - PM_LSU_FX_FIN)", + "MetricGroup": "lsu_rejects", + "MetricName": "lsu_reject_ratio_percent" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to distant L4", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_dmem" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to remote+distant L4", + "MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_mem" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to remote L4", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_rl4" + }, + { + "BriefDescription": "Number of loads from local memory per loads from distant memory", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_dmem" + }, + { + "BriefDescription": "Number of loads from local memory per loads from remote and distant memory", + "MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_mem" + }, + { + "BriefDescription": "Number of loads from local memory per loads from remote memory", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_rmem" + }, + { + "BriefDescription": "Number of loads from remote memory per loads from distant memory", + "MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM", + "MetricGroup": "memory", + "MetricName": "ld_rmem_per_ld_dmem" + }, + { + "BriefDescription": "Memory locality", + "MetricExpr": "(PM_DATA_FROM_LL4 + PM_DATA_FROM_LMEM) * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_LL4 + PM_DATA_FROM_RMEM + PM_DATA_FROM_RL4 + PM_DATA_FROM_DMEM + PM_DATA_FROM_DL4)", + "MetricGroup": "memory", + "MetricName": "mem_locality_percent" + }, + { + "BriefDescription": "DERAT Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "derat_miss_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L2 per inst", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l2_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L3 per inst", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l3_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT misses that result in an ERAT reload", + "MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "derat_miss_reload_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L4", + "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant Memory", + "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dmem_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core", + "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l21_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core", + "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l21_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L2", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l2_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core", + "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l31_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core", + "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l31_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L3", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l3_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local L4", + "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_ll4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local Memory", + "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_lmem_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L4", + "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote Memory", + "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rmem_percent" + }, + { + "BriefDescription": "% DERAT miss ratio for 16G page per inst", + "MetricExpr": "100 * PM_DERAT_MISS_16G / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_16g_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 16G page", + "MetricExpr": "PM_DERAT_MISS_16G / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_16g_miss_ratio" + }, + { + "BriefDescription": "% DERAT miss rate for 16M page per inst", + "MetricExpr": "PM_DERAT_MISS_16M * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_16m_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 16M page", + "MetricExpr": "PM_DERAT_MISS_16M / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_16m_miss_ratio" + }, + { + "BriefDescription": "% DERAT miss rate for 4K page per inst", + "MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_4k_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 4K page", + "MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_4k_miss_ratio" + }, + { + "BriefDescription": "% DERAT miss ratio for 64K page per inst", + "MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_64k_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 64K page", + "MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_64k_miss_ratio" + }, + { + "BriefDescription": "% DSLB_Miss_Rate per inst", + "MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "dslb_miss_rate_percent" + }, + { + "BriefDescription": "% ISLB miss rate per inst", + "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "islb_miss_rate_percent" + }, + { + "BriefDescription": "Fraction of hits on any Centaur (local, remote, or distant) on either L4 or DRAM per L1 load ref", + "MetricExpr": "PM_DATA_FROM_MEMORY / PM_LD_REF_L1", + "MetricName": "any_centaur_ld_hit_ratio" + }, + { + "BriefDescription": "Base Completion Cycles", + "MetricExpr": "PM_1PLUS_PPC_CMPL / PM_RUN_INST_CMPL", + "MetricName": "base_completion_cpi" + }, + { + "BriefDescription": "Marked background kill latency, measured in L2", + "MetricExpr": "PM_MRK_FAB_RSP_BKILL_CYC / PM_MRK_FAB_RSP_BKILL", + "MetricName": "bkill_ratio_percent" + }, + { + "BriefDescription": "cycles", + "MetricExpr": "PM_RUN_CYC", + "MetricName": "custom_secs" + }, + { + "BriefDescription": "Fraction of hits on a distant chip's Centaur (L4 or DRAM) per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_DMEM + PM_DATA_FROM_DL4) / PM_LD_REF_L1", + "MetricName": "distant_centaur_ld_hit_ratio" + }, + { + "BriefDescription": "% of DL1 reloads that came from the L3 and beyond", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_l2_miss_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_l31_rate_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L3 where the lines were brought into the L3 by a prefetch operation", + "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_l3_mepf_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from beyond the local L3", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_l3_miss_percent" + }, + { + "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on the L2 or L3 of a core on a distant chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD / PM_LD_REF_L1", + "MetricName": "dl2l3_mod_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the S state on the L2 or L3 of a core on a distant chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR / PM_LD_REF_L1", + "MetricName": "dl2l3_shr_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a distant Centaur's cache per L1 load ref", + "MetricExpr": "PM_DATA_FROM_DL4 / PM_LD_REF_L1", + "MetricName": "dl4_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a distant Centaur's DRAM per L1 load ref", + "MetricExpr": "PM_DATA_FROM_DMEM / PM_LD_REF_L1", + "MetricName": "dmem_ld_hit_ratio" + }, + { + "BriefDescription": "Rate of DERAT reloads from L2", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dpteg_from_l2_rate_percent" + }, + { + "BriefDescription": "Rate of DERAT reloads from L3", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dpteg_from_l3_rate_percent" + }, + { + "BriefDescription": "Overhead of expansion cycles", + "MetricExpr": "(PM_GRP_CMPL / PM_RUN_INST_CMPL) - (PM_1PLUS_PPC_CMPL / PM_RUN_INST_CMPL)", + "MetricName": "expansion_overhead_cpi" + }, + { + "BriefDescription": "Total Fixed point operations executded in the Load/Store Unit following a load/store operation", + "MetricExpr": "PM_LSU_FX_FIN/PM_RUN_INST_CMPL", + "MetricName": "fixed_in_lsu_per_inst" + }, + { + "BriefDescription": "GCT empty cycles", + "MetricExpr": "(PM_GCT_NOSLOT_CYC / PM_RUN_CYC) * 100", + "MetricName": "gct_empty_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from L2", + "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_l2_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from L3", + "MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_l3_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from local memory", + "MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_ll4_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from local memory", + "MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_lmem_rate_percent" + }, + { + "BriefDescription": "Fraction of L1 hits per load ref", + "MetricExpr": "(PM_LD_REF_L1 - PM_LD_MISS_L1) / PM_LD_REF_L1", + "MetricName": "l1_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L1 load misses per L1 load ref", + "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1", + "MetricName": "l1_ld_miss_ratio" + }, + { + "BriefDescription": "Fraction of hits on another core's L2 on the same chip per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_L21_MOD + PM_DATA_FROM_L21_SHR) / PM_LD_REF_L1", + "MetricName": "l2_1_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on another core's L2 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L21_MOD / PM_LD_REF_L1", + "MetricName": "l2_1_mod_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the S state on another core's L2 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L21_SHR / PM_LD_REF_L1", + "MetricName": "l2_1_shr_ld_hit_ratio" + }, + { + "BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle", + "MetricExpr": "(PM_CO_USAGE / PM_RUN_CYC) * 16", + "MetricName": "l2_co_usage" + }, + { + "BriefDescription": "Fraction of L2 load hits per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L2 / PM_LD_REF_L1", + "MetricName": "l2_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L2 load misses per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L2MISS / PM_LD_REF_L1", + "MetricName": "l2_ld_miss_ratio" + }, + { + "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 experienced a Load-Hit-Store conflict", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST / PM_LD_REF_L1", + "MetricName": "l2_lhs_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 did not experience a conflict", + "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT / PM_LD_REF_L1", + "MetricName": "l2_no_conflict_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 experienced some conflict other than Load-Hit-Store", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER / PM_LD_REF_L1", + "MetricName": "l2_other_conflict_ld_hit_ratio" + }, + { + "BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle", + "MetricExpr": "(PM_RC_USAGE / PM_RUN_CYC) * 16", + "MetricName": "l2_rc_usage" + }, + { + "BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle", + "MetricExpr": "(PM_SN_USAGE / PM_RUN_CYC) * 8", + "MetricName": "l2_sn_usage" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)", + "MetricName": "l31_latency" + }, + { + "BriefDescription": "Fraction of hits on another core's L3 on the same chip per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) / PM_LD_REF_L1", + "MetricName": "l3_1_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on another core's L3 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L31_MOD / PM_LD_REF_L1", + "MetricName": "l3_1_mod_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the S state on another core's L3 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L31_SHR / PM_LD_REF_L1", + "MetricName": "l3_1_shr_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 load hits per load ref where the demand load collided with a pending prefetch", + "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT / PM_LD_REF_L1", + "MetricName": "l3_conflict_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 load hits per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L3 / PM_LD_REF_L1", + "MetricName": "l3_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 load misses per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L3MISS / PM_LD_REF_L1", + "MetricName": "l3_ld_miss_ratio" + }, + { + "BriefDescription": "Fraction of L3 load hits per load ref where the L3 did not experience a conflict", + "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT / PM_LD_REF_L1", + "MetricName": "l3_no_conflict_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 hits on lines that were not in the MEPF state per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_L3 - PM_DATA_FROM_L3_MEPF) / PM_LD_REF_L1", + "MetricName": "l3other_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 hits on lines that were recently prefetched into the L3 (MEPF state) per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L3_MEPF / PM_LD_REF_L1", + "MetricName": "l3pref_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a local Centaur's cache per L1 load ref", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_LD_REF_L1", + "MetricName": "ll4_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a local Centaur's DRAM per L1 load ref", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_LD_REF_L1", + "MetricName": "lmem_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a local Centaur (L4 or DRAM) per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_LMEM + PM_DATA_FROM_LL4) / PM_LD_REF_L1", + "MetricName": "local_centaur_ld_hit_ratio" + }, + { + "BriefDescription": "Cycles stalled by Other LSU Operations", + "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_REJECT - PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_STORE) / (PM_LD_REF_L1 - PM_LD_MISS_L1)", + "MetricName": "lsu_stall_avg_cyc_per_l1hit_stfw" + }, + { + "BriefDescription": "Fraction of hits on another core's L2 or L3 on a different chip (remote or distant) per L1 load ref", + "MetricExpr": "PM_DATA_FROM_OFF_CHIP_CACHE / PM_LD_REF_L1", + "MetricName": "off_chip_cache_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on another core's L2 or L3 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_ON_CHIP_CACHE / PM_LD_REF_L1", + "MetricName": "on_chip_cache_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a remote chip's Centaur (L4 or DRAM) per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_RMEM + PM_DATA_FROM_RL4) / PM_LD_REF_L1", + "MetricName": "remote_centaur_ld_hit_ratio" + }, + { + "BriefDescription": "Percent of all FXU/VSU instructions that got rejected because of unavailable resources or facilities", + "MetricExpr": "PM_ISU_REJECT_RES_NA *100/ PM_RUN_INST_CMPL", + "MetricName": "resource_na_reject_rate_percent" + }, + { + "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on the L2 or L3 of a core on a remote chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD / PM_LD_REF_L1", + "MetricName": "rl2l3_mod_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the S state on the L2 or L3 of a core on a remote chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR / PM_LD_REF_L1", + "MetricName": "rl2l3_shr_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a remote Centaur's cache per L1 load ref", + "MetricExpr": "PM_DATA_FROM_RL4 / PM_LD_REF_L1", + "MetricName": "rl4_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a remote Centaur's DRAM per L1 load ref", + "MetricExpr": "PM_DATA_FROM_RMEM / PM_LD_REF_L1", + "MetricName": "rmem_ld_hit_ratio" + }, + { + "BriefDescription": "Percent of all FXU/VSU instructions that got rejected due to SAR Bypass", + "MetricExpr": "PM_ISU_REJECT_SAR_BYPASS *100/ PM_RUN_INST_CMPL", + "MetricName": "sar_bypass_reject_rate_percent" + }, + { + "BriefDescription": "Percent of all FXU/VSU instructions that got rejected because of unavailable sources", + "MetricExpr": "PM_ISU_REJECT_SRC_NA *100/ PM_RUN_INST_CMPL", + "MetricName": "source_na_reject_rate_percent" + }, + { + "BriefDescription": "Store forward rate", + "MetricExpr": "100 * (PM_LSU0_SRQ_STFWD + PM_LSU1_SRQ_STFWD) / PM_RUN_INST_CMPL", + "MetricName": "store_forward_rate_percent" + }, + { + "BriefDescription": "Store forward rate", + "MetricExpr": "100 * (PM_LSU0_SRQ_STFWD + PM_LSU1_SRQ_STFWD) / (PM_LD_REF_L1 - PM_LD_MISS_L1)", + "MetricName": "store_forward_ratio_percent" + }, + { + "BriefDescription": "Marked store latency, from core completion to L2 RC machine completion", + "MetricExpr": "(PM_MRK_ST_L2DISP_TO_CMPL_CYC + PM_MRK_ST_DRAIN_TO_L2DISP_CYC) / PM_MRK_ST_NEST", + "MetricName": "store_latency" + }, + { + "BriefDescription": "Cycles stalled by any sync", + "MetricExpr": "(PM_CMPLU_STALL_LWSYNC + PM_CMPLU_STALL_HWSYNC) / PM_RUN_INST_CMPL", + "MetricName": "sync_stall_cpi" + }, + { + "BriefDescription": "Percentage of lines that were prefetched into the L3 and evicted before they were consumed", + "MetricExpr": "(PM_L3_CO_MEPF / 2) / PM_L3_PREF_ALL * 100", + "MetricName": "wasted_l3_prefetch_percent" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json new file mode 100644 index 000000000000..811c2a8c1c9e --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -0,0 +1,1982 @@ +[ + { + "MetricExpr": "PM_BR_MPRED_CMPL / PM_BR_PRED * 100", + "MetricGroup": "branch_prediction", + "MetricName": "br_misprediction_percent" + }, + { + "BriefDescription": "Count cache branch misprediction per instruction", + "MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ccache_mispredict_rate_percent" + }, + { + "BriefDescription": "Count cache branch misprediction", + "MetricExpr": "PM_BR_MPRED_CCACHE / PM_BR_PRED_CCACHE * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ccache_misprediction_percent" + }, + { + "BriefDescription": "Link stack branch misprediction", + "MetricExpr": "PM_BR_MPRED_LSTACK / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "lstack_mispredict_rate_percent" + }, + { + "BriefDescription": "Link stack branch misprediction", + "MetricExpr": "PM_BR_MPRED_LSTACK/ PM_BR_PRED_LSTACK * 100", + "MetricGroup": "branch_prediction", + "MetricName": "lstack_misprediction_percent" + }, + { + "BriefDescription": "% Branches Taken", + "MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BRU_FIN", + "MetricGroup": "branch_prediction", + "MetricName": "taken_branches_percent" + }, + { + "BriefDescription": "Completion stall due to a Branch Unit", + "MetricExpr": "PM_CMPLU_STALL_BRU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "bru_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish", + "MetricExpr": "PM_CMPLU_STALL_CRYPTO/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "crypto_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest", + "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dcache_miss_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish.", + "MetricExpr": "PM_CMPLU_STALL_DFLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dflong_stall_cpi" + }, + { + "BriefDescription": "Stalls due to short latency decimal floating ops.", + "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dfu_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish.", + "MetricExpr": "PM_CMPLU_STALL_DFU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dfu_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_distant_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l21_l31_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l2l3_conflict_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l2l3_noconflict_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l2l3_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to cache miss resolving missed the L3", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l3miss_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to cache miss that resolves in local memory", + "MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_lmem_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_non_local_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)", + "MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_remote_stall_cpi" + }, + { + "BriefDescription": "Stalls due to short latency double precision ops.", + "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dp_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", + "MetricExpr": "PM_CMPLU_STALL_DP/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dp_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", + "MetricExpr": "PM_CMPLU_STALL_DPLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dplong_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2", + "MetricExpr": "PM_CMPLU_STALL_EIEIO/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "eieio_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full", + "MetricExpr": "PM_CMPLU_STALL_EMQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "emq_full_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "emq_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss", + "MetricExpr": "PM_CMPLU_STALL_ERAT_MISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "erat_miss_stall_cpi" + }, + { + "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete", + "MetricExpr": "PM_CMPLU_STALL_EXCEPTION/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "exception_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to execution units for other reasons.", + "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "exec_unit_other_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)", + "MetricExpr": "PM_CMPLU_STALL_EXEC_UNIT/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "exec_unit_stall_cpi" + }, + { + "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion", + "MetricExpr": "PM_CMPLU_STALL_FLUSH_ANY_THREAD/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "flush_any_thread_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)", + "MetricExpr": "PM_CMPLU_STALL_FXLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxlong_stall_cpi" + }, + { + "BriefDescription": "Stalls due to short latency integer ops", + "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", + "MetricExpr": "PM_CMPLU_STALL_FXU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_stall_cpi" + }, + { + "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "issue_hold_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied", + "MetricExpr": "PM_CMPLU_STALL_LARX/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "larx_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data", + "MetricExpr": "PM_CMPLU_STALL_LHS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lhs_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full", + "MetricExpr": "PM_CMPLU_STALL_LMQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lmq_full_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish", + "MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "load_finish_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ because the LRQ was full", + "MetricExpr": "PM_CMPLU_STALL_LRQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lrq_full_stall_cpi" + }, + { + "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others", + "MetricExpr": "PM_CMPLU_STALL_LRQ_OTHER/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lrq_other_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lrq_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch", + "MetricExpr": "PM_CMPLU_STALL_LSAQ_ARB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsaq_arb_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsaq_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish", + "MetricExpr": "PM_CMPLU_STALL_LSU_FIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_fin_stall_cpi" + }, + { + "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete", + "MetricExpr": "PM_CMPLU_STALL_LSU_FLUSH_NEXT/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_flush_next_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned", + "MetricExpr": "PM_CMPLU_STALL_LSU_MFSPR/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_mfspr_stall_cpi" + }, + { + "BriefDescription": "Completion LSU stall for other reasons", + "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_other_stall_cpi" + }, + { + "BriefDescription": "Completion stall by LSU instruction", + "MetricExpr": "PM_CMPLU_STALL_LSU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_cpi" + }, + { + "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)", + "MetricExpr": "PM_CMPLU_STALL_MTFPSCR/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "mtfpscr_stall_cpi" + }, + { + "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT", + "MetricExpr": "PM_CMPLU_STALL_NESTED_TBEGIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "nested_tbegin_stall_cpi" + }, + { + "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay", + "MetricExpr": "PM_CMPLU_STALL_NESTED_TEND/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "nested_tend_stall_cpi" + }, + { + "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread", + "MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "nothing_dispatched_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch.", + "MetricExpr": "PM_CMPLU_STALL_NTC_DISP_FIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_disp_fin_stall_cpi" + }, + { + "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack", + "MetricExpr": "PM_NTC_FIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_fin_cpi" + }, + { + "BriefDescription": "Completion stall due to ntc flush", + "MetricExpr": "PM_CMPLU_STALL_NTC_FLUSH/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_flush_stall_cpi" + }, + { + "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)", + "MetricExpr": "PM_NTC_ISSUE_HELD_ARB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_issue_held_arb_cpi" + }, + { + "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it", + "MetricExpr": "PM_NTC_ISSUE_HELD_DARQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_issue_held_darq_full_cpi" + }, + { + "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU", + "MetricExpr": "PM_NTC_ISSUE_HELD_OTHER/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_issue_held_other_cpi" + }, + { + "BriefDescription": "Cycles unaccounted for.", + "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_cpi" + }, + { + "BriefDescription": "Completion stall for other reasons", + "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2", + "MetricExpr": "PM_CMPLU_STALL_PASTE/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "paste_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish.", + "MetricExpr": "PM_CMPLU_STALL_PM/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "pm_stall_cpi" + }, + { + "BriefDescription": "Run cycles per run instruction", + "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "run_cpi" + }, + { + "BriefDescription": "Run_cycles", + "MetricExpr": "PM_RUN_CYC/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "run_cyc_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "scalar_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB", + "MetricExpr": "PM_CMPLU_STALL_SLB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "slb_stall_cpi" + }, + { + "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC", + "MetricExpr": "PM_CMPLU_STALL_SPEC_FINISH/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "spec_finish_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full", + "MetricExpr": "PM_CMPLU_STALL_SRQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "srq_full_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "srq_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to store forward", + "MetricExpr": "PM_CMPLU_STALL_ST_FWD/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "st_fwd_stall_cpi" + }, + { + "BriefDescription": "Nothing completed and ICT not empty", + "MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2", + "MetricExpr": "PM_CMPLU_STALL_STCX/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "stcx_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data", + "MetricExpr": "PM_CMPLU_STALL_STORE_DATA/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "store_data_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe", + "MetricExpr": "PM_CMPLU_STALL_STORE_FIN_ARB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "store_fin_arb_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish", + "MetricExpr": "PM_CMPLU_STALL_STORE_FINISH/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "store_finish_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration", + "MetricExpr": "PM_CMPLU_STALL_STORE_PIPE_ARB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "store_pipe_arb_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2", + "MetricExpr": "PM_CMPLU_STALL_TEND/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "tend_stall_cpi" + }, + { + "BriefDescription": "Completion Stalled because the thread was blocked", + "MetricExpr": "PM_CMPLU_STALL_THRD/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "thread_block_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2", + "MetricExpr": "PM_CMPLU_STALL_TLBIE/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "tlbie_stall_cpi" + }, + { + "BriefDescription": "Vector stalls due to small latency double precision ops", + "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vdp_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish.", + "MetricExpr": "PM_CMPLU_STALL_VDP/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vdp_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", + "MetricExpr": "PM_CMPLU_STALL_VDPLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vdplong_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vector_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)", + "MetricExpr": "PM_CMPLU_STALL_VFXLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vfxlong_stall_cpi" + }, + { + "BriefDescription": "Vector stalls due to small latency integer ops", + "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vfxu_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", + "MetricExpr": "PM_CMPLU_STALL_VFXU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vfxu_stall_cpi" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant Memory per Inst", + "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2 per Inst", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_miss_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2 per Inst", + "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads that came from the L3 and were brought into the L3 by a prefetch, per instruction completed", + "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_mepf_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L3 per Inst", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_miss_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from L3 per Inst", + "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Local Memory per Inst", + "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", + "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rmem_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 demand load misses per run instruction", + "MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "l1_ld_miss_rate_percent" + }, + { + "BriefDescription": "% of DL1 misses that result in a cache reload", + "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_miss_reloads_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant Memory", + "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dmem_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core", + "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l21_mod_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core", + "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l21_shr_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from sources beyond the local L2", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_miss_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2", + "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core", + "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l31_mod_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core", + "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l31_shr_percent" + }, + { + "BriefDescription": "% of DL1 Reloads that came from L3 and were brought into the L3 by a prefetch", + "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_mepf_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from sources beyond the local L3", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_miss_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from L3", + "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Local Memory", + "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_lmem_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote Memory", + "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rmem_percent" + }, + { + "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * PM_MRK_DATA_FROM_DL2L3_MOD_CYC / PM_MRK_DATA_FROM_DL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl2l3_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * PM_MRK_DATA_FROM_DL2L3_SHR_CYC / PM_MRK_DATA_FROM_DL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl2l3_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_DL4 * PM_MRK_DATA_FROM_DL4_CYC / PM_MRK_DATA_FROM_DL4 / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl4_cpi_percent" + }, + { + "BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_DMEM * PM_MRK_DATA_FROM_DMEM_CYC / PM_MRK_DATA_FROM_DMEM / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dmem_cpi_percent" + }, + { + "BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L21_MOD * PM_MRK_DATA_FROM_L21_MOD_CYC / PM_MRK_DATA_FROM_L21_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l21_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L21_SHR * PM_MRK_DATA_FROM_L21_SHR_CYC / PM_MRK_DATA_FROM_L21_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l21_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L2 * PM_MRK_DATA_FROM_L2_CYC / PM_MRK_DATA_FROM_L2 / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l2_cpi_percent" + }, + { + "BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L31_MOD * PM_MRK_DATA_FROM_L31_MOD_CYC / PM_MRK_DATA_FROM_L31_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l31_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L31_SHR * PM_MRK_DATA_FROM_L31_SHR_CYC / PM_MRK_DATA_FROM_L31_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l31_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L3 * PM_MRK_DATA_FROM_L3_CYC / PM_MRK_DATA_FROM_L3 / PM_CMPLU_STALL_DCACHE_MISS * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l3_cpi_percent" + }, + { + "BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_LMEM * PM_MRK_DATA_FROM_LMEM_CYC / PM_MRK_DATA_FROM_LMEM / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "lmem_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * PM_MRK_DATA_FROM_RL2L3_MOD_CYC / PM_MRK_DATA_FROM_RL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl2l3_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * PM_MRK_DATA_FROM_RL2L3_SHR_CYC / PM_MRK_DATA_FROM_RL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl2l3_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_RL4 * PM_MRK_DATA_FROM_RL4_CYC / PM_MRK_DATA_FROM_RL4 / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl4_cpi_percent" + }, + { + "BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_RMEM * PM_MRK_DATA_FROM_RMEM_CYC / PM_MRK_DATA_FROM_RMEM / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rmem_cpi_percent" + }, + { + "BriefDescription": "Branch Mispredict flushes per instruction", + "MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100", + "MetricGroup": "general", + "MetricName": "br_mpred_flush_rate_percent" + }, + { + "BriefDescription": "Cycles per instruction", + "MetricExpr": "PM_CYC / PM_INST_CMPL", + "MetricGroup": "general", + "MetricName": "cpi" + }, + { + "BriefDescription": "GCT empty cycles", + "MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100", + "MetricGroup": "general", + "MetricName": "disp_flush_rate_percent" + }, + { + "BriefDescription": "% DTLB miss rate per inst", + "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100", + "MetricGroup": "general", + "MetricName": "dtlb_miss_rate_percent" + }, + { + "BriefDescription": "Flush rate (%)", + "MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "flush_rate_percent" + }, + { + "BriefDescription": "Instructions per cycles", + "MetricExpr": "PM_INST_CMPL / PM_CYC", + "MetricGroup": "general", + "MetricName": "ipc" + }, + { + "BriefDescription": "% ITLB miss rate per inst", + "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100", + "MetricGroup": "general", + "MetricName": "itlb_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 load misses per L1 load ref", + "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100", + "MetricGroup": "general", + "MetricName": "l1_ld_miss_ratio_percent" + }, + { + "BriefDescription": "Percentage of L1 store misses per run instruction", + "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l1_st_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 store misses per L1 store ref", + "MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100", + "MetricGroup": "general", + "MetricName": "l1_st_miss_ratio_percent" + }, + { + "BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)", + "MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_inst_miss_rate_percent" + }, + { + "BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_ld_miss_rate_percent" + }, + { + "BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_pteg_miss_rate_percent" + }, + { + "BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)", + "MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_inst_miss_rate_percent" + }, + { + "BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_ld_miss_rate_percent" + }, + { + "BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_pteg_miss_rate_percent" + }, + { + "BriefDescription": "Run cycles per cycle", + "MetricExpr": "PM_RUN_CYC / PM_CYC*100", + "MetricGroup": "general", + "MetricName": "run_cycles_percent" + }, + { + "BriefDescription": "Instruction dispatch-to-completion ratio", + "MetricExpr": "PM_INST_DISP / PM_INST_CMPL", + "MetricGroup": "general", + "MetricName": "speculation" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L4 per Inst", + "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant Memory per Inst", + "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", + "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", + "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from L2 per Inst", + "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l2_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core per Inst", + "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3 other core per Inst", + "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from L3 per Inst", + "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l3_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local L4 per Inst", + "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local Memory per Inst", + "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L4 per Inst", + "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote Memory per Inst", + "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rmem_rate_percent" + }, + { + "BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)", + "MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "l1_inst_miss_rate_percent" + }, + { + "BriefDescription": "Icache Fetchs per Icache Miss", + "MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "icache_miss_reload" + }, + { + "BriefDescription": "% of ICache reloads due to prefetch", + "MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "icache_pref_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L4", + "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant Memory", + "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dmem_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core", + "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l21_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core", + "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l21_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from L2", + "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l2_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core", + "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l31_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core", + "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l31_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from L3", + "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l3_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local L4", + "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_ll4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local Memory", + "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_lmem_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L4", + "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote Memory", + "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rmem_percent" + }, + { + "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", + "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_co_m_rd_util" + }, + { + "BriefDescription": "L2 dcache invalidates per run inst (per core)", + "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_dc_inv_rate_percent" + }, + { + "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)", + "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_dem_ld_disp_percent" + }, + { + "BriefDescription": "L2 Icache invalidates per run inst (per core)", + "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_ic_inv_rate_percent" + }, + { + "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", + "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_inst_miss_ratio_percent" + }, + { + "BriefDescription": "Average number of cycles between L2 Load hits", + "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2", + "MetricGroup": "l2_stats", + "MetricName": "l2_ld_hit_frequency" + }, + { + "BriefDescription": "Average number of cycles between L2 Load misses", + "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2", + "MetricGroup": "l2_stats", + "MetricName": "l2_ld_miss_frequency" + }, + { + "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", + "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_ld_miss_ratio_percent" + }, + { + "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)", + "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_ld_rd_util" + }, + { + "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks", + "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_ldmiss_wr_util" + }, + { + "BriefDescription": "L2 local pump prediction success", + "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_local_pred_correct_percent" + }, + { + "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs", + "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_mod_co_percent" + }, + { + "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", + "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_rc_ld_disp_addr_fail_percent" + }, + { + "BriefDescription": "% of L2 Load RC dispatch attempts that failed", + "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_rc_ld_disp_fail_percent" + }, + { + "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", + "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_rc_st_disp_addr_fail_percent" + }, + { + "BriefDescription": "% of L2 Store RC dispatch attempts that failed", + "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_rc_st_disp_fail_percent" + }, + { + "BriefDescription": "L2 Cache Read Utilization (per core)", + "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)", + "MetricGroup": "l2_stats", + "MetricName": "l2_rd_util_percent" + }, + { + "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", + "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_shr_co_percent" + }, + { + "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", + "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_st_miss_ratio_percent" + }, + { + "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)", + "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_st_rd_util" + }, + { + "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks", + "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_st_wr_util" + }, + { + "BriefDescription": "L2 Cache Write Utilization (per core)", + "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)", + "MetricGroup": "l2_stats", + "MetricName": "l2_wr_util_percent" + }, + { + "BriefDescription": "Average number of cycles between L3 Load hits", + "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2", + "MetricGroup": "l3_stats", + "MetricName": "l3_ld_hit_frequency" + }, + { + "BriefDescription": "Average number of cycles between L3 Load misses", + "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2", + "MetricGroup": "l3_stats", + "MetricName": "l3_ld_miss_frequency" + }, + { + "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", + "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8", + "MetricGroup": "l3_stats", + "MetricName": "l3_wi_usage" + }, + { + "BriefDescription": "Average icache miss latency", + "MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ", + "MetricGroup": "latency", + "MetricName": "average_il1_miss_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD", + "MetricGroup": "latency", + "MetricName": "dl2l3_mod_latency" + }, + { + "BriefDescription": "Marked L2L3 distant Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR", + "MetricGroup": "latency", + "MetricName": "dl2l3_shr_latency" + }, + { + "BriefDescription": "Distant L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4", + "MetricGroup": "latency", + "MetricName": "dl4_latency" + }, + { + "BriefDescription": "Marked Dmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM", + "MetricGroup": "latency", + "MetricName": "dmem_latency" + }, + { + "BriefDescription": "average L1 miss latency using marked events", + "MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1", + "MetricGroup": "latency", + "MetricName": "estimated_dl1miss_latency" + }, + { + "BriefDescription": "Marked L21 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD", + "MetricGroup": "latency", + "MetricName": "l21_mod_latency" + }, + { + "BriefDescription": "Marked L21 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR", + "MetricGroup": "latency", + "MetricName": "l21_shr_latency" + }, + { + "BriefDescription": "Marked L2 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l2_latency" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD", + "MetricGroup": "latency", + "MetricName": "l31_mod_latency" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR", + "MetricGroup": "latency", + "MetricName": "l31_shr_latency" + }, + { + "BriefDescription": "Marked L3 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3", + "MetricGroup": "latency", + "MetricName": "l3_latency" + }, + { + "BriefDescription": "Local L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4", + "MetricGroup": "latency", + "MetricName": "ll4_latency" + }, + { + "BriefDescription": "Marked Lmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM", + "MetricGroup": "latency", + "MetricName": "lmem_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD", + "MetricGroup": "latency", + "MetricName": "rl2l3_mod_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR", + "MetricGroup": "latency", + "MetricName": "rl2l3_shr_latency" + }, + { + "BriefDescription": "Remote L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4", + "MetricGroup": "latency", + "MetricName": "rl4_latency" + }, + { + "BriefDescription": "Marked Rmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM", + "MetricGroup": "latency", + "MetricName": "rmem_latency" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "erat_reject_rate_percent" + }, + { + "BriefDescription": "LHS reject ratio", + "MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lhs_reject_rate_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lmq_full_reject_rate_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1", + "MetricGroup": "lsu_rejects", + "MetricName": "lmq_full_reject_ratio_percent" + }, + { + "BriefDescription": "L4 locality(%)", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / (PM_DATA_FROM_LL4 + PM_DATA_FROM_RL4 + PM_DATA_FROM_DL4)", + "MetricGroup": "memory", + "MetricName": "l4_locality" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to distant L4", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_dmem" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to remote+distant L4", + "MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_mem" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to remote L4", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_rl4" + }, + { + "BriefDescription": "Number of loads from local memory per loads from distant memory", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_dmem" + }, + { + "BriefDescription": "Number of loads from local memory per loads from remote and distant memory", + "MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_mem" + }, + { + "BriefDescription": "Number of loads from local memory per loads from remote memory", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_rmem" + }, + { + "BriefDescription": "Number of loads from remote memory per loads from distant memory", + "MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM", + "MetricGroup": "memory", + "MetricName": "ld_rmem_per_ld_dmem" + }, + { + "BriefDescription": "Memory locality", + "MetricExpr": "PM_DATA_FROM_LMEM * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM)", + "MetricGroup": "memory", + "MetricName": "mem_locality_percent" + }, + { + "BriefDescription": "L1 Prefetches issued by the prefetch machine per instruction (per thread)", + "MetricExpr": "PM_L1_PREF / PM_RUN_INST_CMPL * 100", + "MetricGroup": "prefetch", + "MetricName": "l1_prefetch_rate_percent" + }, + { + "BriefDescription": "DERAT Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "derat_miss_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L2 per inst", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l2_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L3 per inst", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l3_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT misses that result in an ERAT reload", + "MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "derat_miss_reload_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L4", + "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant Memory", + "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dmem_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core", + "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l21_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core", + "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l21_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L2", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l2_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core", + "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l31_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core", + "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l31_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L3", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l3_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local L4", + "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_ll4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local Memory", + "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_lmem_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L4", + "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote Memory", + "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rmem_percent" + }, + { + "BriefDescription": "% DERAT miss rate for 4K page per inst", + "MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_4k_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 4K page", + "MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_4k_miss_ratio" + }, + { + "BriefDescription": "% DERAT miss ratio for 64K page per inst", + "MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_64k_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 64K page", + "MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_64k_miss_ratio" + }, + { + "BriefDescription": "DERAT miss ratio", + "MetricExpr": "PM_LSU_DERAT_MISS / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_miss_ratio" + }, + { + "BriefDescription": "% DSLB_Miss_Rate per inst", + "MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "dslb_miss_rate_percent" + }, + { + "BriefDescription": "% ISLB miss rate per inst", + "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "islb_miss_rate_percent" + }, + { + "BriefDescription": "ANY_SYNC_STALL_CPI", + "MetricExpr": "PM_CMPLU_STALL_ANY_SYNC / PM_RUN_INST_CMPL", + "MetricName": "any_sync_stall_cpi" + }, + { + "BriefDescription": "Avg. more than 1 instructions completed", + "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", + "MetricName": "average_completed_instruction_set_size" + }, + { + "BriefDescription": "% Branches per instruction", + "MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL", + "MetricName": "branches_per_inst" + }, + { + "BriefDescription": "Cycles in which at least one instruction completes in this thread", + "MetricExpr": "PM_1PLUS_PPC_CMPL/PM_RUN_INST_CMPL", + "MetricName": "completion_cpi" + }, + { + "BriefDescription": "cycles", + "MetricExpr": "PM_RUN_CYC", + "MetricName": "custom_secs" + }, + { + "BriefDescription": "Percentage Cycles atleast one instruction dispatched", + "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", + "MetricName": "cycles_atleast_one_inst_dispatched_percent" + }, + { + "BriefDescription": "Cycles per instruction group", + "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL", + "MetricName": "cycles_per_completed_instructions_set" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L4", + "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_dl4_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L4 per Inst", + "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_l31_rate_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Local L4", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_ll4_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Local L4 per Inst", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L4", + "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_rl4_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", + "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_rl4_rate_percent" + }, + { + "BriefDescription": "Rate of DERAT reloads from L2", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dpteg_from_l2_rate_percent" + }, + { + "BriefDescription": "Rate of DERAT reloads from L3", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dpteg_from_l3_rate_percent" + }, + { + "BriefDescription": "Cycles in which the oldest instruction is finished and ready to complete for waiting to get through the completion pipe", + "MetricExpr": "PM_NTC_ALL_FIN / PM_RUN_INST_CMPL", + "MetricName": "finish_to_cmpl_cpi" + }, + { + "BriefDescription": "Total Fixed point operations", + "MetricExpr": "PM_FXU_FIN/PM_RUN_INST_CMPL", + "MetricName": "fixed_per_inst" + }, + { + "BriefDescription": "All FXU Busy", + "MetricExpr": "PM_FXU_BUSY / PM_CYC", + "MetricName": "fxu_all_busy" + }, + { + "BriefDescription": "All FXU Idle", + "MetricExpr": "PM_FXU_IDLE / PM_CYC", + "MetricName": "fxu_all_idle" + }, + { + "BriefDescription": "Ict empty for this thread due to branch mispred", + "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_br_mpred_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred", + "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_br_mpred_icmiss_cpi" + }, + { + "BriefDescription": "ICT other stalls", + "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_cyc_other_cpi" + }, + { + "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_hb_full_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_issq_cpi" + }, + { + "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", + "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_other_cpi" + }, + { + "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_sync_cpi" + }, + { + "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_tbegin_cpi" + }, + { + "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", + "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_ic_l2_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3", + "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_ic_l3_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", + "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_ic_l3miss_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to Icache Miss", + "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_ic_miss_cpi" + }, + { + "BriefDescription": "Rate of IERAT reloads from L2", + "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_l2_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from L3", + "MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_l3_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from local memory", + "MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_ll4_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from local memory", + "MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_lmem_rate_percent" + }, + { + "BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle", + "MetricExpr": "PM_CO_USAGE / PM_RUN_CYC * 16", + "MetricName": "l2_co_usage" + }, + { + "BriefDescription": "Percent of instruction reads out of all L2 commands", + "MetricExpr": "PM_ISIDE_DISP * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", + "MetricName": "l2_instr_commands_percent" + }, + { + "BriefDescription": "Percent of loads out of all L2 commands", + "MetricExpr": "PM_L2_LD * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", + "MetricName": "l2_ld_commands_percent" + }, + { + "BriefDescription": "Rate of L2 store dispatches that failed per core", + "MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL", + "MetricName": "l2_rc_st_disp_fail_rate_percent" + }, + { + "BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle", + "MetricExpr": "PM_RC_USAGE / PM_RUN_CYC * 16", + "MetricName": "l2_rc_usage" + }, + { + "BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle", + "MetricExpr": "PM_SN_USAGE / PM_RUN_CYC * 8", + "MetricName": "l2_sn_usage" + }, + { + "BriefDescription": "Percent of stores out of all L2 commands", + "MetricExpr": "PM_L2_ST * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", + "MetricName": "l2_st_commands_percent" + }, + { + "BriefDescription": "Rate of L2 store dispatches that failed per core", + "MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL", + "MetricName": "l2_st_disp_fail_rate_percent" + }, + { + "BriefDescription": "Rate of L2 dispatches per core", + "MetricExpr": "100 * PM_L2_RCST_DISP/2 / PM_RUN_INST_CMPL", + "MetricName": "l2_st_disp_rate_percent" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)", + "MetricName": "l31_latency" + }, + { + "BriefDescription": "PCT instruction loads", + "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL", + "MetricName": "loads_per_inst" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses", + "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL", + "MetricName": "lsu_stall_dcache_miss_cpi" + }, + { + "BriefDescription": "Completion stall because a different thread was using the completion pipe", + "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL", + "MetricName": "other_thread_cmpl_stall" + }, + { + "BriefDescription": "PCT instruction stores", + "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL", + "MetricName": "stores_per_inst" + }, + { + "BriefDescription": "ANY_SYNC_STALL_CPI", + "MetricExpr": "PM_CMPLU_STALL_SYNC_PMU_INT / PM_RUN_INST_CMPL", + "MetricName": "sync_pmu_int_stall_cpi" + } +] diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 36c903faed0b..71e9737f4614 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -73,7 +73,7 @@ }, { "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", - "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", "MetricGroup": "Memory_Bound;Memory_Lat", "MetricName": "Load_Miss_Real_Latency" }, diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build index 41efd7e368b3..68d4b54574ad 100644 --- a/tools/perf/scripts/Build +++ b/tools/perf/scripts/Build @@ -1,2 +1,2 @@ -libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ -libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ +perf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ +perf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build index 34faecf774ae..db0036129307 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Build +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -1,4 +1,4 @@ -libperf-y += Context.o +perf-y += Context.o CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build index aefc15c9444a..7d0e33ce6aba 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Build +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -1,3 +1,3 @@ -libperf-y += Context.o +perf-y += Context.o CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 0564dd7377f2..30130213da7e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -478,7 +478,7 @@ if perf_db_export_calls: 'branch_count,' 'call_id,' 'return_id,' - 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 'parent_call_path_id' ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 245caf2643ed..ed237f2ed03f 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -320,7 +320,7 @@ if perf_db_export_calls: 'branch_count,' 'call_id,' 'return_id,' - 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 'parent_call_path_id' ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index f278ce5ebab7..c3091401df91 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -1,4 +1,3 @@ -#!/usr/bin/python2 # SPDX-License-Identifier: GPL-2.0 # exported-sql-viewer.py: view data from sql database # Copyright (c) 2014-2018, Intel Corporation. diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py index 3473e7f66081..3984bf51f3c5 100644 --- a/tools/perf/scripts/python/sched-migration.py +++ b/tools/perf/scripts/python/sched-migration.py @@ -1,5 +1,3 @@ -#!/usr/bin/python -# # Cpu task migration overview toy # # Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com> diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py index 8410672efb8b..a81ad8835a74 100644 --- a/tools/perf/scripts/python/stat-cpi.py +++ b/tools/perf/scripts/python/stat-cpi.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 data = {} diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index e952127e4fb0..cb39ac46bc73 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -1,4 +1,3 @@ -#! /usr/bin/python # SPDX-License-Identifier: GPL-2.0 from __future__ import print_function diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index a20cbc445426..57fc544aedb0 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -15,7 +15,6 @@ #include <sys/mman.h> #include <linux/compiler.h> #include <linux/hw_breakpoint.h> -#include <sys/ioctl.h> #include "tests.h" #include "debug.h" diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index dbf2c69944d2..4ebd2681e760 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -15,6 +15,8 @@ #include "thread_map.h" #include "cpumap.h" #include "machine.h" +#include "map.h" +#include "symbol.h" #include "event.h" #include "thread.h" diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 7c8d2e422401..077c306c1cae 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -10,6 +10,7 @@ #include "../util/unwind.h" #include "perf_regs.h" #include "map.h" +#include "symbol.h" #include "thread.h" #include "callchain.h" diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index b889a28fd80b..469958cd7fe0 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -2,6 +2,7 @@ #include <inttypes.h> #include "perf.h" #include "util/debug.h" +#include "util/map.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" @@ -161,7 +162,7 @@ out: void print_hists_in(struct hists *hists) { int i = 0; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; if (hists__has(hists, need_collapse)) @@ -170,7 +171,7 @@ void print_hists_in(struct hists *hists) root = hists->entries_in; pr_info("----- %s --------\n", __func__); - node = rb_first(root); + node = rb_first_cached(root); while (node) { struct hist_entry *he; @@ -191,13 +192,13 @@ void print_hists_in(struct hists *hists) void print_hists_out(struct hists *hists) { int i = 0; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; root = &hists->entries; pr_info("----- %s --------\n", __func__); - node = rb_first(root); + node = rb_first_cached(root); while (node) { struct hist_entry *he; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 65fe02bebbee..7a2eed6c783e 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -2,6 +2,7 @@ #include "perf.h" #include "util/debug.h" #include "util/event.h" +#include "util/map.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" @@ -125,8 +126,8 @@ out: static void del_hist_entries(struct hists *hists) { struct hist_entry *he; - struct rb_root *root_in; - struct rb_root *root_out; + struct rb_root_cached *root_in; + struct rb_root_cached *root_out; struct rb_node *node; if (hists__has(hists, need_collapse)) @@ -136,12 +137,12 @@ static void del_hist_entries(struct hists *hists) root_out = &hists->entries; - while (!RB_EMPTY_ROOT(root_out)) { - node = rb_first(root_out); + while (!RB_EMPTY_ROOT(&root_out->rb_root)) { + node = rb_first_cached(root_out); he = rb_entry(node, struct hist_entry, rb_node); - rb_erase(node, root_out); - rb_erase(&he->rb_node_in, root_in); + rb_erase_cached(node, root_out); + rb_erase_cached(&he->rb_node_in, root_in); hist_entry__delete(he); } } @@ -198,7 +199,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec print_hists_out(hists); } - root = &hists->entries; + root = &hists->entries.rb_root; for (node = rb_first(root), i = 0; node && (he = rb_entry(node, struct hist_entry, rb_node)); node = rb_next(node), i++) { diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 1c5bedab3c2c..975844807fe2 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "perf.h" #include "util/debug.h" +#include "util/map.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 9a9d06cb0222..af633db63f4d 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -142,7 +142,7 @@ static int find_sample(struct sample *samples, size_t nr_samples, static int __validate_match(struct hists *hists) { size_t count = 0; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; /* @@ -153,7 +153,7 @@ static int __validate_match(struct hists *hists) else root = hists->entries_in; - node = rb_first(root); + node = rb_first_cached(root); while (node) { struct hist_entry *he; @@ -192,7 +192,7 @@ static int __validate_link(struct hists *hists, int idx) size_t count = 0; size_t count_pair = 0; size_t count_dummy = 0; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; /* @@ -205,7 +205,7 @@ static int __validate_link(struct hists *hists, int idx) else root = hists->entries_in; - node = rb_first(root); + node = rb_first_cached(root); while (node) { struct hist_entry *he; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index faacb4f41460..0a510c524a5d 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -2,6 +2,7 @@ #include "perf.h" #include "util/debug.h" #include "util/event.h" +#include "util/map.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" @@ -91,8 +92,8 @@ out: static void del_hist_entries(struct hists *hists) { struct hist_entry *he; - struct rb_root *root_in; - struct rb_root *root_out; + struct rb_root_cached *root_in; + struct rb_root_cached *root_out; struct rb_node *node; if (hists__has(hists, need_collapse)) @@ -102,12 +103,12 @@ static void del_hist_entries(struct hists *hists) root_out = &hists->entries; - while (!RB_EMPTY_ROOT(root_out)) { - node = rb_first(root_out); + while (!RB_EMPTY_ROOT(&root_out->rb_root)) { + node = rb_first_cached(root_out); he = rb_entry(node, struct hist_entry, rb_node); - rb_erase(node, root_out); - rb_erase(&he->rb_node_in, root_in); + rb_erase_cached(node, root_out); + rb_erase_cached(&he->rb_node_in, root_in); hist_entry__delete(he); } } @@ -126,7 +127,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = NULL; @@ -162,7 +163,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") && @@ -228,7 +229,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = "overhead,cpu"; @@ -262,7 +263,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300); @@ -284,7 +285,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = "comm,overhead,dso"; @@ -316,7 +317,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") && @@ -358,7 +359,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = "dso,sym,comm,overhead,dso"; @@ -394,7 +395,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") && @@ -460,7 +461,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) int err; struct hists *hists = evsel__hists(evsel); struct hist_entry *he; - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *node; field_order = "cpu,pid,comm,dso,sym"; @@ -497,7 +498,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) } root = &hists->entries; - node = rb_first(root); + node = rb_first_cached(root); he = rb_entry(node, struct hist_entry, rb_node); TEST_ASSERT_VAL("Invalid hist entry", diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 5ede9b561d32..ba87e6e8d18c 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -11,6 +11,7 @@ #include "tests.h" #include "machine.h" #include "thread_map.h" +#include "map.h" #include "symbol.h" #include "thread.h" #include "util.h" diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 3b97ac018d5a..4a69c07f4101 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1330,6 +1330,26 @@ static int test__checkevent_complex_name(struct perf_evlist *evlist) return 0; } +static int test__sym_event_slash(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE); + TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + return 0; +} + +static int test__sym_event_dc(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE); + TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + return 0; +} + static int count_tracepoints(void) { struct dirent *events_ent; @@ -1670,6 +1690,16 @@ static struct evlist_test test__events[] = { .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk", .check = test__checkevent_complex_name, .id = 53 + }, + { + .name = "cycles//u", + .check = test__sym_event_slash, + .id = 54, + }, + { + .name = "cycles:k", + .check = test__sym_event_dc, + .id = 55, } }; diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 7bedf8608fdd..14a78898d79e 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -4,7 +4,9 @@ #include "util.h" #include "tests.h" #include <errno.h> +#include <stdio.h> #include <linux/kernel.h> +#include <linux/limits.h> /* Simulated format definitions. */ static struct test_format { diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 0e2d00d69e6e..236ce0d6c826 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdbool.h> #include <inttypes.h> +#include <linux/bitops.h> #include <linux/kernel.h> #include <linux/types.h> +#include "branch.h" #include "util.h" #include "event.h" #include "evsel.h" diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 5059452d27dd..8bfaa630389c 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <sys/epoll.h> #include <util/evlist.h> +#include <util/symbol.h> #include <linux/filter.h> #include "tests.h" #include "debug.h" diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 637365099b7d..85f328ddf897 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1,15 +1,15 @@ -libperf-y += clone.o -libperf-y += fcntl.o -libperf-y += flock.o +perf-y += clone.o +perf-y += fcntl.o +perf-y += flock.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) -libperf-y += ioctl.o +perf-y += ioctl.o endif -libperf-y += kcmp.o -libperf-y += mount_flags.o -libperf-y += pkey_alloc.o -libperf-y += arch_prctl.o -libperf-y += prctl.o -libperf-y += renameat.o -libperf-y += sockaddr.o -libperf-y += socket.o -libperf-y += statx.o +perf-y += kcmp.o +perf-y += mount_flags.o +perf-y += pkey_alloc.o +perf-y += arch_prctl.o +perf-y += prctl.o +perf-y += renameat.o +perf-y += sockaddr.o +perf-y += socket.o +perf-y += statx.o diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 620350d41209..52242fa4072b 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -175,7 +175,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg) { unsigned long cmd = arg->val; - unsigned int fd = syscall_arg__val(arg, 0); + int fd = syscall_arg__val(arg, 0); struct file *file = thread__files_entry(arg->thread, fd); if (file != NULL) { diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c index 6897fab40dcc..d4d10b33ba0e 100644 --- a/tools/perf/trace/beauty/waitid_options.c +++ b/tools/perf/trace/beauty/waitid_options.c @@ -11,7 +11,7 @@ static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size, #define P_OPTION(n) \ if (options & W##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : #n); \ + printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \ options &= ~W##n; \ } diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build index 0a73538c0441..3aff83c3275f 100644 --- a/tools/perf/ui/Build +++ b/tools/perf/ui/Build @@ -1,14 +1,14 @@ -libperf-y += setup.o -libperf-y += helpline.o -libperf-y += progress.o -libperf-y += util.o -libperf-y += hist.o -libperf-y += stdio/hist.o +perf-y += setup.o +perf-y += helpline.o +perf-y += progress.o +perf-y += util.o +perf-y += hist.o +perf-y += stdio/hist.o CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" -libperf-$(CONFIG_SLANG) += browser.o -libperf-$(CONFIG_SLANG) += browsers/ -libperf-$(CONFIG_SLANG) += tui/ +perf-$(CONFIG_SLANG) += browser.o +perf-$(CONFIG_SLANG) += browsers/ +perf-$(CONFIG_SLANG) += tui/ CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build index de223f5bed58..8fee56b46502 100644 --- a/tools/perf/ui/browsers/Build +++ b/tools/perf/ui/browsers/Build @@ -1,8 +1,8 @@ -libperf-y += annotate.o -libperf-y += hists.o -libperf-y += map.o -libperf-y += scripts.o -libperf-y += header.o +perf-y += annotate.o +perf-y += hists.o +perf-y += map.o +perf-y += scripts.o +perf-y += header.o CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 82e16bf84466..35bdfd8b1e71 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -7,6 +7,7 @@ #include "../../util/annotate.h" #include "../../util/hist.h" #include "../../util/sort.h" +#include "../../util/map.h" #include "../../util/symbol.h" #include "../../util/evsel.h" #include "../../util/evlist.h" diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index d75492189acb..5aeb663dd184 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c @@ -35,7 +35,7 @@ static int list_menu__run(struct ui_browser *menu) { int key; unsigned long offset; - const char help[] = + static const char help[] = "h/?/F1 Show this window\n" "UP/DOWN/PGUP\n" "PGDN/SPACE\n" diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index ffac1d54a3d4..aef800d97ea1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -8,9 +8,12 @@ #include <linux/rbtree.h> #include <sys/ttydefaults.h> +#include "../../util/callchain.h" #include "../../util/evsel.h" #include "../../util/evlist.h" #include "../../util/hist.h" +#include "../../util/map.h" +#include "../../util/symbol.h" #include "../../util/pstack.h" #include "../../util/sort.h" #include "../../util/util.h" @@ -49,7 +52,7 @@ static int hist_browser__get_folding(struct hist_browser *browser) struct hists *hists = browser->hists; int unfolded_rows = 0; - for (nd = rb_first(&hists->entries); + for (nd = rb_first_cached(&hists->entries); (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; nd = rb_hierarchy_next(nd)) { struct hist_entry *he = @@ -267,7 +270,7 @@ static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he, if (he->has_no_entry) return 1; - node = rb_first(&he->hroot_out); + node = rb_first_cached(&he->hroot_out); while (node) { float percent; @@ -372,7 +375,7 @@ static void hist_entry__init_have_children(struct hist_entry *he) he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); callchain__init_have_children(&he->sorted_chain); } else { - he->has_children = !RB_EMPTY_ROOT(&he->hroot_out); + he->has_children = !RB_EMPTY_ROOT(&he->hroot_out.rb_root); } he->init_have_children = true; @@ -508,7 +511,7 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, struct hist_entry *child; int n = 0; - for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&he->hroot_out); nd; nd = rb_next(nd)) { child = rb_entry(nd, struct hist_entry, rb_node); percent = hist_entry__get_percent_limit(child); if (!child->filtered && percent >= hb->min_pcnt) @@ -566,7 +569,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) struct rb_node *nd; struct hist_entry *he; - nd = rb_first(&browser->hists->entries); + nd = rb_first_cached(&browser->hists->entries); while (nd) { he = rb_entry(nd, struct hist_entry, rb_node); @@ -1738,7 +1741,7 @@ static void ui_browser__hists_init_top(struct ui_browser *browser) struct hist_browser *hb; hb = container_of(browser, struct hist_browser, b); - browser->top = rb_first(&hb->hists->entries); + browser->top = rb_first_cached(&hb->hists->entries); } } @@ -2649,7 +2652,7 @@ add_socket_opt(struct hist_browser *browser, struct popup_action *act, static void hist_browser__update_nr_entries(struct hist_browser *hb) { u64 nr_entries = 0; - struct rb_node *nd = rb_first(&hb->hists->entries); + struct rb_node *nd = rb_first_cached(&hb->hists->entries); if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) { hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries; @@ -2669,7 +2672,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, double percent) { struct hist_entry *he; - struct rb_node *nd = rb_first(&hb->hists->entries); + struct rb_node *nd = rb_first_cached(&hb->hists->entries); u64 total = hists__total_period(hb->hists); u64 min_callchain_hits = total * (percent / 100); @@ -2748,7 +2751,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "S Zoom into current Processor Socket\n" \ /* help messages are sorted by lexical order of the hotkey */ - const char report_help[] = HIST_BROWSER_HELP_COMMON + static const char report_help[] = HIST_BROWSER_HELP_COMMON "i Show header information\n" "P Print histograms to perf.hist.N\n" "r Run available scripts\n" @@ -2756,7 +2759,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "t Zoom into current Thread\n" "V Verbose (DSO names in callchains, etc)\n" "/ Filter symbol by name"; - const char top_help[] = HIST_BROWSER_HELP_COMMON + static const char top_help[] = HIST_BROWSER_HELP_COMMON "P Print histograms to perf.hist.N\n" "t Zoom into current Thread\n" "V Verbose (DSO names in callchains, etc)\n" diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 5b8b8c637686..c70d9337405b 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -6,6 +6,7 @@ #include <linux/bitops.h> #include "../../util/util.h" #include "../../util/debug.h" +#include "../../util/map.h" #include "../../util/symbol.h" #include "../browser.h" #include "../helpline.h" diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 48428c9acd89..df49c9ba1785 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include "gtk.h" +#include "util/sort.h" #include "util/debug.h" #include "util/annotate.h" #include "util/evsel.h" +#include "util/map.h" +#include "util/symbol.h" #include "ui/helpline.h" #include <inttypes.h> #include <signal.h> diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 4ab663ec3e5e..0c08890f006a 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "../evlist.h" #include "../cache.h" +#include "../callchain.h" #include "../evsel.h" #include "../sort.h" #include "../hist.h" @@ -353,7 +354,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, g_object_unref(GTK_TREE_MODEL(store)); - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); GtkTreeIter iter; u64 total = hists__total_period(h->hists); @@ -401,7 +402,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, } static void perf_gtk__add_hierarchy_entries(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, GtkTreeStore *store, GtkTreeIter *parent, struct perf_hpp *hpp, @@ -415,7 +416,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, u64 total = hists__total_period(hists); int size; - for (node = rb_first(root); node; node = rb_next(node)) { + for (node = rb_first_cached(root); node; node = rb_next(node)) { GtkTreeIter iter; float percent; char *bf; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index fe3dfaa64a91..412d6f1626e3 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -3,6 +3,7 @@ #include <math.h> #include <linux/compiler.h> +#include "../util/callchain.h" #include "../util/hist.h" #include "../util/util.h" #include "../util/sort.h" diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 74c4ae1f0a05..a60f2993d390 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -2,8 +2,12 @@ #include <stdio.h> #include <linux/string.h> +#include "../../util/callchain.h" #include "../../util/util.h" #include "../../util/hist.h" +#include "../../util/map.h" +#include "../../util/map_groups.h" +#include "../../util/symbol.h" #include "../../util/sort.h" #include "../../util/evsel.h" #include "../../util/srcline.h" @@ -788,7 +792,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, indent = hists__overhead_width(hists) + 4; - for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { + for (nd = rb_first_cached(&hists->entries); nd; + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build index 9e4c6ca41a9f..f916df33a1a7 100644 --- a/tools/perf/ui/tui/Build +++ b/tools/perf/ui/tui/Build @@ -1,4 +1,4 @@ -libperf-y += setup.o -libperf-y += util.o -libperf-y += helpline.o -libperf-y += progress.o +perf-y += setup.o +perf-y += util.o +perf-y += helpline.o +perf-y += progress.o diff --git a/tools/perf/util/Build b/tools/perf/util/Build index af72be7f5b3b..ca0741c91903 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,158 +1,162 @@ -libperf-y += annotate.o -libperf-y += block-range.o -libperf-y += build-id.o -libperf-y += config.o -libperf-y += ctype.o -libperf-y += db-export.o -libperf-y += env.o -libperf-y += event.o -libperf-y += evlist.o -libperf-y += evsel.o -libperf-y += evsel_fprintf.o -libperf-y += find_bit.o -libperf-y += get_current_dir_name.o -libperf-y += kallsyms.o -libperf-y += levenshtein.o -libperf-y += llvm-utils.o -libperf-y += mmap.o -libperf-y += memswap.o -libperf-y += parse-events.o -libperf-y += perf_regs.o -libperf-y += path.o -libperf-y += print_binary.o -libperf-y += rbtree.o -libperf-y += libstring.o -libperf-y += bitmap.o -libperf-y += hweight.o -libperf-y += smt.o -libperf-y += strbuf.o -libperf-y += string.o -libperf-y += strlist.o -libperf-y += strfilter.o -libperf-y += top.o -libperf-y += usage.o -libperf-y += dso.o -libperf-y += symbol.o -libperf-y += symbol_fprintf.o -libperf-y += color.o -libperf-y += metricgroup.o -libperf-y += header.o -libperf-y += callchain.o -libperf-y += values.o -libperf-y += debug.o -libperf-y += machine.o -libperf-y += map.o -libperf-y += pstack.o -libperf-y += session.o -libperf-$(CONFIG_TRACE) += syscalltbl.o -libperf-y += ordered-events.o -libperf-y += namespaces.o -libperf-y += comm.o -libperf-y += thread.o -libperf-y += thread_map.o -libperf-y += trace-event-parse.o -libperf-y += parse-events-flex.o -libperf-y += parse-events-bison.o -libperf-y += pmu.o -libperf-y += pmu-flex.o -libperf-y += pmu-bison.o -libperf-y += trace-event-read.o -libperf-y += trace-event-info.o -libperf-y += trace-event-scripting.o -libperf-y += trace-event.o -libperf-y += svghelper.o -libperf-y += sort.o -libperf-y += hist.o -libperf-y += util.o -libperf-y += xyarray.o -libperf-y += cpumap.o -libperf-y += cgroup.o -libperf-y += target.o -libperf-y += rblist.o -libperf-y += intlist.o -libperf-y += vdso.o -libperf-y += counts.o -libperf-y += stat.o -libperf-y += stat-shadow.o -libperf-y += stat-display.o -libperf-y += record.o -libperf-y += srcline.o -libperf-y += srccode.o -libperf-y += data.o -libperf-y += tsc.o -libperf-y += cloexec.o -libperf-y += call-path.o -libperf-y += rwsem.o -libperf-y += thread-stack.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o -libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ -libperf-$(CONFIG_AUXTRACE) += intel-pt.o -libperf-$(CONFIG_AUXTRACE) += intel-bts.o -libperf-$(CONFIG_AUXTRACE) += arm-spe.o -libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o -libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o +perf-y += annotate.o +perf-y += block-range.o +perf-y += build-id.o +perf-y += config.o +perf-y += ctype.o +perf-y += db-export.o +perf-y += env.o +perf-y += event.o +perf-y += evlist.o +perf-y += evsel.o +perf-y += evsel_fprintf.o +perf-y += find_bit.o +perf-y += get_current_dir_name.o +perf-y += kallsyms.o +perf-y += levenshtein.o +perf-y += llvm-utils.o +perf-y += mmap.o +perf-y += memswap.o +perf-y += parse-events.o +perf-y += perf_regs.o +perf-y += path.o +perf-y += print_binary.o +perf-y += rbtree.o +perf-y += libstring.o +perf-y += bitmap.o +perf-y += hweight.o +perf-y += smt.o +perf-y += strbuf.o +perf-y += string.o +perf-y += strlist.o +perf-y += strfilter.o +perf-y += top.o +perf-y += usage.o +perf-y += dso.o +perf-y += symbol.o +perf-y += symbol_fprintf.o +perf-y += color.o +perf-y += color_config.o +perf-y += metricgroup.o +perf-y += header.o +perf-y += callchain.o +perf-y += values.o +perf-y += debug.o +perf-y += machine.o +perf-y += map.o +perf-y += pstack.o +perf-y += session.o +perf-y += sample-raw.o +perf-y += s390-sample-raw.o +perf-$(CONFIG_TRACE) += syscalltbl.o +perf-y += ordered-events.o +perf-y += namespaces.o +perf-y += comm.o +perf-y += thread.o +perf-y += thread_map.o +perf-y += trace-event-parse.o +perf-y += parse-events-flex.o +perf-y += parse-events-bison.o +perf-y += pmu.o +perf-y += pmu-flex.o +perf-y += pmu-bison.o +perf-y += trace-event-read.o +perf-y += trace-event-info.o +perf-y += trace-event-scripting.o +perf-y += trace-event.o +perf-y += svghelper.o +perf-y += sort.o +perf-y += hist.o +perf-y += util.o +perf-y += xyarray.o +perf-y += cpumap.o +perf-y += cgroup.o +perf-y += target.o +perf-y += rblist.o +perf-y += intlist.o +perf-y += vdso.o +perf-y += counts.o +perf-y += stat.o +perf-y += stat-shadow.o +perf-y += stat-display.o +perf-y += record.o +perf-y += srcline.o +perf-y += srccode.o +perf-y += data.o +perf-y += tsc.o +perf-y += cloexec.o +perf-y += call-path.o +perf-y += rwsem.o +perf-y += thread-stack.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ +perf-$(CONFIG_AUXTRACE) += intel-pt.o +perf-$(CONFIG_AUXTRACE) += intel-bts.o +perf-$(CONFIG_AUXTRACE) += arm-spe.o +perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o +perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o ifdef CONFIG_LIBOPENCSD -libperf-$(CONFIG_AUXTRACE) += cs-etm.o -libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ +perf-$(CONFIG_AUXTRACE) += cs-etm.o +perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ endif -libperf-y += parse-branch-options.o -libperf-y += dump-insn.o -libperf-y += parse-regs-options.o -libperf-y += term.o -libperf-y += help-unknown-cmd.o -libperf-y += mem-events.o -libperf-y += vsprintf.o -libperf-y += drv_configs.o -libperf-y += units.o -libperf-y += time-utils.o -libperf-y += expr-bison.o -libperf-y += branch.o -libperf-y += mem2node.o - -libperf-$(CONFIG_LIBBPF) += bpf-loader.o -libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o -libperf-$(CONFIG_LIBELF) += symbol-elf.o -libperf-$(CONFIG_LIBELF) += probe-file.o -libperf-$(CONFIG_LIBELF) += probe-event.o +perf-y += parse-branch-options.o +perf-y += dump-insn.o +perf-y += parse-regs-options.o +perf-y += term.o +perf-y += help-unknown-cmd.o +perf-y += mem-events.o +perf-y += vsprintf.o +perf-y += units.o +perf-y += time-utils.o +perf-y += expr-bison.o +perf-y += branch.o +perf-y += mem2node.o + +perf-$(CONFIG_LIBBPF) += bpf-loader.o +perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o +perf-$(CONFIG_LIBELF) += symbol-elf.o +perf-$(CONFIG_LIBELF) += probe-file.o +perf-$(CONFIG_LIBELF) += probe-event.o ifndef CONFIG_LIBELF -libperf-y += symbol-minimal.o +perf-y += symbol-minimal.o endif ifndef CONFIG_SETNS -libperf-y += setns.o +perf-y += setns.o endif -libperf-$(CONFIG_DWARF) += probe-finder.o -libperf-$(CONFIG_DWARF) += dwarf-aux.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += probe-finder.o +perf-$(CONFIG_DWARF) += dwarf-aux.o +perf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o -libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o +perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o +perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o -libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o -libperf-y += scripting-engines/ +perf-y += scripting-engines/ -libperf-$(CONFIG_ZLIB) += zlib.o -libperf-$(CONFIG_LZMA) += lzma.o -libperf-y += demangle-java.o -libperf-y += demangle-rust.o +perf-$(CONFIG_ZLIB) += zlib.o +perf-$(CONFIG_LZMA) += lzma.o +perf-y += demangle-java.o +perf-y += demangle-rust.o ifdef CONFIG_JITDUMP -libperf-$(CONFIG_LIBELF) += jitdump.o -libperf-$(CONFIG_LIBELF) += genelf.o -libperf-$(CONFIG_DWARF) += genelf_debug.o +perf-$(CONFIG_LIBELF) += jitdump.o +perf-$(CONFIG_LIBELF) += genelf.o +perf-$(CONFIG_DWARF) += genelf_debug.o endif -libperf-y += perf-hooks.o +perf-y += perf-hooks.o -libperf-$(CONFIG_CXX) += c++/ +perf-$(CONFIG_LIBBPF) += bpf-event.o + +perf-$(CONFIG_CXX) += c++/ CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 70de8f6b3aee..2468b8aa0b6b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -9,6 +9,7 @@ #include <errno.h> #include <inttypes.h> +#include <libgen.h> #include "util.h" #include "ui/ui.h" #include "sort.h" @@ -16,6 +17,7 @@ #include "color.h" #include "config.h" #include "cache.h" +#include "map.h" #include "symbol.h" #include "units.h" #include "debug.h" diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index fb6463730ba4..95053cab41fe 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -4,16 +4,24 @@ #include <stdbool.h> #include <stdint.h> +#include <stdio.h> #include <linux/types.h> -#include "symbol.h" -#include "hist.h" -#include "sort.h" #include <linux/list.h> #include <linux/rbtree.h> #include <pthread.h> #include <asm/bug.h> +#include "symbol_conf.h" +struct hist_browser_timer; +struct hist_entry; struct ins_ops; +struct map; +struct map_symbol; +struct addr_map_symbol; +struct option; +struct perf_sample; +struct perf_evsel; +struct symbol; struct ins { const char *name; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index f69961c4a4f3..267e54df511b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -27,6 +27,7 @@ #include <linux/bitops.h> #include <linux/log2.h> #include <linux/string.h> +#include <linux/time64.h> #include <sys/param.h> #include <stdlib.h> @@ -41,6 +42,7 @@ #include "pmu.h" #include "evsel.h" #include "cpumap.h" +#include "symbol.h" #include "thread_map.h" #include "asm/bug.h" #include "auxtrace.h" @@ -857,7 +859,7 @@ void auxtrace_buffer__free(struct auxtrace_buffer *buffer) void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, int code, int cpu, pid_t pid, pid_t tid, u64 ip, - const char *msg) + const char *msg, u64 timestamp) { size_t size; @@ -869,7 +871,9 @@ void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, auxtrace_error->cpu = cpu; auxtrace_error->pid = pid; auxtrace_error->tid = tid; + auxtrace_error->fmt = 1; auxtrace_error->ip = ip; + auxtrace_error->time = timestamp; strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG); size = (void *)auxtrace_error->msg - (void *)auxtrace_error + @@ -1159,12 +1163,27 @@ static const char *auxtrace_error_name(int type) size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp) { struct auxtrace_error_event *e = &event->auxtrace_error; + unsigned long long nsecs = e->time; + const char *msg = e->msg; int ret; ret = fprintf(fp, " %s error type %u", auxtrace_error_name(e->type), e->type); + + if (e->fmt && nsecs) { + unsigned long secs = nsecs / NSEC_PER_SEC; + + nsecs -= secs * NSEC_PER_SEC; + ret += fprintf(fp, " time %lu.%09llu", secs, nsecs); + } else { + ret += fprintf(fp, " time 0"); + } + + if (!e->fmt) + msg = (const char *)&e->time; + ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n", - e->cpu, e->pid, e->tid, e->ip, e->code, e->msg); + e->cpu, e->pid, e->tid, e->ip, e->code, msg); return ret; } @@ -1278,9 +1297,9 @@ static int __auxtrace_mmap__read(struct perf_mmap *map, } /* padding must be written by fn() e.g. record__process_auxtrace() */ - padding = size & 7; + padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1); if (padding) - padding = 8 - padding; + padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding; memset(&ev, 0, sizeof(ev)); ev.auxtrace.header.type = PERF_RECORD_AUXTRACE; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 8e50f96d4b23..c69bcd9a3091 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -40,6 +40,9 @@ struct record_opts; struct auxtrace_info_event; struct events_stats; +/* Auxtrace records must have the same alignment as perf event records */ +#define PERF_AUXTRACE_RECORD_ALIGNMENT 8 + enum auxtrace_type { PERF_AUXTRACE_UNKNOWN, PERF_AUXTRACE_INTEL_PT, @@ -516,7 +519,7 @@ void auxtrace_index__free(struct list_head *head); void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, int code, int cpu, pid_t pid, pid_t tid, u64 ip, - const char *msg); + const char *msg, u64 timestamp); int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool, diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index f1451c987eec..1be432657501 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "block-range.h" #include "annotate.h" +#include <assert.h> +#include <stdlib.h> struct { struct rb_root root; diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h index a5ba719d69fb..ec0fb534bf56 100644 --- a/tools/perf/util/block-range.h +++ b/tools/perf/util/block-range.h @@ -2,7 +2,11 @@ #ifndef __PERF_BLOCK_RANGE_H #define __PERF_BLOCK_RANGE_H -#include "symbol.h" +#include <stdbool.h> +#include <linux/rbtree.h> +#include <linux/types.h> + +struct symbol; /* * struct block_range - non-overlapping parts of basic blocks diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c new file mode 100644 index 000000000000..62dda96b0096 --- /dev/null +++ b/tools/perf/util/bpf-event.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <stdlib.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <linux/btf.h> +#include "bpf-event.h" +#include "debug.h" +#include "symbol.h" +#include "machine.h" + +#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) + +static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) +{ + int ret = 0; + size_t i; + + for (i = 0; i < len; i++) + ret += snprintf(buf + ret, size - ret, "%02x", data[i]); + return ret; +} + +int machine__process_bpf_event(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_bpf_event(event, stdout); + return 0; +} + +/* + * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf + * program. One PERF_RECORD_BPF_EVENT is generated for the program. And + * one PERF_RECORD_KSYMBOL is generated for each sub program. + * + * Returns: + * 0 for success; + * -1 for failures; + * -2 for lack of kernel support. + */ +static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + int fd, + union perf_event *event, + struct record_opts *opts) +{ + struct ksymbol_event *ksymbol_event = &event->ksymbol_event; + struct bpf_event *bpf_event = &event->bpf_event; + u32 sub_prog_cnt, i, func_info_rec_size = 0; + u8 (*prog_tags)[BPF_TAG_SIZE] = NULL; + struct bpf_prog_info info = { .type = 0, }; + u32 info_len = sizeof(info); + void *func_infos = NULL; + u64 *prog_addrs = NULL; + struct btf *btf = NULL; + u32 *prog_lens = NULL; + bool has_btf = false; + char errbuf[512]; + int err = 0; + + /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + + if (err) { + pr_debug("%s: failed to get BPF program info: %s, aborting\n", + __func__, str_error_r(errno, errbuf, sizeof(errbuf))); + return -1; + } + if (info_len < offsetof(struct bpf_prog_info, prog_tags)) { + pr_debug("%s: the kernel is too old, aborting\n", __func__); + return -2; + } + + /* number of ksyms, func_lengths, and tags should match */ + sub_prog_cnt = info.nr_jited_ksyms; + if (sub_prog_cnt != info.nr_prog_tags || + sub_prog_cnt != info.nr_jited_func_lens) + return -1; + + /* check BTF func info support */ + if (info.btf_id && info.nr_func_info && info.func_info_rec_size) { + /* btf func info number should be same as sub_prog_cnt */ + if (sub_prog_cnt != info.nr_func_info) { + pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__); + return -1; + } + if (btf__get_from_id(info.btf_id, &btf)) { + pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id); + return -1; + } + func_info_rec_size = info.func_info_rec_size; + func_infos = calloc(sub_prog_cnt, func_info_rec_size); + if (!func_infos) { + pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__); + return -1; + } + has_btf = true; + } + + /* + * We need address, length, and tag for each sub program. + * Allocate memory and call bpf_obj_get_info_by_fd() again + */ + prog_addrs = calloc(sub_prog_cnt, sizeof(u64)); + if (!prog_addrs) { + pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__); + goto out; + } + prog_lens = calloc(sub_prog_cnt, sizeof(u32)); + if (!prog_lens) { + pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__); + goto out; + } + prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE); + if (!prog_tags) { + pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__); + goto out; + } + + memset(&info, 0, sizeof(info)); + info.nr_jited_ksyms = sub_prog_cnt; + info.nr_jited_func_lens = sub_prog_cnt; + info.nr_prog_tags = sub_prog_cnt; + info.jited_ksyms = ptr_to_u64(prog_addrs); + info.jited_func_lens = ptr_to_u64(prog_lens); + info.prog_tags = ptr_to_u64(prog_tags); + info_len = sizeof(info); + if (has_btf) { + info.nr_func_info = sub_prog_cnt; + info.func_info_rec_size = func_info_rec_size; + info.func_info = ptr_to_u64(func_infos); + } + + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("%s: failed to get BPF program info, aborting\n", __func__); + goto out; + } + + /* Synthesize PERF_RECORD_KSYMBOL */ + for (i = 0; i < sub_prog_cnt; i++) { + const struct bpf_func_info *finfo; + const char *short_name = NULL; + const struct btf_type *t; + int name_len; + + *ksymbol_event = (struct ksymbol_event){ + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = offsetof(struct ksymbol_event, name), + }, + .addr = prog_addrs[i], + .len = prog_lens[i], + .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, + .flags = 0, + }; + name_len = snprintf(ksymbol_event->name, KSYM_NAME_LEN, + "bpf_prog_"); + name_len += snprintf_hex(ksymbol_event->name + name_len, + KSYM_NAME_LEN - name_len, + prog_tags[i], BPF_TAG_SIZE); + if (has_btf) { + finfo = func_infos + i * info.func_info_rec_size; + t = btf__type_by_id(btf, finfo->type_id); + short_name = btf__name_by_offset(btf, t->name_off); + } else if (i == 0 && sub_prog_cnt == 1) { + /* no subprog */ + if (info.name[0]) + short_name = info.name; + } else + short_name = "F"; + if (short_name) + name_len += snprintf(ksymbol_event->name + name_len, + KSYM_NAME_LEN - name_len, + "_%s", short_name); + + ksymbol_event->header.size += PERF_ALIGN(name_len + 1, + sizeof(u64)); + + memset((void *)event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + err = perf_tool__process_synth_event(tool, event, + machine, process); + } + + /* Synthesize PERF_RECORD_BPF_EVENT */ + if (opts->bpf_event) { + *bpf_event = (struct bpf_event){ + .header = { + .type = PERF_RECORD_BPF_EVENT, + .size = sizeof(struct bpf_event), + }, + .type = PERF_BPF_EVENT_PROG_LOAD, + .flags = 0, + .id = info.id, + }; + memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE); + memset((void *)event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + err = perf_tool__process_synth_event(tool, event, + machine, process); + } + +out: + free(prog_tags); + free(prog_lens); + free(prog_addrs); + free(func_infos); + free(btf); + return err ? -1 : 0; +} + +int perf_event__synthesize_bpf_events(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + struct record_opts *opts) +{ + union perf_event *event; + __u32 id = 0; + int err; + int fd; + + event = malloc(sizeof(event->bpf_event) + KSYM_NAME_LEN + machine->id_hdr_size); + if (!event) + return -1; + while (true) { + err = bpf_prog_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + pr_debug("%s: can't get next program: %s%s", + __func__, strerror(errno), + errno == EINVAL ? " -- kernel too old?" : ""); + /* don't report error on old kernel or EPERM */ + err = (errno == EINVAL || errno == EPERM) ? 0 : -1; + break; + } + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + pr_debug("%s: failed to get fd for prog_id %u\n", + __func__, id); + continue; + } + + err = perf_event__synthesize_one_bpf_prog(tool, process, + machine, fd, + event, opts); + close(fd); + if (err) { + /* do not return error for old kernel */ + if (err == -2) + err = 0; + break; + } + } + free(event); + return err; +} diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h new file mode 100644 index 000000000000..7890067e1a37 --- /dev/null +++ b/tools/perf/util/bpf-event.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_BPF_EVENT_H +#define __PERF_BPF_EVENT_H + +#include <linux/compiler.h> +#include "event.h" + +struct machine; +union perf_event; +struct perf_sample; +struct perf_tool; +struct record_opts; + +#ifdef HAVE_LIBBPF_SUPPORT +int machine__process_bpf_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); + +int perf_event__synthesize_bpf_events(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine, + struct record_opts *opts); +#else +static inline int machine__process_bpf_event(struct machine *machine __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused) +{ + return 0; +} + +static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused, + struct record_opts *opts __maybe_unused) +{ + return 0; +} +#endif // HAVE_LIBBPF_SUPPORT +#endif diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 2f3eb6d293ee..6e68698aa0ad 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -15,6 +15,7 @@ #include <errno.h> #include "perf.h" #include "debug.h" +#include "evlist.h" #include "bpf-loader.h" #include "bpf-prologue.h" #include "probe-event.h" diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 62d245a90e1d..3f46856e3330 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -8,11 +8,7 @@ #include <linux/compiler.h> #include <linux/err.h> -#include <string.h> #include <bpf/libbpf.h> -#include "probe-event.h" -#include "evlist.h" -#include "debug.h" enum bpf_loader_errno { __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, @@ -44,6 +40,7 @@ enum bpf_loader_errno { }; struct perf_evsel; +struct perf_evlist; struct bpf_object; struct parse_events_term; #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" @@ -87,6 +84,8 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size); #else #include <errno.h> +#include <string.h> +#include "debug.h" static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 1e3c7c5cdc63..64f96b79f1d7 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -1,8 +1,31 @@ #ifndef _PERF_BRANCH_H #define _PERF_BRANCH_H 1 +#include <stdio.h> #include <stdint.h> -#include "../perf.h" +#include <linux/perf_event.h> +#include <linux/types.h> + +struct branch_flags { + u64 mispred:1; + u64 predicted:1; + u64 in_tx:1; + u64 abort:1; + u64 cycles:16; + u64 type:4; + u64 reserved:40; +}; + +struct branch_entry { + u64 from; + u64 to; + struct branch_flags flags; +}; + +struct branch_stack { + u64 nr; + struct branch_entry entries[0]; +}; struct branch_type_stat { bool branch_to; @@ -13,8 +36,6 @@ struct branch_type_stat { u64 cross_2m; }; -struct branch_flags; - void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, u64 from, u64 to); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 04b1d53e4bf9..bff0d17920ed 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -15,6 +15,8 @@ #include <sys/types.h> #include "build-id.h" #include "event.h" +#include "namespaces.h" +#include "map.h" #include "symbol.h" #include "thread.h" #include <linux/kernel.h> @@ -363,7 +365,8 @@ int perf_session__write_buildid_table(struct perf_session *session, if (err) return err; - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); err = machine__write_buildid_table(pos, fd); if (err) @@ -396,7 +399,8 @@ int dsos__hit_all(struct perf_session *session) if (err) return err; - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); err = machine__hit_all_dsos(pos); @@ -849,7 +853,8 @@ int perf_session__cache_build_ids(struct perf_session *session) ret = machine__cache_build_ids(&session->machines.host); - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret |= machine__cache_build_ids(pos); } @@ -866,7 +871,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) struct rb_node *nd; bool ret = machine__read_build_ids(&session->machines.host, with_hits); - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret |= machine__read_build_ids(pos, with_hits); } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index f0c565164a97..93668f38f1ed 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -6,9 +6,10 @@ #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) #include "tool.h" -#include "namespaces.h" #include <linux/types.h> +struct nsinfo; + extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; struct feat_fd; diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build index 988fef1b11d7..613ecfd76527 100644 --- a/tools/perf/util/c++/Build +++ b/tools/perf/util/c++/Build @@ -1,2 +1,2 @@ -libperf-$(CONFIG_CLANGLLVM) += clang.o -libperf-$(CONFIG_CLANGLLVM) += clang-test.o +perf-$(CONFIG_CLANGLLVM) += clang.o +perf-$(CONFIG_CLANGLLVM) += clang-test.o diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index dc2212e12184..abb608b09269 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -23,8 +23,10 @@ #include "util.h" #include "sort.h" #include "machine.h" +#include "map.h" #include "callchain.h" #include "branch.h" +#include "symbol.h" #define CALLCHAIN_PARAM_DEFAULT \ .mode = CHAIN_GRAPH_ABS, \ @@ -1577,3 +1579,18 @@ int callchain_cursor__copy(struct callchain_cursor *dst, return rc; } + +/* + * Initialize a cursor before adding entries inside, but keep + * the previously allocated entries as a cache. + */ +void callchain_cursor_reset(struct callchain_cursor *cursor) +{ + struct callchain_cursor_node *node; + + cursor->nr = 0; + cursor->last = &cursor->first; + + for (node = cursor->first; node != NULL; node = node->next) + map__zput(node->map); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 99d38ac019b8..80e056a3d882 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -2,14 +2,14 @@ #ifndef __PERF_CALLCHAIN_H #define __PERF_CALLCHAIN_H -#include "../perf.h" #include <linux/list.h> #include <linux/rbtree.h> #include "event.h" -#include "map.h" -#include "symbol.h" +#include "map_symbol.h" #include "branch.h" +struct map; + #define HELP_PAD "\t\t\t\t" #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" @@ -188,20 +188,7 @@ int callchain_append(struct callchain_root *root, int callchain_merge(struct callchain_cursor *cursor, struct callchain_root *dst, struct callchain_root *src); -/* - * Initialize a cursor before adding entries inside, but keep - * the previously allocated entries as a cache. - */ -static inline void callchain_cursor_reset(struct callchain_cursor *cursor) -{ - struct callchain_cursor_node *node; - - cursor->nr = 0; - cursor->last = &cursor->first; - - for (node = cursor->first; node != NULL; node = node->next) - map__zput(node->map); -} +void callchain_cursor_reset(struct callchain_cursor *cursor); int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 39e628b8938e..39b8c4ec4e2e 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include "cache.h" -#include "config.h" #include <stdlib.h> #include <stdio.h> #include "color.h" @@ -10,44 +9,6 @@ int perf_use_color_default = -1; -int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) -{ - if (value) { - if (!strcasecmp(value, "never")) - return 0; - if (!strcasecmp(value, "always")) - return 1; - if (!strcasecmp(value, "auto")) - goto auto_color; - } - - /* Missing or explicit false to turn off colorization */ - if (!perf_config_bool(var, value)) - return 0; - - /* any normal truth value defaults to 'auto' */ - auto_color: - if (stdout_is_tty < 0) - stdout_is_tty = isatty(1); - if (stdout_is_tty || pager_in_use()) { - char *term = getenv("TERM"); - if (term && strcmp(term, "dumb")) - return 1; - } - return 0; -} - -int perf_color_default_config(const char *var, const char *value, - void *cb __maybe_unused) -{ - if (!strcmp(var, "color.ui")) { - perf_use_color_default = perf_config_colorbool(var, value, -1); - return 0; - } - - return 0; -} - static int __color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args, const char *trail) { diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 22777b1812ee..01f7bed21c9b 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -3,6 +3,7 @@ #define __PERF_COLOR_H #include <stdio.h> +#include <stdarg.h> /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ #define COLOR_MAXLEN 24 diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c new file mode 100644 index 000000000000..817dc56e7e95 --- /dev/null +++ b/tools/perf/util/color_config.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include "cache.h" +#include "config.h" +#include <stdlib.h> +#include <stdio.h> +#include "color.h" +#include <math.h> +#include <unistd.h> + +int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) +{ + if (value) { + if (!strcasecmp(value, "never")) + return 0; + if (!strcasecmp(value, "always")) + return 1; + if (!strcasecmp(value, "auto")) + goto auto_color; + } + + /* Missing or explicit false to turn off colorization */ + if (!perf_config_bool(var, value)) + return 0; + + /* any normal truth value defaults to 'auto' */ + auto_color: + if (stdout_is_tty < 0) + stdout_is_tty = isatty(1); + if (stdout_is_tty || pager_in_use()) { + char *term = getenv("TERM"); + if (term && strcmp(term, "dumb")) + return 1; + } + return 0; +} + +int perf_color_default_config(const char *var, const char *value, + void *cb __maybe_unused) +{ + if (!strcmp(var, "color.ui")) { + perf_use_color_default = perf_config_colorbool(var, value, -1); + return 0; + } + + return 0; +} diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 31279a7bd919..1066de92af12 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -6,6 +6,7 @@ #include <stdio.h> #include <string.h> #include <linux/refcount.h> +#include <linux/rbtree.h> #include "rwsem.h" struct comm_str { diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h index 3e5c438fe85e..f35d8fbfa2dd 100644 --- a/tools/perf/util/comm.h +++ b/tools/perf/util/comm.h @@ -2,9 +2,9 @@ #ifndef __PERF_COMM_H #define __PERF_COMM_H -#include "../perf.h" -#include <linux/rbtree.h> #include <linux/list.h> +#include <linux/types.h> +#include <stdbool.h> struct comm_str; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 1ea8f898f1a1..fa092511c52b 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -13,6 +13,7 @@ #include <sys/param.h> #include "util.h" #include "cache.h" +#include "callchain.h" #include <subcmd/exec-cmd.h> #include "util/event.h" /* proc_map_timeout */ #include "util/hist.h" /* perf_hist_config */ diff --git a/tools/perf/util/cpu-set-sched.h b/tools/perf/util/cpu-set-sched.h new file mode 100644 index 000000000000..8cf4e40d322a --- /dev/null +++ b/tools/perf/util/cpu-set-sched.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: LGPL-2.1 +// Definitions taken from glibc for use with older systems, same licensing. +#ifndef _CPU_SET_SCHED_PERF_H +#define _CPU_SET_SCHED_PERF_H + +#include <features.h> +#include <sched.h> + +#ifndef CPU_EQUAL +#ifndef __CPU_EQUAL_S +#if __GNUC_PREREQ (2, 91) +# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \ + (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0) +#else +# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \ + (__extension__ \ + ({ const __cpu_mask *__arr1 = (cpusetp1)->__bits; \ + const __cpu_mask *__arr2 = (cpusetp2)->__bits; \ + size_t __imax = (setsize) / sizeof (__cpu_mask); \ + size_t __i; \ + for (__i = 0; __i < __imax; ++__i) \ + if (__arr1[__i] != __arr2[__i]) \ + break; \ + __i == __imax; })) +#endif +#endif // __CPU_EQUAL_S + +#define CPU_EQUAL(cpusetp1, cpusetp2) \ + __CPU_EQUAL_S (sizeof (cpu_set_t), cpusetp1, cpusetp2) +#endif // CPU_EQUAL + +#ifndef CPU_OR +#ifndef __CPU_OP_S +#define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \ + (__extension__ \ + ({ cpu_set_t *__dest = (destset); \ + const __cpu_mask *__arr1 = (srcset1)->__bits; \ + const __cpu_mask *__arr2 = (srcset2)->__bits; \ + size_t __imax = (setsize) / sizeof (__cpu_mask); \ + size_t __i; \ + for (__i = 0; __i < __imax; ++__i) \ + ((__cpu_mask *) __dest->__bits)[__i] = __arr1[__i] op __arr2[__i]; \ + __dest; })) +#endif // __CPU_OP_S + +#define CPU_OR(destset, srcset1, srcset2) \ + __CPU_OP_S (sizeof (cpu_set_t), destset, srcset1, srcset2, |) +#endif // CPU_OR + +#endif // _CPU_SET_SCHED_PERF_H diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 383674f448fc..0bbc3feb0894 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -730,3 +730,13 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size) buf[size - 1] = '\0'; return ptr - buf; } + +const struct cpu_map *cpu_map__online(void) /* thread unsafe */ +{ + static const struct cpu_map *online = NULL; + + if (!online) + online = cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ + + return online; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index ed8999d1a640..f00ce624b9f7 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -29,6 +29,7 @@ int cpu_map__get_core_id(int cpu); int cpu_map__get_core(struct cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); +const struct cpu_map *cpu_map__online(void); /* thread unsafe */ struct cpu_map *cpu_map__get(struct cpu_map *map); void cpu_map__put(struct cpu_map *map); diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build index bc22c39c727f..216cb17a3322 100644 --- a/tools/perf/util/cs-etm-decoder/Build +++ b/tools/perf/util/cs-etm-decoder/Build @@ -1 +1 @@ -libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o +perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 8c155575c6c5..ba4c623cd8de 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -290,6 +290,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_buffer[i].instr_count = 0; decoder->packet_buffer[i].last_instr_taken_branch = false; decoder->packet_buffer[i].last_instr_size = 0; + decoder->packet_buffer[i].last_instr_type = 0; + decoder->packet_buffer[i].last_instr_subtype = 0; + decoder->packet_buffer[i].last_instr_cond = 0; + decoder->packet_buffer[i].flags = 0; + decoder->packet_buffer[i].exception_number = UINT32_MAX; + decoder->packet_buffer[i].trace_chan_id = UINT8_MAX; decoder->packet_buffer[i].cpu = INT_MIN; } } @@ -300,14 +306,12 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, enum cs_etm_sample_type sample_type) { u32 et = 0; - struct int_node *inode = NULL; + int cpu; if (decoder->packet_count >= MAX_BUFFER - 1) return OCSD_RESP_FATAL_SYS_ERR; - /* Search the RB tree for the cpu associated with this traceID */ - inode = intlist__find(traceid_list, trace_chan_id); - if (!inode) + if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) return OCSD_RESP_FATAL_SYS_ERR; et = decoder->tail; @@ -317,12 +321,18 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_buffer[et].sample_type = sample_type; decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; - decoder->packet_buffer[et].cpu = *((int *)inode->priv); + decoder->packet_buffer[et].cpu = cpu; decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[et].instr_count = 0; decoder->packet_buffer[et].last_instr_taken_branch = false; decoder->packet_buffer[et].last_instr_size = 0; + decoder->packet_buffer[et].last_instr_type = 0; + decoder->packet_buffer[et].last_instr_subtype = 0; + decoder->packet_buffer[et].last_instr_cond = 0; + decoder->packet_buffer[et].flags = 0; + decoder->packet_buffer[et].exception_number = UINT32_MAX; + decoder->packet_buffer[et].trace_chan_id = trace_chan_id; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; @@ -366,6 +376,9 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, packet->start_addr = elem->st_addr; packet->end_addr = elem->en_addr; packet->instr_count = elem->num_instr_range; + packet->last_instr_type = elem->last_i_type; + packet->last_instr_subtype = elem->last_i_subtype; + packet->last_instr_cond = elem->last_instr_cond; switch (elem->last_i_type) { case OCSD_INSTR_BR: @@ -395,10 +408,20 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder, static ocsd_datapath_resp_t cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) -{ - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, - CS_ETM_EXCEPTION); +{ int ret = 0; + struct cs_etm_packet *packet; + + ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_EXCEPTION); + if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) + return ret; + + packet = &decoder->packet_buffer[decoder->tail]; + packet->exception_number = elem->exception_number; + + return ret; } static ocsd_datapath_resp_t @@ -432,7 +455,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: - resp = cs_etm_decoder__buffer_exception(decoder, + resp = cs_etm_decoder__buffer_exception(decoder, elem, trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index a6407d41598f..3ab11dfa92ae 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -15,13 +15,6 @@ struct cs_etm_decoder; -struct cs_etm_buffer { - const unsigned char *buf; - size_t len; - u64 offset; - u64 ref_timestamp; -}; - enum cs_etm_sample_type { CS_ETM_EMPTY, CS_ETM_RANGE, @@ -43,8 +36,14 @@ struct cs_etm_packet { u64 start_addr; u64 end_addr; u32 instr_count; + u32 last_instr_type; + u32 last_instr_subtype; + u32 flags; + u32 exception_number; + u8 last_instr_cond; u8 last_instr_taken_branch; u8 last_instr_size; + u8 trace_chan_id; int cpu; }; @@ -99,9 +98,10 @@ enum { CS_ETM_PROTO_PTM, }; -enum { +enum cs_etm_decoder_operation { CS_ETM_OPERATION_PRINT = 1, CS_ETM_OPERATION_DECODE, + CS_ETM_OPERATION_MAX, }; int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 27a374ddf661..110804936fc3 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -12,6 +12,7 @@ #include <linux/log2.h> #include <linux/types.h> +#include <opencsd/ocsd_if_types.h> #include <stdlib.h> #include "auxtrace.h" @@ -24,6 +25,7 @@ #include "machine.h" #include "map.h" #include "perf.h" +#include "symbol.h" #include "thread.h" #include "thread_map.h" #include "thread-stack.h" @@ -63,13 +65,10 @@ struct cs_etm_queue { struct thread *thread; struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; - const struct cs_etm_state *state; union perf_event *event_buf; unsigned int queue_nr; pid_t pid, tid; int cpu; - u64 time; - u64 timestamp; u64 offset; u64 period_instructions; struct branch_stack *last_branch; @@ -77,11 +76,13 @@ struct cs_etm_queue { size_t last_branch_pos; struct cs_etm_packet *prev_packet; struct cs_etm_packet *packet; + const unsigned char *buf; + size_t buf_len, buf_used; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, - pid_t tid, u64 time_); + pid_t tid); /* PTMs ETMIDR [11:8] set to b0011 */ #define ETMIDR_PTM_VERSION 0x00000300 @@ -96,6 +97,34 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr) return CS_ETM_PROTO_ETMV3; } +static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) +{ + struct int_node *inode; + u64 *metadata; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + *magic = metadata[CS_ETM_MAGIC]; + return 0; +} + +int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) +{ + struct int_node *inode; + u64 *metadata; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + *cpu = (int)metadata[CS_ETM_CPU]; + return 0; +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -109,10 +138,83 @@ static void cs_etm__packet_dump(const char *pkt_string) fflush(stdout); } +static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx, + u32 etmidr) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); + t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; + t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; +} + +static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; + t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; + t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; + t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; + t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; + t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; + t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; +} + +static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm) +{ + int i; + u32 etmidr; + u64 architecture; + + for (i = 0; i < etm->num_cpu; i++) { + architecture = etm->metadata[i][CS_ETM_MAGIC]; + + switch (architecture) { + case __perf_cs_etmv3_magic: + etmidr = etm->metadata[i][CS_ETM_ETMIDR]; + cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); + break; + case __perf_cs_etmv4_magic: + cs_etm__set_trace_param_etmv4(t_params, etm, i); + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, + struct cs_etm_queue *etmq, + enum cs_etm_decoder_operation mode) +{ + int ret = -EINVAL; + + if (!(mode < CS_ETM_OPERATION_MAX)) + goto out; + + d_params->packet_printer = cs_etm__packet_dump; + d_params->operation = mode; + d_params->data = etmq; + d_params->formatted = true; + d_params->fsyncs = false; + d_params->hsyncs = false; + d_params->frame_aligned = true; + + ret = 0; +out: + return ret; +} + static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, struct auxtrace_buffer *buffer) { - int i, ret; + int ret; const char *color = PERF_COLOR_BLUE; struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params; @@ -126,48 +228,22 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); - for (i = 0; i < etm->num_cpu; i++) { - if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { - u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; - - t_params[i].protocol = - cs_etm__get_v7_protocol_version(etmidr); - t_params[i].etmv3.reg_ctrl = - etm->metadata[i][CS_ETM_ETMCR]; - t_params[i].etmv3.reg_trc_id = - etm->metadata[i][CS_ETM_ETMTRACEIDR]; - } else if (etm->metadata[i][CS_ETM_MAGIC] == - __perf_cs_etmv4_magic) { - t_params[i].protocol = CS_ETM_PROTO_ETMV4i; - t_params[i].etmv4.reg_idr0 = - etm->metadata[i][CS_ETMV4_TRCIDR0]; - t_params[i].etmv4.reg_idr1 = - etm->metadata[i][CS_ETMV4_TRCIDR1]; - t_params[i].etmv4.reg_idr2 = - etm->metadata[i][CS_ETMV4_TRCIDR2]; - t_params[i].etmv4.reg_idr8 = - etm->metadata[i][CS_ETMV4_TRCIDR8]; - t_params[i].etmv4.reg_configr = - etm->metadata[i][CS_ETMV4_TRCCONFIGR]; - t_params[i].etmv4.reg_traceidr = - etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; - } - } + + if (!t_params) + return; + + if (cs_etm__init_trace_params(t_params, etm)) + goto out_free; /* Set decoder parameters to simply print the trace packets */ - d_params.packet_printer = cs_etm__packet_dump; - d_params.operation = CS_ETM_OPERATION_PRINT; - d_params.formatted = true; - d_params.fsyncs = false; - d_params.hsyncs = false; - d_params.frame_aligned = true; + if (cs_etm__init_decoder_params(&d_params, NULL, + CS_ETM_OPERATION_PRINT)) + goto out_free; decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - zfree(&t_params); - if (!decoder) - return; + goto out_free; do { size_t consumed; @@ -182,6 +258,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, } while (buffer_used < buffer->size); cs_etm_decoder__free(decoder); + +out_free: + zfree(&t_params); } static int cs_etm__flush_events(struct perf_session *session, @@ -205,7 +284,7 @@ static int cs_etm__flush_events(struct perf_session *session, if (ret < 0) return ret; - return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); + return cs_etm__process_timeless_queues(etm, -1); } static void cs_etm__free_queue(void *priv) @@ -251,7 +330,7 @@ static void cs_etm__free(struct perf_session *session) cs_etm__free_events(session); session->auxtrace = NULL; - /* First remove all traceID/CPU# nodes for the RB tree */ + /* First remove all traceID/metadata nodes for the RB tree */ intlist__for_each_entry_safe(inode, tmp, traceid_list) intlist__remove(traceid_list, inode); /* Then the RB tree itself */ @@ -297,7 +376,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, struct addr_location al; if (!etmq) - return -1; + return 0; machine = etmq->etm->machine; cpumode = cs_etm__cpu_mode(etmq, address); @@ -305,7 +384,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, thread = etmq->thread; if (!thread) { if (cpumode != PERF_RECORD_MISC_KERNEL) - return -EINVAL; + return 0; thread = etmq->etm->unknown_thread; } @@ -328,12 +407,10 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, - unsigned int queue_nr) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) { - int i; struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params; + struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; size_t szp = sizeof(struct cs_etm_packet); @@ -368,59 +445,22 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (!etmq->event_buf) goto out_free; - etmq->etm = etm; - etmq->queue_nr = queue_nr; - etmq->pid = -1; - etmq->tid = -1; - etmq->cpu = -1; - /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); if (!t_params) goto out_free; - for (i = 0; i < etm->num_cpu; i++) { - if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { - u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; - - t_params[i].protocol = - cs_etm__get_v7_protocol_version(etmidr); - t_params[i].etmv3.reg_ctrl = - etm->metadata[i][CS_ETM_ETMCR]; - t_params[i].etmv3.reg_trc_id = - etm->metadata[i][CS_ETM_ETMTRACEIDR]; - } else if (etm->metadata[i][CS_ETM_MAGIC] == - __perf_cs_etmv4_magic) { - t_params[i].protocol = CS_ETM_PROTO_ETMV4i; - t_params[i].etmv4.reg_idr0 = - etm->metadata[i][CS_ETMV4_TRCIDR0]; - t_params[i].etmv4.reg_idr1 = - etm->metadata[i][CS_ETMV4_TRCIDR1]; - t_params[i].etmv4.reg_idr2 = - etm->metadata[i][CS_ETMV4_TRCIDR2]; - t_params[i].etmv4.reg_idr8 = - etm->metadata[i][CS_ETMV4_TRCIDR8]; - t_params[i].etmv4.reg_configr = - etm->metadata[i][CS_ETMV4_TRCCONFIGR]; - t_params[i].etmv4.reg_traceidr = - etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; - } - } + if (cs_etm__init_trace_params(t_params, etm)) + goto out_free; - /* Set decoder parameters to simply print the trace packets */ - d_params.packet_printer = cs_etm__packet_dump; - d_params.operation = CS_ETM_OPERATION_DECODE; - d_params.formatted = true; - d_params.fsyncs = false; - d_params.hsyncs = false; - d_params.frame_aligned = true; - d_params.data = etmq; + /* Set decoder parameters to decode trace packets */ + if (cs_etm__init_decoder_params(&d_params, etmq, + CS_ETM_OPERATION_DECODE)) + goto out_free; etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - zfree(&t_params); - if (!etmq->decoder) goto out_free; @@ -433,14 +473,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, cs_etm__mem_access)) goto out_free_decoder; - etmq->offset = 0; - etmq->period_instructions = 0; - + zfree(&t_params); return etmq; out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: + zfree(&t_params); zfree(&etmq->event_buf); zfree(&etmq->last_branch); zfree(&etmq->last_branch_rb); @@ -455,24 +494,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, unsigned int queue_nr) { + int ret = 0; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) - return 0; + goto out; - etmq = cs_etm__alloc_queue(etm, queue_nr); + etmq = cs_etm__alloc_queue(etm); - if (!etmq) - return -ENOMEM; + if (!etmq) { + ret = -ENOMEM; + goto out; + } queue->priv = etmq; - - if (queue->cpu != -1) - etmq->cpu = queue->cpu; - + etmq->etm = etm; + etmq->queue_nr = queue_nr; + etmq->cpu = queue->cpu; etmq->tid = queue->tid; + etmq->pid = -1; + etmq->offset = 0; + etmq->period_instructions = 0; - return 0; +out: + return ret; } static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) @@ -480,6 +525,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) unsigned int i; int ret; + if (!etm->kernel_start) + etm->kernel_start = machine__kernel_start(etm->machine); + for (i = 0; i < etm->queues.nr_queues; i++) { ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); if (ret) @@ -637,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event, static int -cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) +cs_etm__get_trace(struct cs_etm_queue *etmq) { struct auxtrace_buffer *aux_buffer = etmq->buffer; struct auxtrace_buffer *old_buffer = aux_buffer; @@ -651,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) if (!aux_buffer) { if (old_buffer) auxtrace_buffer__drop_data(old_buffer); - buff->len = 0; + etmq->buf_len = 0; return 0; } @@ -671,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) if (old_buffer) auxtrace_buffer__drop_data(old_buffer); - buff->offset = aux_buffer->offset; - buff->len = aux_buffer->size; - buff->buf = aux_buffer->data; - - buff->ref_timestamp = aux_buffer->reference; + etmq->buf_used = 0; + etmq->buf_len = aux_buffer->size; + etmq->buf = aux_buffer->data; - return buff->len; + return etmq->buf_len; } static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, @@ -719,7 +765,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.stream_id = etmq->etm->instructions_id; sample.period = period; sample.cpu = etmq->packet->cpu; - sample.flags = 0; + sample.flags = etmq->prev_packet->flags; sample.insn_len = 1; sample.cpumode = event->sample.header.misc; @@ -778,7 +824,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) sample.stream_id = etmq->etm->branches_id; sample.period = 1; sample.cpu = etmq->packet->cpu; - sample.flags = 0; + sample.flags = etmq->prev_packet->flags; sample.cpumode = event->sample.header.misc; /* @@ -1106,95 +1152,489 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) return 0; } +/* + * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue + * if need be. + * Returns: < 0 if error + * = 0 if no more auxtrace_buffer to read + * > 0 if the current buffer isn't empty yet + */ +static int cs_etm__get_data_block(struct cs_etm_queue *etmq) +{ + int ret; + + if (!etmq->buf_len) { + ret = cs_etm__get_trace(etmq); + if (ret <= 0) + return ret; + /* + * We cannot assume consecutive blocks in the data file + * are contiguous, reset the decoder to force re-sync. + */ + ret = cs_etm_decoder__reset(etmq->decoder); + if (ret) + return ret; + } + + return etmq->buf_len; +} + +static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, + struct cs_etm_packet *packet, + u64 end_addr) +{ + u16 instr16; + u32 instr32; + u64 addr; + + switch (packet->isa) { + case CS_ETM_ISA_T32: + /* + * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: + * + * b'15 b'8 + * +-----------------+--------+ + * | 1 1 0 1 1 1 1 1 | imm8 | + * +-----------------+--------+ + * + * According to the specifiction, it only defines SVC for T32 + * with 16 bits instruction and has no definition for 32bits; + * so below only read 2 bytes as instruction size for T32. + */ + addr = end_addr - 2; + cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); + if ((instr16 & 0xFF00) == 0xDF00) + return true; + + break; + case CS_ETM_ISA_A32: + /* + * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: + * + * b'31 b'28 b'27 b'24 + * +---------+---------+-------------------------+ + * | !1111 | 1 1 1 1 | imm24 | + * +---------+---------+-------------------------+ + */ + addr = end_addr - 4; + cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); + if ((instr32 & 0x0F000000) == 0x0F000000 && + (instr32 & 0xF0000000) != 0xF0000000) + return true; + + break; + case CS_ETM_ISA_A64: + /* + * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: + * + * b'31 b'21 b'4 b'0 + * +-----------------------+---------+-----------+ + * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | + * +-----------------------+---------+-----------+ + */ + addr = end_addr - 4; + cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); + if ((instr32 & 0xFFE0001F) == 0xd4000001) + return true; + + break; + case CS_ETM_ISA_UNKNOWN: + default: + break; + } + + return false; +} + +static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) +{ + struct cs_etm_packet *packet = etmq->packet; + struct cs_etm_packet *prev_packet = etmq->prev_packet; + + if (magic == __perf_cs_etmv3_magic) + if (packet->exception_number == CS_ETMV3_EXC_SVC) + return true; + + /* + * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and + * HVC cases; need to check if it's SVC instruction based on + * packet address. + */ + if (magic == __perf_cs_etmv4_magic) { + if (packet->exception_number == CS_ETMV4_EXC_CALL && + cs_etm__is_svc_instr(etmq, prev_packet, + prev_packet->end_addr)) + return true; + } + + return false; +} + +static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) +{ + struct cs_etm_packet *packet = etmq->packet; + + if (magic == __perf_cs_etmv3_magic) + if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || + packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || + packet->exception_number == CS_ETMV3_EXC_PE_RESET || + packet->exception_number == CS_ETMV3_EXC_IRQ || + packet->exception_number == CS_ETMV3_EXC_FIQ) + return true; + + if (magic == __perf_cs_etmv4_magic) + if (packet->exception_number == CS_ETMV4_EXC_RESET || + packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || + packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || + packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || + packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || + packet->exception_number == CS_ETMV4_EXC_IRQ || + packet->exception_number == CS_ETMV4_EXC_FIQ) + return true; + + return false; +} + +static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) +{ + struct cs_etm_packet *packet = etmq->packet; + struct cs_etm_packet *prev_packet = etmq->prev_packet; + + if (magic == __perf_cs_etmv3_magic) + if (packet->exception_number == CS_ETMV3_EXC_SMC || + packet->exception_number == CS_ETMV3_EXC_HYP || + packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || + packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || + packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || + packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || + packet->exception_number == CS_ETMV3_EXC_GENERIC) + return true; + + if (magic == __perf_cs_etmv4_magic) { + if (packet->exception_number == CS_ETMV4_EXC_TRAP || + packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || + packet->exception_number == CS_ETMV4_EXC_INST_FAULT || + packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) + return true; + + /* + * For CS_ETMV4_EXC_CALL, except SVC other instructions + * (SMC, HVC) are taken as sync exceptions. + */ + if (packet->exception_number == CS_ETMV4_EXC_CALL && + !cs_etm__is_svc_instr(etmq, prev_packet, + prev_packet->end_addr)) + return true; + + /* + * ETMv4 has 5 bits for exception number; if the numbers + * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] + * they are implementation defined exceptions. + * + * For this case, simply take it as sync exception. + */ + if (packet->exception_number > CS_ETMV4_EXC_FIQ && + packet->exception_number <= CS_ETMV4_EXC_END) + return true; + } + + return false; +} + +static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) +{ + struct cs_etm_packet *packet = etmq->packet; + struct cs_etm_packet *prev_packet = etmq->prev_packet; + u64 magic; + int ret; + + switch (packet->sample_type) { + case CS_ETM_RANGE: + /* + * Immediate branch instruction without neither link nor + * return flag, it's normal branch instruction within + * the function. + */ + if (packet->last_instr_type == OCSD_INSTR_BR && + packet->last_instr_subtype == OCSD_S_INSTR_NONE) { + packet->flags = PERF_IP_FLAG_BRANCH; + + if (packet->last_instr_cond) + packet->flags |= PERF_IP_FLAG_CONDITIONAL; + } + + /* + * Immediate branch instruction with link (e.g. BL), this is + * branch instruction for function call. + */ + if (packet->last_instr_type == OCSD_INSTR_BR && + packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL; + + /* + * Indirect branch instruction with link (e.g. BLR), this is + * branch instruction for function call. + */ + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && + packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL; + + /* + * Indirect branch instruction with subtype of + * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for + * function return for A32/T32. + */ + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && + packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN; + + /* + * Indirect branch instruction without link (e.g. BR), usually + * this is used for function return, especially for functions + * within dynamic link lib. + */ + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && + packet->last_instr_subtype == OCSD_S_INSTR_NONE) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN; + + /* Return instruction for function return. */ + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && + packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN; + + /* + * Decoder might insert a discontinuity in the middle of + * instruction packets, fixup prev_packet with flag + * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. + */ + if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) + prev_packet->flags |= PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_BEGIN; + + /* + * If the previous packet is an exception return packet + * and the return address just follows SVC instuction, + * it needs to calibrate the previous packet sample flags + * as PERF_IP_FLAG_SYSCALLRET. + */ + if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_INTERRUPT) && + cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) + prev_packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_SYSCALLRET; + break; + case CS_ETM_DISCONTINUITY: + /* + * The trace is discontinuous, if the previous packet is + * instruction packet, set flag PERF_IP_FLAG_TRACE_END + * for previous packet. + */ + if (prev_packet->sample_type == CS_ETM_RANGE) + prev_packet->flags |= PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_TRACE_END; + break; + case CS_ETM_EXCEPTION: + ret = cs_etm__get_magic(packet->trace_chan_id, &magic); + if (ret) + return ret; + + /* The exception is for system call. */ + if (cs_etm__is_syscall(etmq, magic)) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_SYSCALLRET; + /* + * The exceptions are triggered by external signals from bus, + * interrupt controller, debug module, PE reset or halt. + */ + else if (cs_etm__is_async_exception(etmq, magic)) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT; + /* + * Otherwise, exception is caused by trap, instruction & + * data fault, or alignment errors. + */ + else if (cs_etm__is_sync_exception(etmq, magic)) + packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_CALL | + PERF_IP_FLAG_INTERRUPT; + + /* + * When the exception packet is inserted, since exception + * packet is not used standalone for generating samples + * and it's affiliation to the previous instruction range + * packet; so set previous range packet flags to tell perf + * it is an exception taken branch. + */ + if (prev_packet->sample_type == CS_ETM_RANGE) + prev_packet->flags = packet->flags; + break; + case CS_ETM_EXCEPTION_RET: + /* + * When the exception return packet is inserted, since + * exception return packet is not used standalone for + * generating samples and it's affiliation to the previous + * instruction range packet; so set previous range packet + * flags to tell perf it is an exception return branch. + * + * The exception return can be for either system call or + * other exception types; unfortunately the packet doesn't + * contain exception type related info so we cannot decide + * the exception type purely based on exception return packet. + * If we record the exception number from exception packet and + * reuse it for excpetion return packet, this is not reliable + * due the trace can be discontinuity or the interrupt can + * be nested, thus the recorded exception number cannot be + * used for exception return packet for these two cases. + * + * For exception return packet, we only need to distinguish the + * packet is for system call or for other types. Thus the + * decision can be deferred when receive the next packet which + * contains the return address, based on the return address we + * can read out the previous instruction and check if it's a + * system call instruction and then calibrate the sample flag + * as needed. + */ + if (prev_packet->sample_type == CS_ETM_RANGE) + prev_packet->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_RETURN | + PERF_IP_FLAG_INTERRUPT; + break; + case CS_ETM_EMPTY: + default: + break; + } + + return 0; +} + +static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) +{ + int ret = 0; + size_t processed = 0; + + /* + * Packets are decoded and added to the decoder's packet queue + * until the decoder packet processing callback has requested that + * processing stops or there is nothing left in the buffer. Normal + * operations that stop processing are a timestamp packet or a full + * decoder buffer queue. + */ + ret = cs_etm_decoder__process_data_block(etmq->decoder, + etmq->offset, + &etmq->buf[etmq->buf_used], + etmq->buf_len, + &processed); + if (ret) + goto out; + + etmq->offset += processed; + etmq->buf_used += processed; + etmq->buf_len -= processed; + +out: + return ret; +} + +static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) +{ + int ret; + + /* Process each packet in this chunk */ + while (1) { + ret = cs_etm_decoder__get_packet(etmq->decoder, + etmq->packet); + if (ret <= 0) + /* + * Stop processing this chunk on + * end of data or error + */ + break; + + /* + * Since packet addresses are swapped in packet + * handling within below switch() statements, + * thus setting sample flags must be called + * prior to switch() statement to use address + * information before packets swapping. + */ + ret = cs_etm__set_sample_flags(etmq); + if (ret < 0) + break; + + switch (etmq->packet->sample_type) { + case CS_ETM_RANGE: + /* + * If the packet contains an instruction + * range, generate instruction sequence + * events. + */ + cs_etm__sample(etmq); + break; + case CS_ETM_EXCEPTION: + case CS_ETM_EXCEPTION_RET: + /* + * If the exception packet is coming, + * make sure the previous instruction + * range packet to be handled properly. + */ + cs_etm__exception(etmq); + break; + case CS_ETM_DISCONTINUITY: + /* + * Discontinuity in trace, flush + * previous branch stack + */ + cs_etm__flush(etmq); + break; + case CS_ETM_EMPTY: + /* + * Should not receive empty packet, + * report error. + */ + pr_err("CS ETM Trace: empty packet\n"); + return -EINVAL; + default: + break; + } + } + + return ret; +} static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { - struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_buffer buffer; - size_t buffer_used, processed; int err = 0; - if (!etm->kernel_start) - etm->kernel_start = machine__kernel_start(etm->machine); - /* Go through each buffer in the queue and decode them one by one */ while (1) { - buffer_used = 0; - memset(&buffer, 0, sizeof(buffer)); - err = cs_etm__get_trace(&buffer, etmq); + err = cs_etm__get_data_block(etmq); if (err <= 0) return err; - /* - * We cannot assume consecutive blocks in the data file are - * contiguous, reset the decoder to force re-sync. - */ - err = cs_etm_decoder__reset(etmq->decoder); - if (err != 0) - return err; /* Run trace decoder until buffer consumed or end of trace */ do { - processed = 0; - err = cs_etm_decoder__process_data_block( - etmq->decoder, - etmq->offset, - &buffer.buf[buffer_used], - buffer.len - buffer_used, - &processed); + err = cs_etm__decode_data_block(etmq); if (err) return err; - etmq->offset += processed; - buffer_used += processed; - - /* Process each packet in this chunk */ - while (1) { - err = cs_etm_decoder__get_packet(etmq->decoder, - etmq->packet); - if (err <= 0) - /* - * Stop processing this chunk on - * end of data or error - */ - break; - - switch (etmq->packet->sample_type) { - case CS_ETM_RANGE: - /* - * If the packet contains an instruction - * range, generate instruction sequence - * events. - */ - cs_etm__sample(etmq); - break; - case CS_ETM_EXCEPTION: - case CS_ETM_EXCEPTION_RET: - /* - * If the exception packet is coming, - * make sure the previous instruction - * range packet to be handled properly. - */ - cs_etm__exception(etmq); - break; - case CS_ETM_DISCONTINUITY: - /* - * Discontinuity in trace, flush - * previous branch stack - */ - cs_etm__flush(etmq); - break; - case CS_ETM_EMPTY: - /* - * Should not receive empty packet, - * report error. - */ - pr_err("CS ETM Trace: empty packet\n"); - return -EINVAL; - default: - break; - } - } - } while (buffer.len > buffer_used); + /* + * Process each packet in this chunk, nothing to do if + * an error occurs other than hoping the next one will + * be better. + */ + err = cs_etm__process_decoder_queue(etmq); + + } while (etmq->buf_len); if (err == 0) /* Flush any remaining branch stack entries */ @@ -1205,7 +1645,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) } static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, - pid_t tid, u64 time_) + pid_t tid) { unsigned int i; struct auxtrace_queues *queues = &etm->queues; @@ -1215,7 +1655,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, struct cs_etm_queue *etmq = queue->priv; if (etmq && ((tid == -1) || (etmq->tid == tid))) { - etmq->time = time_; cs_etm__set_pid_tid_cpu(etm, queue); cs_etm__run_decoder(etmq); } @@ -1259,8 +1698,7 @@ static int cs_etm__process_event(struct perf_session *session, if (event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, - event->fork.tid, - sample->time); + event->fork.tid); return 0; } @@ -1414,9 +1852,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event, 0xffffffff); /* - * Create an RB tree for traceID-CPU# tuple. Since the conversion has - * to be made for each packet that gets decoded, optimizing access in - * anything other than a sequential array is worth doing. + * Create an RB tree for traceID-metadata tuple. Since the conversion + * has to be made for each packet that gets decoded, optimizing access + * in anything other than a sequential array is worth doing. */ traceid_list = intlist__new(NULL); if (!traceid_list) { @@ -1482,8 +1920,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event, err = -EINVAL; goto err_free_metadata; } - /* All good, associate the traceID with the CPU# */ - inode->priv = &metadata[j][CS_ETM_CPU]; + /* All good, associate the traceID with the metadata pointer */ + inode->priv = metadata[j]; } /* diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 37f8d48179ca..0e97c196147a 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -53,7 +53,51 @@ enum { CS_ETMV4_PRIV_MAX, }; -/* RB tree for quick conversion between traceID and CPUs */ +/* + * ETMv3 exception encoding number: + * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) + * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. + */ +enum { + CS_ETMV3_EXC_NONE = 0, + CS_ETMV3_EXC_DEBUG_HALT = 1, + CS_ETMV3_EXC_SMC = 2, + CS_ETMV3_EXC_HYP = 3, + CS_ETMV3_EXC_ASYNC_DATA_ABORT = 4, + CS_ETMV3_EXC_JAZELLE_THUMBEE = 5, + CS_ETMV3_EXC_PE_RESET = 8, + CS_ETMV3_EXC_UNDEFINED_INSTR = 9, + CS_ETMV3_EXC_SVC = 10, + CS_ETMV3_EXC_PREFETCH_ABORT = 11, + CS_ETMV3_EXC_DATA_FAULT = 12, + CS_ETMV3_EXC_GENERIC = 13, + CS_ETMV3_EXC_IRQ = 14, + CS_ETMV3_EXC_FIQ = 15, +}; + +/* + * ETMv4 exception encoding number: + * See ARM Embedded Trace Macrocell Architecture Specification (ARM IHI 0064D) + * table 6-12 Possible values for the TYPE field in an Exception instruction + * trace packet, for ARMv7-A/R and ARMv8-A/R PEs. + */ +enum { + CS_ETMV4_EXC_RESET = 0, + CS_ETMV4_EXC_DEBUG_HALT = 1, + CS_ETMV4_EXC_CALL = 2, + CS_ETMV4_EXC_TRAP = 3, + CS_ETMV4_EXC_SYSTEM_ERROR = 4, + CS_ETMV4_EXC_INST_DEBUG = 6, + CS_ETMV4_EXC_DATA_DEBUG = 7, + CS_ETMV4_EXC_ALIGNMENT = 10, + CS_ETMV4_EXC_INST_FAULT = 11, + CS_ETMV4_EXC_DATA_FAULT = 12, + CS_ETMV4_EXC_IRQ = 14, + CS_ETMV4_EXC_FIQ = 15, + CS_ETMV4_EXC_END = 31, +}; + +/* RB tree for quick conversion between traceID and metadata pointers */ struct intlist *traceid_list; #define KiB(x) ((x) * 1024) @@ -61,14 +105,15 @@ struct intlist *traceid_list; #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) -static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL; -static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; +#define __perf_cs_etmv3_magic 0x3030303030303030ULL +#define __perf_cs_etmv4_magic 0x4040404040404040ULL #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) #ifdef HAVE_CSTRACE_SUPPORT int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); +int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); #else static inline int cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, @@ -76,6 +121,12 @@ cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, { return -1; } + +static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused, + int *cpu __maybe_unused) +{ + return -1; +} #endif #endif diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 69fbb0a72d0c..de9b4769d06c 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -20,6 +20,7 @@ #include "thread.h" #include "comm.h" #include "symbol.h" +#include "map.h" #include "event.h" #include "util.h" #include "thread-stack.h" diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c deleted file mode 100644 index eec754243f4d..000000000000 --- a/tools/perf/util/drv_configs.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * drv_configs.h: Interface to apply PMU specific configuration - * Copyright (c) 2016-2018, Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include "drv_configs.h" -#include "evlist.h" -#include "evsel.h" -#include "pmu.h" -#include <errno.h> - -static int -perf_evsel__apply_drv_configs(struct perf_evsel *evsel, - struct perf_evsel_config_term **err_term) -{ - bool found = false; - int err = 0; - struct perf_evsel_config_term *term; - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) - if (pmu->type == evsel->attr.type) { - found = true; - break; - } - - list_for_each_entry(term, &evsel->config_terms, list) { - if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) - continue; - - /* - * We have a configuration term, report an error if we - * can't find the PMU or if the PMU driver doesn't support - * cmd line driver configuration. - */ - if (!found || !pmu->set_drv_config) { - err = -EINVAL; - *err_term = term; - break; - } - - err = pmu->set_drv_config(term); - if (err) { - *err_term = term; - break; - } - } - - return err; -} - -int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, - struct perf_evsel **err_evsel, - struct perf_evsel_config_term **err_term) -{ - struct perf_evsel *evsel; - int err = 0; - - evlist__for_each_entry(evlist, evsel) { - err = perf_evsel__apply_drv_configs(evsel, err_term); - if (err) { - *err_evsel = evsel; - break; - } - } - - return err; -} diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h deleted file mode 100644 index 32bc9babc2e0..000000000000 --- a/tools/perf/util/drv_configs.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * drv_configs.h: Interface to apply PMU specific configuration - * Copyright (c) 2016-2018, Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#ifndef __PERF_DRV_CONFIGS_H -#define __PERF_DRV_CONFIGS_H - -#include "drv_configs.h" -#include "evlist.h" -#include "evsel.h" - -int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, - struct perf_evsel **err_evsel, - struct perf_evsel_config_term **term); -#endif diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 62c8cf622607..ba58ba603b69 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -8,8 +8,11 @@ #include <unistd.h> #include <errno.h> #include <fcntl.h> +#include <libgen.h> #include "compress.h" +#include "namespaces.h" #include "path.h" +#include "map.h" #include "symbol.h" #include "srcline.h" #include "dso.h" @@ -1195,10 +1198,10 @@ struct dso *dso__new(const char *name) strcpy(dso->name, name); dso__set_long_name(dso, dso->name, false); dso__set_short_name(dso, dso->name, false); - dso->symbols = dso->symbol_names = RB_ROOT; + dso->symbols = dso->symbol_names = RB_ROOT_CACHED; dso->data.cache = RB_ROOT; - dso->inlined_nodes = RB_ROOT; - dso->srclines = RB_ROOT; + dso->inlined_nodes = RB_ROOT_CACHED; + dso->srclines = RB_ROOT_CACHED; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1467,7 +1470,7 @@ size_t dso__fprintf(struct dso *dso, FILE *fp) ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT "); ret += dso__fprintf_buildid(dso, fp); ret += fprintf(fp, ")\n"); - for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); ret += symbol__fprintf(pos, fp); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 8c8a7abe809d..bb417c54c25a 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -7,13 +7,14 @@ #include <linux/rbtree.h> #include <sys/types.h> #include <stdbool.h> +#include <stdio.h> #include "rwsem.h" -#include <linux/types.h> #include <linux/bitops.h> -#include "map.h" -#include "namespaces.h" #include "build-id.h" +struct machine; +struct map; + enum dso_binary_type { DSO_BINARY_TYPE__KALLSYMS = 0, DSO_BINARY_TYPE__GUEST_KALLSYMS, @@ -140,10 +141,10 @@ struct dso { struct list_head node; struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root *root; /* root of rbtree that rb_node is in */ - struct rb_root symbols; - struct rb_root symbol_names; - struct rb_root inlined_nodes; - struct rb_root srclines; + struct rb_root_cached symbols; + struct rb_root_cached symbol_names; + struct rb_root_cached inlined_nodes; + struct rb_root_cached srclines; struct { u64 addr; struct symbol *symbol; @@ -235,7 +236,7 @@ bool dso__loaded(const struct dso *dso); static inline bool dso__has_symbols(const struct dso *dso) { - return !RB_EMPTY_ROOT(&dso->symbols); + return !RB_EMPTY_ROOT(&dso->symbols.rb_root); } bool dso__sorted_by_name(const struct dso *dso); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 937a5a4f71cc..ba7be74fad6e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -21,9 +21,13 @@ #include "thread.h" #include "thread_map.h" #include "sane_ctype.h" +#include "map.h" +#include "symbol.h" #include "symbol/kallsyms.h" #include "asm/bug.h" #include "stat.h" +#include "session.h" +#include "bpf-event.h" #define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 @@ -45,6 +49,8 @@ static const char *perf_event__names[] = { [PERF_RECORD_SWITCH] = "SWITCH", [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", [PERF_RECORD_NAMESPACES] = "NAMESPACES", + [PERF_RECORD_KSYMBOL] = "KSYMBOL", + [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -1329,6 +1335,22 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused, return machine__process_switch_event(machine, event); } +int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_ksymbol(machine, event, sample); +} + +int perf_event__process_bpf_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_bpf_event(machine, event, sample); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", @@ -1461,6 +1483,21 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); } +size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n", + event->ksymbol_event.addr, event->ksymbol_event.len, + event->ksymbol_event.ksym_type, + event->ksymbol_event.flags, event->ksymbol_event.name); +} + +size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " bpf event with type %u, flags %u, id %u\n", + event->bpf_event.type, event->bpf_event.flags, + event->bpf_event.id); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1496,6 +1533,12 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_LOST: ret += perf_event__fprintf_lost(event, fp); break; + case PERF_RECORD_KSYMBOL: + ret += perf_event__fprintf_ksymbol(event, fp); + break; + case PERF_RECORD_BPF_EVENT: + ret += perf_event__fprintf_bpf_event(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index eb95f3384958..36ae7e92dab1 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -5,6 +5,7 @@ #include <limits.h> #include <stdio.h> #include <linux/kernel.h> +#include <linux/bpf.h> #include "../perf.h" #include "build-id.h" @@ -84,6 +85,29 @@ struct throttle_event { u64 stream_id; }; +#ifndef KSYM_NAME_LEN +#define KSYM_NAME_LEN 256 +#endif + +struct ksymbol_event { + struct perf_event_header header; + u64 addr; + u32 len; + u16 ksym_type; + u16 flags; + char name[KSYM_NAME_LEN]; +}; + +struct bpf_event { + struct perf_event_header header; + u16 type; + u16 flags; + u32 id; + + /* for bpf_prog types */ + u8 tag[BPF_TAG_SIZE]; // prog tag +}; + #define PERF_SAMPLE_MASK \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \ @@ -137,26 +161,7 @@ struct ip_callchain { u64 ips[0]; }; -struct branch_flags { - u64 mispred:1; - u64 predicted:1; - u64 in_tx:1; - u64 abort:1; - u64 cycles:16; - u64 type:4; - u64 reserved:40; -}; - -struct branch_entry { - u64 from; - u64 to; - struct branch_flags flags; -}; - -struct branch_stack { - u64 nr; - struct branch_entry entries[0]; -}; +struct branch_stack; enum { PERF_IP_FLAG_BRANCH = 1ULL << 0, @@ -527,8 +532,9 @@ struct auxtrace_error_event { u32 cpu; u32 pid; u32 tid; - u32 reserved__; /* For alignment */ + u32 fmt; u64 ip; + u64 time; char msg[MAX_AUXTRACE_ERROR_MSG]; }; @@ -651,6 +657,8 @@ union perf_event { struct stat_round_event stat_round; struct time_conv_event time_conv; struct feature_event feat; + struct ksymbol_event ksymbol_event; + struct bpf_event bpf_event; }; void perf_event__print_totals(void); @@ -748,6 +756,14 @@ int perf_event__process_exit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_ksymbol(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process_bpf_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, @@ -811,6 +827,8 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); int kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8c902276d4b4..08cedb643ea6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1022,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks) + bool auxtrace_overwrite, int nr_cblocks, int affinity) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; @@ -1032,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp = { .nr_cblocks = nr_cblocks }; + struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); @@ -1064,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false, 0); + return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 868294491194..744906dd4887 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -49,6 +49,9 @@ struct perf_evlist { struct perf_evsel *selected; struct events_stats stats; struct perf_env *env; + void (*trace_event_sample_raw)(struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample); u64 first_sample_time; u64 last_sample_time; }; @@ -162,7 +165,7 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks); + bool auxtrace_overwrite, int nr_cblocks, int affinity); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); @@ -314,5 +317,4 @@ void perf_evlist__force_leader(struct perf_evlist *evlist); struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, struct perf_evsel *evsel); - #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbc0466db368..684c893ca6bc 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1035,6 +1035,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + attr->ksymbol = track && !perf_missing_features.ksymbol; + attr->bpf_event = track && opts->bpf_event && + !perf_missing_features.bpf_event; if (opts->record_namespaces) attr->namespaces = track; @@ -1652,6 +1655,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRf(write_backward, p_unsigned); PRINT_ATTRf(namespaces, p_unsigned); + PRINT_ATTRf(ksymbol, p_unsigned); + PRINT_ATTRf(bpf_event, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -1811,6 +1816,10 @@ fallback_missing_features: PERF_SAMPLE_BRANCH_NO_CYCLES); if (perf_missing_features.group_read && evsel->attr.inherit) evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID); + if (perf_missing_features.ksymbol) + evsel->attr.ksymbol = 0; + if (perf_missing_features.bpf_event) + evsel->attr.bpf_event = 0; retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; @@ -1930,7 +1939,15 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.write_backward && evsel->attr.write_backward) { + if (!perf_missing_features.bpf_event && evsel->attr.bpf_event) { + perf_missing_features.bpf_event = true; + pr_debug2("switching off bpf_event\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) { + perf_missing_features.ksymbol = true; + pr_debug2("switching off ksymbol\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.write_backward && evsel->attr.write_backward) { perf_missing_features.write_backward = true; pr_debug2("switching off write_backward\n"); goto out_close; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 82a289ce8b0c..cc578e02e08f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -8,7 +8,7 @@ #include <linux/perf_event.h> #include <linux/types.h> #include "xyarray.h" -#include "symbol.h" +#include "symbol_conf.h" #include "cpumap.h" #include "counts.h" @@ -168,6 +168,8 @@ struct perf_missing_features { bool lbr_flags; bool write_backward; bool group_read; + bool ksymbol; + bool bpf_event; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index dec6d218c31c..61ce197c5362 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -563,7 +563,6 @@ static int write_cmdline(struct feat_fd *ff, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" struct cpu_topo { - u32 cpu_nr; u32 core_sib; u32 thread_sib; char **core_siblings; @@ -679,7 +678,6 @@ static struct cpu_topo *build_cpu_topology(void) goto out_free; tp = addr; - tp->cpu_nr = nr; addr += sizeof(*tp); tp->core_siblings = addr; addr += sz; @@ -1042,11 +1040,9 @@ static int write_cpuid(struct feat_fd *ff, int ret; ret = get_cpuid(buffer, sizeof(buffer)); - if (!ret) - goto write_it; + if (ret) + return -1; - return -1; -write_it: return do_write_string(ff, buffer); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 8aad8330e392..669f961316f0 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include "callchain.h" #include "util.h" #include "build-id.h" #include "hist.h" @@ -11,6 +12,7 @@ #include "evsel.h" #include "annotate.h" #include "srcline.h" +#include "symbol.h" #include "thread.h" #include "ui/progress.h" #include <errno.h> @@ -209,7 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) void hists__output_recalc_col_len(struct hists *hists, int max_rows) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_node *next = rb_first_cached(&hists->entries); struct hist_entry *n; int row = 0; @@ -296,7 +298,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) if (!he->leaf) { struct hist_entry *child; - struct rb_node *node = rb_first(&he->hroot_out); + struct rb_node *node = rb_first_cached(&he->hroot_out); while (node) { child = rb_entry(node, struct hist_entry, rb_node); node = rb_next(node); @@ -311,8 +313,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) static void hists__delete_entry(struct hists *hists, struct hist_entry *he) { - struct rb_root *root_in; - struct rb_root *root_out; + struct rb_root_cached *root_in; + struct rb_root_cached *root_out; if (he->parent_he) { root_in = &he->parent_he->hroot_in; @@ -325,8 +327,8 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) root_out = &hists->entries; } - rb_erase(&he->rb_node_in, root_in); - rb_erase(&he->rb_node, root_out); + rb_erase_cached(&he->rb_node_in, root_in); + rb_erase_cached(&he->rb_node, root_out); --hists->nr_entries; if (!he->filtered) @@ -337,7 +339,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_node *next = rb_first_cached(&hists->entries); struct hist_entry *n; while (next) { @@ -353,7 +355,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) void hists__delete_entries(struct hists *hists) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_node *next = rb_first_cached(&hists->entries); struct hist_entry *n; while (next) { @@ -435,8 +437,8 @@ static int hist_entry__init(struct hist_entry *he, } INIT_LIST_HEAD(&he->pairs.node); thread__get(he->thread); - he->hroot_in = RB_ROOT; - he->hroot_out = RB_ROOT; + he->hroot_in = RB_ROOT_CACHED; + he->hroot_out = RB_ROOT_CACHED; if (!symbol_conf.report_hierarchy) he->leaf = true; @@ -513,8 +515,9 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, int64_t cmp; u64 period = entry->stat.period; u64 weight = entry->stat.weight; + bool leftmost = true; - p = &hists->entries_in->rb_node; + p = &hists->entries_in->rb_root.rb_node; while (*p != NULL) { parent = *p; @@ -557,8 +560,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (cmp < 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } he = hist_entry__new(entry, sample_self); @@ -570,7 +575,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); - rb_insert_color(&he->rb_node_in, hists->entries_in); + rb_insert_color_cached(&he->rb_node_in, hists->entries_in, leftmost); out: if (sample_self) he_stat__add_cpumode_period(&he->stat, al->cpumode, period); @@ -1279,16 +1284,17 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) } static struct hist_entry *hierarchy_insert_entry(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, struct hist_entry *he, struct hist_entry *parent_he, struct perf_hpp_list *hpp_list) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter, *new; struct perf_hpp_fmt *fmt; int64_t cmp; + bool leftmost = true; while (*p != NULL) { parent = *p; @@ -1308,8 +1314,10 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, if (cmp < 0) p = &parent->rb_left; - else + else { p = &parent->rb_right; + leftmost = false; + } } new = hist_entry__new(he, true); @@ -1343,12 +1351,12 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, } rb_link_node(&new->rb_node_in, parent, p); - rb_insert_color(&new->rb_node_in, root); + rb_insert_color_cached(&new->rb_node_in, root, leftmost); return new; } static int hists__hierarchy_insert_entry(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, struct hist_entry *he) { struct perf_hpp_list_node *node; @@ -1395,13 +1403,14 @@ static int hists__hierarchy_insert_entry(struct hists *hists, } static int hists__collapse_insert_entry(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, struct hist_entry *he) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; int64_t cmp; + bool leftmost = true; if (symbol_conf.report_hierarchy) return hists__hierarchy_insert_entry(hists, root, he); @@ -1432,19 +1441,21 @@ static int hists__collapse_insert_entry(struct hists *hists, if (cmp < 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); - rb_insert_color(&he->rb_node_in, root); + rb_insert_color_cached(&he->rb_node_in, root, leftmost); return 1; } -struct rb_root *hists__get_rotate_entries_in(struct hists *hists) +struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists) { - struct rb_root *root; + struct rb_root_cached *root; pthread_mutex_lock(&hists->lock); @@ -1467,7 +1478,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *next; struct hist_entry *n; int ret; @@ -1479,7 +1490,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) root = hists__get_rotate_entries_in(hists); - next = rb_first(root); + next = rb_first_cached(root); while (next) { if (session_done()) @@ -1487,7 +1498,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - rb_erase(&n->rb_node_in, root); + rb_erase_cached(&n->rb_node_in, root); ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n); if (ret < 0) return -1; @@ -1558,7 +1569,7 @@ static void hierarchy_recalc_total_periods(struct hists *hists) struct rb_node *node; struct hist_entry *he; - node = rb_first(&hists->entries); + node = rb_first_cached(&hists->entries); hists->stats.total_period = 0; hists->stats.total_non_filtered_period = 0; @@ -1578,13 +1589,14 @@ static void hierarchy_recalc_total_periods(struct hists *hists) } } -static void hierarchy_insert_output_entry(struct rb_root *root, +static void hierarchy_insert_output_entry(struct rb_root_cached *root, struct hist_entry *he) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; struct perf_hpp_fmt *fmt; + bool leftmost = true; while (*p != NULL) { parent = *p; @@ -1592,12 +1604,14 @@ static void hierarchy_insert_output_entry(struct rb_root *root, if (hist_entry__sort(he, iter) > 0) p = &parent->rb_left; - else + else { p = &parent->rb_right; + leftmost = false; + } } rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + rb_insert_color_cached(&he->rb_node, root, leftmost); /* update column width of dynamic entry */ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { @@ -1608,16 +1622,16 @@ static void hierarchy_insert_output_entry(struct rb_root *root, static void hists__hierarchy_output_resort(struct hists *hists, struct ui_progress *prog, - struct rb_root *root_in, - struct rb_root *root_out, + struct rb_root_cached *root_in, + struct rb_root_cached *root_out, u64 min_callchain_hits, bool use_callchain) { struct rb_node *node; struct hist_entry *he; - *root_out = RB_ROOT; - node = rb_first(root_in); + *root_out = RB_ROOT_CACHED; + node = rb_first_cached(root_in); while (node) { he = rb_entry(node, struct hist_entry, rb_node_in); @@ -1660,15 +1674,16 @@ static void hists__hierarchy_output_resort(struct hists *hists, } } -static void __hists__insert_output_entry(struct rb_root *entries, +static void __hists__insert_output_entry(struct rb_root_cached *entries, struct hist_entry *he, u64 min_callchain_hits, bool use_callchain) { - struct rb_node **p = &entries->rb_node; + struct rb_node **p = &entries->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; struct perf_hpp_fmt *fmt; + bool leftmost = true; if (use_callchain) { if (callchain_param.mode == CHAIN_GRAPH_REL) { @@ -1689,12 +1704,14 @@ static void __hists__insert_output_entry(struct rb_root *entries, if (hist_entry__sort(he, iter) > 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, entries); + rb_insert_color_cached(&he->rb_node, entries, leftmost); perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { if (perf_hpp__is_dynamic_entry(fmt) && @@ -1704,9 +1721,10 @@ static void __hists__insert_output_entry(struct rb_root *entries, } static void output_resort(struct hists *hists, struct ui_progress *prog, - bool use_callchain, hists__resort_cb_t cb) + bool use_callchain, hists__resort_cb_t cb, + void *cb_arg) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *next; struct hist_entry *n; u64 callchain_total; @@ -1736,14 +1754,14 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, else root = hists->entries_in; - next = rb_first(root); - hists->entries = RB_ROOT; + next = rb_first_cached(root); + hists->entries = RB_ROOT_CACHED; while (next) { n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - if (cb && cb(n)) + if (cb && cb(n, cb_arg)) continue; __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); @@ -1757,7 +1775,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, } } -void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) +void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog, + hists__resort_cb_t cb, void *cb_arg) { bool use_callchain; @@ -1768,18 +1787,23 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro use_callchain |= symbol_conf.show_branchflag_count; - output_resort(evsel__hists(evsel), prog, use_callchain, NULL); + output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg); +} + +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) +{ + return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL); } void hists__output_resort(struct hists *hists, struct ui_progress *prog) { - output_resort(hists, prog, symbol_conf.use_callchain, NULL); + output_resort(hists, prog, symbol_conf.use_callchain, NULL, NULL); } void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, hists__resort_cb_t cb) { - output_resort(hists, prog, symbol_conf.use_callchain, cb); + output_resort(hists, prog, symbol_conf.use_callchain, cb, NULL); } static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) @@ -1798,7 +1822,7 @@ struct rb_node *rb_hierarchy_last(struct rb_node *node) struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); while (can_goto_child(he, HMD_NORMAL)) { - node = rb_last(&he->hroot_out); + node = rb_last(&he->hroot_out.rb_root); he = rb_entry(node, struct hist_entry, rb_node); } return node; @@ -1809,7 +1833,7 @@ struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_di struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); if (can_goto_child(he, hmd)) - node = rb_first(&he->hroot_out); + node = rb_first_cached(&he->hroot_out); else node = rb_next(node); @@ -1847,7 +1871,7 @@ bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit) if (he->leaf) return false; - node = rb_first(&he->hroot_out); + node = rb_first_cached(&he->hroot_out); child = rb_entry(node, struct hist_entry, rb_node); while (node && child->filtered) { @@ -1965,7 +1989,7 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil hists__reset_filter_stats(hists); hists__reset_col_len(hists); - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (filter(hists, h)) @@ -1975,13 +1999,15 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil } } -static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) +static void resort_filtered_entry(struct rb_root_cached *root, + struct hist_entry *he) { - struct rb_node **p = &root->rb_node; + struct rb_node **p = &root->rb_root.rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; - struct rb_root new_root = RB_ROOT; + struct rb_root_cached new_root = RB_ROOT_CACHED; struct rb_node *nd; + bool leftmost = true; while (*p != NULL) { parent = *p; @@ -1989,22 +2015,24 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) if (hist_entry__sort(he, iter) > 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + rb_insert_color_cached(&he->rb_node, root, leftmost); if (he->leaf || he->filtered) return; - nd = rb_first(&he->hroot_out); + nd = rb_first_cached(&he->hroot_out); while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); nd = rb_next(nd); - rb_erase(&h->rb_node, &he->hroot_out); + rb_erase_cached(&h->rb_node, &he->hroot_out); resort_filtered_entry(&new_root, h); } @@ -2015,14 +2043,14 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) { struct rb_node *nd; - struct rb_root new_root = RB_ROOT; + struct rb_root_cached new_root = RB_ROOT_CACHED; hists->stats.nr_non_filtered_samples = 0; hists__reset_filter_stats(hists); hists__reset_col_len(hists); - nd = rb_first(&hists->entries); + nd = rb_first_cached(&hists->entries); while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); int ret; @@ -2066,12 +2094,12 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a * resort output after applying a new filter since filter in a lower * hierarchy can change periods in a upper hierarchy. */ - nd = rb_first(&hists->entries); + nd = rb_first_cached(&hists->entries); while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); nd = rb_next(nd); - rb_erase(&h->rb_node, &hists->entries); + rb_erase_cached(&h->rb_node, &hists->entries); resort_filtered_entry(&new_root, h); } @@ -2140,18 +2168,19 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered) static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; int64_t cmp; + bool leftmost = true; if (hists__has(hists, need_collapse)) root = &hists->entries_collapsed; else root = hists->entries_in; - p = &root->rb_node; + p = &root->rb_root.rb_node; while (*p != NULL) { parent = *p; @@ -2164,8 +2193,10 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, if (cmp < 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } he = hist_entry__new(pair, true); @@ -2175,7 +2206,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, if (symbol_conf.cumulate_callchain) memset(he->stat_acc, 0, sizeof(he->stat)); rb_link_node(&he->rb_node_in, parent, p); - rb_insert_color(&he->rb_node_in, root); + rb_insert_color_cached(&he->rb_node_in, root, leftmost); hists__inc_stats(hists, he); he->dummy = true; } @@ -2184,15 +2215,16 @@ out: } static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, - struct rb_root *root, + struct rb_root_cached *root, struct hist_entry *pair) { struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; struct perf_hpp_fmt *fmt; + bool leftmost = true; - p = &root->rb_node; + p = &root->rb_root.rb_node; while (*p != NULL) { int64_t cmp = 0; @@ -2209,14 +2241,16 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, if (cmp < 0) p = &parent->rb_left; - else + else { p = &parent->rb_right; + leftmost = false; + } } he = hist_entry__new(pair, true); if (he) { rb_link_node(&he->rb_node_in, parent, p); - rb_insert_color(&he->rb_node_in, root); + rb_insert_color_cached(&he->rb_node_in, root, leftmost); he->dummy = true; he->hists = hists; @@ -2233,9 +2267,9 @@ static struct hist_entry *hists__find_entry(struct hists *hists, struct rb_node *n; if (hists__has(hists, need_collapse)) - n = hists->entries_collapsed.rb_node; + n = hists->entries_collapsed.rb_root.rb_node; else - n = hists->entries_in->rb_node; + n = hists->entries_in->rb_root.rb_node; while (n) { struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in); @@ -2252,10 +2286,10 @@ static struct hist_entry *hists__find_entry(struct hists *hists, return NULL; } -static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root, +static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *root, struct hist_entry *he) { - struct rb_node *n = root->rb_node; + struct rb_node *n = root->rb_root.rb_node; while (n) { struct hist_entry *iter; @@ -2280,13 +2314,13 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root, return NULL; } -static void hists__match_hierarchy(struct rb_root *leader_root, - struct rb_root *other_root) +static void hists__match_hierarchy(struct rb_root_cached *leader_root, + struct rb_root_cached *other_root) { struct rb_node *nd; struct hist_entry *pos, *pair; - for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(leader_root); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node_in); pair = hists__find_hierarchy_entry(other_root, pos); @@ -2302,7 +2336,7 @@ static void hists__match_hierarchy(struct rb_root *leader_root, */ void hists__match(struct hists *leader, struct hists *other) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *nd; struct hist_entry *pos, *pair; @@ -2317,7 +2351,7 @@ void hists__match(struct hists *leader, struct hists *other) else root = leader->entries_in; - for (nd = rb_first(root); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node_in); pair = hists__find_entry(other, pos); @@ -2328,13 +2362,13 @@ void hists__match(struct hists *leader, struct hists *other) static int hists__link_hierarchy(struct hists *leader_hists, struct hist_entry *parent, - struct rb_root *leader_root, - struct rb_root *other_root) + struct rb_root_cached *leader_root, + struct rb_root_cached *other_root) { struct rb_node *nd; struct hist_entry *pos, *leader; - for (nd = rb_first(other_root); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(other_root); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node_in); if (hist_entry__has_pairs(pos)) { @@ -2377,7 +2411,7 @@ static int hists__link_hierarchy(struct hists *leader_hists, */ int hists__link(struct hists *leader, struct hists *other) { - struct rb_root *root; + struct rb_root_cached *root; struct rb_node *nd; struct hist_entry *pos, *pair; @@ -2393,7 +2427,7 @@ int hists__link(struct hists *leader, struct hists *other) else root = other->entries_in; - for (nd = rb_first(root); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node_in); if (!hist_entry__has_pairs(pos)) { @@ -2566,10 +2600,10 @@ int perf_hist_config(const char *var, const char *value) int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) { memset(hists, 0, sizeof(*hists)); - hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; + hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT_CACHED; hists->entries_in = &hists->entries_in_array[0]; - hists->entries_collapsed = RB_ROOT; - hists->entries = RB_ROOT; + hists->entries_collapsed = RB_ROOT_CACHED; + hists->entries = RB_ROOT_CACHED; pthread_mutex_init(&hists->lock, NULL); hists->socket_filter = -1; hists->hpp_list = hpp_list; @@ -2577,14 +2611,14 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) return 0; } -static void hists__delete_remaining_entries(struct rb_root *root) +static void hists__delete_remaining_entries(struct rb_root_cached *root) { struct rb_node *node; struct hist_entry *he; - while (!RB_EMPTY_ROOT(root)) { - node = rb_first(root); - rb_erase(node, root); + while (!RB_EMPTY_ROOT(&root->rb_root)) { + node = rb_first_cached(root); + rb_erase_cached(node, root); he = rb_entry(node, struct hist_entry, rb_node_in); hist_entry__delete(he); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 664b5eda8d51..4af27fbab24f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -2,9 +2,9 @@ #ifndef __PERF_HIST_H #define __PERF_HIST_H +#include <linux/rbtree.h> #include <linux/types.h> #include <pthread.h> -#include "callchain.h" #include "evsel.h" #include "header.h" #include "color.h" @@ -13,6 +13,9 @@ struct hist_entry; struct hist_entry_ops; struct addr_location; +struct map_symbol; +struct mem_info; +struct branch_info; struct symbol; enum hist_filter { @@ -70,10 +73,10 @@ struct thread; struct dso; struct hists { - struct rb_root entries_in_array[2]; - struct rb_root *entries_in; - struct rb_root entries; - struct rb_root entries_collapsed; + struct rb_root_cached entries_in_array[2]; + struct rb_root_cached *entries_in; + struct rb_root_cached entries; + struct rb_root_cached entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; u64 callchain_period; @@ -160,8 +163,10 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, struct perf_hpp_fmt *fmt, int printed); void hist_entry__delete(struct hist_entry *he); -typedef int (*hists__resort_cb_t)(struct hist_entry *he); +typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg); +void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog, + hists__resort_cb_t cb, void *cb_arg); void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, @@ -230,7 +235,7 @@ static __pure inline bool hists__has_callchains(struct hists *hists) int hists__init(void); int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); -struct rb_root *hists__get_rotate_entries_in(struct hists *hists); +struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists); struct perf_hpp { char *buf; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ee6ca65f81f4..0c0180c67574 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -27,6 +27,8 @@ #include "evsel.h" #include "evlist.h" #include "machine.h" +#include "map.h" +#include "symbol.h" #include "session.h" #include "util.h" #include "thread.h" @@ -142,7 +144,7 @@ static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample) auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, - sample->tid, 0, "Lost trace data"); + sample->tid, 0, "Lost trace data", sample->time); err = perf_session__deliver_synth_event(bts->session, &event, NULL); if (err) @@ -372,7 +374,7 @@ static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid, auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, - "Failed to get instruction"); + "Failed to get instruction", 0); err = perf_session__deliver_synth_event(bts->session, &event, NULL); if (err) diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 1b704fbea9de..23bf788f84b9 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -1,4 +1,4 @@ -libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o +perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 4503f3ca45ab..6e03db142091 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -26,6 +26,7 @@ #include "../cache.h" #include "../util.h" +#include "../auxtrace.h" #include "intel-pt-insn-decoder.h" #include "intel-pt-pkt-decoder.h" @@ -867,7 +868,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) ret = intel_pt_get_packet(decoder->buf, decoder->len, &decoder->packet); - if (ret == INTEL_PT_NEED_MORE_BYTES && + if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { ret = intel_pt_get_split_packet(decoder); if (ret < 0) @@ -1394,7 +1395,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); - decoder->cbr = 0; decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; @@ -2575,6 +2575,34 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) } } +#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1) + +/** + * adj_for_padding - adjust overlap to account for padding. + * @buf_b: second buffer + * @buf_a: first buffer + * @len_a: size of first buffer + * + * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap + * accordingly. + * + * Return: A pointer into @buf_b from where non-overlapped data starts + */ +static unsigned char *adj_for_padding(unsigned char *buf_b, + unsigned char *buf_a, size_t len_a) +{ + unsigned char *p = buf_b - MAX_PADDING; + unsigned char *q = buf_a + len_a - MAX_PADDING; + int i; + + for (i = MAX_PADDING; i; i--, p++, q++) { + if (*p != *q) + break; + } + + return p; +} + /** * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data * using TSC. @@ -2625,8 +2653,11 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, /* Same TSC, so buffers are consecutive */ if (!cmp && rem_b >= rem_a) { + unsigned char *start; + *consecutive = true; - return buf_b + len_b - (rem_b - rem_a); + start = buf_b + len_b - (rem_b - rem_a); + return adj_for_padding(start, buf_a, len_a); } if (cmp < 0) return buf_b; /* tsc_a < tsc_b => no overlap */ @@ -2689,7 +2720,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, found = memmem(buf_a, len_a, buf_b, len_a); if (found) { *consecutive = true; - return buf_b + len_a; + return adj_for_padding(buf_b + len_a, buf_a, len_a); } /* Try again at next PSB in buffer 'a' */ diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 2e72373ec6df..3b497bab4324 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1411,7 +1411,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, - pid_t pid, pid_t tid, u64 ip) + pid_t pid, pid_t tid, u64 ip, u64 timestamp) { union perf_event event; char msg[MAX_AUXTRACE_ERROR_MSG]; @@ -1420,7 +1420,7 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, - code, cpu, pid, tid, ip, msg); + code, cpu, pid, tid, ip, msg, timestamp); err = perf_session__deliver_synth_event(pt->session, &event, NULL); if (err) @@ -1430,6 +1430,18 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, return err; } +static int intel_ptq_synth_error(struct intel_pt_queue *ptq, + const struct intel_pt_state *state) +{ + struct intel_pt *pt = ptq->pt; + u64 tm = ptq->timestamp; + + tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); + + return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, + ptq->tid, state->from_ip, tm); +} + static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) { struct auxtrace_queue *queue; @@ -1676,10 +1688,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) intel_pt_next_tid(pt, ptq); } if (pt->synth_opts.errors) { - err = intel_pt_synth_error(pt, state->err, - ptq->cpu, ptq->pid, - ptq->tid, - state->from_ip); + err = intel_ptq_synth_error(ptq, state); if (err) return err; } @@ -1804,7 +1813,7 @@ static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) { return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, - sample->pid, sample->tid, 0); + sample->pid, sample->tid, 0, sample->time); } static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index 85bab8735fa9..5c19ee001299 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h @@ -45,7 +45,7 @@ static inline unsigned int intlist__nr_entries(const struct intlist *ilist) /* For intlist iteration */ static inline struct int_node *intlist__first(struct intlist *ilist) { - struct rb_node *rn = rb_first(&ilist->rblist.entries); + struct rb_node *rn = rb_first_cached(&ilist->rblist.entries); return rn ? rb_entry(rn, struct int_node, rb_node) : NULL; } static inline struct int_node *intlist__next(struct int_node *in) diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index bf249552a9b0..eda28d3570bc 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -2,6 +2,7 @@ #include <sys/sysmacros.h> #include <sys/types.h> #include <errno.h> +#include <libgen.h> #include <stdio.h> #include <stdlib.h> #include <string.h> diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 7b1f06567521..1403dec189b4 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -3,12 +3,13 @@ #define __PERF_KVM_STAT_H #include "../perf.h" -#include "evsel.h" -#include "evlist.h" -#include "session.h" #include "tool.h" #include "stat.h" +struct perf_evsel; +struct perf_evlist; +struct perf_session; + struct event_key { #define INVALID_KEY (~0ULL) u64 key; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 143f7057d581..61959aba7e27 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -10,6 +10,7 @@ #include "hist.h" #include "machine.h" #include "map.h" +#include "symbol.h" #include "sort.h" #include "strlist.h" #include "thread.h" @@ -21,6 +22,7 @@ #include "unwind.h" #include "linux/hash.h" #include "asm/bug.h" +#include "bpf-event.h" #include "sane_ctype.h" #include <symbol/kallsyms.h> @@ -41,7 +43,7 @@ static void machine__threads_init(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - threads->entries = RB_ROOT; + threads->entries = RB_ROOT_CACHED; init_rwsem(&threads->lock); threads->nr = 0; INIT_LIST_HEAD(&threads->dead); @@ -179,7 +181,7 @@ void machine__delete_threads(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; down_write(&threads->lock); - nd = rb_first(&threads->entries); + nd = rb_first_cached(&threads->entries); while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); @@ -222,7 +224,7 @@ void machine__delete(struct machine *machine) void machines__init(struct machines *machines) { machine__init(&machines->host, "", HOST_KERNEL_ID); - machines->guests = RB_ROOT; + machines->guests = RB_ROOT_CACHED; } void machines__exit(struct machines *machines) @@ -234,9 +236,10 @@ void machines__exit(struct machines *machines) struct machine *machines__add(struct machines *machines, pid_t pid, const char *root_dir) { - struct rb_node **p = &machines->guests.rb_node; + struct rb_node **p = &machines->guests.rb_root.rb_node; struct rb_node *parent = NULL; struct machine *pos, *machine = malloc(sizeof(*machine)); + bool leftmost = true; if (machine == NULL) return NULL; @@ -251,12 +254,14 @@ struct machine *machines__add(struct machines *machines, pid_t pid, pos = rb_entry(parent, struct machine, rb_node); if (pid < pos->pid) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&machine->rb_node, parent, p); - rb_insert_color(&machine->rb_node, &machines->guests); + rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost); return machine; } @@ -267,7 +272,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec) machines->host.comm_exec = comm_exec; - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *machine = rb_entry(nd, struct machine, rb_node); machine->comm_exec = comm_exec; @@ -276,7 +281,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec) struct machine *machines__find(struct machines *machines, pid_t pid) { - struct rb_node **p = &machines->guests.rb_node; + struct rb_node **p = &machines->guests.rb_root.rb_node; struct rb_node *parent = NULL; struct machine *machine; struct machine *default_machine = NULL; @@ -339,7 +344,7 @@ void machines__process_guests(struct machines *machines, { struct rb_node *nd; - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); process(pos, data); } @@ -352,7 +357,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) machines->host.id_hdr_size = id_hdr_size; - for (node = rb_first(&machines->guests); node; node = rb_next(node)) { + for (node = rb_first_cached(&machines->guests); node; + node = rb_next(node)) { machine = rb_entry(node, struct machine, rb_node); machine->id_hdr_size = id_hdr_size; } @@ -465,9 +471,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid, bool create) { - struct rb_node **p = &threads->entries.rb_node; + struct rb_node **p = &threads->entries.rb_root.rb_node; struct rb_node *parent = NULL; struct thread *th; + bool leftmost = true; th = threads__get_last_match(threads, machine, pid, tid); if (th) @@ -485,8 +492,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine, if (tid < th->tid) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } if (!create) @@ -495,7 +504,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &threads->entries); + rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost); /* * We have to initialize map_groups separately @@ -506,7 +515,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase_init(&th->rb_node, &threads->entries); + rb_erase_cached(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); thread__put(th); return NULL; @@ -681,6 +690,59 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, return 0; } +static int machine__process_ksymbol_register(struct machine *machine, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct symbol *sym; + struct map *map; + + map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr); + if (!map) { + map = dso__new_map(event->ksymbol_event.name); + if (!map) + return -ENOMEM; + + map->start = event->ksymbol_event.addr; + map->pgoff = map->start; + map->end = map->start + event->ksymbol_event.len; + map_groups__insert(&machine->kmaps, map); + } + + sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len, + 0, 0, event->ksymbol_event.name); + if (!sym) + return -ENOMEM; + dso__insert_symbol(map->dso, sym); + return 0; +} + +static int machine__process_ksymbol_unregister(struct machine *machine, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct map *map; + + map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr); + if (map) + map_groups__remove(&machine->kmaps, map); + + return 0; +} + +int machine__process_ksymbol(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample) +{ + if (dump_trace) + perf_event__fprintf_ksymbol(event, stdout); + + if (event->ksymbol_event.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER) + return machine__process_ksymbol_unregister(machine, event, + sample); + return machine__process_ksymbol_register(machine, event, sample); +} + static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename) { const char *dup_filename; @@ -744,7 +806,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) struct rb_node *nd; size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp); - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret += __dsos__fprintf(&pos->dsos.head, fp); } @@ -764,7 +826,7 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, struct rb_node *nd; size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm); - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm); } @@ -804,7 +866,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) ret = fprintf(fp, "Threads: %u\n", threads->nr); - for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&threads->entries); nd; + nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); ret += thread__fprintf(pos, fp); @@ -1107,7 +1170,7 @@ failure: void machines__destroy_kernel_maps(struct machines *machines) { - struct rb_node *next = rb_first(&machines->guests); + struct rb_node *next = rb_first_cached(&machines->guests); machine__destroy_kernel_maps(&machines->host); @@ -1115,7 +1178,7 @@ void machines__destroy_kernel_maps(struct machines *machines) struct machine *pos = rb_entry(next, struct machine, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &machines->guests); + rb_erase_cached(&pos->rb_node, &machines->guests); machine__delete(pos); } } @@ -1680,7 +1743,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) down_write(&threads->lock); - rb_erase_init(&th->rb_node, &threads->entries); + rb_erase_cached(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); --threads->nr; /* @@ -1812,6 +1875,10 @@ int machine__process_event(struct machine *machine, union perf_event *event, case PERF_RECORD_SWITCH: case PERF_RECORD_SWITCH_CPU_WIDE: ret = machine__process_switch_event(machine, event); break; + case PERF_RECORD_KSYMBOL: + ret = machine__process_ksymbol(machine, event, sample); break; + case PERF_RECORD_BPF_EVENT: + ret = machine__process_bpf_event(machine, event, sample); break; default: ret = -1; break; @@ -2453,7 +2520,8 @@ int machine__for_each_thread(struct machine *machine, for (i = 0; i < THREADS__TABLE_SIZE; i++) { threads = &machine->threads[i]; - for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&threads->entries); nd; + nd = rb_next(nd)) { thread = rb_entry(nd, struct thread, rb_node); rc = fn(thread, priv); if (rc != 0) @@ -2480,7 +2548,7 @@ int machines__for_each_thread(struct machines *machines, if (rc != 0) return rc; - for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *machine = rb_entry(nd, struct machine, rb_node); rc = machine__for_each_thread(machine, fn, priv); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a5d1da60f751..f70ab98a7bde 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -4,7 +4,7 @@ #include <sys/types.h> #include <linux/rbtree.h> -#include "map.h" +#include "map_groups.h" #include "dso.h" #include "event.h" #include "rwsem.h" @@ -29,11 +29,11 @@ struct vdso_info; #define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) struct threads { - struct rb_root entries; - struct rw_semaphore lock; - unsigned int nr; - struct list_head dead; - struct thread *last_match; + struct rb_root_cached entries; + struct rw_semaphore lock; + unsigned int nr; + struct list_head dead; + struct thread *last_match; }; struct machine { @@ -130,6 +130,9 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); +int machine__process_ksymbol(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); @@ -137,7 +140,7 @@ typedef void (*machine__process_t)(struct machine *machine, void *data); struct machines { struct machine host; - struct rb_root guests; + struct rb_root_cached guests; }; void machines__init(struct machines *machines); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6751301a755c..fbeb0c6efaa6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -286,8 +286,8 @@ void map__put(struct map *map) void map__fixup_start(struct map *map) { - struct rb_root *symbols = &map->dso->symbols; - struct rb_node *nd = rb_first(symbols); + struct rb_root_cached *symbols = &map->dso->symbols; + struct rb_node *nd = rb_first_cached(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); map->start = sym->start; @@ -296,8 +296,8 @@ void map__fixup_start(struct map *map) void map__fixup_end(struct map *map) { - struct rb_root *symbols = &map->dso->symbols; - struct rb_node *nd = rb_last(symbols); + struct rb_root_cached *symbols = &map->dso->symbols; + struct rb_node *nd = rb_last(&symbols->rb_root); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); map->end = sym->end; @@ -557,6 +557,12 @@ void map_groups__init(struct map_groups *mg, struct machine *machine) refcount_set(&mg->refcnt, 1); } +void map_groups__insert(struct map_groups *mg, struct map *map) +{ + maps__insert(&mg->maps, map); + map->groups = mg; +} + static void __maps__purge(struct maps *maps) { struct rb_root *root = &maps->entries; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 09282aa45c80..0e20749f2c55 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -6,12 +6,10 @@ #include <linux/compiler.h> #include <linux/list.h> #include <linux/rbtree.h> -#include <pthread.h> #include <stdio.h> #include <string.h> #include <stdbool.h> #include <linux/types.h> -#include "rwsem.h" struct dso; struct ip_callchain; @@ -48,38 +46,7 @@ struct map { refcount_t refcnt; }; -#define KMAP_NAME_LEN 256 - -struct kmap { - struct ref_reloc_sym *ref_reloc_sym; - struct map_groups *kmaps; - char name[KMAP_NAME_LEN]; -}; - -struct maps { - struct rb_root entries; - struct rb_root names; - struct rw_semaphore lock; -}; - -struct map_groups { - struct maps maps; - struct machine *machine; - refcount_t refcnt; -}; - -struct map_groups *map_groups__new(struct machine *machine); -void map_groups__delete(struct map_groups *mg); -bool map_groups__empty(struct map_groups *mg); - -static inline struct map_groups *map_groups__get(struct map_groups *mg) -{ - if (mg) - refcount_inc(&mg->refcnt); - return mg; -} - -void map_groups__put(struct map_groups *mg); +struct kmap; struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); @@ -174,18 +141,7 @@ char *map__srcline(struct map *map, u64 addr, struct symbol *sym); int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp); -struct srccode_state { - char *srcfile; - unsigned line; -}; - -static inline void srccode_state_init(struct srccode_state *state) -{ - state->srcfile = NULL; - state->line = 0; -} - -void srccode_state_free(struct srccode_state *state); +struct srccode_state; int map__fprintf_srccode(struct map *map, u64 addr, FILE *fp, struct srccode_state *state); @@ -198,61 +154,9 @@ void map__fixup_end(struct map *map); void map__reloc_vmlinux(struct map *map); -void maps__insert(struct maps *maps, struct map *map); -void maps__remove(struct maps *maps, struct map *map); -struct map *maps__find(struct maps *maps, u64 addr); -struct map *maps__first(struct maps *maps); -struct map *map__next(struct map *map); -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, - struct map **mapp); -void map_groups__init(struct map_groups *mg, struct machine *machine); -void map_groups__exit(struct map_groups *mg); -int map_groups__clone(struct thread *thread, - struct map_groups *parent); -size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); - int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr); -static inline void map_groups__insert(struct map_groups *mg, struct map *map) -{ - maps__insert(&mg->maps, map); - map->groups = mg; -} - -static inline void map_groups__remove(struct map_groups *mg, struct map *map) -{ - maps__remove(&mg->maps, map); -} - -static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) -{ - return maps__find(&mg->maps, addr); -} - -struct map *map_groups__first(struct map_groups *mg); - -static inline struct map *map_groups__next(struct map *map) -{ - return map__next(map); -} - -struct symbol *map_groups__find_symbol(struct map_groups *mg, - u64 addr, struct map **mapp); - -struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - const char *name, - struct map **mapp); - -struct addr_map_symbol; - -int map_groups__find_ams(struct addr_map_symbol *ams); - -int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, - FILE *fp); - -struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); - bool __map__is_kernel(const struct map *map); bool __map__is_extra_kernel_map(const struct map *map); diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h new file mode 100644 index 000000000000..4dcda33e0fdf --- /dev/null +++ b/tools/perf/util/map_groups.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_MAP_GROUPS_H +#define __PERF_MAP_GROUPS_H + +#include <linux/refcount.h> +#include <linux/rbtree.h> +#include <stdio.h> +#include <stdbool.h> +#include <linux/types.h> +#include "rwsem.h" + +struct ref_reloc_sym; +struct machine; +struct map; +struct thread; + +struct maps { + struct rb_root entries; + struct rb_root names; + struct rw_semaphore lock; +}; + +void maps__insert(struct maps *maps, struct map *map); +void maps__remove(struct maps *maps, struct map *map); +struct map *maps__find(struct maps *maps, u64 addr); +struct map *maps__first(struct maps *maps); +struct map *map__next(struct map *map); +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp); + +struct map_groups { + struct maps maps; + struct machine *machine; + refcount_t refcnt; +}; + +#define KMAP_NAME_LEN 256 + +struct kmap { + struct ref_reloc_sym *ref_reloc_sym; + struct map_groups *kmaps; + char name[KMAP_NAME_LEN]; +}; + +struct map_groups *map_groups__new(struct machine *machine); +void map_groups__delete(struct map_groups *mg); +bool map_groups__empty(struct map_groups *mg); + +static inline struct map_groups *map_groups__get(struct map_groups *mg) +{ + if (mg) + refcount_inc(&mg->refcnt); + return mg; +} + +void map_groups__put(struct map_groups *mg); +void map_groups__init(struct map_groups *mg, struct machine *machine); +void map_groups__exit(struct map_groups *mg); +int map_groups__clone(struct thread *thread, struct map_groups *parent); +size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); + +void map_groups__insert(struct map_groups *mg, struct map *map); + +static inline void map_groups__remove(struct map_groups *mg, struct map *map) +{ + maps__remove(&mg->maps, map); +} + +static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) +{ + return maps__find(&mg->maps, addr); +} + +struct map *map_groups__first(struct map_groups *mg); + +static inline struct map *map_groups__next(struct map *map) +{ + return map__next(map); +} + +struct symbol *map_groups__find_symbol(struct map_groups *mg, u64 addr, struct map **mapp); +struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, const char *name, struct map **mapp); + +struct addr_map_symbol; + +int map_groups__find_ams(struct addr_map_symbol *ams); + +int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp); + +struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); + +#endif // __PERF_MAP_GROUPS_H diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h new file mode 100644 index 000000000000..5a1aed9f6bb4 --- /dev/null +++ b/tools/perf/util/map_symbol.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __PERF_MAP_SYMBOL +#define __PERF_MAP_SYMBOL 1 + +#include <linux/types.h> + +struct map; +struct symbol; + +struct map_symbol { + struct map *map; + struct symbol *sym; +}; + +struct addr_map_symbol { + struct map *map; + struct symbol *sym; + u64 addr; + u64 al_addr; + u64 phys_addr; +}; +#endif // __PERF_MAP_SYMBOL diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index a28f9b5cc4ff..b8d864ed4afe 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) } void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw) + bool raw, bool details) { struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; @@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, if (asprintf(&s, "%s\n%*s%s]", pe->metric_name, 8, "[", pe->desc) < 0) return; + + if (details) { + if (asprintf(&s, "%s\n%*s%s]", + s, 8, "[", pe->metric_expr) < 0) + return; + } } if (!s) @@ -352,7 +358,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, else if (metrics && !raw) printf("\nMetrics:\n\n"); - for (node = rb_first(&groups.entries); node; node = next) { + for (node = rb_first_cached(&groups.entries); node; node = next) { struct mep *me = container_of(node, struct mep, nd); if (metricgroups) diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 8a155dba0581..5c52097a5c63 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt, const char *str, struct rblist *metric_events); -void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); +void metricgroup__print(bool metrics, bool groups, char *filter, + bool raw, bool details); bool metricgroup__has_metric(const char *metric); #endif diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 8fc39311a30d..cdc7740fc181 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -10,6 +10,9 @@ #include <sys/mman.h> #include <inttypes.h> #include <asm/bug.h> +#ifdef HAVE_LIBNUMA_SUPPORT +#include <numaif.h> +#endif #include "debug.h" #include "event.h" #include "mmap.h" @@ -154,9 +157,72 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb } #ifdef HAVE_AIO_SUPPORT + +#ifdef HAVE_LIBNUMA_SUPPORT +static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) +{ + map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if (map->aio.data[idx] == MAP_FAILED) { + map->aio.data[idx] = NULL; + return -1; + } + + return 0; +} + +static void perf_mmap__aio_free(struct perf_mmap *map, int idx) +{ + if (map->aio.data[idx]) { + munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); + map->aio.data[idx] = NULL; + } +} + +static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity) +{ + void *data; + size_t mmap_len; + unsigned long node_mask; + + if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { + data = map->aio.data[idx]; + mmap_len = perf_mmap__mmap_len(map); + node_mask = 1UL << cpu__get_node(cpu); + if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { + pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", + data, data + mmap_len, cpu__get_node(cpu)); + return -1; + } + } + + return 0; +} +#else +static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) +{ + map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); + if (map->aio.data[idx] == NULL) + return -1; + + return 0; +} + +static void perf_mmap__aio_free(struct perf_mmap *map, int idx) +{ + zfree(&(map->aio.data[idx])); +} + +static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused, + int cpu __maybe_unused, int affinity __maybe_unused) +{ + return 0; +} +#endif + static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) { - int delta_max, i, prio; + int delta_max, i, prio, ret; map->aio.nr_cblocks = mp->nr_cblocks; if (map->aio.nr_cblocks) { @@ -177,11 +243,14 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) } delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); for (i = 0; i < map->aio.nr_cblocks; ++i) { - map->aio.data[i] = malloc(perf_mmap__mmap_len(map)); - if (!map->aio.data[i]) { + ret = perf_mmap__aio_alloc(map, i); + if (ret == -1) { pr_debug2("failed to allocate data buffer area, error %m"); return -1; } + ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity); + if (ret == -1) + return -1; /* * Use cblock.aio_fildes value different from -1 * to denote started aio write operation on the @@ -210,7 +279,7 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map) int i; for (i = 0; i < map->aio.nr_cblocks; ++i) - zfree(&map->aio.data[i]); + perf_mmap__aio_free(map, i); if (map->aio.data) zfree(&map->aio.data); zfree(&map->aio.cblocks); @@ -314,6 +383,32 @@ void perf_mmap__munmap(struct perf_mmap *map) auxtrace_mmap__munmap(&map->auxtrace_mmap); } +static void build_node_mask(int node, cpu_set_t *mask) +{ + int c, cpu, nr_cpus; + const struct cpu_map *cpu_map = NULL; + + cpu_map = cpu_map__online(); + if (!cpu_map) + return; + + nr_cpus = cpu_map__nr(cpu_map); + for (c = 0; c < nr_cpus; c++) { + cpu = cpu_map->map[c]; /* map c index to online cpu index */ + if (cpu__get_node(cpu) == node) + CPU_SET(cpu, mask); + } +} + +static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp) +{ + CPU_ZERO(&map->affinity_mask); + if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) + build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask); + else if (mp->affinity == PERF_AFFINITY_CPU) + CPU_SET(map->cpu, &map->affinity_mask); +} + int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) { /* @@ -343,6 +438,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c map->fd = fd; map->cpu = cpu; + perf_mmap__setup_affinity_mask(map, mp); + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, &mp->auxtrace_mp, map->base, fd)) return -1; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index aeb6942fdb00..e566c19b242b 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -38,6 +38,7 @@ struct perf_mmap { int nr_cblocks; } aio; #endif + cpu_set_t affinity_mask; }; /* @@ -69,7 +70,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask, nr_cblocks; + int prot, mask, nr_cblocks, affinity; struct auxtrace_mmap_params auxtrace_mp; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 920e1e6551dd..4dcc01b2532c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2540,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_sdt_events(NULL, NULL, name_only); - metricgroup__print(true, true, NULL, name_only); + metricgroup__print(true, true, NULL, name_only, details_flag); } int parse_events__is_hardcoded_term(struct parse_events_term *term) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index da8fe57691b8..44819bdb037d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -311,7 +311,7 @@ value_sym '/' event_config '/' $$ = list; } | -value_sym sep_slash_dc +value_sym sep_slash_slash_dc { struct list_head *list; int type = $1 >> 16; @@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE sep_dc: ':' | -sep_slash_dc: '/' | ':' | +sep_slash_slash_dc: '/' '/' | ':' | %% diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 11a234740632..51d437f55d18 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -29,8 +29,6 @@ struct perf_pmu_format { struct list_head list; }; -#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" - int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 76fecec7b3f9..47253c3daf55 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -6,9 +6,10 @@ #include <linux/compiler.h> #include <linux/perf_event.h> #include <stdbool.h> -#include "evsel.h" #include "parse-events.h" +struct perf_evsel_config_term; + enum { PERF_PMU_FORMAT_VALUE_CONFIG, PERF_PMU_FORMAT_VALUE_CONFIG1, @@ -16,6 +17,7 @@ enum { }; #define PERF_PMU_FORMAT_BITS 64 +#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" struct perf_event_attr; @@ -29,7 +31,6 @@ struct perf_pmu { struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ struct list_head list; /* ELEM */ - int (*set_drv_config) (struct perf_evsel_config_term *term); }; struct perf_pmu_info { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 18a59fba97ff..0030f9b9bf7e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -35,11 +35,14 @@ #include "util.h" #include "event.h" +#include "namespaces.h" #include "strlist.h" #include "strfilter.h" #include "debug.h" #include "cache.h" #include "color.h" +#include "map.h" +#include "map_groups.h" #include "symbol.h" #include "thread.h" #include <api/fs/fs.h> @@ -3528,7 +3531,8 @@ int show_available_funcs(const char *target, struct nsinfo *nsi, /* Show all (filtered) symbols */ setup_pager(); - for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&map->dso->symbol_names); nd; + nd = rb_next(nd)) { struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); if (strfilter__compare(_filter, pos->sym.name)) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 15a98c3a2a2f..05c8d571a901 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -4,8 +4,9 @@ #include <linux/compiler.h> #include <stdbool.h> -#include "intlist.h" -#include "namespaces.h" + +struct intlist; +struct nsinfo; /* Probe related configurations */ struct probe_conf { diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 0b1195cad0e5..4062bc4412a9 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -20,6 +20,7 @@ #include <sys/types.h> #include <sys/uio.h> #include <unistd.h> +#include "namespaces.h" #include "util.h" #include "event.h" #include "strlist.h" diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h index a920f702a74d..376e86cb4c3c 100644 --- a/tools/perf/util/rb_resort.h +++ b/tools/perf/util/rb_resort.h @@ -140,12 +140,12 @@ struct __name##_sorted *__name = __name##_sorted__new /* For 'struct intlist' */ #define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ - DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \ + DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \ __ilist->rblist.nr_entries) /* For 'struct machine->threads' */ -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ - DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \ - __machine->threads[hash_bucket].nr) +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ + DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \ + __machine->threads[hash_bucket].nr) #endif /* _PERF_RESORT_RB_H_ */ diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0efc3258c648..11e07fab20dc 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -13,8 +13,9 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) { - struct rb_node **p = &rblist->entries.rb_node; + struct rb_node **p = &rblist->entries.rb_root.rb_node; struct rb_node *parent = NULL, *new_node; + bool leftmost = true; while (*p != NULL) { int rc; @@ -24,8 +25,10 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) rc = rblist->node_cmp(parent, new_entry); if (rc > 0) p = &(*p)->rb_left; - else if (rc < 0) + else if (rc < 0) { p = &(*p)->rb_right; + leftmost = false; + } else return -EEXIST; } @@ -35,7 +38,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) return -ENOMEM; rb_link_node(new_node, parent, p); - rb_insert_color(new_node, &rblist->entries); + rb_insert_color_cached(new_node, &rblist->entries, leftmost); ++rblist->nr_entries; return 0; @@ -43,7 +46,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node) { - rb_erase(rb_node, &rblist->entries); + rb_erase_cached(rb_node, &rblist->entries); --rblist->nr_entries; rblist->node_delete(rblist, rb_node); } @@ -52,8 +55,9 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, const void *entry, bool create) { - struct rb_node **p = &rblist->entries.rb_node; + struct rb_node **p = &rblist->entries.rb_root.rb_node; struct rb_node *parent = NULL, *new_node = NULL; + bool leftmost = true; while (*p != NULL) { int rc; @@ -63,8 +67,10 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, rc = rblist->node_cmp(parent, entry); if (rc > 0) p = &(*p)->rb_left; - else if (rc < 0) + else if (rc < 0) { p = &(*p)->rb_right; + leftmost = false; + } else return parent; } @@ -73,7 +79,8 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, new_node = rblist->node_new(rblist, entry); if (new_node) { rb_link_node(new_node, parent, p); - rb_insert_color(new_node, &rblist->entries); + rb_insert_color_cached(new_node, + &rblist->entries, leftmost); ++rblist->nr_entries; } } @@ -94,7 +101,7 @@ struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry) void rblist__init(struct rblist *rblist) { if (rblist != NULL) { - rblist->entries = RB_ROOT; + rblist->entries = RB_ROOT_CACHED; rblist->nr_entries = 0; } @@ -103,7 +110,7 @@ void rblist__init(struct rblist *rblist) void rblist__exit(struct rblist *rblist) { - struct rb_node *pos, *next = rb_first(&rblist->entries); + struct rb_node *pos, *next = rb_first_cached(&rblist->entries); while (next) { pos = next; @@ -124,7 +131,8 @@ struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx) { struct rb_node *node; - for (node = rb_first(&rblist->entries); node; node = rb_next(node)) { + for (node = rb_first_cached(&rblist->entries); node; + node = rb_next(node)) { if (!idx--) return node; } diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 76df15c27f5f..14b232a4d0b6 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -20,7 +20,7 @@ */ struct rblist { - struct rb_root entries; + struct rb_root_cached entries; unsigned int nr_entries; int (*node_cmp)(struct rb_node *rbn, const void *entry); diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h new file mode 100644 index 000000000000..d4356030b504 --- /dev/null +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for s390 CPU measurement counter set diagnostic facility + * + * Copyright IBM Corp. 2019 + Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> + * Thomas Richter <tmricht@linux.ibm.com> + */ +#ifndef S390_CPUMCF_KERNEL_H +#define S390_CPUMCF_KERNEL_H + +#define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ +#define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ + +struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int set:16; /* 16-23 Counter set identifier */ + unsigned int ctr:16; /* 24-39 Number of stored counters */ + unsigned int res1:16; /* 40-63 Reserved */ +}; + +struct cf_trailer_entry { /* CPU-M CF trailer for raw traces (64 byte) */ + /* 0 - 7 */ + union { + struct { + unsigned int clock_base:1; /* TOD clock base */ + unsigned int speed:1; /* CPU speed */ + /* Measurement alerts */ + unsigned int mtda:1; /* Loss of MT ctr. data alert */ + unsigned int caca:1; /* Counter auth. change alert */ + unsigned int lcda:1; /* Loss of counter data alert */ + }; + unsigned long flags; /* 0-63 All indicators */ + }; + /* 8 - 15 */ + unsigned int cfvn:16; /* 64-79 Ctr First Version */ + unsigned int csvn:16; /* 80-95 Ctr Second Version */ + unsigned int cpu_speed:32; /* 96-127 CPU speed */ + /* 16 - 23 */ + unsigned long timestamp; /* 128-191 Timestamp (TOD) */ + /* 24 - 55 */ + union { + struct { + unsigned long progusage1; + unsigned long progusage2; + unsigned long progusage3; + unsigned long tod_base; + }; + unsigned long progusage[4]; + }; + /* 56 - 63 */ + unsigned int mach_type:16; /* Machine type */ + unsigned int res1:16; /* Reserved */ + unsigned int res2:32; /* Reserved */ +}; + +#define CPUMF_CTR_SET_BASIC 0 /* Basic Counter Set */ +#define CPUMF_CTR_SET_USER 1 /* Problem-State Counter Set */ +#define CPUMF_CTR_SET_CRYPTO 2 /* Crypto-Activity Counter Set */ +#define CPUMF_CTR_SET_EXT 3 /* Extended Counter Set */ +#define CPUMF_CTR_SET_MT_DIAG 4 /* MT-diagnostic Counter Set */ +#endif diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 68b2570304ec..c215704931dc 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -162,6 +162,7 @@ #include "auxtrace.h" #include "s390-cpumsf.h" #include "s390-cpumsf-kernel.h" +#include "s390-cpumcf-kernel.h" #include "config.h" struct s390_cpumsf { @@ -184,8 +185,58 @@ struct s390_cpumsf_queue { struct auxtrace_buffer *buffer; int cpu; FILE *logfile; + FILE *logfile_ctr; }; +/* Check if the raw data should be dumped to file. If this is the case and + * the file to dump to has not been opened for writing, do so. + * + * Return 0 on success and greater zero on error so processing continues. + */ +static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf, + struct perf_sample *sample) +{ + struct s390_cpumsf_queue *sfq; + struct auxtrace_queue *q; + int rc = 0; + + if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu) + return rc; + + q = &sf->queues.queue_array[sample->cpu]; + sfq = q->priv; + if (!sfq) /* Queue not yet allocated */ + return rc; + + if (!sfq->logfile_ctr) { + char *name; + + rc = (sf->logdir) + ? asprintf(&name, "%s/aux.ctr.%02x", + sf->logdir, sample->cpu) + : asprintf(&name, "aux.ctr.%02x", sample->cpu); + if (rc > 0) + sfq->logfile_ctr = fopen(name, "w"); + if (sfq->logfile_ctr == NULL) { + pr_err("Failed to open counter set log file %s, " + "continue...\n", name); + rc = 1; + } + free(name); + } + + if (sfq->logfile_ctr) { + /* See comment above for -4 */ + size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1, + sfq->logfile_ctr); + if (n != 1) { + pr_err("Failed to write counter set data\n"); + rc = 1; + } + } + return rc; +} + /* Display s390 CPU measurement facility basic-sampling data entry */ static bool s390_cpumsf_basic_show(const char *color, size_t pos, struct hws_basic_entry *basic) @@ -301,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type, *dsdes = 85; *bsdes = 32; break; + case 2964: + case 2965: + *dsdes = 112; + *bsdes = 32; + break; default: /* Illegal trailer entry */ return false; @@ -768,7 +824,7 @@ static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) } static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, - pid_t pid, pid_t tid, u64 ip) + pid_t pid, pid_t tid, u64 ip, u64 timestamp) { char msg[MAX_AUXTRACE_ERROR_MSG]; union perf_event event; @@ -776,7 +832,7 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, - code, cpu, pid, tid, ip, msg); + code, cpu, pid, tid, ip, msg, timestamp); err = perf_session__deliver_synth_event(sf->session, &event, NULL); if (err) @@ -788,11 +844,12 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) { return s390_cpumsf_synth_error(sf, 1, sample->cpu, - sample->pid, sample->tid, 0); + sample->pid, sample->tid, 0, + sample->time); } static int -s390_cpumsf_process_event(struct perf_session *session __maybe_unused, +s390_cpumsf_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool) @@ -801,6 +858,8 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused, struct s390_cpumsf, auxtrace); u64 timestamp = sample->time; + struct perf_evsel *ev_bc000; + int err = 0; if (dump_trace) @@ -811,6 +870,16 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused, return -EINVAL; } + if (event->header.type == PERF_RECORD_SAMPLE && + sample->raw_size) { + /* Handle event with raw data */ + ev_bc000 = perf_evlist__event2evsel(session->evlist, event); + if (ev_bc000 && + ev_bc000->attr.config == PERF_EVENT_CPUM_CF_DIAG) + err = s390_cpumcf_dumpctr(sf, sample); + return err; + } + if (event->header.type == PERF_RECORD_AUX && event->aux.flags & PERF_AUX_FLAG_TRUNCATED) return s390_cpumsf_lost(sf, sample); @@ -891,9 +960,15 @@ static void s390_cpumsf_free_queues(struct perf_session *session) struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) queues->queue_array[i].priv; - if (sfq != NULL && sfq->logfile) { - fclose(sfq->logfile); - sfq->logfile = NULL; + if (sfq != NULL) { + if (sfq->logfile) { + fclose(sfq->logfile); + sfq->logfile = NULL; + } + if (sfq->logfile_ctr) { + fclose(sfq->logfile_ctr); + sfq->logfile_ctr = NULL; + } } zfree(&queues->queue_array[i].priv); } diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c new file mode 100644 index 000000000000..6650f599ed9c --- /dev/null +++ b/tools/perf/util/s390-sample-raw.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2019 + * Author(s): Thomas Richter <tmricht@linux.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Architecture specific trace_event function. Save event's bc000 raw data + * to file. File name is aux.ctr.## where ## stands for the CPU number the + * sample was taken from. + */ + +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + +#include <sys/stat.h> +#include <linux/compiler.h> +#include <asm/byteorder.h> + +#include "debug.h" +#include "util.h" +#include "auxtrace.h" +#include "session.h" +#include "evlist.h" +#include "config.h" +#include "color.h" +#include "sample-raw.h" +#include "s390-cpumcf-kernel.h" +#include "pmu-events/pmu-events.h" + +static size_t ctrset_size(struct cf_ctrset_entry *set) +{ + return sizeof(*set) + set->ctr * sizeof(u64); +} + +static bool ctrset_valid(struct cf_ctrset_entry *set) +{ + return set->def == S390_CPUMCF_DIAG_DEF; +} + +/* CPU Measurement Counter Facility raw data is a byte stream. It is 8 byte + * aligned and might have trailing padding bytes. + * Display the raw data on screen. + */ +static bool s390_cpumcfdg_testctr(struct perf_sample *sample) +{ + size_t len = sample->raw_size, offset = 0; + unsigned char *buf = sample->raw_data; + struct cf_trailer_entry *te; + struct cf_ctrset_entry *cep, ce; + + if (!len) + return false; + while (offset < len) { + cep = (struct cf_ctrset_entry *)(buf + offset); + ce.def = be16_to_cpu(cep->def); + ce.set = be16_to_cpu(cep->set); + ce.ctr = be16_to_cpu(cep->ctr); + ce.res1 = be16_to_cpu(cep->res1); + + if (!ctrset_valid(&ce) || offset + ctrset_size(&ce) > len) { + /* Raw data for counter sets are always multiple of 8 + * bytes. Prepending a 4 bytes size field to the + * raw data block in the sample causes the perf tool + * to append 4 padding bytes to make the raw data part + * of the sample a multiple of eight bytes again. + * + * If the last entry (trailer) is 4 bytes off the raw + * area data end, all is good. + */ + if (len - offset - sizeof(*te) == 4) + break; + pr_err("Invalid counter set entry at %zd\n", offset); + return false; + } + offset += ctrset_size(&ce); + } + return true; +} + +/* Dump event bc000 on screen, already tested on correctness. */ +static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, + struct cf_trailer_entry *tep) +{ + struct cf_trailer_entry te; + + te.flags = be64_to_cpu(tep->flags); + te.cfvn = be16_to_cpu(tep->cfvn); + te.csvn = be16_to_cpu(tep->csvn); + te.cpu_speed = be32_to_cpu(tep->cpu_speed); + te.timestamp = be64_to_cpu(tep->timestamp); + te.progusage1 = be64_to_cpu(tep->progusage1); + te.progusage2 = be64_to_cpu(tep->progusage2); + te.progusage3 = be64_to_cpu(tep->progusage3); + te.tod_base = be64_to_cpu(tep->tod_base); + te.mach_type = be16_to_cpu(tep->mach_type); + te.res1 = be16_to_cpu(tep->res1); + te.res2 = be32_to_cpu(tep->res2); + + color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c" + " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n", + offset, te.clock_base ? 'T' : ' ', + te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ', + te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ', + te.cfvn, te.csvn, te.cpu_speed, te.timestamp); + color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx" + " Type:%x\n\n", + te.progusage1, te.progusage2, te.progusage3, + te.tod_base, te.mach_type); +} + +/* Return starting number of a counter set */ +static int get_counterset_start(int setnr) +{ + switch (setnr) { + case CPUMF_CTR_SET_BASIC: /* Basic counter set */ + return 0; + case CPUMF_CTR_SET_USER: /* Problem state counter set */ + return 32; + case CPUMF_CTR_SET_CRYPTO: /* Crypto counter set */ + return 64; + case CPUMF_CTR_SET_EXT: /* Extended counter set */ + return 128; + case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ + return 448; + default: + return -1; + } +} + +/* Scan the PMU table and extract the logical name of a counter from the + * PMU events table. Input is the counter set and counter number with in the + * set. Construct the event number and use this as key. If they match return + * the name of this counter. + * If no match is found a NULL pointer is returned. + */ +static const char *get_counter_name(int set, int nr, struct pmu_events_map *map) +{ + int rc, event_nr, wanted = get_counterset_start(set) + nr; + + if (map) { + struct pmu_event *evp = map->table; + + for (; evp->name || evp->event || evp->desc; ++evp) { + if (evp->name == NULL || evp->event == NULL) + continue; + rc = sscanf(evp->event, "event=%x", &event_nr); + if (rc == 1 && event_nr == wanted) + return evp->name; + } + } + return NULL; +} + +static void s390_cpumcfdg_dump(struct perf_sample *sample) +{ + size_t i, len = sample->raw_size, offset = 0; + unsigned char *buf = sample->raw_data; + const char *color = PERF_COLOR_BLUE; + struct cf_ctrset_entry *cep, ce; + struct pmu_events_map *map; + struct perf_pmu pmu; + u64 *p; + + memset(&pmu, 0, sizeof(pmu)); + map = perf_pmu__find_map(&pmu); + while (offset < len) { + cep = (struct cf_ctrset_entry *)(buf + offset); + + ce.def = be16_to_cpu(cep->def); + ce.set = be16_to_cpu(cep->set); + ce.ctr = be16_to_cpu(cep->ctr); + ce.res1 = be16_to_cpu(cep->res1); + + if (!ctrset_valid(&ce)) { /* Print trailer */ + s390_cpumcfdg_dumptrail(color, offset, + (struct cf_trailer_entry *)cep); + return; + } + + color_fprintf(stdout, color, " [%#08zx] Counterset:%d" + " Counters:%d\n", offset, ce.set, ce.ctr); + for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) { + const char *ev_name = get_counter_name(ce.set, i, map); + + color_fprintf(stdout, color, + "\tCounter:%03d %s Value:%#018lx\n", i, + ev_name ?: "<unknown>", be64_to_cpu(*p)); + } + offset += ctrset_size(&ce); + } +} + +/* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events + * and if the event was triggered by a counter set diagnostic event display + * its raw data. + * The function is only invoked when the dump flag -D is set. + */ +void perf_evlist__s390_sample_raw(struct perf_evlist *evlist, union perf_event *event, + struct perf_sample *sample) +{ + struct perf_evsel *ev_bc000; + + if (event->header.type != PERF_RECORD_SAMPLE) + return; + + ev_bc000 = perf_evlist__event2evsel(evlist, event); + if (ev_bc000 == NULL || + ev_bc000->attr.config != PERF_EVENT_CPUM_CF_DIAG) + return; + + /* Display raw data on screen */ + if (!s390_cpumcfdg_testctr(sample)) { + pr_err("Invalid counter set data encountered\n"); + return; + } + s390_cpumcfdg_dump(sample); +} diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c new file mode 100644 index 000000000000..c21e1311fb0f --- /dev/null +++ b/tools/perf/util/sample-raw.c @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <string.h> +#include "evlist.h" +#include "env.h" +#include "sample-raw.h" + +/* + * Check platform the perf data file was created on and perform platform + * specific interpretation. + */ +void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist) +{ + const char *arch_pf = perf_env__arch(evlist->env); + + if (arch_pf && !strcmp("s390", arch_pf)) + evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw; +} diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h new file mode 100644 index 000000000000..95d445c87e93 --- /dev/null +++ b/tools/perf/util/sample-raw.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SAMPLE_RAW_H +#define __SAMPLE_RAW_H 1 + +struct perf_evlist; +union perf_event; +struct perf_sample; + +void perf_evlist__s390_sample_raw(struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample); + +void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist); +#endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index 82d28c67e0f3..7b342ce38d99 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -1,5 +1,5 @@ -libperf-$(CONFIG_LIBPERL) += trace-event-perl.o -libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o +perf-$(CONFIG_LIBPERL) += trace-event-perl.o +perf-$(CONFIG_LIBPYTHON) += trace-event-python.o CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b93f36b887b5..5f06378a482b 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -37,6 +37,8 @@ #include "../../perf.h" #include "../callchain.h" #include "../machine.h" +#include "../map.h" +#include "../symbol.h" #include "../thread.h" #include "../event.h" #include "../trace-event.h" diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 87ef16a1b17e..0e17db41b49b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -44,6 +44,8 @@ #include "../thread-stack.h" #include "../trace-event.h" #include "../call-path.h" +#include "map.h" +#include "symbol.h" #include "thread_map.h" #include "cpumap.h" #include "print_binary.h" @@ -733,8 +735,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); - pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize( - (const char *)&evsel->attr, sizeof(evsel->attr))); + pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->attr, sizeof(evsel->attr))); pydict_set_item_string_decref(dict_sample, "pid", _PyLong_FromLong(sample->pid)); @@ -1494,34 +1495,40 @@ static void _free_command_line(wchar_t **command_line, int num) static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; + PyMODINIT_FUNC (*initfunc)(void); #if PY_MAJOR_VERSION < 3 const char **command_line; #else wchar_t **command_line; #endif - char buf[PATH_MAX]; + /* + * Use a non-const name variable to cope with python 2.6's + * PyImport_AppendInittab prototype + */ + char buf[PATH_MAX], name[19] = "perf_trace_context"; int i, err = 0; FILE *fp; #if PY_MAJOR_VERSION < 3 + initfunc = initperf_trace_context; command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; #else + initfunc = PyInit_perf_trace_context; command_line = malloc((argc + 1) * sizeof(wchar_t *)); command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); #endif + PyImport_AppendInittab(name, initfunc); Py_Initialize(); #if PY_MAJOR_VERSION < 3 - initperf_trace_context(); PySys_SetArgv(argc + 1, (char **)command_line); #else - PyInit_perf_trace_context(); PySys_SetArgv(argc + 1, command_line); #endif diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5456c84c7dd1..18fb9c8cbf9c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -13,6 +13,8 @@ #include "evlist.h" #include "evsel.h" #include "memswap.h" +#include "map.h" +#include "symbol.h" #include "session.h" #include "tool.h" #include "sort.h" @@ -23,6 +25,7 @@ #include "auxtrace.h" #include "thread.h" #include "thread-stack.h" +#include "sample-raw.h" #include "stat.h" #include "arch/common.h" @@ -147,6 +150,8 @@ struct perf_session *perf_session__new(struct perf_data *data, perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } + + perf_evlist__init_trace_event_sample_raw(session->evlist); } } else { session->machines.host.env = &perf_env; @@ -376,6 +381,10 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->itrace_start = perf_event__process_itrace_start; if (tool->context_switch == NULL) tool->context_switch = perf_event__process_switch; + if (tool->ksymbol == NULL) + tool->ksymbol = perf_event__process_ksymbol; + if (tool->bpf_event == NULL) + tool->bpf_event = perf_event__process_bpf_event; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -694,7 +703,10 @@ static void perf_event__auxtrace_error_swap(union perf_event *event, event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu); event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid); event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid); + event->auxtrace_error.fmt = bswap_32(event->auxtrace_error.fmt); event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip); + if (event->auxtrace_error.fmt) + event->auxtrace_error.time = bswap_64(event->auxtrace_error.time); } static void perf_event__thread_map_swap(union perf_event *event, @@ -1065,6 +1077,8 @@ static void dump_event(struct perf_evlist *evlist, union perf_event *event, file_offset, event->header.size, event->header.type); trace_event(event); + if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw) + evlist->trace_event_sample_raw(evlist, event, sample); if (sample) perf_evlist__print_tstamp(evlist, event, sample); @@ -1305,6 +1319,10 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_SWITCH: case PERF_RECORD_SWITCH_CPU_WIDE: return tool->context_switch(tool, event, sample, machine); + case PERF_RECORD_KSYMBOL: + return tool->ksymbol(tool, event, sample, machine); + case PERF_RECORD_BPF_EVENT: + return tool->bpf_event(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -1820,38 +1838,35 @@ fetch_mmaped_event(struct perf_session *session, #define NUM_MMAPS 128 #endif -static int __perf_session__process_events(struct perf_session *session, - u64 data_offset, u64 data_size, - u64 file_size) +struct reader { + int fd; + u64 data_size; + u64 data_offset; +}; + +static int +reader__process_events(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) { - struct ordered_events *oe = &session->ordered_events; - struct perf_tool *tool = session->tool; - int fd = perf_data__fd(session->data); + u64 data_size = rd->data_size; u64 head, page_offset, file_offset, file_pos, size; - int err, mmap_prot, mmap_flags, map_idx = 0; + int err = 0, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; char *buf, *mmaps[NUM_MMAPS]; union perf_event *event; - struct ui_progress prog; s64 skip; - perf_tool__fill_defaults(tool); - - page_offset = page_size * (data_offset / page_size); + page_offset = page_size * (rd->data_offset / page_size); file_offset = page_offset; - head = data_offset - page_offset; + head = rd->data_offset - page_offset; - if (data_size == 0) - goto out; - - if (data_offset + data_size < file_size) - file_size = data_offset + data_size; + ui_progress__init_size(prog, data_size, "Processing events..."); - ui_progress__init_size(&prog, file_size, "Processing events..."); + data_size += rd->data_offset; mmap_size = MMAP_SIZE; - if (mmap_size > file_size) { - mmap_size = file_size; + if (mmap_size > data_size) { + mmap_size = data_size; session->one_mmap = true; } @@ -1865,12 +1880,12 @@ static int __perf_session__process_events(struct perf_session *session, mmap_flags = MAP_PRIVATE; } remap: - buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd, + buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd, file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); err = -errno; - goto out_err; + goto out; } mmaps[map_idx] = buf; map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); @@ -1902,7 +1917,7 @@ more: file_offset + head, event->header.size, event->header.type); err = -EINVAL; - goto out_err; + goto out; } if (skip) @@ -1911,15 +1926,40 @@ more: head += size; file_pos += size; - ui_progress__update(&prog, size); + ui_progress__update(prog, size); if (session_done()) goto out; - if (file_pos < file_size) + if (file_pos < data_size) goto more; out: + return err; +} + +static int __perf_session__process_events(struct perf_session *session) +{ + struct reader rd = { + .fd = perf_data__fd(session->data), + .data_size = session->header.data_size, + .data_offset = session->header.data_offset, + }; + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; + struct ui_progress prog; + int err; + + perf_tool__fill_defaults(tool); + + if (rd.data_size == 0) + return -1; + + ui_progress__init_size(&prog, rd.data_size, "Processing events..."); + + err = reader__process_events(&rd, session, &prog); + if (err) + goto out_err; /* do the final flush for ordered samples */ err = ordered_events__flush(oe, OE_FLUSH__FINAL); if (err) @@ -1944,20 +1984,13 @@ out_err: int perf_session__process_events(struct perf_session *session) { - u64 size = perf_data__size(session->data); - int err; - if (perf_session__register_idle_thread(session) < 0) return -ENOMEM; - if (!perf_data__is_pipe(session->data)) - err = __perf_session__process_events(session, - session->header.data_offset, - session->header.data_size, size); - else - err = __perf_session__process_pipe_events(session); + if (perf_data__is_pipe(session->data)) + return __perf_session__process_pipe_events(session); - return err; + return __perf_session__process_events(session); } bool perf_session__has_traces(struct perf_session *session, const char *msg) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 64d1f36dee99..5b5a167b43ce 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,5 +1,3 @@ -#!/usr/bin/python - from os import getenv from subprocess import Popen, PIPE from re import sub @@ -55,9 +53,14 @@ ext_sources = [f.strip() for f in open('util/python-ext-sources') # use full paths with source files ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) +extra_libraries = [] +if '-DHAVE_LIBNUMA_SUPPORT' in cflags: + extra_libraries = [ 'numa' ] + perf = Extension('perf', sources = ext_sources, include_dirs = ['util/include'], + libraries = extra_libraries, extra_compile_args = cflags, extra_objects = [libtraceevent, libapikfs], ) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6c1a83768eb0..2b6c1ccb878c 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -6,6 +6,7 @@ #include "sort.h" #include "hist.h" #include "comm.h" +#include "map.h" #include "symbol.h" #include "thread.h" #include "evsel.h" @@ -428,8 +429,6 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, { struct symbol *sym = he->ms.sym; - struct map *map = he->ms.map; - struct perf_evsel *evsel = hists_to_evsel(he->hists); struct annotation *notes; double ipc = 0.0, coverage = 0.0; char tmp[64]; @@ -437,11 +436,6 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, if (!sym) return repsep_snprintf(bf, size, "%-*s", width, "-"); - if (!sym->annotate2 && symbol__annotate2(sym, map, evsel, - &annotation__default_options, NULL) < 0) { - return 0; - } - notes = symbol__annotation(sym); if (notes->hit_cycles) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 130fe37fe2df..2fbee0b1011c 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -9,7 +9,8 @@ #include <linux/list.h> #include "cache.h" #include <linux/rbtree.h> -#include "symbol.h" +#include "map_symbol.h" +#include "symbol_conf.h" #include "string.h" #include "callchain.h" #include "values.h" @@ -145,8 +146,8 @@ struct hist_entry { union { /* this is for hierarchical entry structure */ struct { - struct rb_root hroot_in; - struct rb_root hroot_out; + struct rb_root_cached hroot_in; + struct rb_root_cached hroot_out; }; /* non-leaf entries */ struct rb_root sorted_chain; /* leaf entry has callchains */ }; diff --git a/tools/perf/util/srccode.h b/tools/perf/util/srccode.h index e500a746d5f1..1b5ed769779c 100644 --- a/tools/perf/util/srccode.h +++ b/tools/perf/util/srccode.h @@ -1,6 +1,19 @@ #ifndef SRCCODE_H #define SRCCODE_H 1 +struct srccode_state { + char *srcfile; + unsigned line; +}; + +static inline void srccode_state_init(struct srccode_state *state) +{ + state->srcfile = NULL; + state->line = 0; +} + +void srccode_state_free(struct srccode_state *state); + /* Result is not 0 terminated */ char *find_sourceline(char *fn, unsigned line, int *lenp); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index dc86597d0cc4..00f215580b5a 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -594,11 +594,12 @@ struct srcline_node { struct rb_node rb_node; }; -void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) +void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline) { - struct rb_node **p = &tree->rb_node; + struct rb_node **p = &tree->rb_root.rb_node; struct rb_node *parent = NULL; struct srcline_node *i, *node; + bool leftmost = true; node = zalloc(sizeof(struct srcline_node)); if (!node) { @@ -614,16 +615,18 @@ void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) i = rb_entry(parent, struct srcline_node, rb_node); if (addr < i->addr) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&node->rb_node, parent, p); - rb_insert_color(&node->rb_node, tree); + rb_insert_color_cached(&node->rb_node, tree, leftmost); } -char *srcline__tree_find(struct rb_root *tree, u64 addr) +char *srcline__tree_find(struct rb_root_cached *tree, u64 addr) { - struct rb_node *n = tree->rb_node; + struct rb_node *n = tree->rb_root.rb_node; while (n) { struct srcline_node *i = rb_entry(n, struct srcline_node, @@ -640,15 +643,15 @@ char *srcline__tree_find(struct rb_root *tree, u64 addr) return NULL; } -void srcline__tree_delete(struct rb_root *tree) +void srcline__tree_delete(struct rb_root_cached *tree) { struct srcline_node *pos; - struct rb_node *next = rb_first(tree); + struct rb_node *next = rb_first_cached(tree); while (next) { pos = rb_entry(next, struct srcline_node, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, tree); + rb_erase_cached(&pos->rb_node, tree); free_srcline(pos->srcline); zfree(&pos); } @@ -682,28 +685,32 @@ void inline_node__delete(struct inline_node *node) free(node); } -void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines) +void inlines__tree_insert(struct rb_root_cached *tree, + struct inline_node *inlines) { - struct rb_node **p = &tree->rb_node; + struct rb_node **p = &tree->rb_root.rb_node; struct rb_node *parent = NULL; const u64 addr = inlines->addr; struct inline_node *i; + bool leftmost = true; while (*p != NULL) { parent = *p; i = rb_entry(parent, struct inline_node, rb_node); if (addr < i->addr) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&inlines->rb_node, parent, p); - rb_insert_color(&inlines->rb_node, tree); + rb_insert_color_cached(&inlines->rb_node, tree, leftmost); } -struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) +struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr) { - struct rb_node *n = tree->rb_node; + struct rb_node *n = tree->rb_root.rb_node; while (n) { struct inline_node *i = rb_entry(n, struct inline_node, @@ -720,15 +727,15 @@ struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) return NULL; } -void inlines__tree_delete(struct rb_root *tree) +void inlines__tree_delete(struct rb_root_cached *tree) { struct inline_node *pos; - struct rb_node *next = rb_first(tree); + struct rb_node *next = rb_first_cached(tree); while (next) { pos = rb_entry(next, struct inline_node, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, tree); + rb_erase_cached(&pos->rb_node, tree); inline_node__delete(pos); } } diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 5762212dc342..b11a0aaaa676 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -19,11 +19,11 @@ void free_srcline(char *srcline); char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line); /* insert the srcline into the DSO, which will take ownership */ -void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline); +void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline); /* find previously inserted srcline */ -char *srcline__tree_find(struct rb_root *tree, u64 addr); +char *srcline__tree_find(struct rb_root_cached *tree, u64 addr); /* delete all srclines within the tree */ -void srcline__tree_delete(struct rb_root *tree); +void srcline__tree_delete(struct rb_root_cached *tree); #define SRCLINE_UNKNOWN ((char *) "??:0") @@ -46,10 +46,11 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, void inline_node__delete(struct inline_node *node); /* insert the inline node list into the DSO, which will take ownership */ -void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines); +void inlines__tree_insert(struct rb_root_cached *tree, + struct inline_node *inlines); /* find previously inserted inline node list */ -struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr); +struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr); /* delete all nodes within the tree of inline_node s */ -void inlines__tree_delete(struct rb_root *tree); +void inlines__tree_delete(struct rb_root_cached *tree); #endif /* PERF_SRCLINE_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 665ee374fc01..6d043c78f3c2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -2,6 +2,7 @@ #include <inttypes.h> #include <linux/time64.h> #include <math.h> +#include "color.h" #include "evlist.h" #include "evsel.h" #include "stat.h" diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 3c22c58b3e90..83d8094be4fe 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -168,7 +168,7 @@ static void reset_stat(struct runtime_stat *st) struct rb_node *pos, *next; rblist = &st->value_list; - next = rb_first(&rblist->entries); + next = rb_first_cached(&rblist->entries); while (next) { pos = next; next = rb_next(pos); diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index d58f1e08b170..7e82c71dcc42 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -57,7 +57,7 @@ static inline unsigned int strlist__nr_entries(const struct strlist *slist) /* For strlist iteration */ static inline struct str_node *strlist__first(struct strlist *slist) { - struct rb_node *rn = rb_first(&slist->rblist.entries); + struct rb_node *rn = rb_first_cached(&slist->rblist.entries); return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; } static inline struct str_node *strlist__next(struct str_node *sn) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index dca7dfae69ad..4ad106a5f2c0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -6,6 +6,8 @@ #include <unistd.h> #include <inttypes.h> +#include "map.h" +#include "map_groups.h" #include "symbol.h" #include "demangle-java.h" #include "demangle-rust.h" diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7119df77dc0b..17edbd4f6f85 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -3,6 +3,7 @@ #include "util.h" #include <errno.h> +#include <unistd.h> #include <stdio.h> #include <fcntl.h> #include <string.h> diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 48efad6d0f90..758bf5f74e6e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -17,6 +17,7 @@ #include "util.h" #include "debug.h" #include "machine.h" +#include "map.h" #include "symbol.h" #include "strlist.h" #include "intlist.h" @@ -163,7 +164,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) return arch__choose_best_symbol(syma, symb); } -void symbols__fixup_duplicate(struct rb_root *symbols) +void symbols__fixup_duplicate(struct rb_root_cached *symbols) { struct rb_node *nd; struct symbol *curr, *next; @@ -171,7 +172,7 @@ void symbols__fixup_duplicate(struct rb_root *symbols) if (symbol_conf.allow_aliases) return; - nd = rb_first(symbols); + nd = rb_first_cached(symbols); while (nd) { curr = rb_entry(nd, struct symbol, rb_node); @@ -186,20 +187,20 @@ again: continue; if (choose_best_symbol(curr, next) == SYMBOL_A) { - rb_erase(&next->rb_node, symbols); + rb_erase_cached(&next->rb_node, symbols); symbol__delete(next); goto again; } else { nd = rb_next(&curr->rb_node); - rb_erase(&curr->rb_node, symbols); + rb_erase_cached(&curr->rb_node, symbols); symbol__delete(curr); } } } -void symbols__fixup_end(struct rb_root *symbols) +void symbols__fixup_end(struct rb_root_cached *symbols) { - struct rb_node *nd, *prevnd = rb_first(symbols); + struct rb_node *nd, *prevnd = rb_first_cached(symbols); struct symbol *curr, *prev; if (prevnd == NULL) @@ -282,25 +283,27 @@ void symbol__delete(struct symbol *sym) free(((void *)sym) - symbol_conf.priv_size); } -void symbols__delete(struct rb_root *symbols) +void symbols__delete(struct rb_root_cached *symbols) { struct symbol *pos; - struct rb_node *next = rb_first(symbols); + struct rb_node *next = rb_first_cached(symbols); while (next) { pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, symbols); + rb_erase_cached(&pos->rb_node, symbols); symbol__delete(pos); } } -void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) +void __symbols__insert(struct rb_root_cached *symbols, + struct symbol *sym, bool kernel) { - struct rb_node **p = &symbols->rb_node; + struct rb_node **p = &symbols->rb_root.rb_node; struct rb_node *parent = NULL; const u64 ip = sym->start; struct symbol *s; + bool leftmost = true; if (kernel) { const char *name = sym->name; @@ -318,26 +321,28 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) s = rb_entry(parent, struct symbol, rb_node); if (ip < s->start) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&sym->rb_node, parent, p); - rb_insert_color(&sym->rb_node, symbols); + rb_insert_color_cached(&sym->rb_node, symbols, leftmost); } -void symbols__insert(struct rb_root *symbols, struct symbol *sym) +void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym) { __symbols__insert(symbols, sym, false); } -static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) +static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip) { struct rb_node *n; if (symbols == NULL) return NULL; - n = symbols->rb_node; + n = symbols->rb_root.rb_node; while (n) { struct symbol *s = rb_entry(n, struct symbol, rb_node); @@ -353,9 +358,9 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) return NULL; } -static struct symbol *symbols__first(struct rb_root *symbols) +static struct symbol *symbols__first(struct rb_root_cached *symbols) { - struct rb_node *n = rb_first(symbols); + struct rb_node *n = rb_first_cached(symbols); if (n) return rb_entry(n, struct symbol, rb_node); @@ -363,9 +368,9 @@ static struct symbol *symbols__first(struct rb_root *symbols) return NULL; } -static struct symbol *symbols__last(struct rb_root *symbols) +static struct symbol *symbols__last(struct rb_root_cached *symbols) { - struct rb_node *n = rb_last(symbols); + struct rb_node *n = rb_last(&symbols->rb_root); if (n) return rb_entry(n, struct symbol, rb_node); @@ -383,11 +388,12 @@ static struct symbol *symbols__next(struct symbol *sym) return NULL; } -static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) +static void symbols__insert_by_name(struct rb_root_cached *symbols, struct symbol *sym) { - struct rb_node **p = &symbols->rb_node; + struct rb_node **p = &symbols->rb_root.rb_node; struct rb_node *parent = NULL; struct symbol_name_rb_node *symn, *s; + bool leftmost = true; symn = container_of(sym, struct symbol_name_rb_node, sym); @@ -396,19 +402,21 @@ static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) s = rb_entry(parent, struct symbol_name_rb_node, rb_node); if (strcmp(sym->name, s->sym.name) < 0) p = &(*p)->rb_left; - else + else { p = &(*p)->rb_right; + leftmost = false; + } } rb_link_node(&symn->rb_node, parent, p); - rb_insert_color(&symn->rb_node, symbols); + rb_insert_color_cached(&symn->rb_node, symbols, leftmost); } -static void symbols__sort_by_name(struct rb_root *symbols, - struct rb_root *source) +static void symbols__sort_by_name(struct rb_root_cached *symbols, + struct rb_root_cached *source) { struct rb_node *nd; - for (nd = rb_first(source); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); symbols__insert_by_name(symbols, pos); } @@ -431,7 +439,7 @@ int symbol__match_symbol_name(const char *name, const char *str, return arch__compare_symbol_names(name, str); } -static struct symbol *symbols__find_by_name(struct rb_root *symbols, +static struct symbol *symbols__find_by_name(struct rb_root_cached *symbols, const char *name, enum symbol_tag_include includes) { @@ -441,7 +449,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, if (symbols == NULL) return NULL; - n = symbols->rb_node; + n = symbols->rb_root.rb_node; while (n) { int cmp; @@ -644,7 +652,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name, { struct symbol *sym; struct dso *dso = arg; - struct rb_root *root = &dso->symbols; + struct rb_root_cached *root = &dso->symbols; if (!symbol_type__filter(type)) return 0; @@ -681,14 +689,14 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct struct map *curr_map; struct symbol *pos; int count = 0; - struct rb_root old_root = dso->symbols; - struct rb_root *root = &dso->symbols; - struct rb_node *next = rb_first(root); + struct rb_root_cached old_root = dso->symbols; + struct rb_root_cached *root = &dso->symbols; + struct rb_node *next = rb_first_cached(root); if (!kmaps) return -1; - *root = RB_ROOT; + *root = RB_ROOT_CACHED; while (next) { char *module; @@ -696,8 +704,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); - rb_erase_init(&pos->rb_node, &old_root); - + rb_erase_cached(&pos->rb_node, &old_root); + RB_CLEAR_NODE(&pos->rb_node); module = strchr(pos->name, '\t'); if (module) *module = '\0'; @@ -710,6 +718,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct } pos->start -= curr_map->start - curr_map->pgoff; + if (pos->end > curr_map->end) + pos->end = curr_map->end; if (pos->end) pos->end -= curr_map->start - curr_map->pgoff; symbols__insert(&curr_map->dso->symbols, pos); @@ -734,8 +744,8 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, struct map *curr_map = initial_map; struct symbol *pos; int count = 0, moved = 0; - struct rb_root *root = &dso->symbols; - struct rb_node *next = rb_first(root); + struct rb_root_cached *root = &dso->symbols; + struct rb_node *next = rb_first_cached(root); int kernel_range = 0; bool x86_64; @@ -849,7 +859,7 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, } add_symbol: if (curr_map != initial_map) { - rb_erase(&pos->rb_node, root); + rb_erase_cached(&pos->rb_node, root); symbols__insert(&curr_map->dso->symbols, pos); ++moved; } else @@ -857,7 +867,7 @@ add_symbol: continue; discard_symbol: - rb_erase(&pos->rb_node, root); + rb_erase_cached(&pos->rb_node, root); symbol__delete(pos); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 14d9d438e7e2..9a8fe012910a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,16 +5,13 @@ #include <linux/types.h> #include <stdbool.h> #include <stdint.h> -#include "map.h" -#include "../perf.h" #include <linux/list.h> #include <linux/rbtree.h> #include <stdio.h> -#include <byteswap.h> -#include <libgen.h> -#include "build-id.h" -#include "event.h" +#include "map_symbol.h" +#include "branch.h" #include "path.h" +#include "symbol_conf.h" #ifdef HAVE_LIBELF_SUPPORT #include <libelf.h> @@ -24,6 +21,10 @@ #include "dso.h" +struct map; +struct map_groups; +struct option; + /* * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; * for newer versions we can use mmap to reduce memory usage: @@ -68,7 +69,7 @@ struct symbol { }; void symbol__delete(struct symbol *sym); -void symbols__delete(struct rb_root *symbols); +void symbols__delete(struct rb_root_cached *symbols); /* symbols__for_each_entry - iterate over symbols (rb_root) * @@ -77,7 +78,7 @@ void symbols__delete(struct rb_root *symbols); * @nd: the 'struct rb_node *' to use as a temporary storage */ #define symbols__for_each_entry(symbols, pos, nd) \ - for (nd = rb_first(symbols); \ + for (nd = rb_first_cached(symbols); \ nd && (pos = rb_entry(nd, struct symbol, rb_node)); \ nd = rb_next(nd)) @@ -89,69 +90,6 @@ static inline size_t symbol__size(const struct symbol *sym) struct strlist; struct intlist; -struct symbol_conf { - unsigned short priv_size; - bool try_vmlinux_path, - init_annotation, - force, - ignore_vmlinux, - ignore_vmlinux_buildid, - show_kernel_path, - use_modules, - allow_aliases, - sort_by_name, - show_nr_samples, - show_total_period, - use_callchain, - cumulate_callchain, - show_branchflag_count, - exclude_other, - show_cpu_utilization, - initialized, - kptr_restrict, - event_group, - demangle, - demangle_kernel, - filter_relative, - show_hist_headers, - branch_callstack, - has_filter, - show_ref_callgraph, - hide_unresolved, - raw_trace, - report_hierarchy, - inline_name; - const char *vmlinux_name, - *kallsyms_name, - *source_prefix, - *field_sep, - *graph_function; - const char *default_guest_vmlinux_name, - *default_guest_kallsyms, - *default_guest_modules; - const char *guestmount; - const char *dso_list_str, - *comm_list_str, - *pid_list_str, - *tid_list_str, - *sym_list_str, - *col_width_list_str, - *bt_stop_list_str; - struct strlist *dso_list, - *comm_list, - *sym_list, - *dso_from_list, - *dso_to_list, - *sym_from_list, - *sym_to_list, - *bt_stop_list; - struct intlist *pid_list, - *tid_list; - const char *symfs; -}; - -extern struct symbol_conf symbol_conf; - struct symbol_name_rb_node { struct rb_node rb_node; struct symbol sym; @@ -178,19 +116,6 @@ struct ref_reloc_sym { u64 unrelocated_addr; }; -struct map_symbol { - struct map *map; - struct symbol *sym; -}; - -struct addr_map_symbol { - struct map *map; - struct symbol *sym; - u64 addr; - u64 al_addr; - u64 phys_addr; -}; - struct branch_info { struct addr_map_symbol from; struct addr_map_symbol to; @@ -310,10 +235,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss); char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); -void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel); -void symbols__insert(struct rb_root *symbols, struct symbol *sym); -void symbols__fixup_duplicate(struct rb_root *symbols); -void symbols__fixup_end(struct rb_root *symbols); +void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, + bool kernel); +void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); +void symbols__fixup_duplicate(struct rb_root_cached *symbols); +void symbols__fixup_end(struct rb_root_cached *symbols); void map_groups__fixup_end(struct map_groups *mg); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h new file mode 100644 index 000000000000..fffea68c1203 --- /dev/null +++ b/tools/perf/util/symbol_conf.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_SYMBOL_CONF +#define __PERF_SYMBOL_CONF 1 + +#include <stdbool.h> + +struct strlist; +struct intlist; + +struct symbol_conf { + unsigned short priv_size; + bool try_vmlinux_path, + init_annotation, + force, + ignore_vmlinux, + ignore_vmlinux_buildid, + show_kernel_path, + use_modules, + allow_aliases, + sort_by_name, + show_nr_samples, + show_total_period, + use_callchain, + cumulate_callchain, + show_branchflag_count, + exclude_other, + show_cpu_utilization, + initialized, + kptr_restrict, + event_group, + demangle, + demangle_kernel, + filter_relative, + show_hist_headers, + branch_callstack, + has_filter, + show_ref_callgraph, + hide_unresolved, + raw_trace, + report_hierarchy, + inline_name; + const char *vmlinux_name, + *kallsyms_name, + *source_prefix, + *field_sep, + *graph_function; + const char *default_guest_vmlinux_name, + *default_guest_kallsyms, + *default_guest_modules; + const char *guestmount; + const char *dso_list_str, + *comm_list_str, + *pid_list_str, + *tid_list_str, + *sym_list_str, + *col_width_list_str, + *bt_stop_list_str; + struct strlist *dso_list, + *comm_list, + *sym_list, + *dso_from_list, + *dso_to_list, + *sym_from_list, + *sym_to_list, + *bt_stop_list; + struct intlist *pid_list, + *tid_list; + const char *symfs; +}; + +extern struct symbol_conf symbol_conf; + +#endif // __PERF_SYMBOL_CONF diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index ed0205cc7942..02e89b02c2ce 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -3,6 +3,7 @@ #include <inttypes.h> #include <stdio.h> +#include "map.h" #include "symbol.h" size_t symbol__fprintf(struct symbol *sym, FILE *fp) @@ -64,7 +65,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, struct rb_node *nd; struct symbol_name_rb_node *pos; - for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); fprintf(fp, "%s\n", pos->sym.name); } diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index d52f27f373ce..f52c0f90915d 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -38,6 +38,7 @@ * @cp: call path * @no_call: a 'call' was not seen * @trace_end: a 'call' but trace ended + * @non_call: a branch but not a 'call' to the start of a different symbol */ struct thread_stack_entry { u64 ret_addr; @@ -47,6 +48,7 @@ struct thread_stack_entry { struct call_path *cp; bool no_call; bool trace_end; + bool non_call; }; /** @@ -268,6 +270,8 @@ static int thread_stack__call_return(struct thread *thread, cr.flags |= CALL_RETURN_NO_CALL; if (no_return) cr.flags |= CALL_RETURN_NO_RETURN; + if (tse->non_call) + cr.flags |= CALL_RETURN_NON_CALL; return crp->process(&cr, crp->data); } @@ -493,6 +497,9 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, struct thread_stack_entry *tse; int err; + if (!cp) + return -ENOMEM; + if (ts->cnt == ts->sz) { err = thread_stack__grow(ts); if (err) @@ -507,6 +514,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->cp = cp; tse->no_call = no_call; tse->trace_end = trace_end; + tse->non_call = false; return 0; } @@ -528,14 +536,16 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, timestamp, ref, false); } - if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { + if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && + !ts->stack[ts->cnt - 1].non_call) { return thread_stack__call_return(thread, ts, --ts->cnt, timestamp, ref, false); } else { size_t i = ts->cnt - 1; while (i--) { - if (ts->stack[i].ret_addr != ret_addr) + if (ts->stack[i].ret_addr != ret_addr || + ts->stack[i].non_call) continue; i += 1; while (ts->cnt > i) { @@ -576,8 +586,6 @@ static int thread_stack__bottom(struct thread_stack *ts, cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, ts->kernel_start); - if (!cp) - return -ENOMEM; return thread_stack__push_cp(ts, ip, sample->time, ref, cp, true, false); @@ -590,36 +598,36 @@ static int thread_stack__no_call_return(struct thread *thread, struct addr_location *to_al, u64 ref) { struct call_path_root *cpr = ts->crp->cpr; + struct call_path *root = &cpr->call_path; + struct symbol *fsym = from_al->sym; + struct symbol *tsym = to_al->sym; struct call_path *cp, *parent; u64 ks = ts->kernel_start; + u64 addr = sample->addr; + u64 tm = sample->time; + u64 ip = sample->ip; int err; - if (sample->ip >= ks && sample->addr < ks) { + if (ip >= ks && addr < ks) { /* Return to userspace, so pop all kernel addresses */ while (thread_stack__in_kernel(ts)) { err = thread_stack__call_return(thread, ts, --ts->cnt, - sample->time, ref, - true); + tm, ref, true); if (err) return err; } /* If the stack is empty, push the userspace address */ if (!ts->cnt) { - cp = call_path__findnew(cpr, &cpr->call_path, - to_al->sym, sample->addr, - ts->kernel_start); - if (!cp) - return -ENOMEM; - return thread_stack__push_cp(ts, 0, sample->time, ref, - cp, true, false); + cp = call_path__findnew(cpr, root, tsym, addr, ks); + return thread_stack__push_cp(ts, 0, tm, ref, cp, true, + false); } - } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { + } else if (thread_stack__in_kernel(ts) && ip < ks) { /* Return to userspace, so pop all kernel addresses */ while (thread_stack__in_kernel(ts)) { err = thread_stack__call_return(thread, ts, --ts->cnt, - sample->time, ref, - true); + tm, ref, true); if (err) return err; } @@ -628,21 +636,16 @@ static int thread_stack__no_call_return(struct thread *thread, if (ts->cnt) parent = ts->stack[ts->cnt - 1].cp; else - parent = &cpr->call_path; + parent = root; /* This 'return' had no 'call', so push and pop top of stack */ - cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, - ts->kernel_start); - if (!cp) - return -ENOMEM; + cp = call_path__findnew(cpr, parent, fsym, ip, ks); - err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, - true, false); + err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); if (err) return err; - return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, - to_al->sym); + return thread_stack__pop_cp(thread, ts, addr, tm, ref, tsym); } static int thread_stack__trace_begin(struct thread *thread, @@ -680,8 +683,6 @@ static int thread_stack__trace_end(struct thread_stack *ts, cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, ts->kernel_start); - if (!cp) - return -ENOMEM; ret_addr = sample->ip + sample->insn_len; @@ -745,8 +746,6 @@ int thread_stack__process(struct thread *thread, struct comm *comm, cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, to_al->sym, sample->addr, ts->kernel_start); - if (!cp) - return -ENOMEM; err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, false, trace_end); } else if (sample->flags & PERF_IP_FLAG_RETURN) { @@ -765,6 +764,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm, err = thread_stack__trace_begin(thread, ts, sample->time, ref); } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { err = thread_stack__trace_end(ts, sample, ref); + } else if (sample->flags & PERF_IP_FLAG_BRANCH && + from_al->sym != to_al->sym && to_al->sym && + to_al->addr == to_al->sym->start) { + struct call_path_root *cpr = ts->crp->cpr; + struct call_path *cp; + + /* + * The compiler might optimize a call/ret combination by making + * it a jmp. Make that visible by recording on the stack a + * branch to the start of a different symbol. Note, that means + * when a ret pops the stack, all jmps must be popped off first. + */ + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, + to_al->sym, sample->addr, + ts->kernel_start); + err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, + false); + if (!err) + ts->stack[ts->cnt - 1].non_call = true; } return err; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 1f626f4a1c40..b7c04e19ad41 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -35,10 +35,13 @@ struct call_path; * * CALL_RETURN_NO_CALL: 'return' but no matching 'call' * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' + * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different + * symbol */ enum { CALL_RETURN_NO_CALL = 1 << 0, CALL_RETURN_NO_RETURN = 1 << 1, + CALL_RETURN_NON_CALL = 1 << 2, }; /** diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index c83372329f89..4c179fef442d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -12,6 +12,7 @@ #include "debug.h" #include "namespaces.h" #include "comm.h" +#include "symbol.h" #include "unwind.h" #include <api/fs/fs.h> diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 712dd48cc0ca..8276ffeec556 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -5,14 +5,18 @@ #include <linux/refcount.h> #include <linux/rbtree.h> #include <linux/list.h> +#include <stdio.h> #include <unistd.h> #include <sys/types.h> -#include "symbol.h" -#include "map.h" +#include "srccode.h" +#include "symbol_conf.h" #include <strlist.h> #include <intlist.h> #include "rwsem.h" +struct addr_location; +struct map; +struct namespaces_event; struct thread_stack; struct unwind_libunwind_ops; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 56e4ca54020a..250391672f9f 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -53,7 +53,10 @@ struct perf_tool { itrace_start, context_switch, throttle, - unthrottle; + unthrottle, + ksymbol, + bpf_event; + event_attr_op attr; event_attr_op event_update; event_op2 tracing_data; diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 5eff9bfc5758..407d0167b942 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -8,6 +8,8 @@ #include "unwind.h" #include "unwind-libdw.h" #include "machine.h" +#include "map.h" +#include "symbol.h" #include "thread.h" #include <linux/types.h> #include "event.h" diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 79f521a552cf..f3c666a84e4d 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -34,6 +34,7 @@ #include "session.h" #include "perf_regs.h" #include "unwind.h" +#include "map.h" #include "symbol.h" #include "util.h" #include "debug.h" diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index b029a5e9ae49..9778b3133b77 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "unwind.h" +#include "map.h" #include "thread.h" #include "session.h" #include "debug.h" diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 093352e93d50..320b0fef249a 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -2,6 +2,7 @@ #include "../perf.h" #include "util.h" #include "debug.h" +#include "namespaces.h" #include <api/fs/fs.h> #include <sys/mman.h> #include <sys/stat.h> diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 3702cba11d7d..5031b7b22bbd 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -11,6 +11,7 @@ #include "vdso.h" #include "util.h" +#include "map.h" #include "symbol.h" #include "machine.h" #include "thread.h" diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 902ce6384f57..512ad7c09b13 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -6,7 +6,6 @@ #include <sys/mman.h> #include <zlib.h> #include <linux/compiler.h> -#include <unistd.h> #include "util/compress.h" #include "util/util.h" |