diff options
38 files changed, 701 insertions, 185 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 33787ee817f0..929655db5084 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha } EXPORT_SYMBOL_GPL(events_sysfs_show); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_ht_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_ht_attr, attr); + + /* + * Report conditional events depending on Hyper-Threading. + * + * This is overly conservative as usually the HT special + * handling is not needed if the other CPU thread is idle. + * + * Note this does not (and cannot) handle the case when thread + * siblings are invisible, for example with virtualization + * if they are owned by some other guest. The user tool + * has to re-read when a thread sibling gets onlined later. + */ + return sprintf(page, "%s", + topology_max_smt_threads() > 1 ? + pmu_attr->event_str_ht : + pmu_attr->event_str_noht); +} + EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(cache-references, CACHE_REFERENCES ); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7c666958a625..5081b4cdad0d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -177,7 +177,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; -struct event_constraint intel_skl_event_constraints[] = { +static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -186,10 +186,8 @@ struct event_constraint intel_skl_event_constraints[] = { }; static struct extra_reg intel_knl_extra_regs[] __read_mostly = { - INTEL_UEVENT_EXTRA_REG(0x01b7, - MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, - MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), EVENT_EXTRA_END }; @@ -225,14 +223,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); -struct attribute *nhm_events_attrs[] = { +static struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), NULL, }; -struct attribute *snb_events_attrs[] = { +/* + * topdown events for Intel Core CPUs. + * + * The events are all in slots, which is a free slot in a 4 wide + * pipeline. Some events are already reported in slots, for cycle + * events we multiply by the pipeline width (4). + * + * With Hyper Threading on, topdown metrics are either summed or averaged + * between the threads of a core: (count_t0 + count_t1). + * + * For the average case the metric is always scaled to pipeline width, + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) + */ + +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, + "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ + "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, + "event=0xe,umask=0x1"); /* uops_issued.any */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, + "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, + "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, + "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ + "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, + "4", "2"); + +static struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), EVENT_PTR(mem_st_snb), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL, }; @@ -258,7 +293,7 @@ static struct event_constraint intel_hsw_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_bdw_event_constraints[] = { +static struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -1332,6 +1367,29 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); +/* no_alloc_cycles.not_delivered */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, + "event=0xca,umask=0x50"); +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, + "event=0xc2,umask=0x10"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, + "event=0xc2,umask=0x10"); + +static struct attribute *slm_events_attrs[] = { + EVENT_PTR(td_total_slots_slm), + EVENT_PTR(td_total_slots_scale_slm), + EVENT_PTR(td_fetch_bubbles_slm), + EVENT_PTR(td_fetch_bubbles_scale_slm), + EVENT_PTR(td_slots_issued_slm), + EVENT_PTR(td_slots_retired_slm), + NULL +}; + static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ @@ -3437,6 +3495,13 @@ static struct attribute *hsw_events_attrs[] = { EVENT_PTR(cycles_ct), EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_hsw), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL }; @@ -3587,6 +3652,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.cpu_events = slm_events_attrs; pr_cont("Silvermont events, "); break; @@ -3805,6 +3871,12 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_skl(); + /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ + event_attr_td_recovery_bubbles.event_str_noht = + "event=0xd,umask=0x1,cmask=1"; + event_attr_td_recovery_bubbles.event_str_ht = + "event=0xd,umask=0x1,cmask=1,any=1"; + x86_pmu.event_constraints = intel_skl_event_constraints; x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; x86_pmu.extra_regs = intel_skl_extra_regs; @@ -3917,16 +3989,14 @@ __init int intel_pmu_init(void) */ static __init int fixup_ht_bug(void) { - int cpu = smp_processor_id(); - int w, c; + int c; /* * problem not present on this CPU model, nothing to do */ if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) return 0; - w = cpumask_weight(topology_sibling_cpumask(cpu)); - if (w > 1) { + if (topology_max_smt_threads() > 1) { pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); return 0; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 99c4bab123cd..e30eef4f29a6 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -714,7 +714,7 @@ static void cleanup_rapl_pmus(void) int i; for (i = 0; i < rapl_pmus->maxpkg; i++) - kfree(rapl_pmus->pmus + i); + kfree(rapl_pmus->pmus[i]); kfree(rapl_pmus); } diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index fce74062d981..65490589e52e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -882,7 +882,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_box *box; int phys_id, pkg, ret; @@ -903,20 +903,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + /* - * for performance monitoring unit with multiple boxes, - * each box has a different function id. + * Some platforms, e.g. Knights Landing, use a common PCI device ID + * for multiple instances of an uncore PMU device type. We should check + * PCI slot and func to indicate the uncore box. */ - pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; - /* Knights Landing uses a common PCI device ID for multiple instances of - * an uncore PMU device type. There is only one entry per device type in - * the knl_uncore_pci_ids table inspite of multiple devices present for - * some device types. Hence PCI device idx would be 0 for all devices. - * So increment pmu pointer to point to an unused array element. - */ - if (boot_cpu_data.x86_model == 87) { - while (pmu->func_id >= 0) - pmu++; + if (id->driver_data & ~0xffff) { + struct pci_driver *pci_drv = pdev->driver; + const struct pci_device_id *ids = pci_drv->id_table; + unsigned int devfn; + + while (ids && ids->vendor) { + if ((ids->vendor == pdev->vendor) && + (ids->device == pdev->device)) { + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), + UNCORE_PCI_DEV_FUNC(ids->driver_data)); + if (devfn == pdev->devfn) { + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; + break; + } + } + ids++; + } + if (pmu == NULL) + return -ENODEV; + } else { + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 79766b9a3580..66c3a3657a10 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -15,7 +15,11 @@ #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) +#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \ + ((dev << 24) | (func << 16) | (type << 8) | idx) #define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx) +#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff) +#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff) #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b2625867ebd1..7336e55c248c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = { */ static const struct pci_device_id knl_uncore_pci_ids[] = { - { /* MC UClk */ + { /* MC0 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0), }, - { /* MC DClk Channel */ + { /* MC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1), + }, + { /* MC0 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0), + }, + { /* MC0 DClk CH 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1), + }, + { /* MC0 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2), + }, + { /* MC1 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3), + }, + { /* MC1 DClk CH 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4), + }, + { /* MC1 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5), + }, + { /* EDC0 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0), + }, + { /* EDC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1), + }, + { /* EDC2 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2), + }, + { /* EDC3 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3), }, - { /* EDC UClk */ + { /* EDC4 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4), + }, + { /* EDC5 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5), + }, + { /* EDC6 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6), + }, + { /* EDC7 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7), + }, + { /* EDC0 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0), + }, + { /* EDC1 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1), + }, + { /* EDC2 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2), + }, + { /* EDC3 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3), + }, + { /* EDC4 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4), + }, + { /* EDC5 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5), + }, + { /* EDC6 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6), }, - { /* EDC EClk */ + { /* EDC7 EClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7), }, { /* M2PCIe */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817), diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8bd764df815d..e2d7285a2dac 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \ .event_str = str, \ }; +#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ +static struct perf_pmu_events_ht_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ + .id = 0, \ + .event_str_noht = noht, \ + .event_str_ht = ht, \ +} + extern struct x86_pmu x86_pmu __read_mostly; static inline bool x86_pmu_has_lbr_callstack(void) @@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b); ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); #ifdef CONFIG_CPU_SUP_AMD diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 7f991bd5031b..e346572841a0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) + +extern int __max_smt_threads; + +static inline int topology_max_smt_threads(void) +{ + return __max_smt_threads; +} + int topology_update_package_map(unsigned int apicid, unsigned int cpu); extern int topology_phys_to_logical_pkg(unsigned int pkg); #else @@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg); static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_max_smt_threads(void) { return 1; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..2ed0ec1353f8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +/* Maximum number of SMT threads on any online core */ +int __max_smt_threads __read_mostly; + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu) bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; - int i; + int i, threads; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); @@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu) if (match_die(c, o) && !topology_same_node(c, o)) primarily_use_numa_for_topology(); } + + threads = cpumask_weight(topology_sibling_cpumask(cpu)); + if (threads > __max_smt_threads) + __max_smt_threads = threads; } /* maps the cpu to the sched domain representing multi-core */ @@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void) #ifdef CONFIG_HOTPLUG_CPU +/* Recompute SMT state for all CPUs on offline */ +static void recompute_smt_state(void) +{ + int max_threads, cpu; + + max_threads = 0; + for_each_online_cpu (cpu) { + int threads = cpumask_weight(topology_sibling_cpumask(cpu)); + + if (threads > max_threads) + max_threads = threads; + } + __max_smt_threads = max_threads; +} + static void remove_siblinginfo(int cpu) { int sibling; @@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu) c->phys_proc_id = 0; c->cpu_core_id = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); + recompute_smt_state(); } static void remove_cpu_from_maps(int cpu) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a827cecd62f..7921f4f20a58 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -517,6 +517,11 @@ struct swevent_hlist { struct perf_cgroup; struct ring_buffer; +struct pmu_event_list { + raw_spinlock_t lock; + struct list_head list; +}; + /** * struct perf_event - performance event kernel representation: */ @@ -675,6 +680,7 @@ struct perf_event { int cgrp_defer_enabled; #endif + struct list_head sb_list; #endif /* CONFIG_PERF_EVENTS */ }; @@ -1074,7 +1080,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); -extern int get_callchain_buffers(void); +extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; @@ -1326,6 +1332,13 @@ struct perf_pmu_events_attr { const char *event_str; }; +struct perf_pmu_events_ht_attr { + struct device_attribute attr; + u64 id; + const char *event_str_ht; + const char *event_str_noht; +}; + ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 36ce552cf6a9..c66a485a24ac 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -276,6 +276,9 @@ enum perf_event_read_format { /* * Hardware event_id to monitor via a performance monitoring event: + * + * @sample_max_stack: Max number of frame pointers in a callchain, + * should be < /proc/sys/kernel/perf_event_max_stack */ struct perf_event_attr { @@ -385,7 +388,8 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; - __u32 __reserved_2; /* align to __u64 */ + __u16 sample_max_stack; + __u16 __reserved_2; /* align to __u64 */ }; #define perf_flags(attr) (*(&(attr)->read_format + 1)) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 080a2dfb5800..bf4495fcd25d 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (err) goto free_smap; - err = get_callchain_buffers(); + err = get_callchain_buffers(sysctl_perf_event_max_stack); if (err) goto free_smap; diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 179ef4640964..e9fdb5203de5 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -104,7 +104,7 @@ fail: return -ENOMEM; } -int get_callchain_buffers(void) +int get_callchain_buffers(int event_max_stack) { int err = 0; int count; @@ -121,6 +121,15 @@ int get_callchain_buffers(void) /* If the allocation failed, give up */ if (!callchain_cpus_entries) err = -ENOMEM; + /* + * If requesting per event more than the global cap, + * return a different error to help userspace figure + * this out. + * + * And also do it here so that we have &callchain_mutex held. + */ + if (event_max_stack > sysctl_perf_event_max_stack) + err = -EOVERFLOW; goto exit; } @@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) bool user = !event->attr.exclude_callchain_user; /* Disallow cross-task user callchains. */ bool crosstask = event->ctx->task && event->ctx->task != current; + const u32 max_stack = event->attr.sample_max_stack; if (!kernel && !user) return NULL; - return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true); + return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true); } struct perf_callchain_entry * diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..05b923e2111a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -335,6 +335,7 @@ static atomic_t perf_sched_count; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(int, perf_sched_cb_usages); +static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -396,6 +397,13 @@ int perf_proc_update_handler(struct ctl_table *table, int write, if (ret || !write) return ret; + /* + * If throttling is disabled don't allow the write: + */ + if (sysctl_perf_cpu_time_max_percent == 100 || + sysctl_perf_cpu_time_max_percent == 0) + return -EINVAL; + max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; update_perf_cpu_limits(); @@ -3665,6 +3673,39 @@ static void free_event_rcu(struct rcu_head *head) static void ring_buffer_attach(struct perf_event *event, struct ring_buffer *rb); +static void detach_sb_event(struct perf_event *event) +{ + struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu); + + raw_spin_lock(&pel->lock); + list_del_rcu(&event->sb_list); + raw_spin_unlock(&pel->lock); +} + +static bool is_sb_event(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + if (event->parent) + return false; + + if (event->attach_state & PERF_ATTACH_TASK) + return false; + + if (attr->mmap || attr->mmap_data || attr->mmap2 || + attr->comm || attr->comm_exec || + attr->task || + attr->context_switch) + return true; + return false; +} + +static void unaccount_pmu_sb_event(struct perf_event *event) +{ + if (is_sb_event(event)) + detach_sb_event(event); +} + static void unaccount_event_cpu(struct perf_event *event, int cpu) { if (event->parent) @@ -3728,6 +3769,8 @@ static void unaccount_event(struct perf_event *event) } unaccount_event_cpu(event, event->cpu); + + unaccount_pmu_sb_event(event); } static void perf_sched_delayed(struct work_struct *work) @@ -5856,11 +5899,11 @@ perf_event_read_event(struct perf_event *event, perf_output_end(&handle); } -typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data); +typedef void (perf_iterate_f)(struct perf_event *event, void *data); static void -perf_event_aux_ctx(struct perf_event_context *ctx, - perf_event_aux_output_cb output, +perf_iterate_ctx(struct perf_event_context *ctx, + perf_iterate_f output, void *data, bool all) { struct perf_event *event; @@ -5877,52 +5920,55 @@ perf_event_aux_ctx(struct perf_event_context *ctx, } } -static void -perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data, - struct perf_event_context *task_ctx) +static void perf_iterate_sb_cpu(perf_iterate_f output, void *data) { - rcu_read_lock(); - preempt_disable(); - perf_event_aux_ctx(task_ctx, output, data, false); - preempt_enable(); - rcu_read_unlock(); + struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events); + struct perf_event *event; + + list_for_each_entry_rcu(event, &pel->list, sb_list) { + if (event->state < PERF_EVENT_STATE_INACTIVE) + continue; + if (!event_filter_match(event)) + continue; + output(event, data); + } } +/* + * Iterate all events that need to receive side-band events. + * + * For new callers; ensure that account_pmu_sb_event() includes + * your event, otherwise it might not get delivered. + */ static void -perf_event_aux(perf_event_aux_output_cb output, void *data, +perf_iterate_sb(perf_iterate_f output, void *data, struct perf_event_context *task_ctx) { - struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; - struct pmu *pmu; int ctxn; + rcu_read_lock(); + preempt_disable(); + /* - * If we have task_ctx != NULL we only notify - * the task context itself. The task_ctx is set - * only for EXIT events before releasing task + * If we have task_ctx != NULL we only notify the task context itself. + * The task_ctx is set only for EXIT events before releasing task * context. */ if (task_ctx) { - perf_event_aux_task_ctx(output, data, task_ctx); - return; + perf_iterate_ctx(task_ctx, output, data, false); + goto done; } - rcu_read_lock(); - list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); - if (cpuctx->unique_pmu != pmu) - goto next; - perf_event_aux_ctx(&cpuctx->ctx, output, data, false); - ctxn = pmu->task_ctx_nr; - if (ctxn < 0) - goto next; + perf_iterate_sb_cpu(output, data); + + for_each_task_context_nr(ctxn) { ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); if (ctx) - perf_event_aux_ctx(ctx, output, data, false); -next: - put_cpu_ptr(pmu->pmu_cpu_context); + perf_iterate_ctx(ctx, output, data, false); } +done: + preempt_enable(); rcu_read_unlock(); } @@ -5971,7 +6017,7 @@ void perf_event_exec(void) perf_event_enable_on_exec(ctxn); - perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL, + perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true); } rcu_read_unlock(); @@ -6015,9 +6061,9 @@ static int __perf_pmu_output_stop(void *info) }; rcu_read_lock(); - perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false); + perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false); if (cpuctx->task_ctx) - perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop, + perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop, &ro, false); rcu_read_unlock(); @@ -6146,7 +6192,7 @@ static void perf_event_task(struct task_struct *task, }, }; - perf_event_aux(perf_event_task_output, + perf_iterate_sb(perf_event_task_output, &task_event, task_ctx); } @@ -6225,7 +6271,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; - perf_event_aux(perf_event_comm_output, + perf_iterate_sb(perf_event_comm_output, comm_event, NULL); } @@ -6456,7 +6502,7 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; - perf_event_aux(perf_event_mmap_output, + perf_iterate_sb(perf_event_mmap_output, mmap_event, NULL); @@ -6539,7 +6585,7 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma) if (!ctx) continue; - perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true); + perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true); } rcu_read_unlock(); } @@ -6726,7 +6772,7 @@ static void perf_event_switch(struct task_struct *task, }, }; - perf_event_aux(perf_event_switch_output, + perf_iterate_sb(perf_event_switch_output, &switch_event, NULL); } @@ -8648,6 +8694,28 @@ unlock: return pmu; } +static void attach_sb_event(struct perf_event *event) +{ + struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu); + + raw_spin_lock(&pel->lock); + list_add_rcu(&event->sb_list, &pel->list); + raw_spin_unlock(&pel->lock); +} + +/* + * We keep a list of all !task (and therefore per-cpu) events + * that need to receive side-band records. + * + * This avoids having to scan all the various PMU per-cpu contexts + * looking for them. + */ +static void account_pmu_sb_event(struct perf_event *event) +{ + if (is_sb_event(event)) + attach_sb_event(event); +} + static void account_event_cpu(struct perf_event *event, int cpu) { if (event->parent) @@ -8728,6 +8796,8 @@ static void account_event(struct perf_event *event) enabled: account_event_cpu(event, event->cpu); + + account_pmu_sb_event(event); } /* @@ -8876,7 +8946,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (!event->parent) { if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { - err = get_callchain_buffers(); + err = get_callchain_buffers(attr->sample_max_stack); if (err) goto err_addr_filters; } @@ -9198,6 +9268,9 @@ SYSCALL_DEFINE5(perf_event_open, return -EINVAL; } + if (!attr.sample_max_stack) + attr.sample_max_stack = sysctl_perf_event_max_stack; + /* * In cgroup mode, the pid argument is used to pass the fd * opened to the cgroup directory in cgroupfs. The cpu argument @@ -9271,7 +9344,7 @@ SYSCALL_DEFINE5(perf_event_open, if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto err_alloc; } } @@ -10233,6 +10306,9 @@ static void __init perf_event_init_all_cpus(void) swhash = &per_cpu(swevent_htable, cpu); mutex_init(&swhash->hlist_mutex); INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu)); + + INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu)); + raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu)); } } diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c index 0e636c4339b8..b0a035fc87b3 100644 --- a/tools/lib/api/fd/array.c +++ b/tools/lib/api/fd/array.c @@ -85,7 +85,8 @@ int fdarray__add(struct fdarray *fda, int fd, short revents) } int fdarray__filter(struct fdarray *fda, short revents, - void (*entry_destructor)(struct fdarray *fda, int fd)) + void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), + void *arg) { int fd, nr = 0; @@ -95,7 +96,7 @@ int fdarray__filter(struct fdarray *fda, short revents, for (fd = 0; fd < fda->nr; ++fd) { if (fda->entries[fd].revents & revents) { if (entry_destructor) - entry_destructor(fda, fd); + entry_destructor(fda, fd, arg); continue; } diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h index 45db01818f45..e87fd800fa8d 100644 --- a/tools/lib/api/fd/array.h +++ b/tools/lib/api/fd/array.h @@ -34,7 +34,8 @@ void fdarray__delete(struct fdarray *fda); int fdarray__add(struct fdarray *fda, int fd, short revents); int fdarray__poll(struct fdarray *fda, int timeout); int fdarray__filter(struct fdarray *fda, short revents, - void (*entry_destructor)(struct fdarray *fda, int fd)); + void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), + void *arg); int fdarray__grow(struct fdarray *fda, int extra); int fdarray__fprintf(struct fdarray *fda, FILE *fp); diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 3d1bb802dbf4..3db3db9278be 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -30,3 +30,4 @@ config.mak.autogen *.pyo .config-detected util/intel-pt-decoder/inat-tables.c +arch/*/include/generated/ diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 357f1b13b5ae..2e5567c94e09 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -62,6 +62,8 @@ int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion tc; int err; + if (!pc) + return 0; err = perf_read_tsc_conversion(pc, &tc); if (err == -EOPNOTSUPP) return 0; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index dc3fcb597e4c..d4cf1b0c88f9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -655,6 +655,13 @@ perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused return 0; } +static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) +{ + if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base) + return rec->evlist->mmap[0].base; + return NULL; +} + static int record__synthesize(struct record *rec) { struct perf_session *session = rec->session; @@ -692,7 +699,7 @@ static int record__synthesize(struct record *rec) } } - err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool, + err = perf_event__synth_time_conv(record__pick_pc(rec), tool, process_synthesized_event, machine); if (err) goto out; diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index c809463edbe5..59dbd0550c51 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -36,7 +36,7 @@ int test__fdarray__filter(int subtest __maybe_unused) } fdarray__init_revents(fda, POLLIN); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); if (nr_fds != fda->nr_alloc) { pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything", nr_fds, fda->nr_alloc); @@ -44,7 +44,7 @@ int test__fdarray__filter(int subtest __maybe_unused) } fdarray__init_revents(fda, POLLHUP); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); if (nr_fds != 0) { pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds", nr_fds, fda->nr_alloc); @@ -57,7 +57,7 @@ int test__fdarray__filter(int subtest __maybe_unused) pr_debug("\nfiltering all but fda->entries[2]:"); fdarray__fprintf_prefix(fda, "before", stderr); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); fdarray__fprintf_prefix(fda, " after", stderr); if (nr_fds != 1) { pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds); @@ -78,7 +78,7 @@ int test__fdarray__filter(int subtest __maybe_unused) pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):"); fdarray__fprintf_prefix(fda, "before", stderr); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); fdarray__fprintf_prefix(fda, " after", stderr); if (nr_fds != 2) { pr_debug("\nfdarray__filter()=%d != 2, should have left just two events", diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 67e5966503b2..67f986c8c378 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -144,7 +144,32 @@ static int asnprintf(char **strp, size_t size, const char *fmt, ...) return ret; } -static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, + size_t size) +{ + bool is_alloc = !!bf; + bool retry_old = true; + + asnprintf(&bf, size, "%s/%s/%s/kallsyms", + buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); +retry: + if (!access(bf, F_OK)) + return bf; + if (is_alloc) + free(bf); + if (retry_old) { + /* Try old style kallsyms cache */ + asnprintf(&bf, size, "%s/%s/%s", + buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); + retry_old = false; + goto retry; + } + + return NULL; +} + +static char *build_id_cache__linkname(const char *sbuild_id, char *bf, + size_t size) { char *tmp = bf; int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, @@ -154,23 +179,52 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) return bf; } +static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso) +{ + return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : "elf"); +} + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { - char build_id_hex[SBUILD_ID_SIZE]; + bool is_kallsyms = dso__is_kallsyms((struct dso *)dso); + bool is_vdso = dso__is_vdso((struct dso *)dso); + char sbuild_id[SBUILD_ID_SIZE]; + char *linkname; + bool alloc = (bf == NULL); + int ret; if (!dso->has_build_id) return NULL; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - return build_id__filename(build_id_hex, bf, size); + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + linkname = build_id_cache__linkname(sbuild_id, NULL, 0); + if (!linkname) + return NULL; + + /* Check if old style build_id cache */ + if (is_regular_file(linkname)) + ret = asnprintf(&bf, size, "%s", linkname); + else + ret = asnprintf(&bf, size, "%s/%s", linkname, + build_id_cache__basename(is_kallsyms, is_vdso)); + if (ret < 0 || (!alloc && size < (unsigned int)ret)) + bf = NULL; + free(linkname); + + return bf; } bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) { - char *id_name, *ch; + char *id_name = NULL, *ch; struct stat sb; + char sbuild_id[SBUILD_ID_SIZE]; + + if (!dso->has_build_id) + goto err; - id_name = dso__build_id_filename(dso, bf, size); + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + id_name = build_id_cache__linkname(sbuild_id, NULL, 0); if (!id_name) goto err; if (access(id_name, F_OK)) @@ -194,18 +248,14 @@ bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) if (ch - 3 < bf) goto err; + free(id_name); return strncmp(".ko", ch - 3, 3) == 0; err: - /* - * If dso__build_id_filename work, get id_name again, - * because id_name points to bf and is broken. - */ - if (id_name) - id_name = dso__build_id_filename(dso, bf, size); pr_err("Invalid build id: %s\n", id_name ? : dso->long_name ? : dso->short_name ? : "[unknown]"); + free(id_name); return false; } @@ -341,7 +391,8 @@ void disable_buildid_cache(void) } static char *build_id_cache__dirname_from_path(const char *name, - bool is_kallsyms, bool is_vdso) + bool is_kallsyms, bool is_vdso, + const char *sbuild_id) { char *realname = (char *)name, *filename; bool slash = is_kallsyms || is_vdso; @@ -352,8 +403,9 @@ static char *build_id_cache__dirname_from_path(const char *name, return NULL; } - if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname) < 0) + if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname, + sbuild_id ? "/" : "", sbuild_id ?: "") < 0) filename = NULL; if (!slash) @@ -368,7 +420,8 @@ int build_id_cache__list_build_ids(const char *pathname, char *dir_name; int ret = 0; - dir_name = build_id_cache__dirname_from_path(pathname, false, false); + dir_name = build_id_cache__dirname_from_path(pathname, false, false, + NULL); if (!dir_name) return -ENOMEM; @@ -385,7 +438,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, { const size_t size = PATH_MAX; char *realname = NULL, *filename = NULL, *dir_name = NULL, - *linkname = zalloc(size), *targetname, *tmp; + *linkname = zalloc(size), *tmp; int err = -1; if (!is_kallsyms) { @@ -394,14 +447,22 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; } - dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso); + dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, + is_vdso, sbuild_id); if (!dir_name) goto out_free; + /* Remove old style build-id cache */ + if (is_regular_file(dir_name)) + if (unlink(dir_name)) + goto out_free; + if (mkdir_p(dir_name, 0755)) goto out_free; - if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) { + /* Save the allocated buildid dirname */ + if (asprintf(&filename, "%s/%s", dir_name, + build_id_cache__basename(is_kallsyms, is_vdso)) < 0) { filename = NULL; goto out_free; } @@ -415,7 +476,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; } - if (!build_id__filename(sbuild_id, linkname, size)) + if (!build_id_cache__linkname(sbuild_id, linkname, size)) goto out_free; tmp = strrchr(linkname, '/'); *tmp = '\0'; @@ -424,10 +485,10 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; *tmp = '/'; - targetname = filename + strlen(buildid_dir) - 5; - memcpy(targetname, "../..", 5); + tmp = dir_name + strlen(buildid_dir) - 5; + memcpy(tmp, "../..", 5); - if (symlink(targetname, linkname) == 0) + if (symlink(tmp, linkname) == 0) err = 0; out_free: if (!is_kallsyms) @@ -452,7 +513,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, bool build_id_cache__cached(const char *sbuild_id) { bool ret = false; - char *filename = build_id__filename(sbuild_id, NULL, 0); + char *filename = build_id_cache__linkname(sbuild_id, NULL, 0); if (filename && !access(filename, F_OK)) ret = true; @@ -471,7 +532,7 @@ int build_id_cache__remove_s(const char *sbuild_id) if (filename == NULL || linkname == NULL) goto out_free; - if (!build_id__filename(sbuild_id, linkname, size)) + if (!build_id_cache__linkname(sbuild_id, linkname, size)) goto out_free; if (access(linkname, F_OK)) @@ -489,7 +550,7 @@ int build_id_cache__remove_s(const char *sbuild_id) tmp = strrchr(linkname, '/') + 1; snprintf(tmp, size - (tmp - linkname), "%s", filename); - if (unlink(linkname)) + if (rm_rf(linkname)) goto out_free; err = 0; @@ -501,7 +562,7 @@ out_free: static int dso__cache_build_id(struct dso *dso, struct machine *machine) { - bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; + bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); const char *name = dso->long_name; char nm[PATH_MAX]; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 64af3e20610d..e5435f46e48e 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -14,6 +14,8 @@ struct dso; int build_id__sprintf(const u8 *build_id, int len, char *bf); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); +char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, + size_t size); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 65e2a4f7cb4e..a70f6b54eb92 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -94,6 +94,7 @@ struct callchain_param { enum perf_call_graph_mode record_mode; u32 dump_size; enum chain_mode mode; + u16 max_stack; u32 print_limit; double min_percent; sort_chain_func_t sort; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index bbf69d248ec5..9f53020c3269 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -204,6 +204,44 @@ static unsigned long long adjust_signedness(unsigned long long value_int, int si return (value_int & value_mask) | ~value_mask; } +static int string_set_value(struct bt_ctf_field *field, const char *string) +{ + char *buffer = NULL; + size_t len = strlen(string), i, p; + int err; + + for (i = p = 0; i < len; i++, p++) { + if (isprint(string[i])) { + if (!buffer) + continue; + buffer[p] = string[i]; + } else { + char numstr[5]; + + snprintf(numstr, sizeof(numstr), "\\x%02x", + (unsigned int)(string[i]) & 0xff); + + if (!buffer) { + buffer = zalloc(i + (len - i) * 4 + 2); + if (!buffer) { + pr_err("failed to set unprintable string '%s'\n", string); + return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING"); + } + if (i > 0) + strncpy(buffer, string, i); + } + strncat(buffer + p, numstr, 4); + p += 3; + } + } + + if (!buffer) + return bt_ctf_field_string_set_value(field, string); + err = bt_ctf_field_string_set_value(field, buffer); + free(buffer); + return err; +} + static int add_tracepoint_field_value(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, @@ -270,8 +308,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, } if (flags & FIELD_IS_STRING) - ret = bt_ctf_field_string_set_value(field, - data + offset + i * len); + ret = string_set_value(field, data + offset + i * len); else { unsigned long long value_int; diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index c9a6dc173e74..b0c2b5c5d337 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -233,17 +233,6 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym, return 0; } -static struct thread *get_main_thread(struct machine *machine, struct thread *thread) -{ - if (thread->pid_ == thread->tid) - return thread__get(thread); - - if (thread->pid_ == -1) - return NULL; - - return machine__find_thread(machine, thread->pid_, thread->pid_); -} - static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, u64 *dso_db_id, u64 *sym_db_id, u64 *offset) { @@ -382,7 +371,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, if (err) return err; - main_thread = get_main_thread(al->machine, thread); + main_thread = thread__main_thread(al->machine, thread); if (main_thread) comm = machine__thread_exec_comm(al->machine, main_thread); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 0953280629cf..76d79d070e21 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -349,6 +349,11 @@ static inline bool dso__is_kcore(struct dso *dso) dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE; } +static inline bool dso__is_kallsyms(struct dso *dso) +{ + return dso->kernel && dso->long_name[0] != '/'; +} + void dso__free_a2l(struct dso *dso); enum dso_type dso__type(struct dso *dso, struct machine *machine); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f6fcc6832949..9b141f12329e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -673,6 +673,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int err; union perf_event *event; + if (symbol_conf.kptr_restrict) + return -1; if (map == NULL) return -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e82ba90cc969..e0f30946ed1a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -462,9 +462,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) return 0; } -static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) +static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent) { - int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); + int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); /* * Save the idx so that when we filter out fds POLLHUP'ed we can * close the associated evlist->mmap[] entry. @@ -480,10 +480,11 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) { - return __perf_evlist__add_pollfd(evlist, fd, -1); + return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN); } -static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) +static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, + void *arg __maybe_unused) { struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); @@ -493,7 +494,7 @@ static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) { return fdarray__filter(&evlist->pollfd, revents_and_mask, - perf_evlist__munmap_filtered); + perf_evlist__munmap_filtered, NULL); } int perf_evlist__poll(struct perf_evlist *evlist, int timeout) @@ -777,7 +778,7 @@ broken_event: return event; } -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; u64 head; @@ -832,6 +833,13 @@ perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) return perf_mmap__read(md, false, start, end, &md->prev); } +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +{ + if (!evlist->backward) + return perf_evlist__mmap_read_forward(evlist, idx); + return perf_evlist__mmap_read_backward(evlist, idx); +} + void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; @@ -856,9 +864,11 @@ static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) { - BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); + struct perf_mmap *md = &evlist->mmap[idx]; - if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) + BUG_ON(md->base && atomic_read(&md->refcnt) == 0); + + if (atomic_dec_and_test(&md->refcnt)) __perf_evlist__munmap(evlist, idx); } @@ -983,15 +993,28 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, return 0; } +static bool +perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, + struct perf_evsel *evsel) +{ + if (evsel->overwrite) + return false; + return true; +} + static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu, int thread, int *output) { struct perf_evsel *evsel; + int revent; evlist__for_each(evlist, evsel) { int fd; + if (evsel->overwrite != (evlist->overwrite && evlist->backward)) + continue; + if (evsel->system_wide && thread) continue; @@ -1008,6 +1031,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, perf_evlist__mmap_get(evlist, idx); } + revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; + /* * The system_wide flag causes a selected event to be opened * always without a pid. Consequently it will never get a @@ -1016,7 +1041,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, * Therefore don't add it for polling. */ if (!evsel->system_wide && - __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { + __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) { perf_evlist__mmap_put(evlist, idx); return -1; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index d740fb877ab6..68cb1361c97c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -131,6 +131,8 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); +union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, + int idx); union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx); void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5d7037ef7d3b..18e18f1d435e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -572,6 +572,8 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, perf_evsel__set_sample_bit(evsel, CALLCHAIN); + attr->sample_max_stack = param->max_stack; + if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { @@ -635,7 +637,8 @@ static void apply_config_terms(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; struct callchain_param param; u32 dump_size = 0; - char *callgraph_buf = NULL; + int max_stack = 0; + const char *callgraph_buf = NULL; /* callgraph default */ param.record_mode = callchain_param.record_mode; @@ -662,6 +665,9 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; break; + case PERF_EVSEL__CONFIG_TERM_MAX_STACK: + max_stack = term->val.max_stack; + break; case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by @@ -677,7 +683,12 @@ static void apply_config_terms(struct perf_evsel *evsel, } /* User explicitly set per-event callgraph, clear the old setting and reset. */ - if ((callgraph_buf != NULL) || (dump_size > 0)) { + if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { + if (max_stack) { + param.max_stack = max_stack; + if (callgraph_buf == NULL) + callgraph_buf = "fp"; + } /* parse callgraph parameters */ if (callgraph_buf != NULL) { @@ -1329,6 +1340,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(clockid, p_signed); PRINT_ATTRf(sample_regs_intr, p_hex); PRINT_ATTRf(aux_watermark, p_unsigned); + PRINT_ATTRf(sample_max_stack, p_unsigned); return ret; } @@ -2372,6 +2384,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "No such device - did you specify an out-of-range profile CPU?"); break; case EOPNOTSUPP: + if (evsel->attr.sample_period != 0) + return scnprintf(msg, size, "%s", + "PMU Hardware doesn't support sampling/overflow-interrupts."); if (evsel->attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c1f10159804c..028412b32d5a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -44,6 +44,7 @@ enum { PERF_EVSEL__CONFIG_TERM_CALLGRAPH, PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, + PERF_EVSEL__CONFIG_TERM_MAX_STACK, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -56,6 +57,7 @@ struct perf_evsel_config_term { bool time; char *callgraph; u64 stack_user; + int max_stack; bool inherit; } val; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c6fd0479f4cd..d15e335842b7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -900,6 +900,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", + [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", }; static bool config_term_shrinked; @@ -995,6 +996,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1040,6 +1044,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_STACKSIZE: case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: return config_term_common(attr, term, err); default: if (err) { @@ -1109,6 +1114,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NOINHERIT: ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); break; + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); + break; default: break; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d740c3ca9a1d..46c05ccd5dfe 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -68,6 +68,7 @@ enum { PARSE_EVENTS__TERM_TYPE_STACKSIZE, PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, + PARSE_EVENTS__TERM_TYPE_MAX_STACK, __PARSE_EVENTS__TERM_TYPE_NR, }; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 1477fbc78993..01af1ee90a27 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -199,6 +199,7 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } +max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5214974e841a..dfedf097b9b1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -593,6 +593,7 @@ do { \ if (bswap_safe(f, 0)) \ attr->f = bswap_##sz(attr->f); \ } while(0) +#define bswap_field_16(f) bswap_field(f, 16) #define bswap_field_32(f) bswap_field(f, 32) #define bswap_field_64(f) bswap_field(f, 64) @@ -608,6 +609,7 @@ do { \ bswap_field_64(sample_regs_user); bswap_field_32(sample_stack_user); bswap_field_32(aux_watermark); + bswap_field_16(sample_max_stack); /* * After read_format are bitfields. Check read_format because diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 20f9cb32b703..09c5c34ae38d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1641,6 +1641,20 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) return ret; } +/* + * Use open(O_RDONLY) to check readability directly instead of access(R_OK) + * since access(R_OK) only checks with real UID/GID but open() use effective + * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO). + */ +static bool filename__readable(const char *file) +{ + int fd = open(file, O_RDONLY); + if (fd < 0) + return false; + close(fd); + return true; +} + static char *dso__find_kallsyms(struct dso *dso, struct map *map) { u8 host_build_id[BUILD_ID_SIZE]; @@ -1660,58 +1674,43 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) sizeof(host_build_id)) == 0) is_host = dso__build_id_equal(dso, host_build_id); - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); - - scnprintf(path, sizeof(path), "%s/%s/%s", buildid_dir, - DSO__NAME_KCORE, sbuild_id); - - /* Use /proc/kallsyms if possible */ + /* Try a fast path for /proc/kallsyms if possible */ if (is_host) { - DIR *d; - int fd; - - /* If no cached kcore go with /proc/kallsyms */ - d = opendir(path); - if (!d) - goto proc_kallsyms; - closedir(d); - /* - * Do not check the build-id cache, until we know we cannot use - * /proc/kcore. + * Do not check the build-id cache, unless we know we cannot use + * /proc/kcore or module maps don't match to /proc/kallsyms. + * To check readability of /proc/kcore, do not use access(R_OK) + * since /proc/kcore requires CAP_SYS_RAWIO to read and access + * can't check it. */ - fd = open("/proc/kcore", O_RDONLY); - if (fd != -1) { - close(fd); - /* If module maps match go with /proc/kallsyms */ - if (!validate_kcore_addresses("/proc/kallsyms", map)) - goto proc_kallsyms; - } - - /* Find kallsyms in build-id cache with kcore */ - if (!find_matching_kcore(map, path, sizeof(path))) - return strdup(path); - - goto proc_kallsyms; + if (filename__readable("/proc/kcore") && + !validate_kcore_addresses("/proc/kallsyms", map)) + goto proc_kallsyms; } + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + /* Find kallsyms in build-id cache with kcore */ + scnprintf(path, sizeof(path), "%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuild_id); + if (!find_matching_kcore(map, path, sizeof(path))) return strdup(path); - scnprintf(path, sizeof(path), "%s/%s/%s", - buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); + /* Use current /proc/kallsyms if possible */ + if (is_host) { +proc_kallsyms: + return strdup("/proc/kallsyms"); + } - if (access(path, F_OK)) { + /* Finally, find a cache of kallsyms */ + if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) { pr_err("No kallsyms or vmlinux with build-id %s was found\n", sbuild_id); return NULL; } return strdup(path); - -proc_kallsyms: - return strdup("/proc/kallsyms"); } static int dso__load_kernel_sym(struct dso *dso, struct map *map, @@ -1933,17 +1932,17 @@ int setup_intlist(struct intlist **list, const char *list_str, static bool symbol__read_kptr_restrict(void) { bool value = false; + FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (geteuid() != 0) { - FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (fp != NULL) { - char line[8]; + if (fp != NULL) { + char line[8]; - if (fgets(line, sizeof(line), fp) != NULL) - value = atoi(line) != 0; + if (fgets(line, sizeof(line), fp) != NULL) + value = (geteuid() != 0) ? + (atoi(line) != 0) : + (atoi(line) == 2); - fclose(fp); - } + fclose(fp); } return value; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 45fcb715a36b..ada58e6070bf 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -265,3 +265,14 @@ void thread__find_cpumode_addr_location(struct thread *thread, break; } } + +struct thread *thread__main_thread(struct machine *machine, struct thread *thread) +{ + if (thread->pid_ == thread->tid) + return thread__get(thread); + + if (thread->pid_ == -1) + return NULL; + + return machine__find_thread(machine, thread->pid_, thread->pid_); +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 45fba13c800b..08fcb14cf637 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -81,6 +81,8 @@ void thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); +struct thread *thread__main_thread(struct machine *machine, struct thread *thread); + void thread__find_addr_map(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); |