diff options
-rw-r--r-- | arch/powerpc/perf/core-book3s.c | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 25 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 9 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 12 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 159 | ||||
-rw-r--r-- | include/linux/perf_event.h | 12 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 8 | ||||
-rw-r--r-- | kernel/events/core.c | 10 |
9 files changed, 234 insertions, 4 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3086055bf681..3dcfecf858f3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -518,6 +518,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * } } cpuhw->bhrb_stack.nr = u_index; + cpuhw->bhrb_stack.hw_idx = -1ULL; return; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index dff6623804c2..332954cccece 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1945,6 +1945,14 @@ static __initconst const u64 knl_hw_cache_extra_regs * intel_bts events don't coexist with intel PMU's BTS events because of * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them * disabled around intel PMU's event batching etc, only inside the PMI handler. + * + * Avoid PEBS_ENABLE MSR access in PMIs. + * The GLOBAL_CTRL has been disabled. All the counters do not count anymore. + * It doesn't matter if the PEBS is enabled or not. + * Usually, the PEBS status are not changed in PMIs. It's unnecessary to + * access PEBS_ENABLE MSR in disable_all()/enable_all(). + * However, there are some cases which may change PEBS status, e.g. PMI + * throttle. The PEBS_ENABLE should be updated where the status changes. */ static void __intel_pmu_disable_all(void) { @@ -1954,13 +1962,12 @@ static void __intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - - intel_pmu_pebs_disable_all(); } static void intel_pmu_disable_all(void) { __intel_pmu_disable_all(); + intel_pmu_pebs_disable_all(); intel_pmu_lbr_disable_all(); } @@ -1968,7 +1975,6 @@ static void __intel_pmu_enable_all(int added, bool pmi) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(pmi); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); @@ -1986,6 +1992,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) static void intel_pmu_enable_all(int added) { + intel_pmu_pebs_enable_all(); __intel_pmu_enable_all(added, false); } @@ -2374,9 +2381,21 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * PEBS overflow sets bit 62 in the global status register */ if (__test_and_clear_bit(62, (unsigned long *)&status)) { + u64 pebs_enabled = cpuc->pebs_enabled; + handled++; x86_pmu.drain_pebs(regs); status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; + + /* + * PMI throttle may be triggered, which stops the PEBS event. + * Although cpuc->pebs_enabled is updated accordingly, the + * MSR_IA32_PEBS_ENABLE is not updated. Because the + * cpuc->enabled has been forced to 0 in PMI. + * Update the MSR if pebs_enabled is changed. + */ + if (pebs_enabled != cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); } /* diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 534c76606049..65113b16804a 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -585,6 +585,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; + cpuc->lbr_stack.hw_idx = tos; } /* @@ -680,6 +681,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) out++; } cpuc->lbr_stack.nr = out; + cpuc->lbr_stack.hw_idx = tos; } void intel_pmu_lbr_read(void) @@ -1120,6 +1122,13 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr) int i; cpuc->lbr_stack.nr = x86_pmu.lbr_nr; + + /* Cannot get TOS for large PEBS */ + if (cpuc->n_pebs == cpuc->n_large_pebs) + cpuc->lbr_stack.hw_idx = -1ULL; + else + cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); + for (i = 0; i < x86_pmu.lbr_nr; i++) { u64 info = lbr->lbr[i].info; struct perf_branch_entry *e = &cpuc->lbr_entries[i]; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86467f85c383..63922e3a34f5 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1470,6 +1470,16 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = { .pci_init = skl_uncore_pci_init, }; +static const struct intel_uncore_init_fun tgl_uncore_init __initconst = { + .cpu_init = icl_uncore_cpu_init, + .mmio_init = tgl_uncore_mmio_init, +}; + +static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { + .cpu_init = icl_uncore_cpu_init, + .mmio_init = tgl_l_uncore_mmio_init, +}; + static const struct intel_uncore_init_fun snr_uncore_init __initconst = { .cpu_init = snr_uncore_cpu_init, .pci_init = snr_uncore_pci_init, @@ -1505,6 +1515,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, icl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_TIGERLAKE_L, tgl_l_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_TIGERLAKE, tgl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init), {}, }; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index bbfdaa720b45..1204dcc9fe9b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -527,6 +527,8 @@ void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); +void tgl_uncore_mmio_init(void); +void tgl_l_uncore_mmio_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index c37cb12d0ef6..3de1065eefc4 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -44,6 +44,11 @@ #define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 +#define PCI_DEVICE_ID_INTEL_TGL_U1_IMC 0x9a02 +#define PCI_DEVICE_ID_INTEL_TGL_U2_IMC 0x9a04 +#define PCI_DEVICE_ID_INTEL_TGL_U3_IMC 0x9a12 +#define PCI_DEVICE_ID_INTEL_TGL_U4_IMC 0x9a14 +#define PCI_DEVICE_ID_INTEL_TGL_H_IMC 0x9a36 /* SNB event control */ @@ -1002,3 +1007,157 @@ void nhm_uncore_cpu_init(void) } /* end of Nehalem uncore support */ + +/* Tiger Lake MMIO uncore support */ + +static const struct pci_device_id tgl_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U1_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U2_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U4_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ } +}; + +enum perf_tgl_uncore_imc_freerunning_types { + TGL_MMIO_UNCORE_IMC_DATA_TOTAL, + TGL_MMIO_UNCORE_IMC_DATA_READ, + TGL_MMIO_UNCORE_IMC_DATA_WRITE, + TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX +}; + +static struct freerunning_counters tgl_l_uncore_imc_freerunning[] = { + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x5040, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0x5058, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0x50A0, 0x0, 0x0, 1, 64 }, +}; + +static struct freerunning_counters tgl_uncore_imc_freerunning[] = { + [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0xd840, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0xd858, 0x0, 0x0, 1, 64 }, + [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xd8A0, 0x0, 0x0, 1, 64 }, +}; + +static struct uncore_event_desc tgl_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(data_total, "event=0xff,umask=0x10"), + INTEL_UNCORE_EVENT_DESC(data_total.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_total.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(data_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_read.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(data_write, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(data_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_write.unit, "MiB"), + + { /* end: all zeroes */ } +}; + +static struct pci_dev *tgl_uncore_get_mc_dev(void) +{ + const struct pci_device_id *ids = tgl_uncore_pci_ids; + struct pci_dev *mc_dev = NULL; + + while (ids && ids->vendor) { + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, ids->device, NULL); + if (mc_dev) + return mc_dev; + ids++; + } + + return mc_dev; +} + +#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 + +static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = tgl_uncore_get_mc_dev(); + struct intel_uncore_pmu *pmu = box->pmu; + resource_size_t addr; + u32 mch_bar; + + if (!pdev) { + pr_warn("perf uncore: Cannot find matched IMC device.\n"); + return; + } + + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar); + /* MCHBAR is disabled */ + if (!(mch_bar & BIT(0))) { + pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); + return; + } + mch_bar &= ~BIT(0); + addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx); + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar); + addr |= ((resource_size_t)mch_bar << 32); +#endif + + box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); +} + +static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { + .init_box = tgl_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct attribute *tgl_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + NULL +}; + +static const struct attribute_group tgl_uncore_imc_format_group = { + .name = "format", + .attrs = tgl_uncore_imc_formats_attr, +}; + +static struct intel_uncore_type tgl_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 2, + .num_freerunning_types = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .freerunning = tgl_uncore_imc_freerunning, + .ops = &tgl_uncore_imc_freerunning_ops, + .event_descs = tgl_uncore_imc_events, + .format_group = &tgl_uncore_imc_format_group, +}; + +static struct intel_uncore_type *tgl_mmio_uncores[] = { + &tgl_uncore_imc_free_running, + NULL +}; + +void tgl_l_uncore_mmio_init(void) +{ + tgl_uncore_imc_free_running.freerunning = tgl_l_uncore_imc_freerunning; + uncore_mmio_uncores = tgl_mmio_uncores; +} + +void tgl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = tgl_mmio_uncores; +} + +/* end of Tiger Lake MMIO uncore support */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 547773f5894e..68e21e828893 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -93,14 +93,26 @@ struct perf_raw_record { /* * branch stack layout: * nr: number of taken branches stored in entries[] + * hw_idx: The low level index of raw branch records + * for the most recent branch. + * -1ULL means invalid/unknown. * * Note that nr can vary from sample to sample * branches (to, from) are stored from most recent * to least recent, i.e., entries[0] contains the most * recent branch. + * The entries[] is an abstraction of raw branch records, + * which may not be stored in age order in HW, e.g. Intel LBR. + * The hw_idx is to expose the low level index of raw + * branch record for the most recent branch aka entries[0]. + * The hw_idx index is between -1 (unknown) and max depth, + * which can be retrieved in /sys/devices/cpu/caps/branches. + * For the architectures whose raw branch records are + * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { __u64 nr; + __u64 hw_idx; struct perf_branch_entry entries[0]; }; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 377d794d3105..397cfd65b3fe 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -208,6 +210,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -853,7 +857,9 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 nr; - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX + * { u64 from, to, flags } lbr[nr]; + * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER diff --git a/kernel/events/core.c b/kernel/events/core.c index e453589da97c..3f1f77de7247 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6555,6 +6555,11 @@ static void perf_output_read(struct perf_output_handle *handle, perf_output_read_one(handle, event, enabled, running); } +static inline bool perf_sample_save_hw_index(struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, @@ -6643,6 +6648,8 @@ void perf_output_sample(struct perf_output_handle *handle, * sizeof(struct perf_branch_entry); perf_output_put(handle, data->br_stack->nr); + if (perf_sample_save_hw_index(event)) + perf_output_put(handle, data->br_stack->hw_idx); perf_output_copy(handle, data->br_stack->entries, size); } else { /* @@ -6836,6 +6843,9 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_BRANCH_STACK) { int size = sizeof(u64); /* nr */ if (data->br_stack) { + if (perf_sample_save_hw_index(event)) + size += sizeof(u64); + size += data->br_stack->nr * sizeof(struct perf_branch_entry); } |