aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/core.c23
-rw-r--r--arch/x86/events/intel/core.c94
-rw-r--r--arch/x86/events/intel/rapl.c2
-rw-r--r--arch/x86/events/intel/uncore.c43
-rw-r--r--arch/x86/events/intel/uncore.h4
-rw-r--r--arch/x86/events/intel/uncore_snbep.c96
-rw-r--r--arch/x86/events/perf_event.h10
-rw-r--r--arch/x86/include/asm/topology.h9
-rw-r--r--arch/x86/kernel/smpboot.c25
9 files changed, 271 insertions, 35 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee817f0..929655db5084 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
}
EXPORT_SYMBOL_GPL(events_sysfs_show);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_ht_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+ /*
+ * Report conditional events depending on Hyper-Threading.
+ *
+ * This is overly conservative as usually the HT special
+ * handling is not needed if the other CPU thread is idle.
+ *
+ * Note this does not (and cannot) handle the case when thread
+ * siblings are invisible, for example with virtualization
+ * if they are owned by some other guest. The user tool
+ * has to re-read when a thread sibling gets onlined later.
+ */
+ return sprintf(page, "%s",
+ topology_max_smt_threads() > 1 ?
+ pmu_attr->event_str_ht :
+ pmu_attr->event_str_noht);
+}
+
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c666958a625..5081b4cdad0d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -177,7 +177,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
@@ -186,10 +186,8 @@ struct event_constraint intel_skl_event_constraints[] = {
};
static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7,
- MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7,
- MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
EVENT_EXTRA_END
};
@@ -225,14 +223,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+ "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */
+ "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+ "event=0xe,umask=0x1"); /* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+ "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+ "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+ "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */
+ "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+ "4", "2");
+
+static struct attribute *snb_events_attrs[] = {
EVENT_PTR(mem_ld_snb),
EVENT_PTR(mem_st_snb),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL,
};
@@ -258,7 +293,7 @@ static struct event_constraint intel_hsw_event_constraints[] = {
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
@@ -1332,6 +1367,29 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+ "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+ "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+ "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+ EVENT_PTR(td_total_slots_slm),
+ EVENT_PTR(td_total_slots_scale_slm),
+ EVENT_PTR(td_fetch_bubbles_slm),
+ EVENT_PTR(td_fetch_bubbles_scale_slm),
+ EVENT_PTR(td_slots_issued_slm),
+ EVENT_PTR(td_slots_retired_slm),
+ NULL
+};
+
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3437,6 +3495,13 @@ static struct attribute *hsw_events_attrs[] = {
EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
@@ -3587,6 +3652,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.cpu_events = slm_events_attrs;
pr_cont("Silvermont events, ");
break;
@@ -3805,6 +3871,12 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
+ /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+ event_attr_td_recovery_bubbles.event_str_noht =
+ "event=0xd,umask=0x1,cmask=1";
+ event_attr_td_recovery_bubbles.event_str_ht =
+ "event=0xd,umask=0x1,cmask=1,any=1";
+
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
@@ -3917,16 +3989,14 @@ __init int intel_pmu_init(void)
*/
static __init int fixup_ht_bug(void)
{
- int cpu = smp_processor_id();
- int w, c;
+ int c;
/*
* problem not present on this CPU model, nothing to do
*/
if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
return 0;
- w = cpumask_weight(topology_sibling_cpumask(cpu));
- if (w > 1) {
+ if (topology_max_smt_threads() > 1) {
pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
return 0;
}
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 99c4bab123cd..e30eef4f29a6 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -714,7 +714,7 @@ static void cleanup_rapl_pmus(void)
int i;
for (i = 0; i < rapl_pmus->maxpkg; i++)
- kfree(rapl_pmus->pmus + i);
+ kfree(rapl_pmus->pmus[i]);
kfree(rapl_pmus);
}
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fce74062d981..65490589e52e 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -882,7 +882,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
int phys_id, pkg, ret;
@@ -903,20 +903,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
/*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
+ * Some platforms, e.g. Knights Landing, use a common PCI device ID
+ * for multiple instances of an uncore PMU device type. We should check
+ * PCI slot and func to indicate the uncore box.
*/
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- /* Knights Landing uses a common PCI device ID for multiple instances of
- * an uncore PMU device type. There is only one entry per device type in
- * the knl_uncore_pci_ids table inspite of multiple devices present for
- * some device types. Hence PCI device idx would be 0 for all devices.
- * So increment pmu pointer to point to an unused array element.
- */
- if (boot_cpu_data.x86_model == 87) {
- while (pmu->func_id >= 0)
- pmu++;
+ if (id->driver_data & ~0xffff) {
+ struct pci_driver *pci_drv = pdev->driver;
+ const struct pci_device_id *ids = pci_drv->id_table;
+ unsigned int devfn;
+
+ while (ids && ids->vendor) {
+ if ((ids->vendor == pdev->vendor) &&
+ (ids->device == pdev->device)) {
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ if (devfn == pdev->devfn) {
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ break;
+ }
+ }
+ ids++;
+ }
+ if (pmu == NULL)
+ return -ENODEV;
+ } else {
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79766b9a3580..66c3a3657a10 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -15,7 +15,11 @@
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
+ ((dev << 24) | (func << 16) | (type << 8) | idx)
#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff)
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b2625867ebd1..7336e55c248c 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = {
*/
static const struct pci_device_id knl_uncore_pci_ids[] = {
- { /* MC UClk */
+ { /* MC0 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0),
},
- { /* MC DClk Channel */
+ { /* MC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1),
+ },
+ { /* MC0 DClk CH 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0),
+ },
+ { /* MC0 DClk CH 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1),
+ },
+ { /* MC0 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2),
+ },
+ { /* MC1 DClk CH 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3),
+ },
+ { /* MC1 DClk CH 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4),
+ },
+ { /* MC1 DClk CH 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5),
+ },
+ { /* EDC0 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0),
+ },
+ { /* EDC1 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1),
+ },
+ { /* EDC2 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2),
+ },
+ { /* EDC3 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3),
},
- { /* EDC UClk */
+ { /* EDC4 UClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4),
+ },
+ { /* EDC5 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5),
+ },
+ { /* EDC6 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6),
+ },
+ { /* EDC7 UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7),
+ },
+ { /* EDC0 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0),
+ },
+ { /* EDC1 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1),
+ },
+ { /* EDC2 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2),
+ },
+ { /* EDC3 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3),
+ },
+ { /* EDC4 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4),
+ },
+ { /* EDC5 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5),
+ },
+ { /* EDC6 EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6),
},
- { /* EDC EClk */
+ { /* EDC7 EClk */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
- .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7),
},
{ /* M2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764df815d..e2d7285a2dac 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \
.event_str = str, \
};
+#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \
+static struct perf_pmu_events_ht_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
+ .id = 0, \
+ .event_str_noht = noht, \
+ .event_str_ht = ht, \
+}
+
extern struct x86_pmu x86_pmu __read_mostly;
static inline bool x86_pmu_has_lbr_callstack(void)
@@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b);
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page);
#ifdef CONFIG_CPU_SUP_AMD
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7f991bd5031b..e346572841a0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+ return __max_smt_threads;
+}
+
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
extern int topology_phys_to_logical_pkg(unsigned int pkg);
#else
@@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg);
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
#endif
static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b923cac..2ed0ec1353f8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu)
bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
- int i;
+ int i, threads;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
@@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu)
if (match_die(c, o) && !topology_same_node(c, o))
primarily_use_numa_for_topology();
}
+
+ threads = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (threads > __max_smt_threads)
+ __max_smt_threads = threads;
}
/* maps the cpu to the sched domain representing multi-core */
@@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void)
#ifdef CONFIG_HOTPLUG_CPU
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+ int max_threads, cpu;
+
+ max_threads = 0;
+ for_each_online_cpu (cpu) {
+ int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+ if (threads > max_threads)
+ max_threads = threads;
+ }
+ __max_smt_threads = max_threads;
+}
+
static void remove_siblinginfo(int cpu)
{
int sibling;
@@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu)
c->phys_proc_id = 0;
c->cpu_core_id = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+ recompute_smt_state();
}
static void remove_cpu_from_maps(int cpu)