From cbbc6fdd85be6cd748d6c6db23c1d5be6b04161e Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 20 Jun 2023 15:12:33 +0800 Subject: driver/perf: Add identifier sysfs file for Yitian 710 DDR To allow userspace to identify the specific implementation of the device, add an "identifier" sysfs file. The perf tool can match the Yitian 710 DDR metric through the identifier. Signed-off-by: Jing Zhang Acked-by: Ian Rogers Reviewed-by: Shuai Xue Link: https://lore.kernel.org/r/1687245156-61215-2-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Will Deacon --- drivers/perf/alibaba_uncore_drw_pmu.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index 5c5be9fc1b15..19d459a36be5 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -236,10 +236,37 @@ static const struct attribute_group ali_drw_pmu_cpumask_attr_group = { .attrs = ali_drw_pmu_cpumask_attrs, }; +static ssize_t ali_drw_pmu_identifier_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%s\n", "ali_drw_pmu"); +} + +static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + return attr->mode; +} + +static struct device_attribute ali_drw_pmu_identifier_attr = + __ATTR(identifier, 0444, ali_drw_pmu_identifier_show, NULL); + +static struct attribute *ali_drw_pmu_identifier_attrs[] = { + &ali_drw_pmu_identifier_attr.attr, + NULL +}; + +static const struct attribute_group ali_drw_pmu_identifier_attr_group = { + .attrs = ali_drw_pmu_identifier_attrs, + .is_visible = ali_drw_pmu_identifier_attr_visible +}; + static const struct attribute_group *ali_drw_pmu_attr_groups[] = { &ali_drw_pmu_events_attr_group, &ali_drw_pmu_cpumask_attr_group, &ali_drw_pmu_format_group, + &ali_drw_pmu_identifier_attr_group, NULL, }; -- cgit From c47ea342d85db6fde628dc55996d852d5244bda3 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 4 Jul 2023 17:35:55 +0800 Subject: perf: xgene_pmu: Convert to devm_platform_ioremap_resource() Use devm_platform_ioremap_resource() to simplify code. Signed-off-by: Yangtao Li Link: https://lore.kernel.org/r/20230704093556.17926-1-frank.li@vivo.com Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 0c32dffc7ede..9972bfc11a5c 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1833,7 +1833,6 @@ static int xgene_pmu_probe(struct platform_device *pdev) const struct xgene_pmu_data *dev_data; const struct of_device_id *of_id; struct xgene_pmu *xgene_pmu; - struct resource *res; int irq, rc; int version; @@ -1883,8 +1882,7 @@ static int xgene_pmu_probe(struct platform_device *pdev) xgene_pmu->version = version; dev_info(&pdev->dev, "X-Gene PMU version %d\n", xgene_pmu->version); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res); + xgene_pmu->pcppmu_csr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(xgene_pmu->pcppmu_csr)) { dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n"); return PTR_ERR(xgene_pmu->pcppmu_csr); -- cgit From 7c3f204e544dfa376bf1b34ebaa5552304a2b7d9 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 6 Jul 2023 11:23:05 +0200 Subject: perf/smmuv3: Remove build dependency on ACPI This driver supports working without ACPI since commit 3f7be43561766 ("perf/smmuv3: Add devicetree support"), so remove the build dependency. Signed-off-by: Vincent Whitchurch Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20230706-smmuv3-pmu-noacpi-v1-1-7083ef189158@axis.com Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index f4572a5cca72..273d67ecf6d2 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -92,7 +92,7 @@ config ARM_PMU_ACPI config ARM_SMMU_V3_PMU tristate "ARM SMMUv3 Performance Monitors Extension" - depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT) + depends on ARM64 || (COMPILE_TEST && 64BIT) depends on GENERIC_MSI_IRQ help Provides support for the ARM SMMUv3 Performance Monitor Counter -- cgit From 039768b558537af23734c4cc6fd688ed575158c8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 6 Jul 2023 14:55:03 -0600 Subject: dt-bindings: arm: pmu: Add Cortex A520, A715, A720, X3, and X4 Add compatible strings for the Arm Cortex-A520, Cortex-A715, Cortex-A720, Cortex-X3, and Cortex-X4 CPU PMUs. Acked-by: Conor Dooley Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230706205505.308523-1-robh@kernel.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index e14358bf0b9c..99b5e9530707 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -49,9 +49,14 @@ properties: - arm,cortex-a77-pmu - arm,cortex-a78-pmu - arm,cortex-a510-pmu + - arm,cortex-a520-pmu - arm,cortex-a710-pmu + - arm,cortex-a715-pmu + - arm,cortex-a720-pmu - arm,cortex-x1-pmu - arm,cortex-x2-pmu + - arm,cortex-x3-pmu + - arm,cortex-x4-pmu - arm,neoverse-e1-pmu - arm,neoverse-n1-pmu - arm,neoverse-n2-pmu -- cgit From 989567fc0f3d5ba1c2a48cdb857a3965b7e1a276 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 6 Jul 2023 14:55:04 -0600 Subject: perf: pmuv3: Add Cortex A520, A715, A720, X3 and X4 PMUs Add support for the Arm Cortex-A520, Cortex-A715, Cortex-A720, Cortex-X3, and Cortex-X4 CPU PMUs. They are straight-forward additions with just new compatible strings. Acked-by: Mark Rutland Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230706205505.308523-2-robh@kernel.org Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 08b3a1bf0ef6..d2dffb4e9d07 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1266,9 +1266,14 @@ PMUV3_INIT_SIMPLE(armv8_cortex_a76) PMUV3_INIT_SIMPLE(armv8_cortex_a77) PMUV3_INIT_SIMPLE(armv8_cortex_a78) PMUV3_INIT_SIMPLE(armv9_cortex_a510) +PMUV3_INIT_SIMPLE(armv9_cortex_a520) PMUV3_INIT_SIMPLE(armv9_cortex_a710) +PMUV3_INIT_SIMPLE(armv9_cortex_a715) +PMUV3_INIT_SIMPLE(armv9_cortex_a720) PMUV3_INIT_SIMPLE(armv8_cortex_x1) PMUV3_INIT_SIMPLE(armv9_cortex_x2) +PMUV3_INIT_SIMPLE(armv9_cortex_x3) +PMUV3_INIT_SIMPLE(armv9_cortex_x4) PMUV3_INIT_SIMPLE(armv8_neoverse_e1) PMUV3_INIT_SIMPLE(armv8_neoverse_n1) PMUV3_INIT_SIMPLE(armv9_neoverse_n2) @@ -1334,9 +1339,14 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init}, {.compatible = "arm,cortex-a78-pmu", .data = armv8_cortex_a78_pmu_init}, {.compatible = "arm,cortex-a510-pmu", .data = armv9_cortex_a510_pmu_init}, + {.compatible = "arm,cortex-a520-pmu", .data = armv9_cortex_a520_pmu_init}, {.compatible = "arm,cortex-a710-pmu", .data = armv9_cortex_a710_pmu_init}, + {.compatible = "arm,cortex-a715-pmu", .data = armv9_cortex_a715_pmu_init}, + {.compatible = "arm,cortex-a720-pmu", .data = armv9_cortex_a720_pmu_init}, {.compatible = "arm,cortex-x1-pmu", .data = armv8_cortex_x1_pmu_init}, {.compatible = "arm,cortex-x2-pmu", .data = armv9_cortex_x2_pmu_init}, + {.compatible = "arm,cortex-x3-pmu", .data = armv9_cortex_x3_pmu_init}, + {.compatible = "arm,cortex-x4-pmu", .data = armv9_cortex_x4_pmu_init}, {.compatible = "arm,neoverse-e1-pmu", .data = armv8_neoverse_e1_pmu_init}, {.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init}, {.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init}, -- cgit From 918dc87b746e0114eb64d6e478da7f370e266d5a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Jul 2023 11:48:31 -0600 Subject: drivers/perf: Explicitly include correct DT includes The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it as merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230714174832.4061752-1-robh@kernel.org Signed-off-by: Will Deacon --- drivers/perf/amlogic/meson_ddr_pmu_core.c | 2 -- drivers/perf/arm-cci.c | 5 +---- drivers/perf/arm_dsu_pmu.c | 2 +- drivers/perf/arm_pmu_platform.c | 1 - drivers/perf/arm_spe_pmu.c | 3 +-- drivers/perf/fsl_imx8_ddr_perf.c | 3 +-- drivers/perf/fsl_imx9_ddr_perf.c | 4 +--- drivers/perf/marvell_cn10k_ddr_pmu.c | 3 +-- drivers/perf/marvell_cn10k_tad_pmu.c | 3 +-- 9 files changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index 0b24dee1ed3c..bbc7285fd934 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -9,8 +9,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 998259f1d973..61de861eaf91 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -7,10 +7,7 @@ #include #include #include -#include -#include -#include -#include +#include #include #include #include diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index fe2abb412c00..8223c49bd082 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 933b96e243b8..3596db36cbff 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index b9ba4c4fe5a2..d2b0cbf0e0c4 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -25,8 +25,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 5222ba1e79d0..1cb3861ab0e0 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -10,10 +10,9 @@ #include #include #include -#include -#include #include #include +#include #include #define COUNTER_CNTL 0x0 diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 71d5b07e3aff..5cf770a1bc31 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -7,9 +7,7 @@ #include #include #include -#include -#include -#include +#include #include /* Performance monitor configuration */ diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c index b94a5f6cc22b..524ba82bfce2 100644 --- a/drivers/perf/marvell_cn10k_ddr_pmu.c +++ b/drivers/perf/marvell_cn10k_ddr_pmu.c @@ -8,11 +8,10 @@ #include #include #include -#include -#include #include #include #include +#include /* Performance Counters Operating Mode Control Registers */ #define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index 3972197e2210..fec8e82edb95 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -6,10 +6,9 @@ #define pr_fmt(fmt) "tad_pmu: " fmt +#include #include #include -#include -#include #include #include #include -- cgit From 00df90934c9effc52b60749788b0eeb131d1bfd7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 7 Jul 2023 17:38:11 +0100 Subject: perf/arm-cmn: Remove spurious event aliases As the name suggests, the "partial DAT flit" event is only counted for the DAT channel, and furthermore is only applicable to device ports, not mesh links (strictly it's only device ports with CHI-A requesters connected, but detecting that degree of detail is more bother than it's worth). Stop generating spurious event aliases for other combinations which aren't meaningful. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b01a58e3ff05c322547fbfd015f6dbfedf555ed3.1688746690.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index b8c15878bc86..a007648dbf1c 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -791,16 +791,22 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \ _CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL) -#define _CMN_EVENT_XP(_name, _event) \ +#define _CMN_EVENT_XP_MESH(_name, _event) \ __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ __CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \ __CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \ - __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \ + __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)) + +#define _CMN_EVENT_XP_PORT(_name, _event) \ __CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \ __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)), \ __CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)), \ __CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2)) +#define _CMN_EVENT_XP(_name, _event) \ + _CMN_EVENT_XP_MESH(_name, _event), \ + _CMN_EVENT_XP_PORT(_name, _event) + /* Good thing there are only 3 fundamental XP events... */ #define CMN_EVENT_XP(_name, _event) \ _CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \ @@ -813,6 +819,10 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)), \ _CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5)) +#define CMN_EVENT_XP_DAT(_name, _event) \ + _CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)), \ + _CMN_EVENT_XP_PORT(dat2_##_name, (_event) | (6 << 5)) + static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_DTC(cycles), @@ -943,7 +953,7 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_XP(txflit_valid, 0x01), CMN_EVENT_XP(txflit_stall, 0x02), - CMN_EVENT_XP(partial_dat_flit, 0x03), + CMN_EVENT_XP_DAT(partial_dat_flit, 0x03), /* We treat watchpoints as a special made-up class of XP events */ CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP), CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN), -- cgit From b1b7dc38e4827bf842ed763a4dfb4c0d72259ad5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 7 Jul 2023 17:38:12 +0100 Subject: perf/arm-cmn: Refactor HN-F event selector macros Refactor the macros for defining HN-F events with additional selectors, so they can be shared with another upcoming similar-but-distinct HN type. No functional change intended. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0f05327941e06c665dbfd47e03fad29276b9e63c.1688746690.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index a007648dbf1c..1efe9b72c0e6 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -742,8 +742,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup, _fsel) #define CMN_EVENT_DTC(_name) \ CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0) -#define _CMN_EVENT_HNF(_model, _name, _event, _occup, _fsel) \ - _CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup, _fsel) +#define CMN_EVENT_HNF(_model, _name, _event) \ + CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event) #define CMN_EVENT_HNI(_name, _event) \ CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event) #define CMN_EVENT_HNP(_name, _event) \ @@ -775,21 +775,34 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_DVM(_model, _name##_all, _event, 0, SEL_OCCUP1ID), \ _CMN_EVENT_DVM(_model, _name##_dvmop, _event, 1, SEL_OCCUP1ID), \ _CMN_EVENT_DVM(_model, _name##_dvmsync, _event, 2, SEL_OCCUP1ID) -#define CMN_EVENT_HNF(_model, _name, _event) \ - _CMN_EVENT_HNF(_model, _name, _event, 0, SEL_NONE) + +#define CMN_EVENT_HN_OCC(_model, _name, _type, _event) \ + _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 1, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 2, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(_model, _name##_atomic, _type, _event, 3, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(_model, _name##_stash, _type, _event, 4, SEL_OCCUP1ID) +#define CMN_EVENT_HN_CLS(_model, _name, _type, _event) \ + _CMN_EVENT_ATTR(_model, _name##_class0, _type, _event, 0, SEL_CLASS_OCCUP_ID), \ + _CMN_EVENT_ATTR(_model, _name##_class1, _type, _event, 1, SEL_CLASS_OCCUP_ID), \ + _CMN_EVENT_ATTR(_model, _name##_class2, _type, _event, 2, SEL_CLASS_OCCUP_ID), \ + _CMN_EVENT_ATTR(_model, _name##_class3, _type, _event, 3, SEL_CLASS_OCCUP_ID) +#define CMN_EVENT_HN_SNT(_model, _name, _type, _event) \ + _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_group0_read, _type, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_group0_write, _type, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_group1_read, _type, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_group1_write, _type, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \ + _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL) + +#define CMN_EVENT_HNF_OCC(_model, _name, _event) \ + CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event) #define CMN_EVENT_HNF_CLS(_model, _name, _event) \ - _CMN_EVENT_HNF(_model, _name##_class0, _event, 0, SEL_CLASS_OCCUP_ID), \ - _CMN_EVENT_HNF(_model, _name##_class1, _event, 1, SEL_CLASS_OCCUP_ID), \ - _CMN_EVENT_HNF(_model, _name##_class2, _event, 2, SEL_CLASS_OCCUP_ID), \ - _CMN_EVENT_HNF(_model, _name##_class3, _event, 3, SEL_CLASS_OCCUP_ID) + CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNS, _event) #define CMN_EVENT_HNF_SNT(_model, _name, _event) \ - _CMN_EVENT_HNF(_model, _name##_all, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_group0_read, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_group0_write, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_group1_read, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_group1_write, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \ - _CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL) + CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event) + #define _CMN_EVENT_XP_MESH(_name, _event) \ __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ @@ -872,11 +885,7 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_HNF(CMN_ANY, mc_retries, 0x0c), CMN_EVENT_HNF(CMN_ANY, mc_reqs, 0x0d), CMN_EVENT_HNF(CMN_ANY, qos_hh_retry, 0x0e), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0, SEL_OCCUP1ID), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1, SEL_OCCUP1ID), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2, SEL_OCCUP1ID), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3, SEL_OCCUP1ID), - _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4, SEL_OCCUP1ID), + CMN_EVENT_HNF_OCC(CMN_ANY, qos_pocq_occupancy, 0x0f), CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz, 0x10), CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz, 0x11), CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full, 0x12), -- cgit From ac18ea1a8935920a6b3284f24e58f903b95dac20 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 7 Jul 2023 17:38:13 +0100 Subject: perf/arm-cmn: Add CMN-700 r3 support CMN-700 r3 has a special configuration option for a so-called "Super Home Node", which is a superset of the standard HN-F that also manages remote-chip coherency for multi-chip setups. As such it has a similar but expanded set of PMU events compared to HN-F, with some additional filtering options to boot. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/49153b72253f6af0e625cb55b9e1b825b110c49c.1688746690.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 1efe9b72c0e6..913dc04b3a40 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -72,6 +72,8 @@ /* For most nodes, this is all there is */ #define CMN_PMU_EVENT_SEL 0x000 #define CMN__PMU_CBUSY_SNTHROTTLE_SEL GENMASK_ULL(44, 42) +#define CMN__PMU_SN_HOME_SEL GENMASK_ULL(40, 39) +#define CMN__PMU_HBT_LBT_SEL GENMASK_ULL(38, 37) #define CMN__PMU_CLASS_OCCUP_ID GENMASK_ULL(36, 35) /* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */ #define CMN__PMU_OCCUP1_ID GENMASK_ULL(34, 32) @@ -226,6 +228,7 @@ enum cmn_revision { REV_CMN700_R0P0 = 0, REV_CMN700_R1P0, REV_CMN700_R2P0, + REV_CMN700_R3P0, REV_CI700_R0P0 = 0, REV_CI700_R1P0, REV_CI700_R2P0, @@ -254,6 +257,9 @@ enum cmn_node_type { CMN_TYPE_CCHA, CMN_TYPE_CCLA, CMN_TYPE_CCLA_RNI, + CMN_TYPE_HNS = 0x200, + CMN_TYPE_HNS_MPAM_S, + CMN_TYPE_HNS_MPAM_NS, /* Not a real node type */ CMN_TYPE_WP = 0x7770 }; @@ -263,6 +269,8 @@ enum cmn_filter_select { SEL_OCCUP1ID, SEL_CLASS_OCCUP_ID, SEL_CBUSY_SNTHROTTLE_SEL, + SEL_HBT_LBT_SEL, + SEL_SN_HOME_SEL, SEL_MAX }; @@ -768,6 +776,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event) #define CMN_EVENT_CCLA_RNI(_name, _event) \ CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event) +#define CMN_EVENT_HNS(_name, _event) \ + CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event) #define CMN_EVENT_DVM(_model, _name, _event) \ _CMN_EVENT_DVM(_model, _name, _event, 0, SEL_NONE) @@ -803,6 +813,23 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, #define CMN_EVENT_HNF_SNT(_model, _name, _event) \ CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event) +#define CMN_EVENT_HNS_OCC(_name, _event) \ + CMN_EVENT_HN_OCC(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_rxsnp, CMN_TYPE_HNS, _event, 5, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 6, SEL_OCCUP1ID), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 7, SEL_OCCUP1ID) +#define CMN_EVENT_HNS_CLS( _name, _event) \ + CMN_EVENT_HN_CLS(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event) +#define CMN_EVENT_HNS_SNT(_name, _event) \ + CMN_EVENT_HN_SNT(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event) +#define CMN_EVENT_HNS_HBT(_name, _event) \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_HBT_LBT_SEL), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 1, SEL_HBT_LBT_SEL), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 2, SEL_HBT_LBT_SEL) +#define CMN_EVENT_HNS_SNH(_name, _event) \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_SN_HOME_SEL), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_sn, CMN_TYPE_HNS, _event, 1, SEL_SN_HOME_SEL), \ + _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_home, CMN_TYPE_HNS, _event, 2, SEL_SN_HOME_SEL) #define _CMN_EVENT_XP_MESH(_name, _event) \ __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ @@ -1151,6 +1178,66 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_CCLA(pfwd_sndr_stalls_static_crd, 0x2a), CMN_EVENT_CCLA(pfwd_sndr_stalls_dynmaic_crd, 0x2b), + CMN_EVENT_HNS_HBT(cache_miss, 0x01), + CMN_EVENT_HNS_HBT(slc_sf_cache_access, 0x02), + CMN_EVENT_HNS_HBT(cache_fill, 0x03), + CMN_EVENT_HNS_HBT(pocq_retry, 0x04), + CMN_EVENT_HNS_HBT(pocq_reqs_recvd, 0x05), + CMN_EVENT_HNS_HBT(sf_hit, 0x06), + CMN_EVENT_HNS_HBT(sf_evictions, 0x07), + CMN_EVENT_HNS(dir_snoops_sent, 0x08), + CMN_EVENT_HNS(brd_snoops_sent, 0x09), + CMN_EVENT_HNS_HBT(slc_eviction, 0x0a), + CMN_EVENT_HNS_HBT(slc_fill_invalid_way, 0x0b), + CMN_EVENT_HNS(mc_retries_local, 0x0c), + CMN_EVENT_HNS_SNH(mc_reqs_local, 0x0d), + CMN_EVENT_HNS(qos_hh_retry, 0x0e), + CMN_EVENT_HNS_OCC(qos_pocq_occupancy, 0x0f), + CMN_EVENT_HNS(pocq_addrhaz, 0x10), + CMN_EVENT_HNS(pocq_atomic_addrhaz, 0x11), + CMN_EVENT_HNS(ld_st_swp_adq_full, 0x12), + CMN_EVENT_HNS(cmp_adq_full, 0x13), + CMN_EVENT_HNS(txdat_stall, 0x14), + CMN_EVENT_HNS(txrsp_stall, 0x15), + CMN_EVENT_HNS(seq_full, 0x16), + CMN_EVENT_HNS(seq_hit, 0x17), + CMN_EVENT_HNS(snp_sent, 0x18), + CMN_EVENT_HNS(sfbi_dir_snp_sent, 0x19), + CMN_EVENT_HNS(sfbi_brd_snp_sent, 0x1a), + CMN_EVENT_HNS(intv_dirty, 0x1c), + CMN_EVENT_HNS(stash_snp_sent, 0x1d), + CMN_EVENT_HNS(stash_data_pull, 0x1e), + CMN_EVENT_HNS(snp_fwded, 0x1f), + CMN_EVENT_HNS(atomic_fwd, 0x20), + CMN_EVENT_HNS(mpam_hardlim, 0x21), + CMN_EVENT_HNS(mpam_softlim, 0x22), + CMN_EVENT_HNS(snp_sent_cluster, 0x23), + CMN_EVENT_HNS(sf_imprecise_evict, 0x24), + CMN_EVENT_HNS(sf_evict_shared_line, 0x25), + CMN_EVENT_HNS_CLS(pocq_class_occup, 0x26), + CMN_EVENT_HNS_CLS(pocq_class_retry, 0x27), + CMN_EVENT_HNS_CLS(class_mc_reqs_local, 0x28), + CMN_EVENT_HNS_CLS(class_cgnt_cmin, 0x29), + CMN_EVENT_HNS_SNT(sn_throttle, 0x2a), + CMN_EVENT_HNS_SNT(sn_throttle_min, 0x2b), + CMN_EVENT_HNS(sf_precise_to_imprecise, 0x2c), + CMN_EVENT_HNS(snp_intv_cln, 0x2d), + CMN_EVENT_HNS(nc_excl, 0x2e), + CMN_EVENT_HNS(excl_mon_ovfl, 0x2f), + CMN_EVENT_HNS(snp_req_recvd, 0x30), + CMN_EVENT_HNS(snp_req_byp_pocq, 0x31), + CMN_EVENT_HNS(dir_ccgha_snp_sent, 0x32), + CMN_EVENT_HNS(brd_ccgha_snp_sent, 0x33), + CMN_EVENT_HNS(ccgha_snp_stall, 0x34), + CMN_EVENT_HNS(lbt_req_hardlim, 0x35), + CMN_EVENT_HNS(hbt_req_hardlim, 0x36), + CMN_EVENT_HNS(sf_reupdate, 0x37), + CMN_EVENT_HNS(excl_sf_imprecise, 0x38), + CMN_EVENT_HNS(snp_pocq_addrhaz, 0x39), + CMN_EVENT_HNS(mc_retries_remote, 0x3a), + CMN_EVENT_HNS_SNH(mc_reqs_remote, 0x3b), + CMN_EVENT_HNS_CLS(class_mc_reqs_remote, 0x3c), + NULL }; @@ -1392,6 +1479,10 @@ static int arm_cmn_set_event_sel_hi(struct arm_cmn_node *dn, dn->occupid[fsel].val = occupid; reg = FIELD_PREP(CMN__PMU_CBUSY_SNTHROTTLE_SEL, dn->occupid[SEL_CBUSY_SNTHROTTLE_SEL].val) | + FIELD_PREP(CMN__PMU_SN_HOME_SEL, + dn->occupid[SEL_SN_HOME_SEL].val) | + FIELD_PREP(CMN__PMU_HBT_LBT_SEL, + dn->occupid[SEL_HBT_LBT_SEL].val) | FIELD_PREP(CMN__PMU_CLASS_OCCUP_ID, dn->occupid[SEL_CLASS_OCCUP_ID].val) | FIELD_PREP(CMN__PMU_OCCUP1_ID, @@ -2219,6 +2310,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_CCRA: case CMN_TYPE_CCHA: case CMN_TYPE_CCLA: + case CMN_TYPE_HNS: dn++; break; /* Nothing to see here */ @@ -2226,6 +2318,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) case CMN_TYPE_MPAM_NS: case CMN_TYPE_RNSAM: case CMN_TYPE_CXLA: + case CMN_TYPE_HNS_MPAM_S: + case CMN_TYPE_HNS_MPAM_NS: break; /* * Split "optimised" combination nodes into separate -- cgit From 90d68677226ac7cf344648919df2016686b3e2ab Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 2 Aug 2023 14:38:53 +0530 Subject: perf: pmuv3: Remove comments from armv8pmu_[enable|disable]_event() The comments in armv8pmu_[enable|disable]_event() are blindingly obvious, and does not contribute in making things any better. Let's drop them off. Functional change is not intended. Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Mark Rutland Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230802090853.1190391-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index d2dffb4e9d07..e5a2ac4155f6 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -721,38 +721,15 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - - /* - * Disable counter - */ armv8pmu_disable_event_counter(event); - - /* - * Set event. - */ armv8pmu_write_event_type(event); - - /* - * Enable interrupt for this counter - */ armv8pmu_enable_event_irq(event); - - /* - * Enable counter - */ armv8pmu_enable_event_counter(event); } static void armv8pmu_disable_event(struct perf_event *event) { - /* - * Disable counter - */ armv8pmu_disable_event_counter(event); - - /* - * Disable interrupt for this counter - */ armv8pmu_disable_event_irq(event); } -- cgit From 0242737dc4eb9f6e9a5ea594b3f93efa0b12f28d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 14 Aug 2023 20:40:12 +0800 Subject: perf/smmuv3: Enable HiSilicon Erratum 162001900 quirk for HIP08/09 Some HiSilicon SMMU PMCG suffers the erratum 162001900 that the PMU disable control sometimes fail to disable the counters. This will lead to error or inaccurate data since before we enable the counters the counter's still counting for the event used in last perf session. This patch tries to fix this by hardening the global disable process. Before disable the PMU, writing an invalid event type (0xffff) to focibly stop the counters. Correspondingly restore each events on pmu::pmu_enable(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20230814124012.58013-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- Documentation/arch/arm64/silicon-errata.rst | 3 ++ drivers/acpi/arm64/iort.c | 5 +++- drivers/perf/arm_smmuv3_pmu.c | 46 ++++++++++++++++++++++++++++- include/linux/acpi_iort.h | 1 + 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 496cdca5cb99..d54626cfcbda 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -195,6 +195,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| Hisilicon | Hip08 SMMU PMCG | #162001900 | N/A | +| | Hip09 SMMU PMCG | | | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3631230a61c8..2c1640fd2b16 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1711,7 +1711,10 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res, static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip08 Platform */ {"HISI ", "HIP08 ", 0, ACPI_SIG_IORT, greater_than_or_equal, - "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08}, + "Erratum #162001800, Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP08}, + /* HiSilicon Hip09 Platform */ + {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, { } }; diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 25a269d431e4..0e17c57ddb87 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -115,6 +115,7 @@ #define SMMU_PMCG_PA_SHIFT 12 #define SMMU_PMCG_EVCNTR_RDONLY BIT(0) +#define SMMU_PMCG_HARDEN_DISABLE BIT(1) static int cpuhp_state_num; @@ -159,6 +160,20 @@ static inline void smmu_pmu_enable(struct pmu *pmu) writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR); } +static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, + struct perf_event *event, int idx); + +static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); + unsigned int idx; + + for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters) + smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx); + + smmu_pmu_enable(pmu); +} + static inline void smmu_pmu_disable(struct pmu *pmu) { struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); @@ -167,6 +182,22 @@ static inline void smmu_pmu_disable(struct pmu *pmu) writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); } +static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); + unsigned int idx; + + /* + * The global disable of PMU sometimes fail to stop the counting. + * Harden this by writing an invalid event type to each used counter + * to forcibly stop counting. + */ + for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters) + writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx)); + + smmu_pmu_disable(pmu); +} + static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu, u32 idx, u64 value) { @@ -765,7 +796,10 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu) switch (model) { case IORT_SMMU_V3_PMCG_HISI_HIP08: /* HiSilicon Erratum 162001800 */ - smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY; + smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE; + break; + case IORT_SMMU_V3_PMCG_HISI_HIP09: + smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE; break; } @@ -890,6 +924,16 @@ static int smmu_pmu_probe(struct platform_device *pdev) if (!dev->of_node) smmu_pmu_get_acpi_options(smmu_pmu); + /* + * For platforms suffer this quirk, the PMU disable sometimes fails to + * stop the counters. This will leads to inaccurate or error counting. + * Forcibly disable the counters with these quirk handler. + */ + if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) { + smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09; + smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09; + } + /* Pick one CPU to be the preferred one to use */ smmu_pmu->on_cpu = raw_smp_processor_id(); WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu))); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index ee7cb6aaff71..1cb65592c95d 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -21,6 +21,7 @@ */ #define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */ #define IORT_SMMU_V3_PMCG_HISI_HIP08 0x00000001 /* HiSilicon HIP08 PMCG */ +#define IORT_SMMU_V3_PMCG_HISI_HIP09 0x00000002 /* HiSilicon HIP09 PMCG */ int iort_register_domain_token(int trans_id, phys_addr_t base, struct fwnode_handle *fw_node); -- cgit From 1b0e3ea9301a422003d385cda8f8dee6c878ad05 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 14 Aug 2023 21:16:42 +0800 Subject: perf/smmuv3: Add MODULE_ALIAS for module auto loading On my ACPI based arm64 server, if the SMMUv3 PMU is configured as module it won't be loaded automatically after booting even if the device has already been scanned and added. It's because the module lacks a platform alias, the uevent mechanism and userspace tools like udevd make use of this to find the target driver module of the device. This patch adds the missing platform alias of the module, then module will be loaded automatically if device exists. Before this patch: [root@localhost tmp]# modinfo arm_smmuv3_pmu | grep alias alias: of:N*T*Carm,smmu-v3-pmcgC* alias: of:N*T*Carm,smmu-v3-pmcg After this patch: [root@localhost tmp]# modinfo arm_smmuv3_pmu | grep alias alias: platform:arm-smmu-v3-pmcg alias: of:N*T*Carm,smmu-v3-pmcgC* alias: of:N*T*Carm,smmu-v3-pmcg Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20230814131642.65263-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 0e17c57ddb87..6303b82566f9 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -1028,6 +1028,7 @@ static void __exit arm_smmu_pmu_exit(void) module_exit(arm_smmu_pmu_exit); +MODULE_ALIAS("platform:arm-smmu-v3-pmcg"); MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension"); MODULE_AUTHOR("Neil Leeder "); MODULE_AUTHOR("Shameer Kolothum "); -- cgit From 4c1d2f56d685406fc6b452ca5f797bda62a06609 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 12 Aug 2023 19:55:49 -0400 Subject: perf/arm-dmc620: Fix dmc620_pmu_irqs_lock/cpu_hotplug_lock circular lock dependency The following circular locking dependency was reported when running cpus online/offline test on an arm64 system. [ 84.195923] Chain exists of: dmc620_pmu_irqs_lock --> cpu_hotplug_lock --> cpuhp_state-down [ 84.207305] Possible unsafe locking scenario: [ 84.213212] CPU0 CPU1 [ 84.217729] ---- ---- [ 84.222247] lock(cpuhp_state-down); [ 84.225899] lock(cpu_hotplug_lock); [ 84.232068] lock(cpuhp_state-down); [ 84.238237] lock(dmc620_pmu_irqs_lock); [ 84.242236] *** DEADLOCK *** The following locking order happens when dmc620_pmu_get_irq() calls cpuhp_state_add_instance_nocalls(). lock(dmc620_pmu_irqs_lock) --> lock(cpu_hotplug_lock) On the other hand, the calling sequence cpuhp_thread_fun() => cpuhp_invoke_callback() => dmc620_pmu_cpu_teardown() leads to the locking sequence lock(cpuhp_state-down) => lock(dmc620_pmu_irqs_lock) Here dmc620_pmu_irqs_lock protects both the dmc620_pmu_irqs and the pmus_node lists in various dmc620_pmu instances. dmc620_pmu_get_irq() requires protected access to dmc620_pmu_irqs whereas dmc620_pmu_cpu_teardown() needs protection to the pmus_node lists. Break this circular locking dependency by using two separate locks to protect dmc620_pmu_irqs list and the pmus_node lists respectively. Suggested-by: Robin Murphy Signed-off-by: Waiman Long Link: https://lore.kernel.org/r/20230812235549.494174-1-longman@redhat.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 9d0f01c4455a..30cea6859574 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -66,8 +66,13 @@ #define DMC620_PMU_COUNTERn_OFFSET(n) \ (DMC620_PMU_COUNTERS_BASE + 0x28 * (n)) -static LIST_HEAD(dmc620_pmu_irqs); +/* + * dmc620_pmu_irqs_lock: protects dmc620_pmu_irqs list + * dmc620_pmu_node_lock: protects pmus_node lists in all dmc620_pmu instances + */ static DEFINE_MUTEX(dmc620_pmu_irqs_lock); +static DEFINE_MUTEX(dmc620_pmu_node_lock); +static LIST_HEAD(dmc620_pmu_irqs); struct dmc620_pmu_irq { struct hlist_node node; @@ -475,9 +480,9 @@ static int dmc620_pmu_get_irq(struct dmc620_pmu *dmc620_pmu, int irq_num) return PTR_ERR(irq); dmc620_pmu->irq = irq; - mutex_lock(&dmc620_pmu_irqs_lock); + mutex_lock(&dmc620_pmu_node_lock); list_add_rcu(&dmc620_pmu->pmus_node, &irq->pmus_node); - mutex_unlock(&dmc620_pmu_irqs_lock); + mutex_unlock(&dmc620_pmu_node_lock); return 0; } @@ -486,9 +491,11 @@ static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu) { struct dmc620_pmu_irq *irq = dmc620_pmu->irq; - mutex_lock(&dmc620_pmu_irqs_lock); + mutex_lock(&dmc620_pmu_node_lock); list_del_rcu(&dmc620_pmu->pmus_node); + mutex_unlock(&dmc620_pmu_node_lock); + mutex_lock(&dmc620_pmu_irqs_lock); if (!refcount_dec_and_test(&irq->refcount)) { mutex_unlock(&dmc620_pmu_irqs_lock); return; @@ -638,10 +645,10 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu, return 0; /* We're only reading, but this isn't the place to be involving RCU */ - mutex_lock(&dmc620_pmu_irqs_lock); + mutex_lock(&dmc620_pmu_node_lock); list_for_each_entry(dmc620_pmu, &irq->pmus_node, pmus_node) perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target); - mutex_unlock(&dmc620_pmu_irqs_lock); + mutex_unlock(&dmc620_pmu_node_lock); WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target))); irq->cpu = target; -- cgit From 83a6d80c2bfd1d348e5e7079af21a924fdc5c972 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 15 Aug 2023 21:10:10 +0800 Subject: drivers/perf: hisi: Schedule perf session according to locality The PCIe PMUs locate on different NUMA node but currently we don't consider it and likely stack all the sessions on the same CPU: [root@localhost tmp]# cat /sys/devices/hisi_pcie*/cpumask 0 0 0 0 0 0 This can be optimize a bit to use a local CPU for the PMU. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20230815131010.2147-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index e10fc7cb9493..5a00adb2de8c 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -665,8 +665,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node); if (pcie_pmu->on_cpu == -1) { - pcie_pmu->on_cpu = cpu; - WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu))); + pcie_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(&pcie_pmu->pdev->dev)); + WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(pcie_pmu->on_cpu))); } return 0; @@ -676,14 +676,23 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node); unsigned int target; + cpumask_t mask; + int numa_node; /* Nothing to do if this CPU doesn't own the PMU */ if (pcie_pmu->on_cpu != cpu) return 0; pcie_pmu->on_cpu = -1; - /* Choose a new CPU from all online cpus. */ - target = cpumask_any_but(cpu_online_mask, cpu); + + /* Choose a local CPU from all online cpus. */ + numa_node = dev_to_node(&pcie_pmu->pdev->dev); + if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) && + cpumask_andnot(&mask, &mask, cpumask_of(cpu))) + target = cpumask_any(&mask); + else + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) { pci_err(pcie_pmu->pdev, "There is no CPU to set\n"); return 0; -- cgit From e89ecd8368860bf05437eabd07d292c316221cfc Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 11 Aug 2023 09:54:37 +0800 Subject: perf/imx_ddr: speed up overflow frequency of cycle For i.MX8MP, we cannot ensure that cycle counter overflow occurs at least 4 times as often as other events. Due to byte counters will count for any event configured, it will overflow more often. And if byte counters overflow that related counters would stop since they share the COUNTER_CNTL. We can speed up cycle counter overflow frequency by setting counter parameter (CP) field of cycle counter. In this way, we can avoid stop counting byte counters when interrupt didn't come and the byte counters can be fetched or updated from each cycle counter overflow interrupt. Because we initialize CP filed to shorten counter0 overflow time, the cycle counter will start couting from a fixed/base value each time. We need to remove the base from the result too. Therefore, we could get precise result from cycle counter. Signed-off-by: Xu Yang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20230811015438.1999307-1-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 1cb3861ab0e0..7d4e49565738 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -27,6 +27,8 @@ #define CNTL_CLEAR_MASK 0xFFFFFFFD #define CNTL_OVER_MASK 0xFFFFFFFE +#define CNTL_CP_SHIFT 16 +#define CNTL_CP_MASK (0xFF << CNTL_CP_SHIFT) #define CNTL_CSV_SHIFT 24 #define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT) @@ -34,6 +36,8 @@ #define EVENT_CYCLES_COUNTER 0 #define NUM_COUNTERS 4 +/* For removing bias if cycle counter CNTL.CP is set to 0xf0 */ +#define CYCLES_COUNTER_MASK 0x0FFFFFFF #define AXI_MASKING_REVERT 0xffff0000 /* AXI_MASKING(MSB 16bits) + AXI_ID(LSB 16bits) */ #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) @@ -426,6 +430,17 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, writel(0, pmu->base + reg); val = CNTL_EN | CNTL_CLEAR; val |= FIELD_PREP(CNTL_CSV_MASK, config); + + /* + * On i.MX8MP we need to bias the cycle counter to overflow more often. + * We do this by initializing bits [23:16] of the counter value via the + * COUNTER_CTRL Counter Parameter (CP) field. + */ + if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) { + if (counter == EVENT_CYCLES_COUNTER) + val |= FIELD_PREP(CNTL_CP_MASK, 0xf0); + } + writel(val, pmu->base + reg); } else { /* Disable counter */ @@ -465,6 +480,12 @@ static void ddr_perf_event_update(struct perf_event *event) int ret; new_raw_count = ddr_perf_read_counter(pmu, counter); + /* Remove the bias applied in ddr_perf_counter_enable(). */ + if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) { + if (counter == EVENT_CYCLES_COUNTER) + new_raw_count &= CYCLES_COUNTER_MASK; + } + local64_add(new_raw_count, &event->count); /* -- cgit From f4e2bd91ddf5e8543cbe7ad80b3fba3d2dc63fa3 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 11 Aug 2023 09:54:38 +0800 Subject: perf/imx_ddr: don't enable counter0 if none of 4 counters are used In current driver, counter0 will be enabled after ddr_perf_pmu_enable() is called even though none of the 4 counters are used. This will cause counter0 continue to count until ddr_perf_pmu_disabled() is called. If pmu is not disabled all the time, the pmu interrupt will be asserted from time to time due to counter0 will overflow and irq handler will clear it. It's not an expected behavior. This patch will not enable counter0 if none of 4 counters are used. Fixes: 9a66d36cc7ac ("drivers/perf: imx_ddr: Add DDR performance counter support to perf") Signed-off-by: Xu Yang Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20230811015438.1999307-2-xu.yang_2@nxp.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 7d4e49565738..92611c98120f 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -104,6 +104,7 @@ struct ddr_pmu { const struct fsl_ddr_devtype_data *devtype_data; int irq; int id; + int active_counter; }; static ssize_t ddr_perf_identifier_show(struct device *dev, @@ -515,6 +516,10 @@ static void ddr_perf_event_start(struct perf_event *event, int flags) ddr_perf_counter_enable(pmu, event->attr.config, counter, true); + if (!pmu->active_counter++) + ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID, + EVENT_CYCLES_COUNTER, true); + hwc->state = 0; } @@ -568,6 +573,10 @@ static void ddr_perf_event_stop(struct perf_event *event, int flags) ddr_perf_counter_enable(pmu, event->attr.config, counter, false); ddr_perf_event_update(event); + if (!--pmu->active_counter) + ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID, + EVENT_CYCLES_COUNTER, false); + hwc->state |= PERF_HES_STOPPED; } @@ -585,25 +594,10 @@ static void ddr_perf_event_del(struct perf_event *event, int flags) static void ddr_perf_pmu_enable(struct pmu *pmu) { - struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); - - /* enable cycle counter if cycle is not active event list */ - if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL) - ddr_perf_counter_enable(ddr_pmu, - EVENT_CYCLES_ID, - EVENT_CYCLES_COUNTER, - true); } static void ddr_perf_pmu_disable(struct pmu *pmu) { - struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu); - - if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL) - ddr_perf_counter_enable(ddr_pmu, - EVENT_CYCLES_ID, - EVENT_CYCLES_COUNTER, - false); } static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, -- cgit From d11a69873d9a7435fe6a48531e165ab80a8b1221 Mon Sep 17 00:00:00 2001 From: Tomislav Novak Date: Mon, 5 Jun 2023 12:19:23 -0700 Subject: hw_breakpoint: fix single-stepping when using bpf_overflow_handler Arm platforms use is_default_overflow_handler() to determine if the hw_breakpoint code should single-step over the breakpoint trigger or let the custom handler deal with it. Since bpf_overflow_handler() currently isn't recognized as a default handler, attaching a BPF program to a PERF_TYPE_BREAKPOINT event causes it to keep firing (the instruction triggering the data abort exception is never skipped). For example: # bpftrace -e 'watchpoint:0x10000:4:w { print("hit") }' -c ./test Attaching 1 probe... hit hit [...] ^C (./test performs a single 4-byte store to 0x10000) This patch replaces the check with uses_default_overflow_handler(), which accounts for the bpf_overflow_handler() case by also testing if one of the perf_event_output functions gets invoked indirectly, via orig_default_handler. Signed-off-by: Tomislav Novak Tested-by: Samuel Gosselin # arm64 Reviewed-by: Catalin Marinas Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/linux-arm-kernel/20220923203644.2731604-1-tnovak@fb.com/ Link: https://lore.kernel.org/r/20230605191923.1219974-1-tnovak@meta.com Signed-off-by: Will Deacon --- arch/arm/kernel/hw_breakpoint.c | 8 ++++---- arch/arm64/kernel/hw_breakpoint.c | 4 ++-- include/linux/perf_event.h | 22 +++++++++++++++++++--- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 054e9199f30d..dc0fb7a81371 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, hw->address &= ~alignment_mask; hw->ctrl.len <<= offset; - if (is_default_overflow_handler(bp)) { + if (uses_default_overflow_handler(bp)) { /* * Mismatch breakpoints are required for single-stepping * breakpoints. @@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * Otherwise, insert a temporary mismatch breakpoint so that * we can single-step over the watchpoint trigger. */ - if (!is_default_overflow_handler(wp)) + if (!uses_default_overflow_handler(wp)) continue; step: enable_single_step(wp, instruction_pointer(regs)); @@ -811,7 +811,7 @@ step: info->trigger = addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); perf_bp_event(wp, regs); - if (is_default_overflow_handler(wp)) + if (uses_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); } @@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) info->trigger = addr; pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); - if (is_default_overflow_handler(bp)) + if (uses_default_overflow_handler(bp)) enable_single_step(bp, addr); goto unlock; } diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index db2a1861bb97..35225632d70a 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -654,7 +654,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(bp)) + if (uses_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -733,7 +733,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, static int watchpoint_report(struct perf_event *wp, unsigned long addr, struct pt_regs *regs) { - int step = is_default_overflow_handler(wp); + int step = uses_default_overflow_handler(wp); struct arch_hw_breakpoint *info = counter_arch_bp(wp); info->trigger = addr; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2166a69e3bf2..e657916c9509 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1316,15 +1316,31 @@ extern int perf_event_output(struct perf_event *event, struct pt_regs *regs); static inline bool -is_default_overflow_handler(struct perf_event *event) +__is_default_overflow_handler(perf_overflow_handler_t overflow_handler) { - if (likely(event->overflow_handler == perf_event_output_forward)) + if (likely(overflow_handler == perf_event_output_forward)) return true; - if (unlikely(event->overflow_handler == perf_event_output_backward)) + if (unlikely(overflow_handler == perf_event_output_backward)) return true; return false; } +#define is_default_overflow_handler(event) \ + __is_default_overflow_handler((event)->overflow_handler) + +#ifdef CONFIG_BPF_SYSCALL +static inline bool uses_default_overflow_handler(struct perf_event *event) +{ + if (likely(is_default_overflow_handler(event))) + return true; + + return __is_default_overflow_handler(event->orig_overflow_handler); +} +#else +#define uses_default_overflow_handler(event) \ + is_default_overflow_handler(event) +#endif + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, -- cgit From 81e5ee471609848ee1ebf3beb2a46788113fe0eb Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 17 Aug 2023 11:24:02 +0530 Subject: arm_pmu: acpi: Refactor arm_spe_acpi_register_device() Sanity checking all the GICC tables for same interrupt number, and ensuring a homogeneous ACPI based machine, could be used for other platform devices as well. Hence this refactors arm_spe_acpi_register_device() into a common helper arm_acpi_register_pmu_device(). Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Co-developed-by: Will Deacon Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230817055405.249630-2-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_acpi.c | 104 +++++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 40 deletions(-) diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 90815ad762eb..48bd62d3993e 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -69,6 +69,62 @@ static void arm_pmu_acpi_unregister_irq(int cpu) acpi_unregister_gsi(gsi); } +static int __maybe_unused +arm_acpi_register_pmu_device(struct platform_device *pdev, u8 len, + u16 (*parse_gsi)(struct acpi_madt_generic_interrupt *)) +{ + int cpu, this_hetid, hetid, irq, ret; + u16 this_gsi = 0, gsi = 0; + + /* + * Ensure that platform device must have IORESOURCE_IRQ + * resource to hold gsi interrupt. + */ + if (pdev->num_resources != 1) + return -ENXIO; + + if (pdev->resource[0].flags != IORESOURCE_IRQ) + return -ENXIO; + + /* + * Sanity check all the GICC tables for the same interrupt + * number. For now, only support homogeneous ACPI machines. + */ + for_each_possible_cpu(cpu) { + struct acpi_madt_generic_interrupt *gicc; + + gicc = acpi_cpu_get_madt_gicc(cpu); + if (gicc->header.length < len) + return gsi ? -ENXIO : 0; + + this_gsi = parse_gsi(gicc); + this_hetid = find_acpi_cpu_topology_hetero_id(cpu); + if (!gsi) { + hetid = this_hetid; + gsi = this_gsi; + } else if (hetid != this_hetid || gsi != this_gsi) { + pr_warn("ACPI: %s: must be homogeneous\n", pdev->name); + return -ENXIO; + } + } + + if (!this_gsi) + return 0; + + irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); + if (irq < 0) { + pr_warn("ACPI: %s Unable to register interrupt: %d\n", pdev->name, gsi); + return -ENXIO; + } + + pdev->resource[0].start = irq; + ret = platform_device_register(pdev); + if (ret) + acpi_unregister_gsi(gsi); + + return ret; +} + #if IS_ENABLED(CONFIG_ARM_SPE_PMU) static struct resource spe_resources[] = { { @@ -84,6 +140,11 @@ static struct platform_device spe_dev = { .num_resources = ARRAY_SIZE(spe_resources) }; +static u16 arm_spe_parse_gsi(struct acpi_madt_generic_interrupt *gicc) +{ + return gicc->spe_interrupt; +} + /* * For lack of a better place, hook the normal PMU MADT walk * and create a SPE device if we detect a recent MADT with @@ -91,47 +152,10 @@ static struct platform_device spe_dev = { */ static void arm_spe_acpi_register_device(void) { - int cpu, hetid, irq, ret; - bool first = true; - u16 gsi = 0; - - /* - * Sanity check all the GICC tables for the same interrupt number. - * For now, we only support homogeneous ACPI/SPE machines. - */ - for_each_possible_cpu(cpu) { - struct acpi_madt_generic_interrupt *gicc; - - gicc = acpi_cpu_get_madt_gicc(cpu); - if (gicc->header.length < ACPI_MADT_GICC_SPE) - return; - - if (first) { - gsi = gicc->spe_interrupt; - if (!gsi) - return; - hetid = find_acpi_cpu_topology_hetero_id(cpu); - first = false; - } else if ((gsi != gicc->spe_interrupt) || - (hetid != find_acpi_cpu_topology_hetero_id(cpu))) { - pr_warn("ACPI: SPE must be homogeneous\n"); - return; - } - } - - irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, - ACPI_ACTIVE_HIGH); - if (irq < 0) { - pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi); - return; - } - - spe_resources[0].start = irq; - ret = platform_device_register(&spe_dev); - if (ret < 0) { + int ret = arm_acpi_register_pmu_device(&spe_dev, ACPI_MADT_GICC_SPE, + arm_spe_parse_gsi); + if (ret) pr_warn("ACPI: SPE: Unable to register device\n"); - acpi_unregister_gsi(gsi); - } } #else static inline void arm_spe_acpi_register_device(void) -- cgit From 1aa3d0274a4aac338ee45a3dfc3b17c944bcc2bc Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 17 Aug 2023 11:24:03 +0530 Subject: arm_pmu: acpi: Add a representative platform device for TRBE ACPI TRBE does not have a HID for identification which could create and add a platform device into the platform bus. Also without a platform device, it cannot be probed and bound to a platform driver. This creates a dummy platform device for TRBE after ascertaining that ACPI provides required interrupts uniformly across all cpus on the system. This device gets created inside drivers/perf/arm_pmu_acpi.c to accommodate TRBE being built as a module. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20230817055405.249630-3-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 3 +++ drivers/perf/arm_pmu_acpi.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/perf/arm_pmu.h | 1 + 3 files changed, 39 insertions(+) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index bd68e1b7f29f..4d537d56eb84 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -42,6 +42,9 @@ #define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) +#define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ + trbe_interrupt) + sizeof(u16)) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 48bd62d3993e..05dda19c5359 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -163,6 +163,40 @@ static inline void arm_spe_acpi_register_device(void) } #endif /* CONFIG_ARM_SPE_PMU */ +#if IS_ENABLED(CONFIG_CORESIGHT_TRBE) +static struct resource trbe_resources[] = { + { + /* irq */ + .flags = IORESOURCE_IRQ, + } +}; + +static struct platform_device trbe_dev = { + .name = ARMV8_TRBE_PDEV_NAME, + .id = -1, + .resource = trbe_resources, + .num_resources = ARRAY_SIZE(trbe_resources) +}; + +static u16 arm_trbe_parse_gsi(struct acpi_madt_generic_interrupt *gicc) +{ + return gicc->trbe_interrupt; +} + +static void arm_trbe_acpi_register_device(void) +{ + int ret = arm_acpi_register_pmu_device(&trbe_dev, ACPI_MADT_GICC_TRBE, + arm_trbe_parse_gsi); + if (ret) + pr_warn("ACPI: TRBE: Unable to register device\n"); +} +#else +static inline void arm_trbe_acpi_register_device(void) +{ + +} +#endif /* CONFIG_CORESIGHT_TRBE */ + static int arm_pmu_acpi_parse_irqs(void) { int irq, cpu, irq_cpu, err; @@ -398,6 +432,7 @@ static int arm_pmu_acpi_init(void) return 0; arm_spe_acpi_register_device(); + arm_trbe_acpi_register_device(); return 0; } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index a0801f68762b..143fbc10ecfe 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -187,5 +187,6 @@ void armpmu_free_irq(int irq, int cpu); #endif /* CONFIG_ARM_PMU */ #define ARMV8_SPE_PDEV_NAME "arm,spe-v1" +#define ARMV8_TRBE_PDEV_NAME "arm,trbe" #endif /* __ARM_PMU_H__ */ -- cgit From 21b61fe48c2fc43d98ebb67a1f3832e0478fa523 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 24 Aug 2023 10:41:35 +0800 Subject: drivers/perf: hisi: Update HiSilicon PMU maintainers Since Guangbin and Shaokun have left HiSilicon and will no longer maintain the drivers, update the maintainer information and thanks for their work. Signed-off-by: Jijie Shao Acked-by: Jonathan Cameron Acked-by: Yicong Yang Link: https://lore.kernel.org/r/20230824024135.1291459-1-shaojijie@huawei.com [will: left the HNS3 title as-is to avoid the churn of resorting the entries] Signed-off-by: Will Deacon --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d516295978a4..ce4d209a5bf5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9306,7 +9306,7 @@ F: drivers/crypto/hisilicon/hpre/hpre_crypto.c F: drivers/crypto/hisilicon/hpre/hpre_main.c HISILICON HNS3 PMU DRIVER -M: Guangbin Huang +M: Jijie Shao S: Supported F: Documentation/admin-guide/perf/hns3-pmu.rst F: drivers/perf/hisilicon/hns3_pmu.c @@ -9344,7 +9344,7 @@ F: Documentation/devicetree/bindings/net/hisilicon*.txt F: drivers/net/ethernet/hisilicon/ HISILICON PMU DRIVER -M: Shaokun Zhang +M: Yicong Yang M: Jonathan Cameron S: Supported W: http://www.hisilicon.com -- cgit