14 files changed, 294 insertions, 74 deletions
diff --git a/Documentation/arm64/acpi_object_usage.rst b/Documentation/arm64/acpi_object_usage.rst
index 0609da73970b..484ef9676653 100644
--- a/Documentation/arm64/acpi_object_usage.rst
+++ b/Documentation/arm64/acpi_object_usage.rst
@@ -163,7 +163,7 @@ FPDT   Section 5.2.23 (signature == "FPDT")
 
        **Firmware Performance Data Table**
 
-       Optional, not currently supported.
+       Optional, useful for boot performance profiling.
 
 GTDT   Section 5.2.24 (signature == "GTDT")
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 505c8a1ccbe0..16c2a7d68ca4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config ARM64
 	def_bool y
+	select ACPI_APMT if ACPI
 	select ACPI_CCA_REQUIRED if ACPI
 	select ACPI_GENERIC_GSI if ACPI
 	select ACPI_GTDT if ACPI
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 3622e9f4fb44..bdf1f6bcd010 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -224,7 +224,7 @@ alternative_endif
 #include <linux/types.h>
 
 static __always_inline bool
-alternative_has_feature_likely(unsigned long feature)
+alternative_has_feature_likely(const unsigned long feature)
 {
 	compiletime_assert(feature < ARM64_NCAPS,
 			   "feature must be < ARM64_NCAPS");
@@ -242,7 +242,7 @@ l_no:
 }
 
 static __always_inline bool
-alternative_has_feature_unlikely(unsigned long feature)
+alternative_has_feature_unlikely(const unsigned long feature)
 {
 	compiletime_assert(feature < ARM64_NCAPS,
 			   "feature must be < ARM64_NCAPS");
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index cea441b6aa5d..48ddc0f45d22 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -15,8 +15,8 @@
 
 #define JUMP_LABEL_NOP_SIZE		AARCH64_INSN_SIZE
 
-static __always_inline bool arch_static_branch(struct static_key *key,
-					       bool branch)
+static __always_inline bool arch_static_branch(struct static_key * const key,
+					       const bool branch)
 {
 	asm_volatile_goto(
 		"1:	nop					\n\t"
@@ -32,8 +32,8 @@ l_yes:
 	return true;
 }
 
-static __always_inline bool arch_static_branch_jump(struct static_key *key,
-						    bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key * const key,
+						    const bool branch)
 {
 	asm_volatile_goto(
 		"1:	b		%l[l_yes]		\n\t"
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 473241b5193f..1cc11d2a2a88 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -90,7 +90,7 @@ config ACPI_SPCR_TABLE
 
 config ACPI_FPDT
 	bool "ACPI Firmware Performance Data Table (FPDT) support"
-	depends on X86_64
+	depends on X86_64 || ARM64
 	help
 	  Enable support for the Firmware Performance Data Table (FPDT).
 	  This table provides information on the timing of the system
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index d4a72835f328..b3ed6212244c 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -18,3 +18,6 @@ config ACPI_AGDI
 	  reset command.
 
 	  If set, the kernel parses AGDI table and listens for the command.
+
+config ACPI_APMT
+	bool
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 7b9e4045659d..e21a9e84e394 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -2,4 +2,5 @@
 obj-$(CONFIG_ACPI_AGDI) 	+= agdi.o
 obj-$(CONFIG_ACPI_IORT) 	+= iort.o
 obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
+obj-$(CONFIG_ACPI_APMT) 	+= apmt.o
 obj-y				+= dma.o
diff --git a/drivers/acpi/arm64/apmt.c b/drivers/acpi/arm64/apmt.c
new file mode 100644
index 000000000000..8cab69fa5d59
--- /dev/null
+++ b/drivers/acpi/arm64/apmt.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM APMT table support.
+ * Design document number: ARM DEN0117.
+ *
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
+ *
+ */
+
+#define pr_fmt(fmt)	"ACPI: APMT: " fmt
+
+#include <linux/acpi.h>
+#include <linux/acpi_apmt.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#define DEV_NAME "arm-cs-arch-pmu"
+
+/* There can be up to 3 resources: page 0 and 1 address, and interrupt. */
+#define DEV_MAX_RESOURCE_COUNT 3
+
+/* Root pointer to the mapped APMT table */
+static struct acpi_table_header *apmt_table;
+
+static int __init apmt_init_resources(struct resource *res,
+				      struct acpi_apmt_node *node)
+{
+	int irq, trigger;
+	int num_res = 0;
+
+	res[num_res].start = node->base_address0;
+	res[num_res].end = node->base_address0 + SZ_4K - 1;
+	res[num_res].flags = IORESOURCE_MEM;
+
+	num_res++;
+
+	res[num_res].start = node->base_address1;
+	res[num_res].end = node->base_address1 + SZ_4K - 1;
+	res[num_res].flags = IORESOURCE_MEM;
+
+	num_res++;
+
+	if (node->ovflw_irq != 0) {
+		trigger = (node->ovflw_irq_flags & ACPI_APMT_OVFLW_IRQ_FLAGS_MODE);
+		trigger = (trigger == ACPI_APMT_OVFLW_IRQ_FLAGS_MODE_LEVEL) ?
+			ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
+		irq = acpi_register_gsi(NULL, node->ovflw_irq, trigger,
+						ACPI_ACTIVE_HIGH);
+
+		if (irq <= 0) {
+			pr_warn("APMT could not register gsi hwirq %d\n", irq);
+			return num_res;
+		}
+
+		res[num_res].start = irq;
+		res[num_res].end = irq;
+		res[num_res].flags = IORESOURCE_IRQ;
+
+		num_res++;
+	}
+
+	return num_res;
+}
+
+/**
+ * apmt_add_platform_device() - Allocate a platform device for APMT node
+ * @node: Pointer to device ACPI APMT node
+ * @fwnode: fwnode associated with the APMT node
+ *
+ * Returns: 0 on success, <0 failure
+ */
+static int __init apmt_add_platform_device(struct acpi_apmt_node *node,
+					   struct fwnode_handle *fwnode)
+{
+	struct platform_device *pdev;
+	int ret, count;
+	struct resource res[DEV_MAX_RESOURCE_COUNT];
+
+	pdev = platform_device_alloc(DEV_NAME, PLATFORM_DEVID_AUTO);
+	if (!pdev)
+		return -ENOMEM;
+
+	memset(res, 0, sizeof(res));
+
+	count = apmt_init_resources(res, node);
+
+	ret = platform_device_add_resources(pdev, res, count);
+	if (ret)
+		goto dev_put;
+
+	/*
+	 * Add a copy of APMT node pointer to platform_data to be used to
+	 * retrieve APMT data information.
+	 */
+	ret = platform_device_add_data(pdev, &node, sizeof(node));
+	if (ret)
+		goto dev_put;
+
+	pdev->dev.fwnode = fwnode;
+
+	ret = platform_device_add(pdev);
+
+	if (ret)
+		goto dev_put;
+
+	return 0;
+
+dev_put:
+	platform_device_put(pdev);
+
+	return ret;
+}
+
+static int __init apmt_init_platform_devices(void)
+{
+	struct acpi_apmt_node *apmt_node;
+	struct acpi_table_apmt *apmt;
+	struct fwnode_handle *fwnode;
+	u64 offset, end;
+	int ret;
+
+	/*
+	 * apmt_table and apmt both point to the start of APMT table, but
+	 * have different struct types
+	 */
+	apmt = (struct acpi_table_apmt *)apmt_table;
+	offset = sizeof(*apmt);
+	end = apmt->header.length;
+
+	while (offset < end) {
+		apmt_node = ACPI_ADD_PTR(struct acpi_apmt_node, apmt,
+				 offset);
+
+		fwnode = acpi_alloc_fwnode_static();
+		if (!fwnode)
+			return -ENOMEM;
+
+		ret = apmt_add_platform_device(apmt_node, fwnode);
+		if (ret) {
+			acpi_free_fwnode_static(fwnode);
+			return ret;
+		}
+
+		offset += apmt_node->length;
+	}
+
+	return 0;
+}
+
+void __init acpi_apmt_init(void)
+{
+	acpi_status status;
+	int ret;
+
+	/**
+	 * APMT table nodes will be used at runtime after the apmt init,
+	 * so we don't need to call acpi_put_table() to release
+	 * the APMT table mapping.
+	 */
+	status = acpi_get_table(ACPI_SIG_APMT, 0, &apmt_table);
+
+	if (ACPI_FAILURE(status)) {
+		if (status != AE_NOT_FOUND) {
+			const char *msg = acpi_format_exception(status);
+
+			pr_err("Failed to get APMT table, %s\n", msg);
+		}
+
+		return;
+	}
+
+	ret = apmt_init_platform_devices();
+	if (ret) {
+		pr_err("Failed to initialize APMT platform devices, ret: %d\n", ret);
+		acpi_put_table(apmt_table);
+	}
+}
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 8059baf4ef27..38fb84974f35 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -402,6 +402,10 @@ static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 	return NULL;
 }
 
+#ifndef ACPI_IORT_SMMU_V3_DEVICEID_VALID
+#define ACPI_IORT_SMMU_V3_DEVICEID_VALID (1 << 4)
+#endif
+
 static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 {
 	struct acpi_iort_smmu_v3 *smmu;
@@ -418,12 +422,16 @@ static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 
 		smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 		/*
-		 * ID mapping index is only ignored if all interrupts are
-		 * GSIV based
+		 * Until IORT E.e (node rev. 5), the ID mapping index was
+		 * defined to be valid unless all interrupts are GSIV-based.
 		 */
-		if (smmu->event_gsiv && smmu->pri_gsiv && smmu->gerr_gsiv
-		    && smmu->sync_gsiv)
+		if (node->revision < 5) {
+			if (smmu->event_gsiv && smmu->pri_gsiv &&
+			    smmu->gerr_gsiv && smmu->sync_gsiv)
+				return -EINVAL;
+		} else if (!(smmu->flags & ACPI_IORT_SMMU_V3_DEVICEID_VALID)) {
 			return -EINVAL;
+		}
 
 		if (smmu->id_mapping_index >= node->mapping_count) {
 			pr_err(FW_BUG "[node %p type %d] ID mapping index overflows valid mappings\n",
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d466c8195314..351208eda9be 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -27,6 +27,7 @@
 #include <linux/dmi.h>
 #endif
 #include <linux/acpi_agdi.h>
+#include <linux/acpi_apmt.h>
 #include <linux/acpi_iort.h>
 #include <linux/acpi_viot.h>
 #include <linux/pci.h>
@@ -1423,6 +1424,7 @@ static int __init acpi_init(void)
 	acpi_setup_sb_notify_handler();
 	acpi_viot_init();
 	acpi_agdi_init();
+	acpi_apmt_init();
 	return 0;
 }
 
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 3f07df5a7e95..82a6d22e8ee2 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -861,16 +861,16 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 					    &cpu_pmu->node);
 }
 
-static struct arm_pmu *__armpmu_alloc(gfp_t flags)
+struct arm_pmu *armpmu_alloc(void)
 {
 	struct arm_pmu *pmu;
 	int cpu;
 
-	pmu = kzalloc(sizeof(*pmu), flags);
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
 	if (!pmu)
 		goto out;
 
-	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
+	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL);
 	if (!pmu->hw_events) {
 		pr_info("failed to allocate per-cpu PMU data.\n");
 		goto out_free_pmu;
@@ -916,17 +916,6 @@ out:
 	return NULL;
 }
 
-struct arm_pmu *armpmu_alloc(void)
-{
-	return __armpmu_alloc(GFP_KERNEL);
-}
-
-struct arm_pmu *armpmu_alloc_atomic(void)
-{
-	return __armpmu_alloc(GFP_ATOMIC);
-}
-
-
 void armpmu_free(struct arm_pmu *pmu)
 {
 	free_percpu(pmu->hw_events);
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 96ffadd654ff..90815ad762eb 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -13,6 +13,7 @@
 #include <linux/percpu.h>
 #include <linux/perf/arm_pmu.h>
 
+#include <asm/cpu.h>
 #include <asm/cputype.h>
 
 static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
@@ -187,7 +188,7 @@ out_err:
 	return err;
 }
 
-static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
+static struct arm_pmu *arm_pmu_acpi_find_pmu(void)
 {
 	unsigned long cpuid = read_cpuid_id();
 	struct arm_pmu *pmu;
@@ -201,16 +202,7 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
 		return pmu;
 	}
 
-	pmu = armpmu_alloc_atomic();
-	if (!pmu) {
-		pr_warn("Unable to allocate PMU for CPU%d\n",
-			smp_processor_id());
-		return NULL;
-	}
-
-	pmu->acpi_cpuid = cpuid;
-
-	return pmu;
+	return NULL;
 }
 
 /*
@@ -242,6 +234,22 @@ static bool pmu_irq_matches(struct arm_pmu *pmu, int irq)
 	return true;
 }
 
+static void arm_pmu_acpi_associate_pmu_cpu(struct arm_pmu *pmu,
+					   unsigned int cpu)
+{
+	int irq = per_cpu(pmu_irqs, cpu);
+
+	per_cpu(probed_pmus, cpu) = pmu;
+
+	if (pmu_irq_matches(pmu, irq)) {
+		struct pmu_hw_events __percpu *hw_events;
+		hw_events = pmu->hw_events;
+		per_cpu(hw_events->irq, cpu) = irq;
+	}
+
+	cpumask_set_cpu(cpu, &pmu->supported_cpus);
+}
+
 /*
  * This must run before the common arm_pmu hotplug logic, so that we can
  * associate a CPU and its interrupt before the common code tries to manage the
@@ -254,42 +262,50 @@ static bool pmu_irq_matches(struct arm_pmu *pmu, int irq)
 static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
 {
 	struct arm_pmu *pmu;
-	struct pmu_hw_events __percpu *hw_events;
-	int irq;
 
 	/* If we've already probed this CPU, we have nothing to do */
 	if (per_cpu(probed_pmus, cpu))
 		return 0;
 
-	irq = per_cpu(pmu_irqs, cpu);
-
-	pmu = arm_pmu_acpi_find_alloc_pmu();
-	if (!pmu)
-		return -ENOMEM;
+	pmu = arm_pmu_acpi_find_pmu();
+	if (!pmu) {
+		pr_warn_ratelimited("Unable to associate CPU%d with a PMU\n",
+				    cpu);
+		return 0;
+	}
 
-	per_cpu(probed_pmus, cpu) = pmu;
+	arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
+	return 0;
+}
 
-	if (pmu_irq_matches(pmu, irq)) {
-		hw_events = pmu->hw_events;
-		per_cpu(hw_events->irq, cpu) = irq;
-	}
+static void arm_pmu_acpi_probe_matching_cpus(struct arm_pmu *pmu,
+					     unsigned long cpuid)
+{
+	int cpu;
 
-	cpumask_set_cpu(cpu, &pmu->supported_cpus);
+	for_each_online_cpu(cpu) {
+		unsigned long cpu_cpuid = per_cpu(cpu_data, cpu).reg_midr;
 
-	/*
-	 * Ideally, we'd probe the PMU here when we find the first matching
-	 * CPU. We can't do that for several reasons; see the comment in
-	 * arm_pmu_acpi_init().
-	 *
-	 * So for the time being, we're done.
-	 */
-	return 0;
+		if (cpu_cpuid == cpuid)
+			arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
+	}
 }
 
 int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 {
 	int pmu_idx = 0;
-	int cpu, ret;
+	unsigned int cpu;
+	int ret;
+
+	ret = arm_pmu_acpi_parse_irqs();
+	if (ret)
+		return ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_ACPI_STARTING,
+					"perf/arm/pmu_acpi:starting",
+					arm_pmu_acpi_cpu_starting, NULL);
+	if (ret)
+		return ret;
 
 	/*
 	 * Initialise and register the set of PMUs which we know about right
@@ -304,13 +320,27 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 	 * For the moment, as with the platform/DT case, we need at least one
 	 * of a PMU's CPUs to be online at probe time.
 	 */
-	for_each_possible_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct arm_pmu *pmu = per_cpu(probed_pmus, cpu);
+		unsigned long cpuid;
 		char *base_name;
 
-		if (!pmu || pmu->name)
+		/* If we've already probed this CPU, we have nothing to do */
+		if (pmu)
 			continue;
 
+		pmu = armpmu_alloc();
+		if (!pmu) {
+			pr_warn("Unable to allocate PMU for CPU%d\n",
+				cpu);
+			return -ENOMEM;
+		}
+
+		cpuid = per_cpu(cpu_data, cpu).reg_midr;
+		pmu->acpi_cpuid = cpuid;
+
+		arm_pmu_acpi_probe_matching_cpus(pmu, cpuid);
+
 		ret = init_fn(pmu);
 		if (ret == -ENODEV) {
 			/* PMU not handled by this driver, or not present */
@@ -335,26 +365,16 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 		}
 	}
 
-	return 0;
+	return ret;
 }
 
 static int arm_pmu_acpi_init(void)
 {
-	int ret;
-
 	if (acpi_disabled)
 		return 0;
 
 	arm_spe_acpi_register_device();
 
-	ret = arm_pmu_acpi_parse_irqs();
-	if (ret)
-		return ret;
-
-	ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING,
-				"perf/arm/pmu_acpi:starting",
-				arm_pmu_acpi_cpu_starting, NULL);
-
-	return ret;
+	return 0;
 }
 subsys_initcall(arm_pmu_acpi_init)
diff --git a/include/linux/acpi_apmt.h b/include/linux/acpi_apmt.h
new file mode 100644
index 000000000000..40bd634d082f
--- /dev/null
+++ b/include/linux/acpi_apmt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * ARM CoreSight PMU driver.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
+ *
+ */
+
+#ifndef __ACPI_APMT_H__
+#define __ACPI_APMT_H__
+
+#include <linux/acpi.h>
+
+#ifdef CONFIG_ACPI_APMT
+void acpi_apmt_init(void);
+#else
+static inline void acpi_apmt_init(void) { }
+#endif /* CONFIG_ACPI_APMT */
+
+#endif /* __ACPI_APMT_H__ */
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 0356cb6a215d..0c15c5b7f801 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -174,7 +174,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
 
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
-struct arm_pmu *armpmu_alloc_atomic(void);
 void armpmu_free(struct arm_pmu *pmu);
 int armpmu_register(struct arm_pmu *pmu);
 int armpmu_request_irq(int irq, int cpu);