diff options
110 files changed, 3210 insertions, 1005 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index bcc974d276dc..df79e129d097 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -493,12 +493,13 @@ What: /sys/devices/system/cpu/cpuX/regs/ /sys/devices/system/cpu/cpuX/regs/identification/ /sys/devices/system/cpu/cpuX/regs/identification/midr_el1 /sys/devices/system/cpu/cpuX/regs/identification/revidr_el1 + /sys/devices/system/cpu/cpuX/regs/identification/smidr_el1 Date: June 2016 Contact: Linux ARM Kernel Mailing list <[email protected]> Description: AArch64 CPU registers 'identification' directory exposes the CPU ID registers for - identifying model and revision of the CPU. + identifying model and revision of the CPU and SMCU. What: /sys/devices/system/cpu/aarch32_el0 Date: May 2021 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0f1344eb7c2f..3c78af07b313 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3167,7 +3167,7 @@ improves system performance, but it may also expose users to several CPU vulnerabilities. Equivalent to: nopti [X86,PPC] - kpti=0 [ARM64] + if nokaslr then kpti=0 [ARM64] nospectre_v1 [X86,PPC] nobp=0 [S390] nospectre_v2 [X86,PPC,S390,ARM64] diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst new file mode 100644 index 000000000000..578407e487d6 --- /dev/null +++ b/Documentation/admin-guide/perf/hns3-pmu.rst @@ -0,0 +1,136 @@ +====================================== +HNS3 Performance Monitoring Unit (PMU) +====================================== + +HNS3(HiSilicon network system 3) Performance Monitoring Unit (PMU) is an +End Point device to collect performance statistics of HiSilicon SoC NIC. +On Hip09, each SICL(Super I/O cluster) has one PMU device. + +HNS3 PMU supports collection of performance statistics such as bandwidth, +latency, packet rate and interrupt rate. + +Each HNS3 PMU supports 8 hardware events. + +HNS3 PMU driver +=============== + +The HNS3 PMU driver registers a perf PMU with the name of its sicl id.:: + + /sys/devices/hns3_pmu_sicl_<sicl_id> + +PMU driver provides description of available events, filter modes, format, +identifier and cpumask in sysfs. + +The "events" directory describes the event code of all supported events +shown in perf list. + +The "filtermode" directory describes the supported filter modes of each +event. + +The "format" directory describes all formats of the config (events) and +config1 (filter options) fields of the perf_event_attr structure. + +The "identifier" file shows version of PMU hardware device. + +The "bdf_min" and "bdf_max" files show the supported bdf range of each +pmu device. + +The "hw_clk_freq" file shows the hardware clock frequency of each pmu +device. + +Example usage of checking event code and subevent code:: + + $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time + config=0x00204 + $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num + config=0x10204 + +Each performance statistic has a pair of events to get two values to +calculate real performance data in userspace. + +The bits 0~15 of config (here 0x0204) are the true hardware event code. If +two events have same value of bits 0~15 of config, that means they are +event pair. And the bit 16 of config indicates getting counter 0 or +counter 1 of hardware event. + +After getting two values of event pair in usersapce, the formula of +computation to calculate real performance data is::: + + counter 0 / counter 1 + +Example usage of checking supported filter mode:: + + $# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num + filter mode supported: global/port/port-tc/func/func-queue/ + +Example usage of perf:: + + $# perf list + hns3_pmu_sicl_0/bw_ssu_rpu_byte_num/ [kernel PMU event] + hns3_pmu_sicl_0/bw_ssu_rpu_time/ [kernel PMU event] + ------------------------------------------ + + $# perf stat -g -e hns3_pmu_sicl_0/bw_ssu_rpu_byte_num,global=1/ -e hns3_pmu_sicl_0/bw_ssu_rpu_time,global=1/ -I 1000 + or + $# perf stat -g -e hns3_pmu_sicl_0/config=0x00002,global=1/ -e hns3_pmu_sicl_0/config=0x10002,global=1/ -I 1000 + + +Filter modes +-------------- + +1. global mode +PMU collect performance statistics for all HNS3 PCIe functions of IO DIE. +Set the "global" filter option to 1 will enable this mode. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,global=1/ -I 1000 + +2. port mode +PMU collect performance statistic of one whole physical port. The port id +is same as mac id. The "tc" filter option must be set to 0xF in this mode, +here tc stands for traffic class. + +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0xF/ -I 1000 + +3. port-tc mode +PMU collect performance statistic of one tc of physical port. The port id +is same as mac id. The "tc" filter option must be set to 0 ~ 7 in this +mode. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0/ -I 1000 + +4. func mode +PMU collect performance statistic of one PF/VF. The function id is BDF of +PF/VF, its conversion formula:: + + func = (bus << 8) + (device << 3) + (function) + +for example: + BDF func + 35:00.0 0x3500 + 35:00.1 0x3501 + 35:01.0 0x3508 + +In this mode, the "queue" filter option must be set to 0xFFFF. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0xFFFF/ -I 1000 + +5. func-queue mode +PMU collect performance statistic of one queue of PF/VF. The function id +is BDF of PF/VF, the "queue" filter option must be set to the exact queue +id of function. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0/ -I 1000 + +6. func-intr mode +PMU collect performance statistic of one interrupt of PF/VF. The function +id is BDF of PF/VF, the "intr" filter option must be set to the exact +interrupt id of function. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x00301,bdf=0x3500,intr=0/ -I 1000 diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index 69b23f087c05..9c9ece88ce53 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -9,6 +9,7 @@ Performance monitor support hisi-pmu hisi-pcie-pmu + hns3-pmu imx-ddr qcom_l2_pmu qcom_l3_pmu diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 3d116fb536c5..31fc10b833dd 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -301,6 +301,10 @@ HWCAP2_WFXT Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010. +HWCAP2_EBF16 + + Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index 901cd094f4ec..2a641ba7be3b 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -33,9 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: 0000000000000000 0000ffffffffffff 256TB user ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB bpf jit region - ffff800008000000 ffff80000fffffff 128MB modules - ffff800010000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff800007ffffff 128MB modules + ffff800008000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space @@ -51,9 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support): 0000000000000000 000fffffffffffff 4PB user fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB bpf jit region - ffff800008000000 ffff80000fffffff 128MB modules - ffff800010000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff800007ffffff 128MB modules + ffff800008000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d27db84d585e..33b04db8408f 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -82,10 +82,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A57 | #1742098 | ARM64_ERRATUM_1742098 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #853709 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A72 | #1655431 | ARM64_ERRATUM_1742098 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 | @@ -102,6 +106,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt index b01bf7bca3e6..6bd78eb4dc6e 100644 --- a/Documentation/features/vm/ioremap_prot/arch-support.txt +++ b/Documentation/features/vm/ioremap_prot/arch-support.txt @@ -9,7 +9,7 @@ | alpha: | TODO | | arc: | ok | | arm: | TODO | - | arm64: | TODO | + | arm64: | ok | | csky: | TODO | | hexagon: | TODO | | ia64: | TODO | diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index b12df9137e1c..832b5d36e279 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1894,6 +1894,7 @@ There are some more advanced barrier functions: (*) dma_wmb(); (*) dma_rmb(); + (*) dma_mb(); These are for use with consistent memory to guarantee the ordering of writes or reads of shared memory accessible to both the CPU and a @@ -1925,11 +1926,11 @@ There are some more advanced barrier functions: The dma_rmb() allows us guarantee the device has released ownership before we read the data from the descriptor, and the dma_wmb() allows us to guarantee the data is written to the descriptor before the device - can see it now has ownership. Note that, when using writel(), a prior - wmb() is not needed to guarantee that the cache coherent memory writes - have completed before writing to the MMIO region. The cheaper - writel_relaxed() does not provide this guarantee and must not be used - here. + can see it now has ownership. The dma_mb() implies both a dma_rmb() and + a dma_wmb(). Note that, when using writel(), a prior wmb() is not needed + to guarantee that the cache coherent memory writes have completed before + writing to the MMIO region. The cheaper writel_relaxed() does not provide + this guarantee and must not be used here. See the subsection "Kernel I/O barrier effects" for more information on relaxed I/O accessors and the Documentation/core-api/dma-api.rst file for diff --git a/MAINTAINERS b/MAINTAINERS index 3cf9842d9233..6fc770ed6292 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8944,6 +8944,12 @@ F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst F: Documentation/admin-guide/perf/hisi-pmu.rst F: drivers/perf/hisilicon +HISILICON HNS3 PMU DRIVER +M: Guangbin Huang <[email protected]> +S: Supported +F: Documentation/admin-guide/perf/hns3-pmu.rst +F: drivers/perf/hisilicon/hns3_pmu.c + HISILICON QM AND ZIP Controller DRIVER M: Zhou Wang <[email protected]> diff --git a/arch/Kconfig b/arch/Kconfig index fcf9a41a4ef5..52eaa16d9b79 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -223,6 +223,9 @@ config HAVE_FUNCTION_DESCRIPTORS config TRACE_IRQFLAGS_SUPPORT bool +config TRACE_IRQFLAGS_NMI_SUPPORT + bool + # # An arch should select this if it provides all these things: # diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index eba7cbc93b86..7fcdc785366c 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -139,11 +139,9 @@ extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int, extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int); extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached); void __arm_iomem_set_ro(void __iomem *ptr, size_t size); -extern void __iounmap(volatile void __iomem *addr); extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); -extern void (*arch_iounmap)(volatile void __iomem *); /* * Bad read/write accesses... @@ -380,7 +378,7 @@ void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size); #define ioremap_wc ioremap_wc #define ioremap_wt ioremap_wc -void iounmap(volatile void __iomem *iomem_cookie); +void iounmap(volatile void __iomem *io_addr); #define iounmap iounmap void *arch_memremap_wb(phys_addr_t phys_addr, size_t size); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 576c0e6c92fc..2129070065c3 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -418,7 +418,7 @@ void *arch_memremap_wb(phys_addr_t phys_addr, size_t size) __builtin_return_address(0)); } -void __iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *io_addr) { void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); struct static_vm *svm; @@ -446,13 +446,6 @@ void __iounmap(volatile void __iomem *io_addr) vunmap(addr); } - -void (*arch_iounmap)(volatile void __iomem *) = __iounmap; - -void iounmap(volatile void __iomem *cookie) -{ - arch_iounmap(cookie); -} EXPORT_SYMBOL(iounmap); #if defined(CONFIG_PCI) || IS_ENABLED(CONFIG_PCMCIA) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 2658f52903da..c42debaded95 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -230,14 +230,7 @@ void *arch_memremap_wb(phys_addr_t phys_addr, size_t size) return (void *)phys_addr; } -void __iounmap(volatile void __iomem *addr) -{ -} -EXPORT_SYMBOL(__iounmap); - -void (*arch_iounmap)(volatile void __iomem *); - -void iounmap(volatile void __iomem *addr) +void iounmap(volatile void __iomem *io_addr) { } EXPORT_SYMBOL(iounmap); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1652a9800ebe..340e61199057 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -101,6 +101,7 @@ config ARM64 select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_NO_INSTR + select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER @@ -126,6 +127,7 @@ config ARM64 select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP + select GENERIC_IOREMAP select GENERIC_IRQ_IPI select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -188,6 +190,7 @@ config ARM64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM select HAVE_NMI @@ -226,6 +229,7 @@ config ARM64 select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT help ARM 64-bit (AArch64) Linux support. @@ -503,6 +507,22 @@ config ARM64_ERRATUM_834220 If unsure, say Y. +config ARM64_ERRATUM_1742098 + bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence" + depends on COMPAT + default y + help + This option removes the AES hwcap for aarch32 user-space to + workaround erratum 1742098 on Cortex-A57 and Cortex-A72. + + Affected parts may corrupt the AES state if an interrupt is + taken between a pair of AES instructions. These instructions + are only present if the cryptography extensions are present. + All software should have a fallback implementation for CPUs + that don't implement the cryptography extensions. + + If unsure, say Y. + config ARM64_ERRATUM_845719 bool "Cortex-A53: 845719: a load might read incorrect data" depends on COMPAT @@ -821,6 +841,23 @@ config ARM64_ERRATUM_2224489 If unsure, say Y. +config ARM64_ERRATUM_2441009 + bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" + default y + select ARM64_WORKAROUND_REPEAT_TLBI + help + This option adds a workaround for ARM Cortex-A510 erratum #2441009. + + Under very rare circumstances, affected Cortex-A510 CPUs + may not handle a race between a break-before-make sequence on one + CPU, and another CPU accessing the same page. This could allow a + store to a page that has been unmapped. + + Work around this by adding the affected CPUs to the list that needs + TLB sequences to be done twice. + + If unsure, say Y. + config ARM64_ERRATUM_2064142 bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled" depends on CORESIGHT_TRBE diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index ebe80faab883..a0e3dedd2883 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo Image.zst $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) @@ -35,3 +35,6 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) + +$(obj)/Image.zst: $(obj)/Image FORCE + $(call if_changed,zstd) diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index c39f2437e08e..980d1dd8e1a3 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -2,12 +2,27 @@ #ifndef __ASM_ASM_EXTABLE_H #define __ASM_ASM_EXTABLE_H +#include <linux/bits.h> +#include <asm/gpr-num.h> + #define EX_TYPE_NONE 0 -#define EX_TYPE_FIXUP 1 -#define EX_TYPE_BPF 2 -#define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_BPF 1 +#define EX_TYPE_UACCESS_ERR_ZERO 2 +#define EX_TYPE_KACCESS_ERR_ZERO 3 #define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 +/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */ +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +/* Data fields for EX_TYPE_LOAD_UNALIGNED_ZEROPAD */ +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + #ifdef __ASSEMBLY__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ @@ -19,31 +34,45 @@ .short (data); \ .popsection; +#define EX_DATA_REG(reg, gpr) \ + (.L__gpr_num_##gpr << EX_DATA_REG_##reg##_SHIFT) + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __ASM_EXTABLE_RAW(insn, fixup, \ + EX_TYPE_UACCESS_ERR_ZERO, \ + ( \ + EX_DATA_REG(ERR, err) | \ + EX_DATA_REG(ZERO, zero) \ + )) + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) + +#define _ASM_EXTABLE_UACCESS(insn, fixup) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + /* - * Create an exception table entry for `insn`, which will branch to `fixup` + * Create an exception table entry for uaccess `insn`, which will branch to `fixup` * when an unhandled fault is taken. */ - .macro _asm_extable, insn, fixup - __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) + .macro _asm_extable_uaccess, insn, fixup + _ASM_EXTABLE_UACCESS(\insn, \fixup) .endm /* * Create an exception table entry for `insn` if `fixup` is provided. Otherwise * do nothing. */ - .macro _cond_extable, insn, fixup - .ifnc \fixup, - _asm_extable \insn, \fixup + .macro _cond_uaccess_extable, insn, fixup + .ifnc \fixup, + _asm_extable_uaccess \insn, \fixup .endif .endm #else /* __ASSEMBLY__ */ -#include <linux/bits.h> #include <linux/stringify.h> -#include <asm/gpr-num.h> - #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ ".align 2\n" \ @@ -53,14 +82,6 @@ ".short (" data ")\n" \ ".popsection\n" -#define _ASM_EXTABLE(insn, fixup) \ - __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") - -#define EX_DATA_REG_ERR_SHIFT 0 -#define EX_DATA_REG_ERR GENMASK(4, 0) -#define EX_DATA_REG_ZERO_SHIFT 5 -#define EX_DATA_REG_ZERO GENMASK(9, 5) - #define EX_DATA_REG(reg, gpr) \ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" @@ -73,13 +94,23 @@ EX_DATA_REG(ZERO, zero) \ ")") +#define _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_KACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) -#define EX_DATA_REG_DATA_SHIFT 0 -#define EX_DATA_REG_DATA GENMASK(4, 0) -#define EX_DATA_REG_ADDR_SHIFT 5 -#define EX_DATA_REG_ADDR GENMASK(9, 5) +#define _ASM_EXTABLE_UACCESS(insn, fixup) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + +#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr) #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ __DEFINE_ASM_GPR_NUMS \ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 0557af834e03..75b211c98dea 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -61,7 +61,7 @@ alternative_else_nop_endif #define USER(l, x...) \ 9999: x; \ - _asm_extable 9999b, l + _asm_extable_uaccess 9999b, l /* * Generate the assembly for LDTR/STTR with exception table entries. @@ -73,8 +73,8 @@ alternative_else_nop_endif 8889: ldtr \reg2, [\addr, #8]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; - _asm_extable 8889b,\l; + _asm_extable_uaccess 8888b, \l; + _asm_extable_uaccess 8889b, \l; .endm .macro user_stp l, reg1, reg2, addr, post_inc @@ -82,14 +82,14 @@ alternative_else_nop_endif 8889: sttr \reg2, [\addr, #8]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; - _asm_extable 8889b,\l; + _asm_extable_uaccess 8888b,\l; + _asm_extable_uaccess 8889b,\l; .endm .macro user_ldst l, inst, reg, addr, post_inc 8888: \inst \reg, [\addr]; add \addr, \addr, \post_inc; - _asm_extable 8888b,\l; + _asm_extable_uaccess 8888b, \l; .endm #endif diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index ead62f7dd269..13ecc79854ee 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -59,9 +59,9 @@ alternative_else_nop_endif .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 mrs \tmp1, id_aa64isar1_el1 - ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + ubfx \tmp1, \tmp1, #ID_AA64ISAR1_EL1_APA_SHIFT, #8 mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1 - ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4 + ubfx \tmp2, \tmp2, #ID_AA64ISAR2_EL1_APA3_SHIFT, #4 orr \tmp1, \tmp1, \tmp2 cbz \tmp1, .Lno_addr_auth\@ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b2584709c332..5846145be523 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -437,7 +437,7 @@ alternative_endif b.lo .Ldcache_op\@ dsb \domain - _cond_extable .Ldcache_op\@, \fixup + _cond_uaccess_extable .Ldcache_op\@, \fixup .endm /* @@ -476,7 +476,7 @@ alternative_endif dsb ish isb - _cond_extable .Licache_op\@, \fixup + _cond_uaccess_extable .Licache_op\@, \fixup .endm /* diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 9f3e2c3d2ca0..2cfc4245d2e2 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -50,13 +50,13 @@ #define pmr_sync() do {} while (0) #endif -#define mb() dsb(sy) -#define rmb() dsb(ld) -#define wmb() dsb(st) +#define __mb() dsb(sy) +#define __rmb() dsb(ld) +#define __wmb() dsb(st) -#define dma_mb() dmb(osh) -#define dma_rmb() dmb(oshld) -#define dma_wmb() dmb(oshst) +#define __dma_mb() dmb(osh) +#define __dma_rmb() dmb(oshld) +#define __dma_wmb() dmb(oshst) #define io_stop_wc() dgh() diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 7c2181c72116..ca9b487112cc 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -5,34 +5,9 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H -#include <asm/cputype.h> -#include <asm/mte-def.h> - -#define CTR_L1IP_SHIFT 14 -#define CTR_L1IP_MASK 3 -#define CTR_DMINLINE_SHIFT 16 -#define CTR_IMINLINE_SHIFT 0 -#define CTR_IMINLINE_MASK 0xf -#define CTR_ERG_SHIFT 20 -#define CTR_CWG_SHIFT 24 -#define CTR_CWG_MASK 15 -#define CTR_IDC_SHIFT 28 -#define CTR_DIC_SHIFT 29 - -#define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT) - -#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) - -#define ICACHE_POLICY_VPIPT 0 -#define ICACHE_POLICY_RESERVED 1 -#define ICACHE_POLICY_VIPT 2 -#define ICACHE_POLICY_PIPT 3 - #define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) - #define CLIDR_LOUU_SHIFT 27 #define CLIDR_LOC_SHIFT 24 #define CLIDR_LOUIS_SHIFT 21 @@ -55,6 +30,10 @@ #include <linux/bitops.h> #include <linux/kasan-enabled.h> +#include <asm/cputype.h> +#include <asm/mte-def.h> +#include <asm/sysreg.h> + #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) #elif defined(CONFIG_KASAN_HW_TAGS) @@ -66,6 +45,12 @@ static inline unsigned int arch_slab_minalign(void) #define arch_slab_minalign() arch_slab_minalign() #endif +#define CTR_CACHE_MINLINE_MASK \ + (0xf << CTR_EL0_DMINLINE_SHIFT | \ + CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT) + +#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr) + #define ICACHEF_ALIASING 0 #define ICACHEF_VPIPT 1 extern unsigned long __icache_flags; @@ -86,7 +71,7 @@ static __always_inline int icache_is_vpipt(void) static inline u32 cache_type_cwg(void) { - return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; + return (read_cpuid_cachetype() >> CTR_EL0_CWG_SHIFT) & CTR_EL0_CWG_MASK; } #define __read_mostly __section(".data..read_mostly") @@ -120,12 +105,12 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) { u32 ctr = read_cpuid_cachetype(); - if (!(ctr & BIT(CTR_IDC_SHIFT))) { + if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) { u64 clidr = read_sysreg(clidr_el1); if (CLIDR_LOC(clidr) == 0 || (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0)) - ctr |= BIT(CTR_IDC_SHIFT); + ctr |= BIT(CTR_EL0_IDC_SHIFT); } return ctr; diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 5a228e203ef9..37185e978aeb 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -105,13 +105,6 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) #define flush_icache_range flush_icache_range /* - * Cache maintenance functions used by the DMA API. No to be used directly. - */ -extern void __dma_map_area(const void *, size_t, int); -extern void __dma_unmap_area(const void *, size_t, int); -extern void __dma_flush_area(const void *, size_t); - -/* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user * space" model to handle this. diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 115cdec1ae87..fd7a92219eea 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -46,6 +46,7 @@ struct cpuinfo_arm64 { u64 reg_midr; u64 reg_revidr; u64 reg_gmid; + u64 reg_smidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index e95c4df83911..a444c8915e88 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -31,11 +31,6 @@ * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. - * @cpu_init_idle: Reads any data necessary to initialize CPU idle states for - * a proposed logical id. - * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing - * to wrong parameters or error conditions. Called from the - * CPU being suspended. Must be called with IRQs disabled. */ struct cpu_operations { const char *name; @@ -49,10 +44,6 @@ struct cpu_operations { void (*cpu_die)(unsigned int cpu); int (*cpu_kill)(unsigned int cpu); #endif -#ifdef CONFIG_CPU_IDLE - int (*cpu_init_idle)(unsigned int); - int (*cpu_suspend)(unsigned long); -#endif }; int __init init_cpu_ops(int cpu); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 0fc4f6e068e5..fd7d75a275f6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -11,7 +11,7 @@ #include <asm/hwcap.h> #include <asm/sysreg.h> -#define MAX_CPU_FEATURES 64 +#define MAX_CPU_FEATURES 128 #define cpu_feature(x) KERNEL_HWCAP_ ## x #ifndef __ASSEMBLY__ @@ -673,7 +673,7 @@ static inline bool supports_clearbhb(int scope) isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); return cpuid_feature_extract_unsigned_field(isar2, - ID_AA64ISAR2_CLEARBHB_SHIFT); + ID_AA64ISAR2_EL1_BC_SHIFT); } const struct cpumask *system_32bit_el0_cpumask(void); diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h index 14a19d1141bd..2047713e097d 100644 --- a/arch/arm64/include/asm/cpuidle.h +++ b/arch/arm64/include/asm/cpuidle.h @@ -4,21 +4,6 @@ #include <asm/proc-fns.h> -#ifdef CONFIG_CPU_IDLE -extern int arm_cpuidle_init(unsigned int cpu); -extern int arm_cpuidle_suspend(int index); -#else -static inline int arm_cpuidle_init(unsigned int cpu) -{ - return -EOPNOTSUPP; -} - -static inline int arm_cpuidle_suspend(int index) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ARM64_PSEUDO_NMI #include <asm/arch_gicv3.h> diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index daff882883f9..71ed5fdf718b 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,10 +62,12 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#ifdef CONFIG_RELOCATABLE + FIX_ENTRY_TRAMP_TEXT4, /* one extra slot for the data page */ +#endif FIX_ENTRY_TRAMP_TEXT3, FIX_ENTRY_TRAMP_TEXT2, FIX_ENTRY_TRAMP_TEXT1, - FIX_ENTRY_TRAMP_DATA, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index aa443d8f8cfb..cef4ae7a3d8b 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -85,7 +85,7 @@ #define KERNEL_HWCAP_PACA __khwcap_feature(PACA) #define KERNEL_HWCAP_PACG __khwcap_feature(PACG) -#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32) +#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64) #define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) #define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2) #define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES) @@ -118,6 +118,7 @@ #define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32) #define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64) #define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT) +#define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 3995652daf81..87dd42d74afe 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -163,13 +163,16 @@ extern void __memset_io(volatile void __iomem *, int, size_t); /* * I/O memory mapping functions. */ -extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot); -extern void iounmap(volatile void __iomem *addr); -extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); -#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) -#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) -#define ioremap_np(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE)) +bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot); +#define ioremap_allowed ioremap_allowed + +#define _PAGE_IOREMAP PROT_DEVICE_nGnRE + +#define ioremap_wc(addr, size) \ + ioremap_prot((addr), (size), PROT_NORMAL_NC) +#define ioremap_np(addr, size) \ + ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE) /* * io{read,write}{16,32,64}be() macros @@ -184,6 +187,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #include <asm-generic/io.h> +#define ioremap_cache ioremap_cache +static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size) +{ + if (pfn_is_map_memory(__phys_to_pfn(addr))) + return (void __iomem *)__phys_to_virt(addr); + + return ioremap_prot(addr, size, PROT_NORMAL); +} + /* * More restrictive address range checking than the default implementation * (PHYS_OFFSET and PHYS_MASK taken into account). diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c751cd9b94f8..227d256cd4b9 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -355,6 +355,11 @@ static inline void *phys_to_virt(phys_addr_t x) }) void dump_mem_limit(void); + +static inline bool defer_reserve_crashkernel(void) +{ + return IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32); +} #endif /* !ASSEMBLY */ /* diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index dd3d12bce07b..5ab8d163198f 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -281,10 +281,9 @@ */ #ifdef CONFIG_ARM64_PA_BITS_52 /* - * This should be GENMASK_ULL(47, 2). * TTBR_ELx[1] is RES0 in this configuration. */ -#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2) +#define TTBR_BADDR_MASK_52 GENMASK_ULL(47, 2) #endif #ifdef CONFIG_ARM64_VA_BITS_52 diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b6632f18364..b5df82aa99e6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -45,6 +45,12 @@ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline bool arch_thp_swp_supported(void) +{ + return !system_supports_mte(); +} +#define arch_thp_swp_supported arch_thp_swp_supported + /* * Outside of a few very special situations (e.g. hibernation), we always * use broadcast TLB invalidation instructions, therefore a spurious page @@ -427,6 +433,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } +/* + * Select all bits except the pfn + */ +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); +} + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/linux/pgtable.h diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9e58749db21d..86eb0bfe3b38 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -272,8 +272,9 @@ void tls_preserve_current_state(void); static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { + s32 previous_syscall = regs->syscallno; memset(regs, 0, sizeof(*regs)); - forget_syscall(regs); + regs->syscallno = previous_syscall; regs->pc = pc; if (system_uses_irq_prio_masking()) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 42ff95dba6da..7c71358d44c4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -192,8 +192,6 @@ #define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) #define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) -#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) -#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5) #define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) #define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) @@ -201,9 +199,6 @@ #define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) #define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) -#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) -#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2) - #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) #define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) @@ -410,12 +405,6 @@ #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) -#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0) -#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) -#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) -#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) -#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) - #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) #define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) @@ -454,16 +443,12 @@ #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) -#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define SMIDR_EL1_IMPLEMENTER_SHIFT 24 #define SMIDR_EL1_SMPS_SHIFT 15 #define SMIDR_EL1_AFFINITY_SHIFT 0 -#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) -#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) - #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) #define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) @@ -704,66 +689,6 @@ /* Position the attr at the correct index */ #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) -/* id_aa64isar1 */ -#define ID_AA64ISAR1_I8MM_SHIFT 52 -#define ID_AA64ISAR1_DGH_SHIFT 48 -#define ID_AA64ISAR1_BF16_SHIFT 44 -#define ID_AA64ISAR1_SPECRES_SHIFT 40 -#define ID_AA64ISAR1_SB_SHIFT 36 -#define ID_AA64ISAR1_FRINTTS_SHIFT 32 -#define ID_AA64ISAR1_GPI_SHIFT 28 -#define ID_AA64ISAR1_GPA_SHIFT 24 -#define ID_AA64ISAR1_LRCPC_SHIFT 20 -#define ID_AA64ISAR1_FCMA_SHIFT 16 -#define ID_AA64ISAR1_JSCVT_SHIFT 12 -#define ID_AA64ISAR1_API_SHIFT 8 -#define ID_AA64ISAR1_APA_SHIFT 4 -#define ID_AA64ISAR1_DPB_SHIFT 0 - -#define ID_AA64ISAR1_APA_NI 0x0 -#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 -#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 -#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 -#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 -#define ID_AA64ISAR1_API_NI 0x0 -#define ID_AA64ISAR1_API_IMP_DEF 0x1 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 -#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 -#define ID_AA64ISAR1_GPA_NI 0x0 -#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_GPI_NI 0x0 -#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 - -/* id_aa64isar2 */ -#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 -#define ID_AA64ISAR2_APA3_SHIFT 12 -#define ID_AA64ISAR2_GPA3_SHIFT 8 -#define ID_AA64ISAR2_RPRES_SHIFT 4 -#define ID_AA64ISAR2_WFXT_SHIFT 0 - -#define ID_AA64ISAR2_RPRES_8BIT 0x0 -#define ID_AA64ISAR2_RPRES_12BIT 0x1 -/* - * Value 0x1 has been removed from the architecture, and is - * reserved, but has not yet been removed from the ARM ARM - * as of ARM DDI 0487G.b. - */ -#define ID_AA64ISAR2_WFXT_NI 0x0 -#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 - -#define ID_AA64ISAR2_APA3_NI 0x0 -#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1 -#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4 -#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5 - -#define ID_AA64ISAR2_GPA3_NI 0x0 -#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1 - /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -811,45 +736,6 @@ #define ID_AA64PFR1_MTE 0x2 #define ID_AA64PFR1_MTE_ASYMM 0x3 -/* id_aa64zfr0 */ -#define ID_AA64ZFR0_F64MM_SHIFT 56 -#define ID_AA64ZFR0_F32MM_SHIFT 52 -#define ID_AA64ZFR0_I8MM_SHIFT 44 -#define ID_AA64ZFR0_SM4_SHIFT 40 -#define ID_AA64ZFR0_SHA3_SHIFT 32 -#define ID_AA64ZFR0_BF16_SHIFT 20 -#define ID_AA64ZFR0_BITPERM_SHIFT 16 -#define ID_AA64ZFR0_AES_SHIFT 4 -#define ID_AA64ZFR0_SVEVER_SHIFT 0 - -#define ID_AA64ZFR0_F64MM 0x1 -#define ID_AA64ZFR0_F32MM 0x1 -#define ID_AA64ZFR0_I8MM 0x1 -#define ID_AA64ZFR0_BF16 0x1 -#define ID_AA64ZFR0_SM4 0x1 -#define ID_AA64ZFR0_SHA3 0x1 -#define ID_AA64ZFR0_BITPERM 0x1 -#define ID_AA64ZFR0_AES 0x1 -#define ID_AA64ZFR0_AES_PMULL 0x2 -#define ID_AA64ZFR0_SVEVER_SVE2 0x1 - -/* id_aa64smfr0 */ -#define ID_AA64SMFR0_FA64_SHIFT 63 -#define ID_AA64SMFR0_I16I64_SHIFT 52 -#define ID_AA64SMFR0_F64F64_SHIFT 48 -#define ID_AA64SMFR0_I8I32_SHIFT 36 -#define ID_AA64SMFR0_F16F32_SHIFT 35 -#define ID_AA64SMFR0_B16F32_SHIFT 34 -#define ID_AA64SMFR0_F32F32_SHIFT 32 - -#define ID_AA64SMFR0_FA64 0x1 -#define ID_AA64SMFR0_I16I64 0xf -#define ID_AA64SMFR0_F64F64 0x1 -#define ID_AA64SMFR0_I8I32 0xf -#define ID_AA64SMFR0_F16F32 0x1 -#define ID_AA64SMFR0_B16F32 0x1 -#define ID_AA64SMFR0_F32F32 0x1 - /* id_aa64mmfr0 */ #define ID_AA64MMFR0_ECV_SHIFT 60 #define ID_AA64MMFR0_FGT_SHIFT 56 @@ -902,6 +788,7 @@ /* id_aa64mmfr1 */ #define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_TIDCP1_SHIFT 52 #define ID_AA64MMFR1_HCX_SHIFT 40 #define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 @@ -918,6 +805,9 @@ #define ID_AA64MMFR1_VMIDBITS_8 0 #define ID_AA64MMFR1_VMIDBITS_16 2 +#define ID_AA64MMFR1_TIDCP1_NI 0 +#define ID_AA64MMFR1_TIDCP1_IMP 1 + /* id_aa64mmfr2 */ #define ID_AA64MMFR2_E0PD_SHIFT 60 #define ID_AA64MMFR2_EVT_SHIFT 56 @@ -1084,9 +974,6 @@ #define MVFR2_FPMISC_SHIFT 4 #define MVFR2_SIMDMISC_SHIFT 0 -#define DCZID_DZP_SHIFT 4 -#define DCZID_BS_SHIFT 0 - #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ @@ -1121,8 +1008,8 @@ #define SYS_RGSR_EL1_SEED_MASK 0xffffUL /* GMID_EL1 field definitions */ -#define SYS_GMID_EL1_BS_SHIFT 0 -#define SYS_GMID_EL1_BS_SIZE 4 +#define GMID_EL1_BS_SHIFT 0 +#define GMID_EL1_BS_SIZE 4 /* TFSR{,E0}_EL1 bit definitions */ #define SYS_TFSR_EL1_TF0_SHIFT 0 @@ -1324,6 +1211,9 @@ #endif +#define SYS_FIELD_GET(reg, field, val) \ + FIELD_GET(reg##_##field##_MASK, val) + #define SYS_FIELD_PREP(reg, field, val) \ FIELD_PREP(reg##_##field##_MASK, val) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 63f9c828f1a7..2fc9f0861769 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -232,34 +232,34 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_mem_asm(load, reg, x, addr, err) \ +#define __get_mem_asm(load, reg, x, addr, err, type) \ asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ - _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ + _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ : "+r" (err), "=&r" (x) \ : "r" (addr)) -#define __raw_get_mem(ldr, x, ptr, err) \ -do { \ - unsigned long __gu_val; \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err)); \ - break; \ - case 2: \ - __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err)); \ - break; \ - case 4: \ - __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err)); \ - break; \ - case 8: \ - __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err)); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ +#define __raw_get_mem(ldr, x, ptr, err, type) \ +do { \ + unsigned long __gu_val; \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 2: \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 4: \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \ + break; \ + case 8: \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) /* @@ -274,7 +274,7 @@ do { \ __chk_user_ptr(ptr); \ \ uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \ uaccess_ttbr0_disable(); \ \ (x) = __rgu_val; \ @@ -314,40 +314,40 @@ do { \ \ __uaccess_enable_tco_async(); \ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ - (__force type *)(__gkn_src), __gkn_err); \ + (__force type *)(__gkn_src), __gkn_err, K); \ __uaccess_disable_tco_async(); \ \ if (unlikely(__gkn_err)) \ goto err_label; \ } while (0) -#define __put_mem_asm(store, reg, x, addr, err) \ +#define __put_mem_asm(store, reg, x, addr, err, type) \ asm volatile( \ "1: " store " " reg "1, [%2]\n" \ "2:\n" \ - _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ + _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ : "+r" (err) \ : "r" (x), "r" (addr)) -#define __raw_put_mem(str, x, ptr, err) \ -do { \ - __typeof__(*(ptr)) __pu_val = (x); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err)); \ - break; \ - case 2: \ - __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err)); \ - break; \ - case 4: \ - __put_mem_asm(str, "%w", __pu_val, (ptr), (err)); \ - break; \ - case 8: \ - __put_mem_asm(str, "%x", __pu_val, (ptr), (err)); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ +#define __raw_put_mem(str, x, ptr, err, type) \ +do { \ + __typeof__(*(ptr)) __pu_val = (x); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 2: \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 4: \ + __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \ + break; \ + case 8: \ + __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ } while (0) /* @@ -362,7 +362,7 @@ do { \ __chk_user_ptr(__rpu_ptr); \ \ uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \ uaccess_ttbr0_disable(); \ } while (0) @@ -400,7 +400,7 @@ do { \ \ __uaccess_enable_tco_async(); \ __raw_put_mem("str", *((type *)(__pkn_src)), \ - (__force type *)(__pkn_dst), __pkn_err); \ + (__force type *)(__pkn_dst), __pkn_err, K); \ __uaccess_disable_tco_async(); \ \ if (unlikely(__pkn_err)) \ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 4bb2cc8ac446..1ad2568a2569 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -19,6 +19,9 @@ /* * HWCAP flags - for AT_HWCAP + * + * Bits 62 and 63 are reserved for use by libc. + * Bits 32-61 are unallocated for potential use by libc. */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) @@ -88,5 +91,6 @@ #define HWCAP2_SME_F32F32 (1 << 29) #define HWCAP2_SME_FA64 (1 << 30) #define HWCAP2_WFXT (1UL << 31) +#define HWCAP2_EBF16 (1UL << 32) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88a96511580e..1add7b01efa7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_syscall.o += -fno-stack-protector +# When KASAN is enabled, a stack trace is recorded for every alloc/free, which +# can significantly impact performance. Avoid instrumenting the stack trace +# collection code to minimize this impact. +KASAN_SANITIZE_stacktrace.o := n + # It's not safe to invoke KCOV when portions of the kernel environment aren't # available or are out-of-sync with HW state. Since `noinstr` doesn't always # inhibit KCOV instrumentation, disable it for the entire compilation unit. diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index e4dea8db6924..a5a256e3f9fe 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -351,7 +351,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) prot = __acpi_get_writethrough_mem_attribute(); } } - return __ioremap(phys, size, prot); + return ioremap_prot(phys, size, pgprot_val(prot)); } /* diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index fdfecf0991ce..e51535a5f939 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -109,7 +109,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) pxm = pa->proximity_domain; node = acpi_map_pxm_to_node(pxm); - if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { + if (node == NUMA_NO_NODE) { pr_err("SRAT: Too many proximity domains %d\n", pxm); bad_srat(); return; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 7bbf5104b7b7..9bcaa5eacf16 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -121,7 +121,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0, - CTR_DMINLINE_SHIFT); + CTR_EL0_DminLine_SHIFT); cur = start & ~(d_size - 1); do { /* diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 6875a16b09d2..fb0e7c7b2e20 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -59,6 +59,7 @@ struct insn_emulation { static LIST_HEAD(insn_emulation); static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); +static DEFINE_MUTEX(insn_emulation_mutex); static void register_emulation_hooks(struct insn_emulation_ops *ops) { @@ -207,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write, loff_t *ppos) { int ret = 0; - struct insn_emulation *insn = (struct insn_emulation *) table->data; + struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode); enum insn_emulation_mode prev_mode = insn->current_mode; - table->data = &insn->current_mode; + mutex_lock(&insn_emulation_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write || prev_mode == insn->current_mode) @@ -223,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write, update_insn_emulation_mode(insn, INSN_UNDEF); } ret: - table->data = insn; + mutex_unlock(&insn_emulation_mutex); return ret; } @@ -247,7 +248,7 @@ static void __init register_insn_emulation_sysctl(void) sysctl->maxlen = sizeof(int); sysctl->procname = insn->ops->name; - sysctl->data = insn; + sysctl->data = &insn->current_mode; sysctl->extra1 = &insn->min; sysctl->extra2 = &insn->max; sysctl->proc_handler = emulation_proc_handler; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c05cc3b6162e..7e6289e709fc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -187,7 +187,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, int scope) { u32 midr = read_cpuid_id(); - bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT); + bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT); const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1); WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); @@ -212,6 +212,12 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2441009 + { + /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */ + ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), + }, +#endif {}, }; #endif @@ -395,6 +401,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ +#ifdef CONFIG_ARM64_ERRATUM_1742098 +static struct midr_range broken_aarch32_aes[] = { + MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -480,7 +494,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm erratum 1009, or ARM erratum 1286807", + .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009", .capability = ARM64_WORKAROUND_REPEAT_TLBI, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, @@ -658,6 +672,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1) }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1742098 + { + .desc = "ARM erratum 1742098", + .capability = ARM64_WORKAROUND_1742098, + CAP_MIDR_RANGE_LIST(broken_aarch32_aes), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5afa9eba85d..0f6d3b213c25 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -79,6 +79,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/fpsimd.h> +#include <asm/hwcap.h> #include <asm/insn.h> #include <asm/kvm_host.h> #include <asm/mmu_context.h> @@ -91,7 +92,7 @@ #include <asm/virt.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ -static unsigned long elf_hwcap __read_mostly; +static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP_DEFAULT \ @@ -209,35 +210,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_I8MM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DGH_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_BF16_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SPECRES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SPECRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_API_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_APA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_WFXT_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_GPA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -276,41 +277,41 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F64MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_F32MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_I8MM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BF16_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), - FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), ARM64_FTR_END, }; @@ -361,6 +362,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0), @@ -396,18 +398,18 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_CWG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_ERG_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IDC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_CWG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_ERG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DminLine_SHIFT, 4, 1), /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT), /* L1Ip */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_EL0_L1Ip_SHIFT, 2, CTR_EL0_L1Ip_VIPT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IminLine_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -453,13 +455,13 @@ static const struct arm64_ftr_bits ftr_mvfr2[] = { }; static const struct arm64_ftr_bits ftr_dczid[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_EL0_DZP_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_EL0_BS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_gmid[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, GMID_EL1_BS_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -561,7 +563,7 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = { static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0), @@ -1486,7 +1488,7 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, else ctr = read_cpuid_effective_cachetype(); - return ctr & BIT(CTR_IDC_SHIFT); + return ctr & BIT(CTR_EL0_IDC_SHIFT); } static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) @@ -1497,7 +1499,7 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu * to the CTR_EL0 on this CPU and emulate it with the real/safe * value. */ - if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT))) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } @@ -1511,7 +1513,7 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, else ctr = read_cpuid_cachetype(); - return ctr & BIT(CTR_DIC_SHIFT); + return ctr & BIT(CTR_EL0_DIC_SHIFT); } static bool __maybe_unused @@ -1651,14 +1653,34 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) + +extern +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); + +static phys_addr_t kpti_ng_temp_alloc; + +static phys_addr_t kpti_ng_pgd_alloc(int shift) +{ + kpti_ng_temp_alloc -= PAGE_SIZE; + return kpti_ng_temp_alloc; +} + static void __nocfi kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { - typedef void (kpti_remap_fn)(int, int, phys_addr_t); + typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); extern kpti_remap_fn idmap_kpti_install_ng_mappings; kpti_remap_fn *remap_fn; int cpu = smp_processor_id(); + int levels = CONFIG_PGTABLE_LEVELS; + int order = order_base_2(levels); + u64 kpti_ng_temp_pgd_pa = 0; + pgd_t *kpti_ng_temp_pgd; + u64 alloc = 0; if (__this_cpu_read(this_cpu_vector) == vectors) { const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); @@ -1676,12 +1698,40 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings)); + if (!cpu) { + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); + kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); + kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); + + // + // Create a minimal page table hierarchy that permits us to map + // the swapper page tables temporarily as we traverse them. + // + // The physical pages are laid out as follows: + // + // +--------+-/-------+-/------ +-\\--------+ + // : PTE[] : | PMD[] : | PUD[] : || PGD[] : + // +--------+-\-------+-\------ +-//--------+ + // ^ + // The first page is mapped into this hierarchy at a PMD_SHIFT + // aligned virtual address, so that we can manipulate the PTE + // level entries while the mapping is active. The first entry + // covers the PTE[] page itself, the remaining entries are free + // to be used as a ad-hoc fixmap. + // + create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, + kpti_ng_pgd_alloc, 0); + } + cpu_install_idmap(); - remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); + remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); cpu_uninstall_idmap(); - if (!cpu) + if (!cpu) { + free_pages(alloc, order); arm64_use_ng_mappings = true; + } } #else static void @@ -1977,6 +2027,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +static void elf_hwcap_fixup(void) +{ +#ifdef CONFIG_ARM64_ERRATUM_1742098 + if (cpus_have_const_cap(ARM64_WORKAROUND_1742098)) + compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES; +#endif /* ARM64_ERRATUM_1742098 */ +} + #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { @@ -1984,6 +2042,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in } #endif /* CONFIG_KVM */ +static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP); +} + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2138,7 +2201,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, - .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, .field_width = 4, .min_field_value = 1, }, @@ -2149,7 +2212,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_DPB_SHIFT, .field_width = 4, .min_field_value = 2, }, @@ -2309,7 +2372,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, - .field_pos = ID_AA64ISAR1_SB_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_SB_SHIFT, .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, @@ -2321,9 +2384,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_APA_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_APA_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, + .min_field_value = ID_AA64ISAR1_EL1_APA_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2332,9 +2395,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_APA3_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_APA3_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED, + .min_field_value = ID_AA64ISAR2_EL1_APA3_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2343,9 +2406,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_API_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_API_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_API_IMP_DEF, + .min_field_value = ID_AA64ISAR1_EL1_API_PAuth, .matches = has_address_auth_cpucap, }, { @@ -2359,9 +2422,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_GPA_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_GPA_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, + .min_field_value = ID_AA64ISAR1_EL1_GPA_IMP, .matches = has_cpuid_feature, }, { @@ -2370,9 +2433,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_GPA3_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_GPA3_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED, + .min_field_value = ID_AA64ISAR2_EL1_GPA3_IMP, .matches = has_cpuid_feature, }, { @@ -2381,9 +2444,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_GPI_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_GPI_SHIFT, .field_width = 4, - .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF, + .min_field_value = ID_AA64ISAR1_EL1_GPI_IMP, .matches = has_cpuid_feature, }, { @@ -2484,7 +2547,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .field_pos = ID_AA64ISAR1_EL1_LRCPC_SHIFT, .field_width = 4, .matches = has_cpuid_feature, .min_field_value = 1, @@ -2509,9 +2572,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_SME_FA64, .sys_reg = SYS_ID_AA64SMFR0_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64SMFR0_FA64_SHIFT, + .field_pos = ID_AA64SMFR0_EL1_FA64_SHIFT, .field_width = 1, - .min_field_value = ID_AA64SMFR0_FA64, + .min_field_value = ID_AA64SMFR0_EL1_FA64_IMP, .matches = has_cpuid_feature, .cpu_enable = fa64_kernel_enable, }, @@ -2522,10 +2585,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR2_EL1, .sign = FTR_UNSIGNED, - .field_pos = ID_AA64ISAR2_WFXT_SHIFT, + .field_pos = ID_AA64ISAR2_EL1_WFxT_SHIFT, .field_width = 4, .matches = has_cpuid_feature, - .min_field_value = ID_AA64ISAR2_WFXT_SUPPORTED, + .min_field_value = ID_AA64ISAR2_EL1_WFxT_IMP, + }, + { + .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality", + .capability = ARM64_HAS_TIDCP1, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR1_TIDCP1_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64MMFR1_TIDCP1_IMP, + .matches = has_cpuid_feature, + .cpu_enable = cpu_trap_el0_impdef, }, {}, }; @@ -2566,33 +2641,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_APA_SHIFT, 4, FTR_UNSIGNED, - ID_AA64ISAR1_APA_ARCHITECTED) + ID_AA64ISAR1_EL1_APA_PAuth) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_APA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_APA3_PAuth) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_API_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_API_PAuth) }, {}, }; static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPA_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPA_IMP) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_GPA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_GPA3_IMP) }, { - HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, - 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_GPI_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR1_EL1_GPI_IMP) }, {}, }; @@ -2620,30 +2695,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BitPerm_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SHA3_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SM4_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_I8MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI @@ -2659,17 +2735,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_WFXT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_WFXT_SUPPORTED, CAP_HWCAP, KERNEL_HWCAP_WFXT), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), #ifdef CONFIG_ARM64_SME HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), #endif /* CONFIG_ARM64_SME */ {}, }; @@ -3104,14 +3180,12 @@ static bool __maybe_unused __system_matches_cap(unsigned int n) void cpu_set_feature(unsigned int num) { - WARN_ON(num >= MAX_CPU_FEATURES); - elf_hwcap |= BIT(num); + set_bit(num, elf_hwcap); } bool cpu_have_feature(unsigned int num) { - WARN_ON(num >= MAX_CPU_FEATURES); - return elf_hwcap & BIT(num); + return test_bit(num, elf_hwcap); } EXPORT_SYMBOL_GPL(cpu_have_feature); @@ -3122,12 +3196,12 @@ unsigned long cpu_get_elf_hwcap(void) * note that for userspace compatibility we guarantee that bits 62 * and 63 will always be returned as 0. */ - return lower_32_bits(elf_hwcap); + return elf_hwcap[0]; } unsigned long cpu_get_elf_hwcap2(void) { - return upper_32_bits(elf_hwcap); + return elf_hwcap[1]; } static void __init setup_system_capabilities(void) @@ -3149,8 +3223,10 @@ void __init setup_cpu_features(void) setup_system_capabilities(); setup_elf_hwcaps(arm64_elf_hwcaps); - if (system_supports_32bit_el0()) + if (system_supports_32bit_el0()) { setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); + } if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); @@ -3203,6 +3279,7 @@ static int enable_mismatched_32bit_el0(unsigned int cpu) cpu_active_mask); get_cpu_device(lucky_winner)->offline_disabled = true; setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n", cpu, lucky_winner); return 0; diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 3006f4324808..4150e308e99c 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -13,35 +13,6 @@ #include <linux/of_device.h> #include <linux/psci.h> -#include <asm/cpuidle.h> -#include <asm/cpu_ops.h> - -int arm_cpuidle_init(unsigned int cpu) -{ - const struct cpu_operations *ops = get_cpu_ops(cpu); - int ret = -EOPNOTSUPP; - - if (ops && ops->cpu_suspend && ops->cpu_init_idle) - ret = ops->cpu_init_idle(cpu); - - return ret; -} - -/** - * arm_cpuidle_suspend() - function to enter a low-power idle state - * @index: argument to pass to CPU suspend operations - * - * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU - * operations back-end error code otherwise. - */ -int arm_cpuidle_suspend(int index) -{ - int cpu = smp_processor_id(); - const struct cpu_operations *ops = get_cpu_ops(cpu); - - return ops->cpu_suspend(index); -} - #ifdef CONFIG_ACPI #include <acpi/processor.h> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 8eff0a34ffd4..26baa87e8fe7 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -33,12 +33,19 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; -static const char *icache_policy_str[] = { - [ICACHE_POLICY_VPIPT] = "VPIPT", - [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", - [ICACHE_POLICY_VIPT] = "VIPT", - [ICACHE_POLICY_PIPT] = "PIPT", -}; +static inline const char *icache_policy_str(int l1ip) +{ + switch (l1ip) { + case CTR_EL0_L1Ip_VPIPT: + return "VPIPT"; + case CTR_EL0_L1Ip_VIPT: + return "VIPT"; + case CTR_EL0_L1Ip_PIPT: + return "PIPT"; + default: + return "RESERVED/UNKNOWN"; + } +} unsigned long __icache_flags; @@ -107,6 +114,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_F32F32] = "smef32f32", [KERNEL_HWCAP_SME_FA64] = "smefa64", [KERNEL_HWCAP_WFXT] = "wfxt", + [KERNEL_HWCAP_EBF16] = "ebf16", }; #ifdef CONFIG_COMPAT @@ -267,6 +275,7 @@ static struct kobj_type cpuregs_kobj_type = { CPUREGS_ATTR_RO(midr_el1, midr); CPUREGS_ATTR_RO(revidr_el1, revidr); +CPUREGS_ATTR_RO(smidr_el1, smidr); static struct attribute *cpuregs_id_attrs[] = { &cpuregs_attr_midr_el1.attr, @@ -279,6 +288,16 @@ static const struct attribute_group cpuregs_attr_group = { .name = "identification" }; +static struct attribute *sme_cpuregs_id_attrs[] = { + &cpuregs_attr_smidr_el1.attr, + NULL +}; + +static const struct attribute_group sme_cpuregs_attr_group = { + .attrs = sme_cpuregs_id_attrs, + .name = "identification" +}; + static int cpuid_cpu_online(unsigned int cpu) { int rc; @@ -296,6 +315,8 @@ static int cpuid_cpu_online(unsigned int cpu) rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group); if (rc) kobject_del(&info->kobj); + if (system_supports_sme()) + rc = sysfs_merge_group(&info->kobj, &sme_cpuregs_attr_group); out: return rc; } @@ -342,19 +363,19 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) u32 l1ip = CTR_L1IP(info->reg_ctr); switch (l1ip) { - case ICACHE_POLICY_PIPT: + case CTR_EL0_L1Ip_PIPT: break; - case ICACHE_POLICY_VPIPT: + case CTR_EL0_L1Ip_VPIPT: set_bit(ICACHEF_VPIPT, &__icache_flags); break; - case ICACHE_POLICY_RESERVED: - case ICACHE_POLICY_VIPT: + case CTR_EL0_L1Ip_VIPT: + default: /* Assume aliasing */ set_bit(ICACHEF_ALIASING, &__icache_flags); break; } - pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); + pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str(l1ip), cpu); } static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info) @@ -423,9 +444,17 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_zcr = read_zcr_features(); if (IS_ENABLED(CONFIG_ARM64_SME) && - id_aa64pfr1_sme(info->reg_id_aa64pfr1)) + id_aa64pfr1_sme(info->reg_id_aa64pfr1)) { info->reg_smcr = read_smcr_features(); + /* + * We mask out SMPS since even if the hardware + * supports priorities the kernel does not at present + * and we block access to them. + */ + info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; + } + cpuinfo_detect_icache_policy(info); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5b82b9292400..254fe31c03a0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -636,18 +636,28 @@ alternative_else_nop_endif */ .endm - .macro tramp_data_page dst - adr_l \dst, .entry.tramp.text - sub \dst, \dst, PAGE_SIZE - .endm - - .macro tramp_data_read_var dst, var -#ifdef CONFIG_RANDOMIZE_BASE - tramp_data_page \dst - add \dst, \dst, #:lo12:__entry_tramp_data_\var - ldr \dst, [\dst] + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RELOCATABLE + ldr \dst, .L__tramp_data_\var + .ifndef .L__tramp_data_\var + .pushsection ".entry.tramp.rodata", "a", %progbits + .align 3 +.L__tramp_data_\var: + .quad \var + .popsection + .endif #else - ldr \dst, =\var + /* + * As !RELOCATABLE implies !RANDOMIZE_BASE the address is always a + * compile time constant (and hence not secret and not worth hiding). + * + * As statically allocated kernel code and data always live in the top + * 47 bits of the address space we can sign-extend bit 47 and avoid an + * instruction to load the upper 16 bits (which must be 0xFFFF). + */ + movz \dst, :abs_g2_s:\var + movk \dst, :abs_g1_nc:\var + movk \dst, :abs_g0_nc:\var #endif .endm @@ -695,7 +705,7 @@ alternative_else_nop_endif msr vbar_el1, x30 isb .else - ldr x30, =vectors + adr_l x30, vectors .endif // \kpti == 1 .if \bhb == BHB_MITIGATION_FW @@ -764,24 +774,7 @@ SYM_CODE_END(tramp_exit_native) SYM_CODE_START(tramp_exit_compat) tramp_exit 32 SYM_CODE_END(tramp_exit_compat) - - .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE - .pushsection ".rodata", "a" - .align PAGE_SHIFT -SYM_DATA_START(__entry_tramp_data_start) -__entry_tramp_data_vectors: - .quad vectors -#ifdef CONFIG_ARM_SDE_INTERFACE -__entry_tramp_data___sdei_asm_handler: - .quad __sdei_asm_handler -#endif /* CONFIG_ARM_SDE_INTERFACE */ -__entry_tramp_data_this_cpu_vector: - .quad this_cpu_vector -SYM_DATA_END(__entry_tramp_data_start) - .popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -932,7 +925,6 @@ NOKPROBE(call_on_irq_stack) * This clobbers x4, __sdei_handler() will restore this from firmware's * copy. */ -.ltorg .pushsection ".entry.tramp.text", "ax" SYM_CODE_START(__sdei_asm_entry_trampoline) mrs x4, ttbr1_el1 @@ -967,7 +959,6 @@ SYM_CODE_START(__sdei_asm_exit_trampoline) 1: sdei_handler_exit exit_mode=x2 SYM_CODE_END(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) - .ltorg .popsection // .entry.tramp.text #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index aecf3071efdd..dd63ffc3a2fa 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -445,7 +445,6 @@ static void fpsimd_save(void) if (system_supports_sme()) { u64 *svcr = last->svcr; - *svcr = read_sysreg_s(SYS_SVCR); *svcr = read_sysreg_s(SYS_SVCR); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 2e248342476e..af5df48ba915 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -300,11 +300,6 @@ static void swsusp_mte_restore_tags(void) unsigned long pfn = xa_state.xa_index; struct page *page = pfn_to_online_page(pfn); - /* - * It is not required to invoke page_kasan_tag_reset(page) - * at this point since the tags stored in page->flags are - * already restored. - */ mte_restore_page_tags(page_address(page), tags); mte_free_tag_storage(tags); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 3dcc3272ce16..12c7fad02ae5 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -126,7 +126,7 @@ SYM_CODE_START_LOCAL(__finalise_el2) // Full FP in SM? mrs_s x1, SYS_ID_AA64SMFR0_EL1 - __check_override id_aa64smfr0 ID_AA64SMFR0_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64 + __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64 .Linit_sme_fa64: orr x0, x0, SMCR_ELx_FA64_MASK diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 7206fd0ed9eb..1b0542c69738 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -98,7 +98,7 @@ static const struct ftr_set_desc pfr1 __initconst = { .name = "id_aa64pfr1", .override = &id_aa64pfr1_override, .fields = { - FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL), + FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ), FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL), FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter), {} @@ -109,10 +109,10 @@ static const struct ftr_set_desc isar1 __initconst = { .name = "id_aa64isar1", .override = &id_aa64isar1_override, .fields = { - FIELD("gpi", ID_AA64ISAR1_GPI_SHIFT, NULL), - FIELD("gpa", ID_AA64ISAR1_GPA_SHIFT, NULL), - FIELD("api", ID_AA64ISAR1_API_SHIFT, NULL), - FIELD("apa", ID_AA64ISAR1_APA_SHIFT, NULL), + FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL), + FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL), + FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL), + FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL), {} }, }; @@ -121,8 +121,8 @@ static const struct ftr_set_desc isar2 __initconst = { .name = "id_aa64isar2", .override = &id_aa64isar2_override, .fields = { - FIELD("gpa3", ID_AA64ISAR2_GPA3_SHIFT, NULL), - FIELD("apa3", ID_AA64ISAR2_APA3_SHIFT, NULL), + FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL), + FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL), {} }, }; @@ -132,7 +132,7 @@ static const struct ftr_set_desc smfr0 __initconst = { .override = &id_aa64smfr0_override, .fields = { /* FA64 is a one bit field... :-/ */ - { "fa64", ID_AA64SMFR0_FA64_SHIFT, 1, }, + { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, }, {} }, }; diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 42bd8c0c60e0..692e9d2e31e5 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -15,6 +15,7 @@ #include <asm/unistd.h> + .section .rodata .align 5 .globl __kuser_helper_start __kuser_helper_start: diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f6b00743c399..b2b730233274 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -48,15 +48,6 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (!pte_is_tagged) return; - page_kasan_tag_reset(page); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_clear_page_tags(page_address(page)); } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index b0980fbb6bc7..3e6d0352d7d3 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -280,6 +280,9 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) vl = task_get_sme_vl(current); } else { + if (!system_supports_sve()) + return -EINVAL; + vl = task_get_sve_vl(current); } @@ -342,9 +345,14 @@ fpsimd_only: #else /* ! CONFIG_ARM64_SVE */ -/* Turn any non-optimised out attempts to use these into a link error: */ +static int restore_sve_fpsimd_context(struct user_ctxs *user) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} + +/* Turn any non-optimised out attempts to use this into a link error: */ extern int preserve_sve_context(void __user *ctx); -extern int restore_sve_fpsimd_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SVE */ @@ -649,14 +657,10 @@ static int restore_sigframe(struct pt_regs *regs, if (!user.fpsimd) return -EINVAL; - if (user.sve) { - if (!system_supports_sve()) - return -EINVAL; - + if (user.sve) err = restore_sve_fpsimd_context(&user); - } else { + else err = restore_fpsimd_context(user.fpsimd); - } } if (err == 0 && system_supports_sme() && user.za) diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S index 475d30d471ac..ccbd4aab4ba4 100644 --- a/arch/arm64/kernel/sigreturn32.S +++ b/arch/arm64/kernel/sigreturn32.S @@ -15,6 +15,7 @@ #include <asm/unistd.h> + .section .rodata .globl __aarch32_sigret_code_start __aarch32_sigret_code_start: diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 0467cb79f080..fcaa151b81f1 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -38,6 +38,8 @@ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance * associated with the most recently encountered replacement lr * value. + * + * @task: The task being unwound. */ struct unwind_state { unsigned long fp; @@ -48,13 +50,13 @@ struct unwind_state { #ifdef CONFIG_KRETPROBES struct llist_node *kr_cur; #endif + struct task_struct *task; }; -static notrace void unwind_init(struct unwind_state *state, unsigned long fp, - unsigned long pc) +static void unwind_init_common(struct unwind_state *state, + struct task_struct *task) { - state->fp = fp; - state->pc = pc; + state->task = task; #ifdef CONFIG_KRETPROBES state->kr_cur = NULL; #endif @@ -72,7 +74,57 @@ static notrace void unwind_init(struct unwind_state *state, unsigned long fp, state->prev_fp = 0; state->prev_type = STACK_TYPE_UNKNOWN; } -NOKPROBE_SYMBOL(unwind_init); + +/* + * Start an unwind from a pt_regs. + * + * The unwind will begin at the PC within the regs. + * + * The regs must be on a stack currently owned by the calling task. + */ +static inline void unwind_init_from_regs(struct unwind_state *state, + struct pt_regs *regs) +{ + unwind_init_common(state, current); + + state->fp = regs->regs[29]; + state->pc = regs->pc; +} + +/* + * Start an unwind from a caller. + * + * The unwind will begin at the caller of whichever function this is inlined + * into. + * + * The function which invokes this must be noinline. + */ +static __always_inline void unwind_init_from_caller(struct unwind_state *state) +{ + unwind_init_common(state, current); + + state->fp = (unsigned long)__builtin_frame_address(1); + state->pc = (unsigned long)__builtin_return_address(0); +} + +/* + * Start an unwind from a blocked task. + * + * The unwind will begin at the blocked tasks saved PC (i.e. the caller of + * cpu_switch_to()). + * + * The caller should ensure the task is blocked in cpu_switch_to() for the + * duration of the unwind, or the unwind will be bogus. It is never valid to + * call this for the current task. + */ +static inline void unwind_init_from_task(struct unwind_state *state, + struct task_struct *task) +{ + unwind_init_common(state, task); + + state->fp = thread_saved_fp(task); + state->pc = thread_saved_pc(task); +} /* * Unwind from one frame record (A) to the next frame record (B). @@ -81,9 +133,9 @@ NOKPROBE_SYMBOL(unwind_init); * records (e.g. a cycle), determined based on the location and fp value of A * and the location (but not the fp value) of B. */ -static int notrace unwind_next(struct task_struct *tsk, - struct unwind_state *state) +static int notrace unwind_next(struct unwind_state *state) { + struct task_struct *tsk = state->task; unsigned long fp = state->fp; struct stack_info info; @@ -117,15 +169,15 @@ static int notrace unwind_next(struct task_struct *tsk, if (fp <= state->prev_fp) return -EINVAL; } else { - set_bit(state->prev_type, state->stacks_done); + __set_bit(state->prev_type, state->stacks_done); } /* * Record this frame record's values and location. The prev_fp and * prev_type are only meaningful to the next unwind_next() invocation. */ - state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); - state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + state->fp = READ_ONCE(*(unsigned long *)(fp)); + state->pc = READ_ONCE(*(unsigned long *)(fp + 8)); state->prev_fp = fp; state->prev_type = info.type; @@ -157,8 +209,7 @@ static int notrace unwind_next(struct task_struct *tsk, } NOKPROBE_SYMBOL(unwind_next); -static void notrace unwind(struct task_struct *tsk, - struct unwind_state *state, +static void notrace unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry, void *cookie) { while (1) { @@ -166,7 +217,7 @@ static void notrace unwind(struct task_struct *tsk, if (!consume_entry(cookie, state->pc)) break; - ret = unwind_next(tsk, state); + ret = unwind_next(state); if (ret < 0) break; } @@ -212,15 +263,15 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, { struct unwind_state state; - if (regs) - unwind_init(&state, regs->regs[29], regs->pc); - else if (task == current) - unwind_init(&state, - (unsigned long)__builtin_frame_address(1), - (unsigned long)__builtin_return_address(0)); - else - unwind_init(&state, thread_saved_fp(task), - thread_saved_pc(task)); - - unwind(task, &state, consume_entry, cookie); + if (regs) { + if (task != current) + return; + unwind_init_from_regs(&state, regs); + } else if (task == current) { + unwind_init_from_caller(&state); + } else { + unwind_init_from_task(&state, task); + } + + unwind(&state, consume_entry, cookie); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 9ac7a81b79be..b7fed33981f7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -579,11 +579,11 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs) if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { /* Hide DIC so that we can trap the unnecessary maintenance...*/ - val &= ~BIT(CTR_DIC_SHIFT); + val &= ~BIT(CTR_EL0_DIC_SHIFT); /* ... and fake IminLine to reduce the number of traps. */ - val &= ~CTR_IMINLINE_MASK; - val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK; + val &= ~CTR_EL0_IminLine_MASK; + val |= (PAGE_SHIFT - 2) & CTR_EL0_IminLine_MASK; } pt_regs_write_reg(regs, rt, val); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index f6e25d7c346a..bafbf78fab77 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,7 +24,13 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T + -Bsymbolic --build-id=sha1 -n $(btildflags-y) + +ifdef CONFIG_LD_ORPHAN_WARN + ldflags-y += --orphan-handling=warn +endif + +ldflags-y += -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index a5e61e09ea92..e69fb4aaaf3e 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -11,6 +11,7 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") OUTPUT_ARCH(aarch64) @@ -49,11 +50,24 @@ SECTIONS .dynamic : { *(.dynamic) } :text :dynamic - .rodata : { *(.rodata*) } :text + .rela.dyn : ALIGN(8) { *(.rela .rela*) } + + .rodata : { + *(.rodata*) + *(.got) + *(.got.plt) + *(.plt) + *(.plt.*) + *(.iplt) + *(.igot .igot.plt) + } :text _end = .; PROVIDE(end = .); + DWARF_DEBUG + ELF_DETAILS + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 05ba1aae1b6f..36c8f66cad25 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -104,6 +104,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 +VDSO_LDFLAGS += --orphan-handling=warn # Borrow vdsomunge.c from the arm vDSO diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index 3348ce5ea306..8d95d7d35057 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -11,6 +11,7 @@ #include <linux/const.h> #include <asm/page.h> #include <asm/vdso.h> +#include <asm-generic/vmlinux.lds.h> OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") OUTPUT_ARCH(arm) @@ -35,12 +36,30 @@ SECTIONS .dynamic : { *(.dynamic) } :text :dynamic - .rodata : { *(.rodata*) } :text + .rodata : { + *(.rodata*) + *(.got) + *(.got.plt) + *(.plt) + *(.rel.iplt) + *(.iplt) + *(.igot.plt) + } :text - .text : { *(.text*) } :text =0xe7f001f2 + .text : { + *(.text*) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.v4_bx) + } :text =0xe7f001f2 - .got : { *(.got) } - .rel.plt : { *(.rel.plt) } + .rel.dyn : { *(.rel*) } + + .ARM.exidx : { *(.ARM.exidx*) } + DWARF_DEBUG + ELF_DETAILS + .ARM.attributes 0 : { *(.ARM.attributes) } /DISCARD/ : { *(.note.GNU-stack) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 20ebdd48db14..45131e354e27 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -115,7 +115,8 @@ jiffies = jiffies_64; __entry_tramp_text_start = .; \ *(.entry.tramp.text) \ . = ALIGN(PAGE_SIZE); \ - __entry_tramp_text_end = .; + __entry_tramp_text_end = .; \ + *(.entry.tramp.rodata) #else #define TRAMP_TEXT #endif diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index fd55014b3497..fa6e466ed57f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -176,25 +176,25 @@ ) #define PVM_ID_AA64ISAR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \ ) #define PVM_ID_AA64ISAR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \ ) u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 35a4331ba5f3..6b94c3e6ff26 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -173,10 +173,10 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; if (!vcpu_has_ptrauth(vcpu)) - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); return id_aa64isar1_el1_sys_val & allow_mask; } @@ -186,8 +186,8 @@ static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; if (!vcpu_has_ptrauth(vcpu)) - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); return id_aa64isar2_el1_sys_val & allow_mask; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c06c0477fab5..c4fb3874b5e2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1136,17 +1136,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); break; case SYS_ID_AA64ISAR2_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); if (!cpus_have_final_cap(ARM64_HAS_WFXT)) - val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_WFXT); + val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index eeb9e45bcce8..1b7c93ae7e63 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -18,7 +18,7 @@ */ .macro multitag_transfer_size, reg, tmp mrs_s \reg, SYS_GMID_EL1 - ubfx \reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE + ubfx \reg, \reg, #GMID_EL1_BS_SHIFT, #GMID_EL1_BS_SIZE mov \tmp, #4 lsl \reg, \tmp, \reg .endm diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 21c907987080..081058d4e436 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -194,44 +194,3 @@ SYM_FUNC_START(__pi_dcache_clean_pop) ret SYM_FUNC_END(__pi_dcache_clean_pop) SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop) - -/* - * __dma_flush_area(start, size) - * - * clean & invalidate D / U line - * - * - start - virtual start address of region - * - size - size in question - */ -SYM_FUNC_START(__pi___dma_flush_area) - add x1, x0, x1 - dcache_by_line_op civac, sy, x0, x1, x2, x3 - ret -SYM_FUNC_END(__pi___dma_flush_area) -SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) - -/* - * __dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -SYM_FUNC_START(__pi___dma_map_area) - add x1, x0, x1 - b __pi_dcache_clean_poc -SYM_FUNC_END(__pi___dma_map_area) -SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) - -/* - * __dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -SYM_FUNC_START(__pi___dma_unmap_area) - add x1, x0, x1 - cmp w2, #DMA_TO_DEVICE - b.ne __pi_dcache_inval_poc - ret -SYM_FUNC_END(__pi___dma_unmap_area) -SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 0dea80bf6de4..24913271e898 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -23,15 +23,6 @@ void copy_highpage(struct page *to, struct page *from) if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { set_bit(PG_mte_tagged, &to->flags); - page_kasan_tag_reset(to); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_copy_page_tags(kto, kfrom); } } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6099c81b9322..599cf81f5685 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -14,20 +14,29 @@ #include <asm/xen/xen-ops.h> void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir) { - __dma_map_area(phys_to_virt(paddr), size, dir); + unsigned long start = (unsigned long)phys_to_virt(paddr); + + dcache_clean_poc(start, start + size); } void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir) { - __dma_unmap_area(phys_to_virt(paddr), size, dir); + unsigned long start = (unsigned long)phys_to_virt(paddr); + + if (dir == DMA_TO_DEVICE) + return; + + dcache_inval_poc(start, start + size); } void arch_dma_prep_coherent(struct page *page, size_t size) { - __dma_flush_area(page_address(page), size); + unsigned long start = (unsigned long)page_address(page); + + dcache_clean_inval_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 489455309695..228d681a8715 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -16,13 +16,6 @@ get_ex_fixup(const struct exception_table_entry *ex) return ((unsigned long)&ex->fixup + ex->fixup); } -static bool ex_handler_fixup(const struct exception_table_entry *ex, - struct pt_regs *regs) -{ - regs->pc = get_ex_fixup(ex); - return true; -} - static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, struct pt_regs *regs) { @@ -72,11 +65,10 @@ bool fixup_exception(struct pt_regs *regs) return false; switch (ex->type) { - case EX_TYPE_FIXUP: - return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); case EX_TYPE_UACCESS_ERR_ZERO: + case EX_TYPE_KACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: return ex_handler_load_unaligned_zeropad(ex, regs); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c5e11768e5c1..cdf3ffa0c223 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { mte_zero_clear_page_tags(page_address(page)); - page_kasan_tag_reset(page); set_bit(PG_mte_tagged, &page->flags); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index e2a5ec9fdc0d..8eab05367549 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -100,16 +100,6 @@ int pud_huge(pud_t pud) #endif } -/* - * Select all bits except the pfn - */ -static inline pgprot_t pte_pgprot(pte_t pte) -{ - unsigned long pfn = pte_pfn(pte); - - return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); -} - static int find_num_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, size_t *pgsize) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 339ee84e5a61..b6ef26fc8ebe 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -389,7 +389,7 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + if (!defer_reserve_crashkernel()) reserve_crashkernel(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; @@ -438,7 +438,7 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + if (defer_reserve_crashkernel()) reserve_crashkernel(); memblock_dump_all(); diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b21f91cd830d..c5af103d4ad4 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -1,96 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-only -/* - * Based on arch/arm/mm/ioremap.c - * - * (C) Copyright 1995 1996 Linus Torvalds - * Hacked for ARM by Phil Blundell <[email protected]> - * Hacked to allow all architectures to build, and various cleanups - * by Russell King - * Copyright (C) 2012 ARM Ltd. - */ -#include <linux/export.h> #include <linux/mm.h> -#include <linux/vmalloc.h> #include <linux/io.h> -#include <asm/fixmap.h> -#include <asm/tlbflush.h> - -static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, - pgprot_t prot, void *caller) +bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot) { - unsigned long last_addr; - unsigned long offset = phys_addr & ~PAGE_MASK; - int err; - unsigned long addr; - struct vm_struct *area; + unsigned long last_addr = phys_addr + size - 1; - /* - * Page align the mapping address and size, taking account of any - * offset. - */ - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(size + offset); + /* Don't allow outside PHYS_MASK */ + if (last_addr & ~PHYS_MASK) + return false; - /* - * Don't allow wraparound, zero size or outside PHYS_MASK. - */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK)) - return NULL; - - /* - * Don't allow RAM to be mapped. - */ + /* Don't allow RAM to be mapped. */ if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr)))) - return NULL; - - area = get_vm_area_caller(size, VM_IOREMAP, caller); - if (!area) - return NULL; - addr = (unsigned long)area->addr; - area->phys_addr = phys_addr; - - err = ioremap_page_range(addr, addr + size, phys_addr, prot); - if (err) { - vunmap((void *)addr); - return NULL; - } - - return (void __iomem *)(offset + addr); -} - -void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) -{ - return __ioremap_caller(phys_addr, size, prot, - __builtin_return_address(0)); -} -EXPORT_SYMBOL(__ioremap); - -void iounmap(volatile void __iomem *io_addr) -{ - unsigned long addr = (unsigned long)io_addr & PAGE_MASK; - - /* - * We could get an address outside vmalloc range in case - * of ioremap_cache() reusing a RAM mapping. - */ - if (is_vmalloc_addr((void *)addr)) - vunmap((void *)addr); -} -EXPORT_SYMBOL(iounmap); - -void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) -{ - /* For normal memory we already have a cacheable mapping. */ - if (pfn_is_map_memory(__phys_to_pfn(phys_addr))) - return (void __iomem *)__phys_to_virt(phys_addr); + return false; - return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), - __builtin_return_address(0)); + return true; } -EXPORT_SYMBOL(ioremap_cache); /* * Must be called after early_fixmap_init diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 05d77f2342a9..db7c4e6ae57b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -400,6 +400,13 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgdp++, addr = next, addr != end); } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +extern __alias(__create_pgd_mapping) +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); +#endif + static phys_addr_t __pgd_pgtable_alloc(int shift) { void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); @@ -541,8 +548,7 @@ static void __init map_mem(pgd_t *pgdp) #ifdef CONFIG_KEXEC_CORE if (crash_mem_map) { - if (IS_ENABLED(CONFIG_ZONE_DMA) || - IS_ENABLED(CONFIG_ZONE_DMA32)) + if (defer_reserve_crashkernel()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; else if (crashk_res.end) memblock_mark_nomap(crashk_res.start, @@ -583,8 +589,7 @@ static void __init map_mem(pgd_t *pgdp) * through /sys/kernel/kexec_crash_size interface. */ #ifdef CONFIG_KEXEC_CORE - if (crash_mem_map && - !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crash_mem_map && !defer_reserve_crashkernel()) { if (crashk_res.end) { __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1, @@ -677,13 +682,9 @@ static int __init map_entry_trampoline(void) __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, pa_start + i * PAGE_SIZE, prot); - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { - extern char __entry_tramp_data_start[]; - - __set_fixmap(FIX_ENTRY_TRAMP_DATA, - __pa_symbol(__entry_tramp_data_start), - PAGE_KERNEL_RO); - } + if (IS_ENABLED(CONFIG_RELOCATABLE)) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); return 0; } diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index a9e50e930484..4334dec93bd4 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -53,15 +53,6 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page) if (!tags) return false; - page_kasan_tag_reset(page); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_restore_page_tags(page_address(page), tags); return true; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9eb490effb7f..7837a69524c5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -14,6 +14,7 @@ #include <asm/asm-offsets.h> #include <asm/asm_pointer_auth.h> #include <asm/hwcap.h> +#include <asm/kernel-pgtable.h> #include <asm/pgtable-hwdef.h> #include <asm/cpufeature.h> #include <asm/alternative.h> @@ -200,25 +201,51 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) + .pushsection ".idmap.text", "awx" - .macro __idmap_kpti_get_pgtable_ent, type - dc cvac, cur_\()\type\()p // Ensure any existing dirty - dmb sy // lines are written back before - ldr \type, [cur_\()\type\()p] // loading the entry - tbz \type, #0, skip_\()\type // Skip invalid and - tbnz \type, #11, skip_\()\type // non-global entries + .macro kpti_mk_tbl_ng, type, num_entries + add end_\type\()p, cur_\type\()p, #\num_entries * 8 +.Ldo_\type: + ldr \type, [cur_\type\()p] // Load the entry + tbz \type, #0, .Lnext_\type // Skip invalid and + tbnz \type, #11, .Lnext_\type // non-global entries + orr \type, \type, #PTE_NG // Same bit for blocks and pages + str \type, [cur_\type\()p] // Update the entry + .ifnc \type, pte + tbnz \type, #1, .Lderef_\type + .endif +.Lnext_\type: + add cur_\type\()p, cur_\type\()p, #8 + cmp cur_\type\()p, end_\type\()p + b.ne .Ldo_\type .endm - .macro __idmap_kpti_put_pgtable_ent_ng, type - orr \type, \type, #PTE_NG // Same bit for blocks and pages - str \type, [cur_\()\type\()p] // Update the entry and ensure - dmb sy // that it is visible to all - dc civac, cur_\()\type\()p // CPUs. + /* + * Dereference the current table entry and map it into the temporary + * fixmap slot associated with the current level. + */ + .macro kpti_map_pgtbl, type, level + str xzr, [temp_pte, #8 * (\level + 1)] // break before make + dsb nshst + add pte, temp_pte, #PAGE_SIZE * (\level + 1) + lsr pte, pte, #12 + tlbi vaae1, pte + dsb nsh + isb + + phys_to_pte pte, cur_\type\()p + add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1) + orr pte, pte, pte_flags + str pte, [temp_pte, #8 * (\level + 1)] + dsb nshst .endm /* - * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper) + * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd, + * unsigned long temp_pte_va) * * Called exactly once from stop_machine context by each CPU found during boot. */ @@ -228,8 +255,10 @@ SYM_DATA(__idmap_kpti_flag, .long 1) SYM_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 + temp_pte .req x0 num_cpus .req w1 - swapper_pa .req x2 + pte_flags .req x1 + temp_pgd_phys .req x2 swapper_ttb .req x3 flag_ptr .req x4 cur_pgdp .req x5 @@ -237,16 +266,15 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) pgd .req x7 cur_pudp .req x8 end_pudp .req x9 - pud .req x10 cur_pmdp .req x11 end_pmdp .req x12 - pmd .req x13 cur_ptep .req x14 end_ptep .req x15 pte .req x16 + valid .req x17 + mov x5, x3 // preserve temp_pte arg mrs swapper_ttb, ttbr1_el1 - restore_ttbr1 swapper_ttb adr_l flag_ptr, __idmap_kpti_flag cbnz cpu, __idmap_kpti_secondary @@ -258,98 +286,71 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) eor w17, w17, num_cpus cbnz w17, 1b - /* We need to walk swapper, so turn off the MMU. */ - pre_disable_mmu_workaround - mrs x17, sctlr_el1 - bic x17, x17, #SCTLR_ELx_M - msr sctlr_el1, x17 + /* Switch to the temporary page tables on this CPU only */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + offset_ttbr1 temp_pgd_phys, x8 + msr ttbr1_el1, temp_pgd_phys isb + mov temp_pte, x5 + mov pte_flags, #KPTI_NG_PTE_FLAGS + /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ - mov cur_pgdp, swapper_pa - add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) -do_pgd: __idmap_kpti_get_pgtable_ent pgd - tbnz pgd, #1, walk_puds -next_pgd: - __idmap_kpti_put_pgtable_ent_ng pgd -skip_pgd: - add cur_pgdp, cur_pgdp, #8 - cmp cur_pgdp, end_pgdp - b.ne do_pgd - - /* Publish the updated tables and nuke all the TLBs */ - dsb sy - tlbi vmalle1is - dsb ish - isb + adrp cur_pgdp, swapper_pg_dir + kpti_map_pgtbl pgd, 0 + kpti_mk_tbl_ng pgd, PTRS_PER_PGD - /* We're done: fire up the MMU again */ - mrs x17, sctlr_el1 - orr x17, x17, #SCTLR_ELx_M - set_sctlr_el1 x17 + /* Ensure all the updated entries are visible to secondary CPUs */ + dsb ishst + + /* We're done: fire up swapper_pg_dir again */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + msr ttbr1_el1, swapper_ttb + isb /* Set the flag to zero to indicate that we're all done */ str wzr, [flag_ptr] ret +.Lderef_pgd: /* PUD */ -walk_puds: - .if CONFIG_PGTABLE_LEVELS > 3 + .if CONFIG_PGTABLE_LEVELS > 3 + pud .req x10 pte_to_phys cur_pudp, pgd - add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) -do_pud: __idmap_kpti_get_pgtable_ent pud - tbnz pud, #1, walk_pmds -next_pud: - __idmap_kpti_put_pgtable_ent_ng pud -skip_pud: - add cur_pudp, cur_pudp, 8 - cmp cur_pudp, end_pudp - b.ne do_pud - b next_pgd - .else /* CONFIG_PGTABLE_LEVELS <= 3 */ - mov pud, pgd - b walk_pmds -next_pud: - b next_pgd + kpti_map_pgtbl pud, 1 + kpti_mk_tbl_ng pud, PTRS_PER_PUD + b .Lnext_pgd + .else /* CONFIG_PGTABLE_LEVELS <= 3 */ + pud .req pgd + .set .Lnext_pud, .Lnext_pgd .endif +.Lderef_pud: /* PMD */ -walk_pmds: - .if CONFIG_PGTABLE_LEVELS > 2 + .if CONFIG_PGTABLE_LEVELS > 2 + pmd .req x13 pte_to_phys cur_pmdp, pud - add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) -do_pmd: __idmap_kpti_get_pgtable_ent pmd - tbnz pmd, #1, walk_ptes -next_pmd: - __idmap_kpti_put_pgtable_ent_ng pmd -skip_pmd: - add cur_pmdp, cur_pmdp, #8 - cmp cur_pmdp, end_pmdp - b.ne do_pmd - b next_pud - .else /* CONFIG_PGTABLE_LEVELS <= 2 */ - mov pmd, pud - b walk_ptes -next_pmd: - b next_pud + kpti_map_pgtbl pmd, 2 + kpti_mk_tbl_ng pmd, PTRS_PER_PMD + b .Lnext_pud + .else /* CONFIG_PGTABLE_LEVELS <= 2 */ + pmd .req pgd + .set .Lnext_pmd, .Lnext_pgd .endif +.Lderef_pmd: /* PTE */ -walk_ptes: pte_to_phys cur_ptep, pmd - add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) -do_pte: __idmap_kpti_get_pgtable_ent pte - __idmap_kpti_put_pgtable_ent_ng pte -skip_pte: - add cur_ptep, cur_ptep, #8 - cmp cur_ptep, end_ptep - b.ne do_pte - b next_pmd + kpti_map_pgtbl pte, 3 + kpti_mk_tbl_ng pte, PTRS_PER_PTE + b .Lnext_pmd .unreq cpu + .unreq temp_pte .unreq num_cpus - .unreq swapper_pa + .unreq pte_flags + .unreq temp_pgd_phys .unreq cur_pgdp .unreq end_pgdp .unreq pgd @@ -362,6 +363,7 @@ skip_pte: .unreq cur_ptep .unreq end_ptep .unreq pte + .unreq valid /* Secondary CPUs end up here */ __idmap_kpti_secondary: @@ -381,7 +383,6 @@ __idmap_kpti_secondary: cbnz w16, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb, x16 msr ttbr1_el1, swapper_ttb isb ret diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 507b20373953..779653771507 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -36,6 +36,7 @@ HAS_RNG HAS_SB HAS_STAGE2_FWB HAS_SYSREG_GIC_CPUIF +HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN HAS_WFXT @@ -61,6 +62,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_1742098 WORKAROUND_1902691 WORKAROUND_2038923 WORKAROUND_2064142 diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index 5c55509eb43f..db461921d256 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -88,7 +88,7 @@ END { # skip blank lines and comment lines /^$/ { next } -/^#/ { next } +/^[\t ]*#/ { next } /^SysregFields/ { change_block("SysregFields", "None", "SysregFields") diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index ff5e552f7420..9ae483ec1e56 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -46,6 +46,89 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 +Res0 63:60 +Enum 59:56 F64MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 55:52 F32MM + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 51:48 +Enum 47:44 I8MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 43:40 SM4 + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 39:36 +Enum 35:32 SHA3 + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 31:24 +Enum 23:20 BF16 + 0b0000 NI + 0b0001 IMP + 0b0010 EBF16 +EndEnum +Enum 19:16 BitPerm + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 15:8 +Enum 7:4 AES + 0b0000 NI + 0b0001 IMP + 0b0010 PMULL128 +EndEnum +Enum 3:0 SVEver + 0b0000 IMP + 0b0001 SVE2 +EndEnum +EndSysreg + +Sysreg ID_AA64SMFR0_EL1 3 0 0 4 5 +Enum 63 FA64 + 0b0 NI + 0b1 IMP +EndEnum +Res0 62:60 +Field 59:56 SMEver +Enum 55:52 I16I64 + 0b0000 NI + 0b1111 IMP +EndEnum +Res0 51:49 +Enum 48 F64F64 + 0b0 NI + 0b1 IMP +EndEnum +Res0 47:40 +Enum 39:36 I8I32 + 0b0000 NI + 0b1111 IMP +EndEnum +Enum 35 F16F32 + 0b0 NI + 0b1 IMP +EndEnum +Enum 34 B16F32 + 0b0 NI + 0b1 IMP +EndEnum +Res0 33 +Enum 32 F32F32 + 0b0 NI + 0b1 IMP +EndEnum +Res0 31:0 +EndSysreg + Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0 Enum 63:60 RNDR 0b0000 NI @@ -114,6 +197,122 @@ EndEnum Res0 3:0 EndSysreg +Sysreg ID_AA64ISAR1_EL1 3 0 0 6 1 +Enum 63:60 LS64 + 0b0000 NI + 0b0001 LS64 + 0b0010 LS64_V + 0b0011 LS64_ACCDATA +EndEnum +Enum 59:56 XS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 55:52 I8MM + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 51:48 DGH + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 47:44 BF16 + 0b0000 NI + 0b0001 IMP + 0b0010 EBF16 +EndEnum +Enum 43:40 SPECRES + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 39:36 SB + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 35:32 FRINTTS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 31:28 GPI + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 27:24 GPA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 LRCPC + 0b0000 NI + 0b0001 IMP + 0b0010 LRCPC2 +EndEnum +Enum 19:16 FCMA + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 JSCVT + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 11:8 API + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 7:4 APA + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 3:0 DPB + 0b0000 NI + 0b0001 IMP + 0b0010 DPB2 +EndEnum +EndSysreg + +Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2 +Res0 63:28 +Enum 27:24 PAC_frac + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 23:20 BC + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 19:16 MOPS + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 15:12 APA3 + 0b0000 NI + 0b0001 PAuth + 0b0010 EPAC + 0b0011 PAuth2 + 0b0100 FPAC + 0b0101 FPACCOMBINE +EndEnum +Enum 11:8 GPA3 + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 7:4 RPRES + 0b0000 NI + 0b0001 IMP +EndEnum +Enum 3:0 WFxT + 0b0000 NI + 0b0010 IMP +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINMASK @@ -257,6 +456,11 @@ Field 5:3 Ctype2 Field 2:0 Ctype1 EndSysreg +Sysreg GMID_EL1 3 1 0 0 4 +Res0 63:4 +Field 3:0 BS +EndSysreg + Sysreg SMIDR_EL1 3 1 0 0 6 Res0 63:32 Field 31:24 IMPLEMENTER @@ -273,6 +477,33 @@ Field 3:1 Level Field 0 InD EndSysreg +Sysreg CTR_EL0 3 3 0 0 1 +Res0 63:38 +Field 37:32 TminLine +Res1 31 +Res0 30 +Field 29 DIC +Field 28 IDC +Field 27:24 CWG +Field 23:20 ERG +Field 19:16 DminLine +Enum 15:14 L1Ip + 0b00 VPIPT + # This is named as AIVIVT in the ARM but documented as reserved + 0b01 RESERVED + 0b10 VIPT + 0b11 PIPT +EndEnum +Res0 13:4 +Field 3:0 IminLine +EndSysreg + +Sysreg DCZID_EL0 3 3 0 0 7 +Res0 63:5 +Field 4 DZP +Field 3:0 BS +EndSysreg + Sysreg SVCR 3 3 4 2 2 Res0 63:2 Field 1 ZA @@ -367,3 +598,36 @@ EndSysreg Sysreg TTBR1_EL1 3 0 2 0 1 Fields TTBRx_EL1 EndSysreg + +Sysreg LORSA_EL1 3 0 10 4 0 +Res0 63:52 +Field 51:16 SA +Res0 15:1 +Field 0 Valid +EndSysreg + +Sysreg LOREA_EL1 3 0 10 4 1 +Res0 63:52 +Field 51:48 EA_51_48 +Field 47:16 EA_47_16 +Res0 15:0 +EndSysreg + +Sysreg LORN_EL1 3 0 10 4 2 +Res0 63:8 +Field 7:0 Num +EndSysreg + +Sysreg LORC_EL1 3 0 10 4 3 +Res0 63:10 +Field 9:2 DS +Res0 1 +Field 0 EN +EndSysreg + +Sysreg LORID_EL1 3 0 10 4 7 +Res0 63:24 +Field 23:16 LD +Res0 15:8 +Field 7:0 LR +EndSysreg diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be0b95e51df6..e31840edd90e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -277,6 +277,7 @@ config X86 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select HAVE_ARCH_KCSAN if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 340399f69954..bdfe08f1a930 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_NMI_SUPPORT - def_bool y - config EARLY_PRINTK_USB bool diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index be7f512109f7..747aa537389b 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -3,7 +3,8 @@ # ARM CPU Idle drivers # config ARM_CPUIDLE - bool "Generic ARM/ARM64 CPU idle Driver" + bool "Generic ARM CPU idle Driver" + depends on ARM select DT_IDLE_STATES select CPU_IDLE_MULTIPLE_DRIVERS help diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 96e09fa40909..03b1309875ae 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1139,7 +1139,7 @@ static void cci_pmu_start(struct perf_event *event, int pmu_flags) /* * To handle interrupt latency, we always reprogram the period - * regardlesss of PERF_EF_RELOAD. + * regardless of PERF_EF_RELOAD. */ if (pmu_flags & PERF_EF_RELOAD) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); @@ -1261,7 +1261,7 @@ static int validate_group(struct perf_event *event) */ .used_mask = mask, }; - memset(mask, 0, BITS_TO_LONGS(cci_pmu->num_cntrs) * sizeof(unsigned long)); + bitmap_zero(mask, cci_pmu->num_cntrs); if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; @@ -1629,10 +1629,9 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev) GFP_KERNEL); if (!cci_pmu->hw_events.events) return ERR_PTR(-ENOMEM); - cci_pmu->hw_events.used_mask = devm_kcalloc(dev, - BITS_TO_LONGS(CCI_PMU_MAX_HW_CNTRS(model)), - sizeof(*cci_pmu->hw_events.used_mask), - GFP_KERNEL); + cci_pmu->hw_events.used_mask = devm_bitmap_zalloc(dev, + CCI_PMU_MAX_HW_CNTRS(model), + GFP_KERNEL); if (!cci_pmu->hw_events.used_mask) return ERR_PTR(-ENOMEM); diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 40b352e8aa7f..728d13d8e98a 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1250,7 +1250,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) ccn->dt.cmp_mask[CCN_IDX_MASK_OPCODE].h = ~(0x1f << 9); /* Get a convenient /sys/event_source/devices/ name */ - ccn->dt.id = ida_simple_get(&arm_ccn_pmu_ida, 0, 0, GFP_KERNEL); + ccn->dt.id = ida_alloc(&arm_ccn_pmu_ida, GFP_KERNEL); if (ccn->dt.id == 0) { name = "ccn"; } else { @@ -1312,7 +1312,7 @@ error_pmu_register: &ccn->dt.node); error_set_affinity: error_choose_name: - ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); + ida_free(&arm_ccn_pmu_ida, ccn->dt.id); for (i = 0; i < ccn->num_xps; i++) writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); writel(0, ccn->dt.base + CCN_DT_PMCR); @@ -1329,7 +1329,7 @@ static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn) writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); writel(0, ccn->dt.base + CCN_DT_PMCR); perf_pmu_unregister(&ccn->dt.pmu); - ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); + ida_free(&arm_ccn_pmu_ida, ccn->dt.id); } static int arm_ccn_for_each_valid_region(struct arm_ccn *ccn, diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index db670b265897..b65a7d9640e1 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -39,6 +39,24 @@ #include <asm/mmu.h> #include <asm/sysreg.h> +/* + * Cache if the event is allowed to trace Context information. + * This allows us to perform the check, i.e, perfmon_capable(), + * in the context of the event owner, once, during the event_init(). + */ +#define SPE_PMU_HW_FLAGS_CX BIT(0) + +static void set_spe_event_has_cx(struct perf_event *event) +{ + if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) + event->hw.flags |= SPE_PMU_HW_FLAGS_CX; +} + +static bool get_spe_event_has_cx(struct perf_event *event) +{ + return !!(event->hw.flags & SPE_PMU_HW_FLAGS_CX); +} + #define ARM_SPE_BUF_PAD_BYTE 0 struct arm_spe_pmu_buf { @@ -272,7 +290,7 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event) if (!attr->exclude_kernel) reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT); - if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) + if (get_spe_event_has_cx(event)) reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT); return reg; @@ -709,10 +727,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) return -EOPNOTSUPP; + set_spe_event_has_cx(event); reg = arm_spe_event_to_pmscr(event); if (!perfmon_capable() && (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) | - BIT(SYS_PMSCR_EL1_CX_SHIFT) | BIT(SYS_PMSCR_EL1_PCT_SHIFT)))) return -EACCES; diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index b1b2a55de77f..8e058e08fe81 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -611,7 +611,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base, .dev = dev, }; - pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL); + pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL); return pmu->id; } @@ -765,7 +765,7 @@ ddr_perf_err: cpuhp_instance_err: cpuhp_remove_multi_state(pmu->cpuhp_state); cpuhp_state_err: - ida_simple_remove(&ddr_ida, pmu->id); + ida_free(&ddr_ida, pmu->id); dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret); return ret; } @@ -779,7 +779,7 @@ static int ddr_perf_remove(struct platform_device *pdev) perf_pmu_unregister(&pmu->pmu); - ida_simple_remove(&ddr_ida, pmu->id); + ida_free(&ddr_ida, pmu->id); return 0; } diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig index 5546218b5598..171bfc1b6bc2 100644 --- a/drivers/perf/hisilicon/Kconfig +++ b/drivers/perf/hisilicon/Kconfig @@ -14,3 +14,13 @@ config HISI_PCIE_PMU RCiEP devices. Adds the PCIe PMU into perf events system for monitoring latency, bandwidth etc. + +config HNS3_PMU + tristate "HNS3 PERF PMU" + depends on ARM64 || COMPILE_TEST + depends on PCI + help + Provide support for HNS3 performance monitoring unit (PMU) RCiEP + devices. + Adds the HNS3 PMU into perf events system for monitoring latency, + bandwidth etc. diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 6be83517acaa..4d2c9abe3372 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o +obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 62299ab5a9be..50d0c0a2f1fe 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -516,21 +516,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - ddrc_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = ddrc_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&ddrc_pmu->pmu, name, ddrc_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&ddrc_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 393513150106..13017b3412a5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -519,21 +519,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", hha_pmu->sccl_id, hha_pmu->index_id); - hha_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = hha_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&hha_pmu->pmu, name, hha_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 560ab964c8b5..2995f3630d49 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -557,21 +557,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", l3c_pmu->sccl_id, l3c_pmu->ccl_id); - l3c_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = l3c_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&l3c_pmu->pmu, name, l3c_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index a0ee84d97c41..47d3cc9b6eec 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -412,21 +412,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - pa_pmu->pmu = (struct pmu) { - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = pa_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; - + hisi_pmu_init(&pa_pmu->pmu, name, pa_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 980b9ee6eb14..fbc8a93d5eac 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -531,4 +531,22 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); +void hisi_pmu_init(struct pmu *pmu, const char *name, + const struct attribute_group **attr_groups, struct module *module) +{ + pmu->name = name; + pmu->module = module; + pmu->task_ctx_nr = perf_invalid_context; + pmu->event_init = hisi_uncore_pmu_event_init; + pmu->pmu_enable = hisi_uncore_pmu_enable; + pmu->pmu_disable = hisi_uncore_pmu_disable; + pmu->add = hisi_uncore_pmu_add; + pmu->del = hisi_uncore_pmu_del; + pmu->start = hisi_uncore_pmu_start; + pmu->stop = hisi_uncore_pmu_stop; + pmu->read = hisi_uncore_pmu_read; + pmu->attr_groups = attr_groups; +} +EXPORT_SYMBOL_GPL(hisi_pmu_init); + MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 96eeddad55ff..b59de33cd059 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -121,4 +121,6 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); +void hisi_pmu_init(struct pmu *pmu, const char *name, + const struct attribute_group **attr_groups, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 6aedc303ff56..b9c79f17230c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,20 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - sllc_pmu->pmu = (struct pmu) { - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = sllc_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&sllc_pmu->pmu, name, sllc_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c new file mode 100644 index 000000000000..e0457d84af6b --- /dev/null +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -0,0 +1,1671 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This driver adds support for HNS3 PMU iEP device. Related perf events are + * bandwidth, latency, packet rate, interrupt rate etc. + * + * Copyright (C) 2022 HiSilicon Limited + */ +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/bug.h> +#include <linux/cpuhotplug.h> +#include <linux/cpumask.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/pci-epf.h> +#include <linux/perf_event.h> +#include <linux/smp.h> + +/* registers offset address */ +#define HNS3_PMU_REG_GLOBAL_CTRL 0x0000 +#define HNS3_PMU_REG_CLOCK_FREQ 0x0020 +#define HNS3_PMU_REG_BDF 0x0fe0 +#define HNS3_PMU_REG_VERSION 0x0fe4 +#define HNS3_PMU_REG_DEVICE_ID 0x0fe8 + +#define HNS3_PMU_REG_EVENT_OFFSET 0x1000 +#define HNS3_PMU_REG_EVENT_SIZE 0x1000 +#define HNS3_PMU_REG_EVENT_CTRL_LOW 0x00 +#define HNS3_PMU_REG_EVENT_CTRL_HIGH 0x04 +#define HNS3_PMU_REG_EVENT_INTR_STATUS 0x08 +#define HNS3_PMU_REG_EVENT_INTR_MASK 0x0c +#define HNS3_PMU_REG_EVENT_COUNTER 0x10 +#define HNS3_PMU_REG_EVENT_EXT_COUNTER 0x18 +#define HNS3_PMU_REG_EVENT_QID_CTRL 0x28 +#define HNS3_PMU_REG_EVENT_QID_PARA 0x2c + +#define HNS3_PMU_FILTER_SUPPORT_GLOBAL BIT(0) +#define HNS3_PMU_FILTER_SUPPORT_PORT BIT(1) +#define HNS3_PMU_FILTER_SUPPORT_PORT_TC BIT(2) +#define HNS3_PMU_FILTER_SUPPORT_FUNC BIT(3) +#define HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE BIT(4) +#define HNS3_PMU_FILTER_SUPPORT_FUNC_INTR BIT(5) + +#define HNS3_PMU_FILTER_ALL_TC 0xf +#define HNS3_PMU_FILTER_ALL_QUEUE 0xffff + +#define HNS3_PMU_CTRL_SUBEVENT_S 4 +#define HNS3_PMU_CTRL_FILTER_MODE_S 24 + +#define HNS3_PMU_GLOBAL_START BIT(0) + +#define HNS3_PMU_EVENT_STATUS_RESET BIT(11) +#define HNS3_PMU_EVENT_EN BIT(12) +#define HNS3_PMU_EVENT_OVERFLOW_RESTART BIT(15) + +#define HNS3_PMU_QID_PARA_FUNC_S 0 +#define HNS3_PMU_QID_PARA_QUEUE_S 16 + +#define HNS3_PMU_QID_CTRL_REQ_ENABLE BIT(0) +#define HNS3_PMU_QID_CTRL_DONE BIT(1) +#define HNS3_PMU_QID_CTRL_MISS BIT(2) + +#define HNS3_PMU_INTR_MASK_OVERFLOW BIT(1) + +#define HNS3_PMU_MAX_HW_EVENTS 8 + +/* + * Each hardware event contains two registers (counter and ext_counter) for + * bandwidth, packet rate, latency and interrupt rate. These two registers will + * be triggered to run at the same when a hardware event is enabled. The meaning + * of counter and ext_counter of different event type are different, their + * meaning show as follow: + * + * +----------------+------------------+---------------+ + * | event type | counter | ext_counter | + * +----------------+------------------+---------------+ + * | bandwidth | byte number | cycle number | + * +----------------+------------------+---------------+ + * | packet rate | packet number | cycle number | + * +----------------+------------------+---------------+ + * | latency | cycle number | packet number | + * +----------------+------------------+---------------+ + * | interrupt rate | interrupt number | cycle number | + * +----------------+------------------+---------------+ + * + * The cycle number indicates increment of counter of hardware timer, the + * frequency of hardware timer can be read from hw_clk_freq file. + * + * Performance of each hardware event is calculated by: counter / ext_counter. + * + * Since processing of data is preferred to be done in userspace, we expose + * ext_counter as a separate event for userspace and use bit 16 to indicate it. + * For example, event 0x00001 and 0x10001 are actually one event for hardware + * because bit 0-15 are same. If the bit 16 of one event is 0 means to read + * counter register, otherwise means to read ext_counter register. + */ +/* bandwidth events */ +#define HNS3_PMU_EVT_BW_SSU_EGU_BYTE_NUM 0x00001 +#define HNS3_PMU_EVT_BW_SSU_EGU_TIME 0x10001 +#define HNS3_PMU_EVT_BW_SSU_RPU_BYTE_NUM 0x00002 +#define HNS3_PMU_EVT_BW_SSU_RPU_TIME 0x10002 +#define HNS3_PMU_EVT_BW_SSU_ROCE_BYTE_NUM 0x00003 +#define HNS3_PMU_EVT_BW_SSU_ROCE_TIME 0x10003 +#define HNS3_PMU_EVT_BW_ROCE_SSU_BYTE_NUM 0x00004 +#define HNS3_PMU_EVT_BW_ROCE_SSU_TIME 0x10004 +#define HNS3_PMU_EVT_BW_TPU_SSU_BYTE_NUM 0x00005 +#define HNS3_PMU_EVT_BW_TPU_SSU_TIME 0x10005 +#define HNS3_PMU_EVT_BW_RPU_RCBRX_BYTE_NUM 0x00006 +#define HNS3_PMU_EVT_BW_RPU_RCBRX_TIME 0x10006 +#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_BYTE_NUM 0x00008 +#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_TIME 0x10008 +#define HNS3_PMU_EVT_BW_WR_FBD_BYTE_NUM 0x00009 +#define HNS3_PMU_EVT_BW_WR_FBD_TIME 0x10009 +#define HNS3_PMU_EVT_BW_WR_EBD_BYTE_NUM 0x0000a +#define HNS3_PMU_EVT_BW_WR_EBD_TIME 0x1000a +#define HNS3_PMU_EVT_BW_RD_FBD_BYTE_NUM 0x0000b +#define HNS3_PMU_EVT_BW_RD_FBD_TIME 0x1000b +#define HNS3_PMU_EVT_BW_RD_EBD_BYTE_NUM 0x0000c +#define HNS3_PMU_EVT_BW_RD_EBD_TIME 0x1000c +#define HNS3_PMU_EVT_BW_RD_PAY_M0_BYTE_NUM 0x0000d +#define HNS3_PMU_EVT_BW_RD_PAY_M0_TIME 0x1000d +#define HNS3_PMU_EVT_BW_RD_PAY_M1_BYTE_NUM 0x0000e +#define HNS3_PMU_EVT_BW_RD_PAY_M1_TIME 0x1000e +#define HNS3_PMU_EVT_BW_WR_PAY_M0_BYTE_NUM 0x0000f +#define HNS3_PMU_EVT_BW_WR_PAY_M0_TIME 0x1000f +#define HNS3_PMU_EVT_BW_WR_PAY_M1_BYTE_NUM 0x00010 +#define HNS3_PMU_EVT_BW_WR_PAY_M1_TIME 0x10010 + +/* packet rate events */ +#define HNS3_PMU_EVT_PPS_IGU_SSU_PACKET_NUM 0x00100 +#define HNS3_PMU_EVT_PPS_IGU_SSU_TIME 0x10100 +#define HNS3_PMU_EVT_PPS_SSU_EGU_PACKET_NUM 0x00101 +#define HNS3_PMU_EVT_PPS_SSU_EGU_TIME 0x10101 +#define HNS3_PMU_EVT_PPS_SSU_RPU_PACKET_NUM 0x00102 +#define HNS3_PMU_EVT_PPS_SSU_RPU_TIME 0x10102 +#define HNS3_PMU_EVT_PPS_SSU_ROCE_PACKET_NUM 0x00103 +#define HNS3_PMU_EVT_PPS_SSU_ROCE_TIME 0x10103 +#define HNS3_PMU_EVT_PPS_ROCE_SSU_PACKET_NUM 0x00104 +#define HNS3_PMU_EVT_PPS_ROCE_SSU_TIME 0x10104 +#define HNS3_PMU_EVT_PPS_TPU_SSU_PACKET_NUM 0x00105 +#define HNS3_PMU_EVT_PPS_TPU_SSU_TIME 0x10105 +#define HNS3_PMU_EVT_PPS_RPU_RCBRX_PACKET_NUM 0x00106 +#define HNS3_PMU_EVT_PPS_RPU_RCBRX_TIME 0x10106 +#define HNS3_PMU_EVT_PPS_RCBTX_TPU_PACKET_NUM 0x00107 +#define HNS3_PMU_EVT_PPS_RCBTX_TPU_TIME 0x10107 +#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_PACKET_NUM 0x00108 +#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_TIME 0x10108 +#define HNS3_PMU_EVT_PPS_WR_FBD_PACKET_NUM 0x00109 +#define HNS3_PMU_EVT_PPS_WR_FBD_TIME 0x10109 +#define HNS3_PMU_EVT_PPS_WR_EBD_PACKET_NUM 0x0010a +#define HNS3_PMU_EVT_PPS_WR_EBD_TIME 0x1010a +#define HNS3_PMU_EVT_PPS_RD_FBD_PACKET_NUM 0x0010b +#define HNS3_PMU_EVT_PPS_RD_FBD_TIME 0x1010b +#define HNS3_PMU_EVT_PPS_RD_EBD_PACKET_NUM 0x0010c +#define HNS3_PMU_EVT_PPS_RD_EBD_TIME 0x1010c +#define HNS3_PMU_EVT_PPS_RD_PAY_M0_PACKET_NUM 0x0010d +#define HNS3_PMU_EVT_PPS_RD_PAY_M0_TIME 0x1010d +#define HNS3_PMU_EVT_PPS_RD_PAY_M1_PACKET_NUM 0x0010e +#define HNS3_PMU_EVT_PPS_RD_PAY_M1_TIME 0x1010e +#define HNS3_PMU_EVT_PPS_WR_PAY_M0_PACKET_NUM 0x0010f +#define HNS3_PMU_EVT_PPS_WR_PAY_M0_TIME 0x1010f +#define HNS3_PMU_EVT_PPS_WR_PAY_M1_PACKET_NUM 0x00110 +#define HNS3_PMU_EVT_PPS_WR_PAY_M1_TIME 0x10110 +#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_PACKET_NUM 0x00111 +#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_TIME 0x10111 +#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_PACKET_NUM 0x00112 +#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_TIME 0x10112 + +/* latency events */ +#define HNS3_PMU_EVT_DLY_TX_PUSH_TIME 0x00202 +#define HNS3_PMU_EVT_DLY_TX_PUSH_PACKET_NUM 0x10202 +#define HNS3_PMU_EVT_DLY_TX_TIME 0x00204 +#define HNS3_PMU_EVT_DLY_TX_PACKET_NUM 0x10204 +#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_TIME 0x00206 +#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_PACKET_NUM 0x10206 +#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_TIME 0x00207 +#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_PACKET_NUM 0x10207 +#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_TIME 0x00208 +#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_PACKET_NUM 0x10208 +#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_TIME 0x00209 +#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_PACKET_NUM 0x10209 +#define HNS3_PMU_EVT_DLY_RPU_TIME 0x0020e +#define HNS3_PMU_EVT_DLY_RPU_PACKET_NUM 0x1020e +#define HNS3_PMU_EVT_DLY_TPU_TIME 0x0020f +#define HNS3_PMU_EVT_DLY_TPU_PACKET_NUM 0x1020f +#define HNS3_PMU_EVT_DLY_RPE_TIME 0x00210 +#define HNS3_PMU_EVT_DLY_RPE_PACKET_NUM 0x10210 +#define HNS3_PMU_EVT_DLY_TPE_TIME 0x00211 +#define HNS3_PMU_EVT_DLY_TPE_PACKET_NUM 0x10211 +#define HNS3_PMU_EVT_DLY_TPE_PUSH_TIME 0x00212 +#define HNS3_PMU_EVT_DLY_TPE_PUSH_PACKET_NUM 0x10212 +#define HNS3_PMU_EVT_DLY_WR_FBD_TIME 0x00213 +#define HNS3_PMU_EVT_DLY_WR_FBD_PACKET_NUM 0x10213 +#define HNS3_PMU_EVT_DLY_WR_EBD_TIME 0x00214 +#define HNS3_PMU_EVT_DLY_WR_EBD_PACKET_NUM 0x10214 +#define HNS3_PMU_EVT_DLY_RD_FBD_TIME 0x00215 +#define HNS3_PMU_EVT_DLY_RD_FBD_PACKET_NUM 0x10215 +#define HNS3_PMU_EVT_DLY_RD_EBD_TIME 0x00216 +#define HNS3_PMU_EVT_DLY_RD_EBD_PACKET_NUM 0x10216 +#define HNS3_PMU_EVT_DLY_RD_PAY_M0_TIME 0x00217 +#define HNS3_PMU_EVT_DLY_RD_PAY_M0_PACKET_NUM 0x10217 +#define HNS3_PMU_EVT_DLY_RD_PAY_M1_TIME 0x00218 +#define HNS3_PMU_EVT_DLY_RD_PAY_M1_PACKET_NUM 0x10218 +#define HNS3_PMU_EVT_DLY_WR_PAY_M0_TIME 0x00219 +#define HNS3_PMU_EVT_DLY_WR_PAY_M0_PACKET_NUM 0x10219 +#define HNS3_PMU_EVT_DLY_WR_PAY_M1_TIME 0x0021a +#define HNS3_PMU_EVT_DLY_WR_PAY_M1_PACKET_NUM 0x1021a +#define HNS3_PMU_EVT_DLY_MSIX_WRITE_TIME 0x0021c +#define HNS3_PMU_EVT_DLY_MSIX_WRITE_PACKET_NUM 0x1021c + +/* interrupt rate events */ +#define HNS3_PMU_EVT_PPS_MSIX_NIC_INTR_NUM 0x00300 +#define HNS3_PMU_EVT_PPS_MSIX_NIC_TIME 0x10300 + +/* filter mode supported by each bandwidth event */ +#define HNS3_PMU_FILTER_BW_SSU_EGU 0x07 +#define HNS3_PMU_FILTER_BW_SSU_RPU 0x1f +#define HNS3_PMU_FILTER_BW_SSU_ROCE 0x0f +#define HNS3_PMU_FILTER_BW_ROCE_SSU 0x0f +#define HNS3_PMU_FILTER_BW_TPU_SSU 0x1f +#define HNS3_PMU_FILTER_BW_RPU_RCBRX 0x11 +#define HNS3_PMU_FILTER_BW_RCBTX_TXSCH 0x11 +#define HNS3_PMU_FILTER_BW_WR_FBD 0x1b +#define HNS3_PMU_FILTER_BW_WR_EBD 0x11 +#define HNS3_PMU_FILTER_BW_RD_FBD 0x01 +#define HNS3_PMU_FILTER_BW_RD_EBD 0x1b +#define HNS3_PMU_FILTER_BW_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_BW_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_BW_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_BW_WR_PAY_M1 0x01 + +/* filter mode supported by each packet rate event */ +#define HNS3_PMU_FILTER_PPS_IGU_SSU 0x07 +#define HNS3_PMU_FILTER_PPS_SSU_EGU 0x07 +#define HNS3_PMU_FILTER_PPS_SSU_RPU 0x1f +#define HNS3_PMU_FILTER_PPS_SSU_ROCE 0x0f +#define HNS3_PMU_FILTER_PPS_ROCE_SSU 0x0f +#define HNS3_PMU_FILTER_PPS_TPU_SSU 0x1f +#define HNS3_PMU_FILTER_PPS_RPU_RCBRX 0x11 +#define HNS3_PMU_FILTER_PPS_RCBTX_TPU 0x1f +#define HNS3_PMU_FILTER_PPS_RCBTX_TXSCH 0x11 +#define HNS3_PMU_FILTER_PPS_WR_FBD 0x1b +#define HNS3_PMU_FILTER_PPS_WR_EBD 0x11 +#define HNS3_PMU_FILTER_PPS_RD_FBD 0x01 +#define HNS3_PMU_FILTER_PPS_RD_EBD 0x1b +#define HNS3_PMU_FILTER_PPS_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_PPS_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_PPS_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_PPS_WR_PAY_M1 0x01 +#define HNS3_PMU_FILTER_PPS_NICROH_TX_PRE 0x01 +#define HNS3_PMU_FILTER_PPS_NICROH_RX_PRE 0x01 + +/* filter mode supported by each latency event */ +#define HNS3_PMU_FILTER_DLY_TX_PUSH 0x01 +#define HNS3_PMU_FILTER_DLY_TX 0x01 +#define HNS3_PMU_FILTER_DLY_SSU_TX_NIC 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_TX_ROCE 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_RX_NIC 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_RX_ROCE 0x07 +#define HNS3_PMU_FILTER_DLY_RPU 0x11 +#define HNS3_PMU_FILTER_DLY_TPU 0x1f +#define HNS3_PMU_FILTER_DLY_RPE 0x01 +#define HNS3_PMU_FILTER_DLY_TPE 0x0b +#define HNS3_PMU_FILTER_DLY_TPE_PUSH 0x1b +#define HNS3_PMU_FILTER_DLY_WR_FBD 0x1b +#define HNS3_PMU_FILTER_DLY_WR_EBD 0x11 +#define HNS3_PMU_FILTER_DLY_RD_FBD 0x01 +#define HNS3_PMU_FILTER_DLY_RD_EBD 0x1b +#define HNS3_PMU_FILTER_DLY_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_DLY_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_DLY_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_DLY_WR_PAY_M1 0x01 +#define HNS3_PMU_FILTER_DLY_MSIX_WRITE 0x01 + +/* filter mode supported by each interrupt rate event */ +#define HNS3_PMU_FILTER_INTR_MSIX_NIC 0x01 + +enum hns3_pmu_hw_filter_mode { + HNS3_PMU_HW_FILTER_GLOBAL, + HNS3_PMU_HW_FILTER_PORT, + HNS3_PMU_HW_FILTER_PORT_TC, + HNS3_PMU_HW_FILTER_FUNC, + HNS3_PMU_HW_FILTER_FUNC_QUEUE, + HNS3_PMU_HW_FILTER_FUNC_INTR, +}; + +struct hns3_pmu_event_attr { + u32 event; + u16 filter_support; +}; + +struct hns3_pmu { + struct perf_event *hw_events[HNS3_PMU_MAX_HW_EVENTS]; + struct hlist_node node; + struct pci_dev *pdev; + struct pmu pmu; + void __iomem *base; + int irq; + int on_cpu; + u32 identifier; + u32 hw_clk_freq; /* hardware clock frequency of PMU */ + /* maximum and minimum bdf allowed by PMU */ + u16 bdf_min; + u16 bdf_max; +}; + +#define to_hns3_pmu(p) (container_of((p), struct hns3_pmu, pmu)) + +#define GET_PCI_DEVFN(bdf) ((bdf) & 0xff) + +#define FILTER_CONDITION_PORT(port) ((1 << (port)) & 0xff) +#define FILTER_CONDITION_PORT_TC(port, tc) (((port) << 3) | ((tc) & 0x07)) +#define FILTER_CONDITION_FUNC_INTR(func, intr) (((intr) << 8) | (func)) + +#define HNS3_PMU_FILTER_ATTR(_name, _config, _start, _end) \ + static inline u64 hns3_pmu_get_##_name(struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(_end, _start), \ + event->attr._config); \ + } + +HNS3_PMU_FILTER_ATTR(subevent, config, 0, 7); +HNS3_PMU_FILTER_ATTR(event_type, config, 8, 15); +HNS3_PMU_FILTER_ATTR(ext_counter_used, config, 16, 16); +HNS3_PMU_FILTER_ATTR(port, config1, 0, 3); +HNS3_PMU_FILTER_ATTR(tc, config1, 4, 7); +HNS3_PMU_FILTER_ATTR(bdf, config1, 8, 23); +HNS3_PMU_FILTER_ATTR(queue, config1, 24, 39); +HNS3_PMU_FILTER_ATTR(intr, config1, 40, 51); +HNS3_PMU_FILTER_ATTR(global, config1, 52, 52); + +#define HNS3_BW_EVT_BYTE_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_BW_##_name##_BYTE_NUM, \ + HNS3_PMU_FILTER_BW_##_name}) +#define HNS3_BW_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_BW_##_name##_TIME, \ + HNS3_PMU_FILTER_BW_##_name}) +#define HNS3_PPS_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_PACKET_NUM, \ + HNS3_PMU_FILTER_PPS_##_name}) +#define HNS3_PPS_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_TIME, \ + HNS3_PMU_FILTER_PPS_##_name}) +#define HNS3_DLY_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_DLY_##_name##_TIME, \ + HNS3_PMU_FILTER_DLY_##_name}) +#define HNS3_DLY_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_DLY_##_name##_PACKET_NUM, \ + HNS3_PMU_FILTER_DLY_##_name}) +#define HNS3_INTR_EVT_INTR_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_INTR_NUM, \ + HNS3_PMU_FILTER_INTR_##_name}) +#define HNS3_INTR_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_TIME, \ + HNS3_PMU_FILTER_INTR_##_name}) + +static ssize_t hns3_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sysfs_emit(buf, "%s\n", (char *)eattr->var); +} + +static ssize_t hns3_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu_event_attr *event; + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + return sysfs_emit(buf, "config=0x%x\n", event->event); +} + +static ssize_t hns3_pmu_filter_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu_event_attr *event; + struct dev_ext_attribute *eattr; + int len; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + len = sysfs_emit_at(buf, 0, "filter mode supported: "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL) + len += sysfs_emit_at(buf, len, "global "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT) + len += sysfs_emit_at(buf, len, "port "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC) + len += sysfs_emit_at(buf, len, "port-tc "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC) + len += sysfs_emit_at(buf, len, "func "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE) + len += sysfs_emit_at(buf, len, "func-queue "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR) + len += sysfs_emit_at(buf, len, "func-intr "); + + len += sysfs_emit_at(buf, len, "\n"); + + return len; +} + +#define HNS3_PMU_ATTR(_name, _func, _config) \ + (&((struct dev_ext_attribute[]) { \ + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \ + })[0].attr.attr) + +#define HNS3_PMU_FORMAT_ATTR(_name, _format) \ + HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format) +#define HNS3_PMU_EVENT_ATTR(_name, _event) \ + HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event) +#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \ + HNS3_PMU_ATTR(_name, hns3_pmu_filter_mode_show, (void *)_event) + +#define HNS3_PMU_BW_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro)) +#define HNS3_PMU_PPS_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro)) +#define HNS3_PMU_DLY_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro)) +#define HNS3_PMU_INTR_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro)) + +#define HNS3_PMU_BW_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro)) +#define HNS3_PMU_PPS_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro)) +#define HNS3_PMU_DLY_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro)) +#define HNS3_PMU_INTR_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro)) + +static u8 hns3_pmu_hw_filter_modes[] = { + HNS3_PMU_HW_FILTER_GLOBAL, + HNS3_PMU_HW_FILTER_PORT, + HNS3_PMU_HW_FILTER_PORT_TC, + HNS3_PMU_HW_FILTER_FUNC, + HNS3_PMU_HW_FILTER_FUNC_QUEUE, + HNS3_PMU_HW_FILTER_FUNC_INTR, +}; + +#define HNS3_PMU_SET_HW_FILTER(_hwc, _mode) \ + ((_hwc)->addr_filters = (void *)&hns3_pmu_hw_filter_modes[(_mode)]) + +static ssize_t identifier_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "0x%x\n", hns3_pmu->identifier); +} +static DEVICE_ATTR_RO(identifier); + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%d\n", hns3_pmu->on_cpu); +} +static DEVICE_ATTR_RO(cpumask); + +static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + u16 bdf = hns3_pmu->bdf_min; + + return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf), + PCI_SLOT(bdf), PCI_FUNC(bdf)); +} +static DEVICE_ATTR_RO(bdf_min); + +static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + u16 bdf = hns3_pmu->bdf_max; + + return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf), + PCI_SLOT(bdf), PCI_FUNC(bdf)); +} +static DEVICE_ATTR_RO(bdf_max); + +static ssize_t hw_clk_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%u\n", hns3_pmu->hw_clk_freq); +} +static DEVICE_ATTR_RO(hw_clk_freq); + +static struct attribute *hns3_pmu_events_attr[] = { + /* bandwidth events */ + HNS3_PMU_BW_EVT_PAIR(bw_ssu_egu, SSU_EGU), + HNS3_PMU_BW_EVT_PAIR(bw_ssu_rpu, SSU_RPU), + HNS3_PMU_BW_EVT_PAIR(bw_ssu_roce, SSU_ROCE), + HNS3_PMU_BW_EVT_PAIR(bw_roce_ssu, ROCE_SSU), + HNS3_PMU_BW_EVT_PAIR(bw_tpu_ssu, TPU_SSU), + HNS3_PMU_BW_EVT_PAIR(bw_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_BW_EVT_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_BW_EVT_PAIR(bw_wr_fbd, WR_FBD), + HNS3_PMU_BW_EVT_PAIR(bw_wr_ebd, WR_EBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_fbd, RD_FBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_ebd, RD_EBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m1, WR_PAY_M1), + + /* packet rate events */ + HNS3_PMU_PPS_EVT_PAIR(pps_igu_ssu, IGU_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_egu, SSU_EGU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_rpu, SSU_RPU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_roce, SSU_ROCE), + HNS3_PMU_PPS_EVT_PAIR(pps_roce_ssu, ROCE_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_tpu_ssu, TPU_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_tpu, RCBTX_TPU), + HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_fbd, WR_FBD), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_ebd, WR_EBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_fbd, RD_FBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_ebd, RD_EBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE), + HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE), + + /* latency events */ + HNS3_PMU_DLY_EVT_PAIR(dly_tx_push_to_mac, TX_PUSH), + HNS3_PMU_DLY_EVT_PAIR(dly_tx_normal_to_mac, TX), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE), + HNS3_PMU_DLY_EVT_PAIR(dly_rpu, RPU), + HNS3_PMU_DLY_EVT_PAIR(dly_tpu, TPU), + HNS3_PMU_DLY_EVT_PAIR(dly_rpe, RPE), + HNS3_PMU_DLY_EVT_PAIR(dly_tpe_normal, TPE), + HNS3_PMU_DLY_EVT_PAIR(dly_tpe_push, TPE_PUSH), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_fbd, WR_FBD), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_ebd, WR_EBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_fbd, RD_FBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_ebd, RD_EBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_DLY_EVT_PAIR(dly_msix_write, MSIX_WRITE), + + /* interrupt rate events */ + HNS3_PMU_INTR_EVT_PAIR(pps_intr_msix_nic, MSIX_NIC), + + NULL +}; + +static struct attribute *hns3_pmu_filter_mode_attr[] = { + /* bandwidth events */ + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_egu, SSU_EGU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_rpu, SSU_RPU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_roce, SSU_ROCE), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_roce_ssu, ROCE_SSU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_tpu_ssu, TPU_SSU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_fbd, WR_FBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_ebd, WR_EBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_fbd, RD_FBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_ebd, RD_EBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m1, WR_PAY_M1), + + /* packet rate events */ + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_igu_ssu, IGU_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_egu, SSU_EGU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_rpu, SSU_RPU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_roce, SSU_ROCE), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_roce_ssu, ROCE_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_tpu_ssu, TPU_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_tpu, RCBTX_TPU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_fbd, WR_FBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_ebd, WR_EBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_fbd, RD_FBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_ebd, RD_EBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE), + + /* latency events */ + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_push_to_mac, TX_PUSH), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_normal_to_mac, TX), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpu, RPU), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpu, TPU), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpe, RPE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_normal, TPE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_push, TPE_PUSH), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_fbd, WR_FBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_ebd, WR_EBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_fbd, RD_FBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_ebd, RD_EBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_msix_write, MSIX_WRITE), + + /* interrupt rate events */ + HNS3_PMU_INTR_FLT_MODE_PAIR(pps_intr_msix_nic, MSIX_NIC), + + NULL +}; + +static struct attribute_group hns3_pmu_events_group = { + .name = "events", + .attrs = hns3_pmu_events_attr, +}; + +static struct attribute_group hns3_pmu_filter_mode_group = { + .name = "filtermode", + .attrs = hns3_pmu_filter_mode_attr, +}; + +static struct attribute *hns3_pmu_format_attr[] = { + HNS3_PMU_FORMAT_ATTR(subevent, "config:0-7"), + HNS3_PMU_FORMAT_ATTR(event_type, "config:8-15"), + HNS3_PMU_FORMAT_ATTR(ext_counter_used, "config:16"), + HNS3_PMU_FORMAT_ATTR(port, "config1:0-3"), + HNS3_PMU_FORMAT_ATTR(tc, "config1:4-7"), + HNS3_PMU_FORMAT_ATTR(bdf, "config1:8-23"), + HNS3_PMU_FORMAT_ATTR(queue, "config1:24-39"), + HNS3_PMU_FORMAT_ATTR(intr, "config1:40-51"), + HNS3_PMU_FORMAT_ATTR(global, "config1:52"), + NULL +}; + +static struct attribute_group hns3_pmu_format_group = { + .name = "format", + .attrs = hns3_pmu_format_attr, +}; + +static struct attribute *hns3_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static struct attribute_group hns3_pmu_cpumask_attr_group = { + .attrs = hns3_pmu_cpumask_attrs, +}; + +static struct attribute *hns3_pmu_identifier_attrs[] = { + &dev_attr_identifier.attr, + NULL +}; + +static struct attribute_group hns3_pmu_identifier_attr_group = { + .attrs = hns3_pmu_identifier_attrs, +}; + +static struct attribute *hns3_pmu_bdf_range_attrs[] = { + &dev_attr_bdf_min.attr, + &dev_attr_bdf_max.attr, + NULL +}; + +static struct attribute_group hns3_pmu_bdf_range_attr_group = { + .attrs = hns3_pmu_bdf_range_attrs, +}; + +static struct attribute *hns3_pmu_hw_clk_freq_attrs[] = { + &dev_attr_hw_clk_freq.attr, + NULL +}; + +static struct attribute_group hns3_pmu_hw_clk_freq_attr_group = { + .attrs = hns3_pmu_hw_clk_freq_attrs, +}; + +static const struct attribute_group *hns3_pmu_attr_groups[] = { + &hns3_pmu_events_group, + &hns3_pmu_filter_mode_group, + &hns3_pmu_format_group, + &hns3_pmu_cpumask_attr_group, + &hns3_pmu_identifier_attr_group, + &hns3_pmu_bdf_range_attr_group, + &hns3_pmu_hw_clk_freq_attr_group, + NULL +}; + +static u32 hns3_pmu_get_event(struct perf_event *event) +{ + return hns3_pmu_get_ext_counter_used(event) << 16 | + hns3_pmu_get_event_type(event) << 8 | + hns3_pmu_get_subevent(event); +} + +static u32 hns3_pmu_get_real_event(struct perf_event *event) +{ + return hns3_pmu_get_event_type(event) << 8 | + hns3_pmu_get_subevent(event); +} + +static u32 hns3_pmu_get_offset(u32 offset, u32 idx) +{ + return offset + HNS3_PMU_REG_EVENT_OFFSET + + HNS3_PMU_REG_EVENT_SIZE * idx; +} + +static u32 hns3_pmu_readl(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + return readl(hns3_pmu->base + offset); +} + +static void hns3_pmu_writel(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx, + u32 val) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + writel(val, hns3_pmu->base + offset); +} + +static u64 hns3_pmu_readq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + return readq(hns3_pmu->base + offset); +} + +static void hns3_pmu_writeq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx, + u64 val) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + writeq(val, hns3_pmu->base + offset); +} + +static bool hns3_pmu_cmp_event(struct perf_event *target, + struct perf_event *event) +{ + return hns3_pmu_get_real_event(target) == hns3_pmu_get_real_event(event); +} + +static int hns3_pmu_find_related_event_idx(struct hns3_pmu *hns3_pmu, + struct perf_event *event) +{ + struct perf_event *sibling; + int hw_event_used = 0; + int idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + sibling = hns3_pmu->hw_events[idx]; + if (!sibling) + continue; + + hw_event_used++; + + if (!hns3_pmu_cmp_event(sibling, event)) + continue; + + /* Related events is used in group */ + if (sibling->group_leader == event->group_leader) + return idx; + } + + /* No related event and all hardware events are used up */ + if (hw_event_used >= HNS3_PMU_MAX_HW_EVENTS) + return -EBUSY; + + /* No related event and there is extra hardware events can be use */ + return -ENOENT; +} + +static int hns3_pmu_get_event_idx(struct hns3_pmu *hns3_pmu) +{ + int idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + if (!hns3_pmu->hw_events[idx]) + return idx; + } + + return -EBUSY; +} + +static bool hns3_pmu_valid_bdf(struct hns3_pmu *hns3_pmu, u16 bdf) +{ + struct pci_dev *pdev; + + if (bdf < hns3_pmu->bdf_min || bdf > hns3_pmu->bdf_max) { + pci_err(hns3_pmu->pdev, "Invalid EP device: %#x!\n", bdf); + return false; + } + + pdev = pci_get_domain_bus_and_slot(pci_domain_nr(hns3_pmu->pdev->bus), + PCI_BUS_NUM(bdf), + GET_PCI_DEVFN(bdf)); + if (!pdev) { + pci_err(hns3_pmu->pdev, "Nonexistent EP device: %#x!\n", bdf); + return false; + } + + pci_dev_put(pdev); + return true; +} + +static void hns3_pmu_set_qid_para(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf, + u16 queue) +{ + u32 val; + + val = GET_PCI_DEVFN(bdf); + val |= (u32)queue << HNS3_PMU_QID_PARA_QUEUE_S; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_PARA, idx, val); +} + +static bool hns3_pmu_qid_req_start(struct hns3_pmu *hns3_pmu, u32 idx) +{ + bool queue_id_valid = false; + u32 reg_qid_ctrl, val; + int err; + + /* enable queue id request */ + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, + HNS3_PMU_QID_CTRL_REQ_ENABLE); + + reg_qid_ctrl = hns3_pmu_get_offset(HNS3_PMU_REG_EVENT_QID_CTRL, idx); + err = readl_poll_timeout(hns3_pmu->base + reg_qid_ctrl, val, + val & HNS3_PMU_QID_CTRL_DONE, 1, 1000); + if (err == -ETIMEDOUT) { + pci_err(hns3_pmu->pdev, "QID request timeout!\n"); + goto out; + } + + queue_id_valid = !(val & HNS3_PMU_QID_CTRL_MISS); + +out: + /* disable qid request and clear status */ + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, 0); + + return queue_id_valid; +} + +static bool hns3_pmu_valid_queue(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf, + u16 queue) +{ + hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue); + + return hns3_pmu_qid_req_start(hns3_pmu, idx); +} + +static struct hns3_pmu_event_attr *hns3_pmu_get_pmu_event(u32 event) +{ + struct hns3_pmu_event_attr *pmu_event; + struct dev_ext_attribute *eattr; + struct device_attribute *dattr; + struct attribute *attr; + u32 i; + + for (i = 0; i < ARRAY_SIZE(hns3_pmu_events_attr) - 1; i++) { + attr = hns3_pmu_events_attr[i]; + dattr = container_of(attr, struct device_attribute, attr); + eattr = container_of(dattr, struct dev_ext_attribute, attr); + pmu_event = eattr->var; + + if (event == pmu_event->event) + return pmu_event; + } + + return NULL; +} + +static int hns3_pmu_set_func_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + u16 bdf = hns3_pmu_get_bdf(event); + + if (!hns3_pmu_valid_bdf(hns3_pmu, bdf)) + return -ENOENT; + + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC); + + return 0; +} + +static int hns3_pmu_set_func_queue_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + u16 queue_id = hns3_pmu_get_queue(event); + struct hw_perf_event *hwc = &event->hw; + u16 bdf = hns3_pmu_get_bdf(event); + + if (!hns3_pmu_valid_bdf(hns3_pmu, bdf)) + return -ENOENT; + + if (!hns3_pmu_valid_queue(hns3_pmu, hwc->idx, bdf, queue_id)) { + pci_err(hns3_pmu->pdev, "Invalid queue: %u\n", queue_id); + return -ENOENT; + } + + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_QUEUE); + + return 0; +} + +static bool +hns3_pmu_is_enabled_global_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 global = hns3_pmu_get_global(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL)) + return false; + + return global; +} + +static bool hns3_pmu_is_enabled_func_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 queue_id = hns3_pmu_get_queue(event); + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC)) + return false; + else if (queue_id != HNS3_PMU_FILTER_ALL_QUEUE) + return false; + + return bdf; +} + +static bool +hns3_pmu_is_enabled_func_queue_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 queue_id = hns3_pmu_get_queue(event); + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE)) + return false; + else if (queue_id == HNS3_PMU_FILTER_ALL_QUEUE) + return false; + + return bdf; +} + +static bool hns3_pmu_is_enabled_port_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 tc_id = hns3_pmu_get_tc(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT)) + return false; + + return tc_id == HNS3_PMU_FILTER_ALL_TC; +} + +static bool +hns3_pmu_is_enabled_port_tc_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 tc_id = hns3_pmu_get_tc(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC)) + return false; + + return tc_id != HNS3_PMU_FILTER_ALL_TC; +} + +static bool +hns3_pmu_is_enabled_func_intr_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR)) + return false; + + return hns3_pmu_valid_bdf(hns3_pmu, bdf); +} + +static int hns3_pmu_select_filter_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + u32 event_id = hns3_pmu_get_event(event); + struct hw_perf_event *hwc = &event->hw; + struct hns3_pmu_event_attr *pmu_event; + + pmu_event = hns3_pmu_get_pmu_event(event_id); + if (!pmu_event) { + pci_err(hns3_pmu->pdev, "Invalid pmu event\n"); + return -ENOENT; + } + + if (hns3_pmu_is_enabled_global_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_GLOBAL); + return 0; + } + + if (hns3_pmu_is_enabled_func_mode(event, pmu_event)) + return hns3_pmu_set_func_mode(event, hns3_pmu); + + if (hns3_pmu_is_enabled_func_queue_mode(event, pmu_event)) + return hns3_pmu_set_func_queue_mode(event, hns3_pmu); + + if (hns3_pmu_is_enabled_port_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT); + return 0; + } + + if (hns3_pmu_is_enabled_port_tc_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT_TC); + return 0; + } + + if (hns3_pmu_is_enabled_func_intr_mode(event, hns3_pmu, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_INTR); + return 0; + } + + return -ENOENT; +} + +static bool hns3_pmu_validate_event_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct perf_event *event_group[HNS3_PMU_MAX_HW_EVENTS]; + int counters = 1; + int num; + + event_group[0] = leader; + if (!is_software_event(leader)) { + if (leader->pmu != event->pmu) + return false; + + if (leader != event && !hns3_pmu_cmp_event(leader, event)) + event_group[counters++] = event; + } + + for_each_sibling_event(sibling, event->group_leader) { + if (is_software_event(sibling)) + continue; + + if (sibling->pmu != event->pmu) + return false; + + for (num = 0; num < counters; num++) { + if (hns3_pmu_cmp_event(event_group[num], sibling)) + break; + } + + if (num == counters) + event_group[counters++] = sibling; + } + + return counters <= HNS3_PMU_MAX_HW_EVENTS; +} + +static u32 hns3_pmu_get_filter_condition(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u16 intr_id = hns3_pmu_get_intr(event); + u8 port_id = hns3_pmu_get_port(event); + u16 bdf = hns3_pmu_get_bdf(event); + u8 tc_id = hns3_pmu_get_tc(event); + u8 filter_mode; + + filter_mode = *(u8 *)hwc->addr_filters; + switch (filter_mode) { + case HNS3_PMU_HW_FILTER_PORT: + return FILTER_CONDITION_PORT(port_id); + case HNS3_PMU_HW_FILTER_PORT_TC: + return FILTER_CONDITION_PORT_TC(port_id, tc_id); + case HNS3_PMU_HW_FILTER_FUNC: + case HNS3_PMU_HW_FILTER_FUNC_QUEUE: + return GET_PCI_DEVFN(bdf); + case HNS3_PMU_HW_FILTER_FUNC_INTR: + return FILTER_CONDITION_FUNC_INTR(GET_PCI_DEVFN(bdf), intr_id); + default: + break; + } + + return 0; +} + +static void hns3_pmu_config_filter(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + u8 event_type = hns3_pmu_get_event_type(event); + u8 subevent_id = hns3_pmu_get_subevent(event); + u16 queue_id = hns3_pmu_get_queue(event); + struct hw_perf_event *hwc = &event->hw; + u8 filter_mode = *(u8 *)hwc->addr_filters; + u16 bdf = hns3_pmu_get_bdf(event); + u32 idx = hwc->idx; + u32 val; + + val = event_type; + val |= subevent_id << HNS3_PMU_CTRL_SUBEVENT_S; + val |= filter_mode << HNS3_PMU_CTRL_FILTER_MODE_S; + val |= HNS3_PMU_EVENT_OVERFLOW_RESTART; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); + + val = hns3_pmu_get_filter_condition(event); + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_HIGH, idx, val); + + if (filter_mode == HNS3_PMU_HW_FILTER_FUNC_QUEUE) + hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue_id); +} + +static void hns3_pmu_enable_counter(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val |= HNS3_PMU_EVENT_EN; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static void hns3_pmu_disable_counter(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val &= ~HNS3_PMU_EVENT_EN; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static void hns3_pmu_enable_intr(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx); + val &= ~HNS3_PMU_INTR_MASK_OVERFLOW; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val); +} + +static void hns3_pmu_disable_intr(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx); + val |= HNS3_PMU_INTR_MASK_OVERFLOW; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val); +} + +static void hns3_pmu_clear_intr_status(struct hns3_pmu *hns3_pmu, u32 idx) +{ + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val |= HNS3_PMU_EVENT_STATUS_RESET; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val &= ~HNS3_PMU_EVENT_STATUS_RESET; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static u64 hns3_pmu_read_counter(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + + return hns3_pmu_readq(hns3_pmu, event->hw.event_base, event->hw.idx); +} + +static void hns3_pmu_write_counter(struct perf_event *event, u64 value) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + u32 idx = event->hw.idx; + + hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_COUNTER, idx, value); + hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_EXT_COUNTER, idx, value); +} + +static void hns3_pmu_init_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + local64_set(&hwc->prev_count, 0); + hns3_pmu_write_counter(event, 0); +} + +static int hns3_pmu_event_init(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + int ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling is not supported */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + event->cpu = hns3_pmu->on_cpu; + + idx = hns3_pmu_get_event_idx(hns3_pmu); + if (idx < 0) { + pci_err(hns3_pmu->pdev, "Up to %u events are supported!\n", + HNS3_PMU_MAX_HW_EVENTS); + return -EBUSY; + } + + hwc->idx = idx; + + ret = hns3_pmu_select_filter_mode(event, hns3_pmu); + if (ret) { + pci_err(hns3_pmu->pdev, "Invalid filter, ret = %d.\n", ret); + return ret; + } + + if (!hns3_pmu_validate_event_group(event)) { + pci_err(hns3_pmu->pdev, "Invalid event group.\n"); + return -EINVAL; + } + + if (hns3_pmu_get_ext_counter_used(event)) + hwc->event_base = HNS3_PMU_REG_EVENT_EXT_COUNTER; + else + hwc->event_base = HNS3_PMU_REG_EVENT_COUNTER; + + return 0; +} + +static void hns3_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 new_cnt, prev_cnt, delta; + + do { + prev_cnt = local64_read(&hwc->prev_count); + new_cnt = hns3_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != + prev_cnt); + + delta = new_cnt - prev_cnt; + local64_add(delta, &event->count); +} + +static void hns3_pmu_start(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + hns3_pmu_config_filter(event); + hns3_pmu_init_counter(event); + hns3_pmu_enable_intr(hns3_pmu, hwc); + hns3_pmu_enable_counter(hns3_pmu, hwc); + + perf_event_update_userpage(event); +} + +static void hns3_pmu_stop(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hns3_pmu_disable_counter(hns3_pmu, hwc); + hns3_pmu_disable_intr(hns3_pmu, hwc); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + /* Read hardware counter and update the perf counter statistics */ + hns3_pmu_read(event); + hwc->state |= PERF_HES_UPTODATE; +} + +static int hns3_pmu_add(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + /* Check all working events to find a related event. */ + idx = hns3_pmu_find_related_event_idx(hns3_pmu, event); + if (idx < 0 && idx != -ENOENT) + return idx; + + /* Current event shares an enabled hardware event with related event */ + if (idx >= 0 && idx < HNS3_PMU_MAX_HW_EVENTS) { + hwc->idx = idx; + goto start_count; + } + + idx = hns3_pmu_get_event_idx(hns3_pmu); + if (idx < 0) + return idx; + + hwc->idx = idx; + hns3_pmu->hw_events[idx] = event; + +start_count: + if (flags & PERF_EF_START) + hns3_pmu_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void hns3_pmu_del(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hns3_pmu_stop(event, PERF_EF_UPDATE); + hns3_pmu->hw_events[hwc->idx] = NULL; + perf_event_update_userpage(event); +} + +static void hns3_pmu_enable(struct pmu *pmu) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu); + u32 val; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); + val |= HNS3_PMU_GLOBAL_START; + writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); +} + +static void hns3_pmu_disable(struct pmu *pmu) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu); + u32 val; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); + val &= ~HNS3_PMU_GLOBAL_START; + writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); +} + +static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) +{ + u16 device_id; + char *name; + u32 val; + + hns3_pmu->base = pcim_iomap_table(pdev)[BAR_2]; + if (!hns3_pmu->base) { + pci_err(pdev, "ioremap failed\n"); + return -ENOMEM; + } + + hns3_pmu->hw_clk_freq = readl(hns3_pmu->base + HNS3_PMU_REG_CLOCK_FREQ); + + val = readl(hns3_pmu->base + HNS3_PMU_REG_BDF); + hns3_pmu->bdf_min = val & 0xffff; + hns3_pmu->bdf_max = val >> 16; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_DEVICE_ID); + device_id = val & 0xffff; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hns3_pmu_sicl_%u", device_id); + if (!name) + return -ENOMEM; + + hns3_pmu->pdev = pdev; + hns3_pmu->on_cpu = -1; + hns3_pmu->identifier = readl(hns3_pmu->base + HNS3_PMU_REG_VERSION); + hns3_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .event_init = hns3_pmu_event_init, + .pmu_enable = hns3_pmu_enable, + .pmu_disable = hns3_pmu_disable, + .add = hns3_pmu_add, + .del = hns3_pmu_del, + .start = hns3_pmu_start, + .stop = hns3_pmu_stop, + .read = hns3_pmu_read, + .task_ctx_nr = perf_invalid_context, + .attr_groups = hns3_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + return 0; +} + +static irqreturn_t hns3_pmu_irq(int irq, void *data) +{ + struct hns3_pmu *hns3_pmu = data; + u32 intr_status, idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + intr_status = hns3_pmu_readl(hns3_pmu, + HNS3_PMU_REG_EVENT_INTR_STATUS, + idx); + + /* + * As each counter will restart from 0 when it is overflowed, + * extra processing is no need, just clear interrupt status. + */ + if (intr_status) + hns3_pmu_clear_intr_status(hns3_pmu, idx); + } + + return IRQ_HANDLED; +} + +static int hns3_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hns3_pmu *hns3_pmu; + + hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node); + if (!hns3_pmu) + return -ENODEV; + + if (hns3_pmu->on_cpu == -1) { + hns3_pmu->on_cpu = cpu; + irq_set_affinity(hns3_pmu->irq, cpumask_of(cpu)); + } + + return 0; +} + +static int hns3_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hns3_pmu *hns3_pmu; + unsigned int target; + + hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node); + if (!hns3_pmu) + return -ENODEV; + + /* Nothing to do if this CPU doesn't own the PMU */ + if (hns3_pmu->on_cpu != cpu) + return 0; + + /* Choose a new CPU from all online cpus */ + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&hns3_pmu->pmu, cpu, target); + hns3_pmu->on_cpu = target; + irq_set_affinity(hns3_pmu->irq, cpumask_of(target)); + + return 0; +} + +static void hns3_pmu_free_irq(void *data) +{ + struct pci_dev *pdev = data; + + pci_free_irq_vectors(pdev); +} + +static int hns3_pmu_irq_register(struct pci_dev *pdev, + struct hns3_pmu *hns3_pmu) +{ + int irq, ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (ret < 0) { + pci_err(pdev, "failed to enable MSI vectors, ret = %d.\n", ret); + return ret; + } + + ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev); + if (ret) { + pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret); + return ret; + } + + irq = pci_irq_vector(pdev, 0); + ret = devm_request_irq(&pdev->dev, irq, hns3_pmu_irq, 0, + hns3_pmu->pmu.name, hns3_pmu); + if (ret) { + pci_err(pdev, "failed to register irq, ret = %d.\n", ret); + return ret; + } + + hns3_pmu->irq = irq; + + return 0; +} + +static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) +{ + int ret; + + ret = hns3_pmu_alloc_pmu(pdev, hns3_pmu); + if (ret) + return ret; + + ret = hns3_pmu_irq_register(pdev, hns3_pmu); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); + if (ret) { + pci_err(pdev, "failed to register hotplug, ret = %d.\n", ret); + return ret; + } + + ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1); + if (ret) { + pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); + } + + return ret; +} + +static void hns3_pmu_uninit_pmu(struct pci_dev *pdev) +{ + struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev); + + perf_pmu_unregister(&hns3_pmu->pmu); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); +} + +static int hns3_pmu_init_dev(struct pci_dev *pdev) +{ + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + pci_err(pdev, "failed to enable pci device, ret = %d.\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(BAR_2), "hns3_pmu"); + if (ret < 0) { + pci_err(pdev, "failed to request pci region, ret = %d.\n", ret); + return ret; + } + + pci_set_master(pdev); + + return 0; +} + +static int hns3_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct hns3_pmu *hns3_pmu; + int ret; + + hns3_pmu = devm_kzalloc(&pdev->dev, sizeof(*hns3_pmu), GFP_KERNEL); + if (!hns3_pmu) + return -ENOMEM; + + ret = hns3_pmu_init_dev(pdev); + if (ret) + return ret; + + ret = hns3_pmu_init_pmu(pdev, hns3_pmu); + if (ret) { + pci_clear_master(pdev); + return ret; + } + + pci_set_drvdata(pdev, hns3_pmu); + + return ret; +} + +static void hns3_pmu_remove(struct pci_dev *pdev) +{ + hns3_pmu_uninit_pmu(pdev); + pci_clear_master(pdev); + pci_set_drvdata(pdev, NULL); +} + +static const struct pci_device_id hns3_pmu_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa22b) }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, hns3_pmu_ids); + +static struct pci_driver hns3_pmu_driver = { + .name = "hns3_pmu", + .id_table = hns3_pmu_ids, + .probe = hns3_pmu_probe, + .remove = hns3_pmu_remove, +}; + +static int __init hns3_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + "AP_PERF_ARM_HNS3_PMU_ONLINE", + hns3_pmu_online_cpu, + hns3_pmu_offline_cpu); + if (ret) { + pr_err("failed to setup HNS3 PMU hotplug, ret = %d.\n", ret); + return ret; + } + + ret = pci_register_driver(&hns3_pmu_driver); + if (ret) { + pr_err("failed to register pci driver, ret = %d.\n", ret); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE); + } + + return ret; +} +module_init(hns3_pmu_module_init); + +static void __exit hns3_pmu_module_exit(void) +{ + pci_unregister_driver(&hns3_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE); +} +module_exit(hns3_pmu_module_exit); + +MODULE_DESCRIPTION("HNS3 PMU driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index 282d3a071a67..69c3050a4348 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -2,10 +2,6 @@ /* Marvell CN10K LLC-TAD perf driver * * Copyright (C) 2021 Marvell - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define pr_fmt(fmt) "tad_pmu: " fmt @@ -18,9 +14,9 @@ #include <linux/perf_event.h> #include <linux/platform_device.h> -#define TAD_PFC_OFFSET 0x0 +#define TAD_PFC_OFFSET 0x800 #define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3)) -#define TAD_PRF_OFFSET 0x100 +#define TAD_PRF_OFFSET 0x900 #define TAD_PRF(counter) (TAD_PRF_OFFSET | (counter << 3)) #define TAD_PRF_CNTSEL_MASK 0xFF #define TAD_MAX_COUNTERS 8 @@ -100,9 +96,7 @@ static void tad_pmu_event_counter_start(struct perf_event *event, int flags) * which sets TAD()_PRF()[CNTSEL] != 0 */ for (i = 0; i < tad_pmu->region_cnt; i++) { - reg_val = readq_relaxed(tad_pmu->regions[i].base + - TAD_PRF(counter_idx)); - reg_val |= (event_idx & 0xFF); + reg_val = event_idx & 0xFF; writeq_relaxed(reg_val, tad_pmu->regions[i].base + TAD_PRF(counter_idx)); } diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index b2b8d2074ed0..2c961839903d 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -121,7 +121,7 @@ u64 riscv_pmu_event_update(struct perf_event *event) return delta; } -static void riscv_pmu_stop(struct perf_event *event, int flags) +void riscv_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); @@ -175,7 +175,7 @@ int riscv_pmu_event_set_period(struct perf_event *event) return overflow; } -static void riscv_pmu_start(struct perf_event *event, int flags) +void riscv_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index dca3537a8dcc..79a3de515ece 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -17,10 +17,30 @@ #include <linux/irqdomain.h> #include <linux/of_irq.h> #include <linux/of.h> +#include <linux/cpu_pm.h> #include <asm/sbi.h> #include <asm/hwcap.h> +PMU_FORMAT_ATTR(event, "config:0-47"); +PMU_FORMAT_ATTR(firmware, "config:63"); + +static struct attribute *riscv_arch_formats_attr[] = { + &format_attr_event.attr, + &format_attr_firmware.attr, + NULL, +}; + +static struct attribute_group riscv_pmu_format_group = { + .name = "format", + .attrs = riscv_arch_formats_attr, +}; + +static const struct attribute_group *riscv_pmu_attr_groups[] = { + &riscv_pmu_format_group, + NULL, +}; + union sbi_pmu_ctr_info { unsigned long value; struct { @@ -666,12 +686,15 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde child = of_get_compatible_child(cpu, "riscv,cpu-intc"); if (!child) { pr_err("Failed to find INTC node\n"); + of_node_put(cpu); return -ENODEV; } domain = irq_find_host(child); of_node_put(child); - if (domain) + if (domain) { + of_node_put(cpu); break; + } } if (!domain) { pr_err("Failed to find INTC IRQ root domain\n"); @@ -693,6 +716,73 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde return 0; } +#ifdef CONFIG_CPU_PM +static int riscv_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, + void *v) +{ + struct riscv_pmu *rvpmu = container_of(b, struct riscv_pmu, riscv_pm_nb); + struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events); + int enabled = bitmap_weight(cpuc->used_hw_ctrs, RISCV_MAX_COUNTERS); + struct perf_event *event; + int idx; + + if (!enabled) + return NOTIFY_OK; + + for (idx = 0; idx < RISCV_MAX_COUNTERS; idx++) { + event = cpuc->events[idx]; + if (!event) + continue; + + switch (cmd) { + case CPU_PM_ENTER: + /* + * Stop and update the counter + */ + riscv_pmu_stop(event, PERF_EF_UPDATE); + break; + case CPU_PM_EXIT: + case CPU_PM_ENTER_FAILED: + /* + * Restore and enable the counter. + * + * Requires RCU read locking to be functional, + * wrap the call within RCU_NONIDLE to make the + * RCU subsystem aware this cpu is not idle from + * an RCU perspective for the riscv_pmu_start() call + * duration. + */ + RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD)); + break; + default: + break; + } + } + + return NOTIFY_OK; +} + +static int riscv_pm_pmu_register(struct riscv_pmu *pmu) +{ + pmu->riscv_pm_nb.notifier_call = riscv_pm_pmu_notify; + return cpu_pm_register_notifier(&pmu->riscv_pm_nb); +} + +static void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) +{ + cpu_pm_unregister_notifier(&pmu->riscv_pm_nb); +} +#else +static inline int riscv_pm_pmu_register(struct riscv_pmu *pmu) { return 0; } +static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { } +#endif + +static void riscv_pmu_destroy(struct riscv_pmu *pmu) +{ + riscv_pm_pmu_unregister(pmu); + cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); +} + static int pmu_sbi_device_probe(struct platform_device *pdev) { struct riscv_pmu *pmu = NULL; @@ -720,6 +810,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; } + pmu->pmu.attr_groups = riscv_pmu_attr_groups; pmu->num_counters = num_counters; pmu->ctr_start = pmu_sbi_ctr_start; pmu->ctr_stop = pmu_sbi_ctr_stop; @@ -733,14 +824,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) return ret; + ret = riscv_pm_pmu_register(pmu); + if (ret) + goto out_unregister; + ret = perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); - if (ret) { - cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node); - return ret; - } + if (ret) + goto out_unregister; return 0; +out_unregister: + riscv_pmu_destroy(pmu); + out_free: kfree(pmu); return ret; diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index fd7e8fbaeef1..961f4d88f9ef 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -38,6 +38,10 @@ #define wmb() do { kcsan_wmb(); __wmb(); } while (0) #endif +#ifdef __dma_mb +#define dma_mb() do { kcsan_mb(); __dma_mb(); } while (0) +#endif + #ifdef __dma_rmb #define dma_rmb() do { kcsan_rmb(); __dma_rmb(); } while (0) #endif @@ -65,6 +69,10 @@ #define wmb() mb() #endif +#ifndef dma_mb +#define dma_mb() mb() +#endif + #ifndef dma_rmb #define dma_rmb() rmb() #endif diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 7ce93aaf69f8..db5b890eaff7 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -964,7 +964,34 @@ static inline void iounmap(volatile void __iomem *addr) #elif defined(CONFIG_GENERIC_IOREMAP) #include <linux/pgtable.h> -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot); +/* + * Arch code can implement the following two hooks when using GENERIC_IOREMAP + * ioremap_allowed() return a bool, + * - true means continue to remap + * - false means skip remap and return directly + * iounmap_allowed() return a bool, + * - true means continue to vunmap + * - false means skip vunmap and return directly + */ +#ifndef ioremap_allowed +#define ioremap_allowed ioremap_allowed +static inline bool ioremap_allowed(phys_addr_t phys_addr, size_t size, + unsigned long prot) +{ + return true; +} +#endif + +#ifndef iounmap_allowed +#define iounmap_allowed iounmap_allowed +static inline bool iounmap_allowed(void *addr) +{ + return true; +} +#endif + +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot); void iounmap(volatile void __iomem *addr); static inline void __iomem *ioremap(phys_addr_t addr, size_t size) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 19f0dbfdd7fe..3e99fb4d3134 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -230,6 +230,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, + CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2d2ccae933c2..0ace7759acd2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -348,7 +348,7 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ - __GFP_SKIP_KASAN_POISON) + __GFP_SKIP_KASAN_POISON | __GFP_SKIP_KASAN_UNPOISON) #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index de29821231c9..4ddaf6ad73ef 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -461,4 +461,16 @@ static inline int split_folio_to_list(struct folio *folio, return split_huge_page_to_list(&folio->page, list); } +/* + * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to + * limitations in the implementation like arm64 MTE can override this to + * false + */ +#ifndef arch_thp_swp_supported +static inline bool arch_thp_swp_supported(void) +{ + return true; +} +#endif + #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 46f9b6fe306e..bf66fe011fa8 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -56,9 +56,13 @@ struct riscv_pmu { struct cpu_hw_events __percpu *hw_events; struct hlist_node node; + struct notifier_block riscv_pm_nb; }; #define to_riscv_pmu(p) (container_of(p, struct riscv_pmu, pmu)) + +void riscv_pmu_start(struct perf_event *event, int flags); +void riscv_pmu_stop(struct perf_event *event, int flags); unsigned long riscv_pmu_ctr_read_csr(unsigned long csr); int riscv_pmu_event_set_period(struct perf_event *event); uint64_t riscv_pmu_ctr_get_width_mask(struct perf_event *event); diff --git a/mm/ioremap.c b/mm/ioremap.c index 5fe598ecd9b7..8652426282cc 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -11,29 +11,35 @@ #include <linux/io.h> #include <linux/export.h> -void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long prot) { unsigned long offset, vaddr; phys_addr_t last_addr; struct vm_struct *area; /* Disallow wrap-around or zero size */ - last_addr = addr + size - 1; - if (!size || last_addr < addr) + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) return NULL; /* Page-align mappings */ - offset = addr & (~PAGE_MASK); - addr -= offset; + offset = phys_addr & (~PAGE_MASK); + phys_addr -= offset; size = PAGE_ALIGN(size + offset); + if (!ioremap_allowed(phys_addr, size, prot)) + return NULL; + area = get_vm_area_caller(size, VM_IOREMAP, __builtin_return_address(0)); if (!area) return NULL; vaddr = (unsigned long)area->addr; + area->phys_addr = phys_addr; - if (ioremap_page_range(vaddr, vaddr + size, addr, __pgprot(prot))) { + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(prot))) { free_vm_area(area); return NULL; } @@ -44,6 +50,12 @@ EXPORT_SYMBOL(ioremap_prot); void iounmap(volatile void __iomem *addr) { - vunmap((void *)((unsigned long)addr & PAGE_MASK)); + void *vaddr = (void *)((unsigned long)addr & PAGE_MASK); + + if (!iounmap_allowed(vaddr)) + return; + + if (is_vmalloc_addr(vaddr)) + vunmap(vaddr); } EXPORT_SYMBOL(iounmap); diff --git a/mm/kasan/common.c b/mm/kasan/common.c index c40c0e7b3b5f..78be2beb7453 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -108,9 +108,10 @@ void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init) return; tag = kasan_random_tag(); + kasan_unpoison(set_tag(page_address(page), tag), + PAGE_SIZE << order, init); for (i = 0; i < (1 << order); i++) page_kasan_tag_set(page + i, tag); - kasan_unpoison(page_address(page), PAGE_SIZE << order, init); } void __kasan_poison_pages(struct page *page, unsigned int order, bool init) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e008a3df0485..bf45a6aa407a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2361,7 +2361,7 @@ static inline bool check_new_pcp(struct page *page, unsigned int order) } #endif /* CONFIG_DEBUG_VM */ -static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) +static inline bool should_skip_kasan_unpoison(gfp_t flags) { /* Don't skip if a software KASAN mode is enabled. */ if (IS_ENABLED(CONFIG_KASAN_GENERIC) || @@ -2373,12 +2373,10 @@ static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) return true; /* - * With hardware tag-based KASAN enabled, skip if either: - * - * 1. Memory tags have already been cleared via tag_clear_highpage(). - * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON. + * With hardware tag-based KASAN enabled, skip if this has been + * requested via __GFP_SKIP_KASAN_UNPOISON. */ - return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON); + return flags & __GFP_SKIP_KASAN_UNPOISON; } static inline bool should_skip_init(gfp_t flags) @@ -2397,6 +2395,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && !should_skip_init(gfp_flags); bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); + int i; set_page_private(page, 0); set_page_refcounted(page); @@ -2422,8 +2421,6 @@ inline void post_alloc_hook(struct page *page, unsigned int order, * should be initialized as well). */ if (init_tags) { - int i; - /* Initialize both memory and tags. */ for (i = 0; i != 1 << order; ++i) tag_clear_highpage(page + i); @@ -2431,13 +2428,17 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* Note that memory is already initialized by the loop above. */ init = false; } - if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) { + if (!should_skip_kasan_unpoison(gfp_flags)) { /* Unpoison shadow memory or set memory tags. */ kasan_unpoison_pages(page, order, init); /* Note that memory is already initialized by KASAN. */ if (kasan_has_integrated_init()) init = false; + } else { + /* Ensure page_address() dereferencing does not fault. */ + for (i = 0; i != 1 << order; ++i) + page_kasan_tag_reset(page + i); } /* If memory is still not initialized, do it now. */ if (init) diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 2a65a89b5b4d..10b94d64cc25 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -307,7 +307,7 @@ swp_entry_t folio_alloc_swap(struct folio *folio) entry.val = 0; if (folio_test_large(folio)) { - if (IS_ENABLED(CONFIG_THP_SWAP)) + if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported()) get_swap_pages(1, &entry, folio_nr_pages(folio)); goto out; } |