diff options
-rw-r--r-- | Documentation/arm64/booting.rst | 10 | ||||
-rw-r--r-- | Documentation/arm64/memory-tagging-extension.rst | 28 | ||||
-rw-r--r-- | Documentation/arm64/silicon-errata.rst | 2 | ||||
-rw-r--r-- | arch/arm64/Kconfig | 9 | ||||
-rw-r--r-- | arch/arm64/include/asm/arch_gicv3.h | 23 | ||||
-rw-r--r-- | arch/arm64/include/asm/cputype.h | 13 | ||||
-rw-r--r-- | arch/arm64/include/asm/mte-def.h | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/cpu_errata.c | 21 | ||||
-rw-r--r-- | arch/arm64/kernel/elfcore.c | 134 | ||||
-rw-r--r-- | arch/arm64/lib/mte.S | 4 | ||||
-rw-r--r-- | arch/arm64/mm/mteswap.c | 2 | ||||
-rw-r--r-- | arch/ia64/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/um/Kconfig | 1 | ||||
-rw-r--r-- | fs/Kconfig.binfmt | 3 | ||||
-rw-r--r-- | include/linux/elfcore.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/elf.h | 3 |
17 files changed, 240 insertions, 20 deletions
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 52d060caf8bb..29884b261aa9 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and is relevant to all public releases of the AArch64 Linux kernel. The AArch64 exception model is made up of a number of exception levels -(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure -counterpart. EL2 is the hypervisor level and exists only in non-secure -mode. EL3 is the highest priority level and exists only in secure mode. +(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure +counterpart. EL2 is the hypervisor level, EL3 is the highest priority +level and exists only in secure mode. Both are architecturally optional. For the purposes of this document, we will use the term `boot loader` simply to define all software that executes on the CPU(s) before control @@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met: All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError, IRQ and FIQ). - The CPU must be in either EL2 (RECOMMENDED in order to have access to - the virtualisation extensions) or non-secure EL1. + The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order + to have access to the virtualisation extensions), or in EL1. - Caches, MMUs diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index 7b99c8f428eb..6ccbdc408df6 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -91,8 +91,9 @@ mode is specified, the program will run in that mode. If multiple modes are specified, the mode is selected as described in the "Per-CPU preferred tag checking modes" section below. -The current tag check fault mode can be read using the -``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. +The current tag check fault configuration can be read using the +``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If +multiple modes were requested then all will be reported. Tag checking can also be disabled for a user thread by setting the ``PSTATE.TCO`` bit with ``MSR TCO, #1``. @@ -213,6 +214,29 @@ address ABI control and MTE configuration of a process as per the Documentation/arm64/tagged-address-abi.rst and above. The corresponding ``regset`` is 1 element of 8 bytes (``sizeof(long))``). +Core dump support +----------------- + +The allocation tags for user memory mapped with ``PROT_MTE`` are dumped +in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The +program header for such segment is defined as: + +:``p_type``: ``PT_ARM_MEMTAG_MTE`` +:``p_flags``: 0 +:``p_offset``: segment file offset +:``p_vaddr``: segment virtual address, same as the corresponding + ``PT_LOAD`` segment +:``p_paddr``: 0 +:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32`` + (two 4-bit tags cover 32 bytes of memory) +:``p_memsz``: segment size in memory, same as the corresponding + ``PT_LOAD`` segment +:``p_align``: 0 + +The tags are stored in the core file at ``p_offset`` as two 4-bit tags +in a byte. With the tag granule of 16 bytes, a 4K page requires 128 +bytes in the core file. + Example of correct usage ======================== diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index ea281dd75517..466cb9e89047 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -136,7 +136,7 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | +----------------+-----------------+-----------------+-----------------------------+ -| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | +| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 | +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX GICv3 | #38539 | N/A | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cbcd42decb2a..1d1b5c0b157c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select ACPI_SPCR_TABLE if ACPI select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION @@ -890,13 +891,17 @@ config CAVIUM_ERRATUM_23144 If unsure, say Y. config CAVIUM_ERRATUM_23154 - bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed" + bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation" default y help - The gicv3 of ThunderX requires a modified version for + The ThunderX GICv3 implementation requires a modified version for reading the IAR status to ensure data synchronization (access to icc_iar1_el1 is not sync'ed before and after). + It also suffers from erratum 38545 (also present on Marvell's + OcteonTX and OcteonTX2), resulting in deactivated interrupts being + spuriously presented to the CPU interface. + If unsure, say Y. config CAVIUM_ERRATUM_27456 diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 4ad22c3135db..8bd5afc7b692 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -53,17 +53,36 @@ static inline u64 gic_read_iar_common(void) * The gicv3 of ThunderX requires a modified version for reading the * IAR status to ensure data synchronization (access to icc_iar1_el1 * is not sync'ed before and after). + * + * Erratum 38545 + * + * When a IAR register read races with a GIC interrupt RELEASE event, + * GIC-CPU interface could wrongly return a valid INTID to the CPU + * for an interrupt that is already released(non activated) instead of 0x3ff. + * + * To workaround this, return a valid interrupt ID only if there is a change + * in the active priority list after the IAR read. + * + * Common function used for both the workarounds since, + * 1. On Thunderx 88xx 1.x both erratas are applicable. + * 2. Having extra nops doesn't add any side effects for Silicons where + * erratum 23154 is not applicable. */ static inline u64 gic_read_iar_cavium_thunderx(void) { - u64 irqstat; + u64 irqstat, apr; + apr = read_sysreg_s(SYS_ICC_AP1R0_EL1); nops(8); irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); nops(4); mb(); - return irqstat; + /* Max priority groups implemented is only 32 */ + if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1))) + return irqstat; + + return 0x3ff; } static inline void gic_write_ctlr(u32 val) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 999b9149f856..4596e7ca29a3 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -84,6 +84,13 @@ #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 #define CAVIUM_CPU_PART_THUNDERX2 0x0AF +/* OcteonTx2 series */ +#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1 +#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2 +#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3 +#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4 +#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5 +#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6 #define BRCM_CPU_PART_BRAHMA_B53 0x100 #define BRCM_CPU_PART_VULCAN 0x516 @@ -124,6 +131,12 @@ #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX) +#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX) +#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX) +#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN) +#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM) +#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO) #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) #define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index 626d359b396e..14ee86b019c2 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -11,6 +11,7 @@ #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) +#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8) #define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n" diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88b3e2a21408..986837d7ec82 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ cpu-reset.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b217941713a8..6485d8e54cca 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -214,6 +214,21 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { }; #endif +#ifdef CONFIG_CAVIUM_ERRATUM_23154 +const struct midr_range cavium_erratum_23154_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_THUNDERX), + MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX), + MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_98XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_96XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO), + {}, +}; +#endif + #ifdef CONFIG_CAVIUM_ERRATUM_27456 const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ @@ -425,10 +440,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 { - /* Cavium ThunderX, pass 1.x */ - .desc = "Cavium erratum 23154", + .desc = "Cavium errata 23154 and 38545", .capability = ARM64_WORKAROUND_CAVIUM_23154, - ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c new file mode 100644 index 000000000000..3ed39c61a510 --- /dev/null +++ b/arch/arm64/kernel/elfcore.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/coredump.h> +#include <linux/elfcore.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +#include <asm/cpufeature.h> +#include <asm/mte.h> + +#ifndef VMA_ITERATOR +#define VMA_ITERATOR(name, mm, addr) \ + struct mm_struct *name = mm +#define for_each_vma(vmi, vma) \ + for (vma = vmi->mmap; vma; vma = vma->vm_next) +#endif + +#define for_each_mte_vma(vmi, vma) \ + if (system_supports_mte()) \ + for_each_vma(vmi, vma) \ + if (vma->vm_flags & VM_MTE) + +static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_DONTDUMP) + return 0; + + return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; +} + +/* Derived from dump_user_range(); start/end must be page-aligned */ +static int mte_dump_tag_range(struct coredump_params *cprm, + unsigned long start, unsigned long end) +{ + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE) { + char tags[MTE_PAGE_TAG_STORAGE]; + struct page *page = get_dump_page(addr); + + /* + * get_dump_page() returns NULL when encountering an empty + * page table entry that would otherwise have been filled with + * the zero page. Skip the equivalent tag dump which would + * have been all zeros. + */ + if (!page) { + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + /* + * Pages mapped in user space as !pte_access_permitted() (e.g. + * PROT_EXEC only) may not have the PG_mte_tagged flag set. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + put_page(page); + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + mte_save_page_tags(page_address(page), tags); + put_page(page); + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) + return 0; + } + + return 1; +} + +Elf_Half elf_core_extra_phdrs(void) +{ + struct vm_area_struct *vma; + int vma_count = 0; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) + vma_count++; + + return vma_count; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) { + struct elf_phdr phdr; + + phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_offset = offset; + phdr.p_vaddr = vma->vm_start; + phdr.p_paddr = 0; + phdr.p_filesz = mte_vma_tag_dump_size(vma); + phdr.p_memsz = vma->vm_end - vma->vm_start; + offset += phdr.p_filesz; + phdr.p_flags = 0; + phdr.p_align = 0; + + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + struct vm_area_struct *vma; + size_t data_size = 0; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) + data_size += mte_vma_tag_dump_size(vma); + + return data_size; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) { + if (vma->vm_flags & VM_DONTDUMP) + continue; + + if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + return 0; + } + + return 1; +} diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index f531dcb95174..8590af3c98c0 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user) /* * Save the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_save_page_tags) multitag_transfer_size x7, x5 @@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags) /* * Restore the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_restore_page_tags) multitag_transfer_size x7, x5 diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 7c4ef56265ee..a9e50e930484 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages); void *mte_allocate_tag_storage(void) { /* tags granule is 16 bytes, 2 tags stored per byte */ - return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL); + return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL); } void mte_free_tag_storage(char *storage) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a7e01573abd8..e003b2473c64 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 40d6a06e41c8..ead7e5b3a975 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,6 +8,7 @@ endmenu config UML_X86 def_bool y + select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4d5ae61580aa..68e586283764 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF config ARCH_BINFMT_ELF_STATE bool +config ARCH_BINFMT_ELF_EXTRA_PHDRS + bool + config ARCH_HAVE_ELF_PROT bool diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 746e081879a5..f8e206e82476 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) +#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void) { return 0; } -#endif +#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */ #endif /* _LINUX_ELFCORE_H */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..fe8e5b74cb39 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -40,6 +40,9 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* ARM MTE memory tag segment type */ +#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) + /* * Extended Numbering * |