diff options
33 files changed, 436 insertions, 308 deletions
diff --git a/Documentation/arch/arm64/arm-acpi.rst b/Documentation/arch/arm64/arm-acpi.rst index a46c34fa9604..e59e4505d0d9 100644 --- a/Documentation/arch/arm64/arm-acpi.rst +++ b/Documentation/arch/arm64/arm-acpi.rst @@ -130,7 +130,7 @@ When an Arm system boots, it can either have DT information, ACPI tables, or in some very unusual cases, both. If no command line parameters are used, the kernel will try to use DT for device enumeration; if there is no DT present, the kernel will try to use ACPI tables, but only if they are present. -In neither is available, the kernel will not boot. If acpi=force is used +If neither is available, the kernel will not boot. If acpi=force is used on the command line, the kernel will attempt to use ACPI tables first, but fall back to DT if there are no ACPI tables present. The basic idea is that the kernel will not fail to boot unless it absolutely has no other choice. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b071a00425d..b67e6934316f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1549,7 +1549,7 @@ config ARCH_FORCE_MAX_ORDER Don't change if unsure. config UNMAP_KERNEL_AT_EL0 - bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT + bool "Unmap kernel when running in userspace (KPTI)" if EXPERT default y help Speculation attacks against some high-performance processors can diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 9a2d3723cd0f..47ecc4cff9d2 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -200,7 +200,7 @@ endif endif vdso-install-y += arch/arm64/kernel/vdso/vdso.so.dbg -vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso.so.dbg:vdso32.so +vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso32.so.dbg include $(srctree)/scripts/Makefile.defconf diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1761f5972443..a5a787371117 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -44,7 +44,7 @@ EFI_ZBOOT_BFD_TARGET := elf64-littleaarch64 EFI_ZBOOT_MACH_TYPE := ARM64 EFI_ZBOOT_FORWARD_CFI := $(CONFIG_ARM64_BTI_KERNEL) -EFI_ZBOOT_OBJCOPY_FLAGS = --add-symbol zboot_code_size=0x$(shell \ +EFI_ZBOOT_OBJCOPY_FLAGS = --add-symbol zboot_code_size=0x$$( \ $(NM) vmlinux|grep _kernel_codesize|cut -d' ' -f1) include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh index 7399d706967a..9b7a09808a3d 100755 --- a/arch/arm64/boot/install.sh +++ b/arch/arm64/boot/install.sh @@ -17,7 +17,8 @@ # $3 - kernel map file # $4 - default install path (blank if root directory) -if [ "$(basename $2)" = "Image.gz" ]; then +if [ "$(basename $2)" = "Image.gz" ] || [ "$(basename $2)" = "vmlinuz.efi" ] +then # Compressed install echo "Installing compressed kernel" base=vmlinuz diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 376a980f2bad..7b1975bf4b90 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -12,7 +12,7 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H -#include <asm-generic/export.h> +#include <linux/export.h> #include <asm/alternative.h> #include <asm/asm-bug.h> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f6d416fe49b0..21c824edf8ce 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -617,6 +617,7 @@ static inline bool id_aa64pfr1_mte(u64 pfr1) return val >= ID_AA64PFR1_EL1_MTE_MTE2; } +void __init setup_boot_cpu_features(void); void __init setup_system_features(void); void __init setup_user_features(void); @@ -819,6 +820,11 @@ static inline bool system_supports_tlb_range(void) return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE); } +static inline bool system_supports_lpa2(void) +{ + return cpus_have_final_cap(ARM64_HAS_LPA2); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index cdf6a35e3994..cda81d009c9b 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -242,14 +242,6 @@ | (\nx << 5) .endm -/* - * Zero the entire ZA array - * ZERO ZA - */ -.macro zero_za - .inst 0xc00800ff -.endm - .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 85d26143faa5..83ddb14b95a5 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -37,27 +37,12 @@ /* - * If KASLR is enabled, then an offset K is added to the kernel address - * space. The bottom 21 bits of this offset are zero to guarantee 2MB - * alignment for PA and VA. - * - * For each pagetable level of the swapper, we know that the shift will - * be larger than 21 (for the 4KB granule case we use section maps thus - * the smallest shift is actually 30) thus there is the possibility that - * KASLR can increase the number of pagetable entries by 1, so we make - * room for this extra entry. - * - * Note KASLR cannot increase the number of required entries for a level - * by more than one because it increments both the virtual start and end - * addresses equally (the extra entry comes from the case where the end - * address is just pushed over a boundary and the start address isn't). + * A relocatable kernel may execute from an address that differs from the one at + * which it was linked. In the worst case, its runtime placement may intersect + * with two adjacent PGDIR entries, which means that an additional page table + * may be needed at each subordinate level. */ - -#ifdef CONFIG_RANDOMIZE_BASE -#define EARLY_KASLR (1) -#else -#define EARLY_KASLR (0) -#endif +#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE) #define SPAN_NR_ENTRIES(vstart, vend, shift) \ ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1) @@ -83,7 +68,7 @@ + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE)) /* the initial ID map may need two extra pages if it needs to be extended */ #if VA_BITS < 48 diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index d3e354bb8351..10068500d601 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -25,6 +25,8 @@ #define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U #endif +#define kvm_lpa2_is_enabled() false + static inline u64 kvm_get_parange(u64 mmfr0) { u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index fde4186cc387..56d1e6f14861 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -182,6 +182,7 @@ #include <linux/types.h> #include <asm/boot.h> #include <asm/bug.h> +#include <asm/sections.h> #if VA_BITS > 48 extern u64 vabits_actual; @@ -193,15 +194,12 @@ extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image */ -extern u64 kimage_vaddr; - /* the offset between the kernel virtual and physical mappings */ extern u64 kimage_voffset; static inline unsigned long kaslr_offset(void) { - return kimage_vaddr - KIMAGE_VADDR; + return (u64)&_text - KIMAGE_VADDR; } #ifdef CONFIG_RANDOMIZE_BASE @@ -407,6 +405,5 @@ void dump_mem_limit(void); #define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8) #endif -#include <asm-generic/memory_model.h> #endif /* __ASM_MEMORY_H */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index e9624f6326dd..483dbfa39c4c 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +#define lpa2_is_enabled() false + /* * If we have userspace only BTI we don't want to mark kernel pages * guarded even if the system does support BTI. diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index e5bc54522e71..5b0a04810b23 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -167,6 +167,9 @@ struct thread_struct { unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ + + struct user_fpsimd_state kernel_fpsimd_state; + unsigned int kernel_fpsimd_cpu; #ifdef CONFIG_ARM64_PTR_AUTH struct ptrauth_keys_user keys_user; #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 6a75d7ecdcaa..8e86c9e70e48 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -12,8 +12,6 @@ #include <linux/preempt.h> #include <linux/types.h> -DECLARE_PER_CPU(bool, fpsimd_context_busy); - #ifdef CONFIG_KERNEL_MODE_NEON /* @@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void) /* * We must make sure that the SVE has been initialized properly * before using the SIMD in kernel. - * fpsimd_context_busy is only set while preemption is disabled, - * and is clear whenever preemption is enabled. Since - * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy - * cannot change under our feet -- if it's set we cannot be - * migrated, and if it's clear we cannot be migrated to a CPU - * where it is set. */ return !WARN_ON(!system_capabilities_finalized()) && system_supports_fpsimd() && - !in_hardirq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(fpsimd_context_busy); + !in_hardirq() && !irqs_disabled() && !in_nmi(); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5e65f51c10d2..48181cf6cc40 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -871,10 +871,12 @@ /* id_aa64mmfr0 */ #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf #define ARM64_MIN_PARANGE_BITS 32 @@ -882,6 +884,7 @@ #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7 #ifdef CONFIG_ARM64_PA_BITS_52 @@ -892,11 +895,13 @@ #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT #elif defined(CONFIG_ARM64_16K_PAGES) #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT +#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 553d1bc559c6..e72a3bf9e563 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -80,6 +80,7 @@ void arch_setup_new_exec(void); #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ #define TIF_SME 27 /* SME in use */ #define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */ +#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 846c563689a8..0150deb332af 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> /* - * get the tlbi levels in arm64. Default value is 0 if more than one - * of cleared_* is set or neither is set. - * Arm64 doesn't support p4ds now. + * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than + * one of cleared_* is set or neither is set - this elides the level hinting to + * the hardware. */ static inline int tlb_get_level(struct mmu_gather *tlb) { /* The TTL field is only valid for the leaf entry. */ if (tlb->freed_tables) - return 0; + return TLBI_TTL_UNKNOWN; if (tlb->cleared_ptes && !(tlb->cleared_pmds || tlb->cleared_puds || @@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb) tlb->cleared_p4ds)) return 1; - return 0; + if (tlb->cleared_p4ds && !(tlb->cleared_ptes || + tlb->cleared_pmds || + tlb->cleared_puds)) + return 0; + + return TLBI_TTL_UNKNOWN; } static inline void tlb_flush(struct mmu_gather *tlb) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bb2c2833a987..1deb5d789c2e 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void) * When ARMv8.4-TTL exists, TLBI operations take an additional hint for * the level at which the invalidation must take place. If the level is * wrong, no invalidation may take place. In the case where the level - * cannot be easily determined, a 0 value for the level parameter will - * perform a non-hinted invalidation. + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform + * a non-hinted invalidation. Any provided level outside the hint range + * will also cause fall-back to non-hinted invalidation. * * For Stage-2 invalidation, use the level values provided to that effect * in asm/stage2_pgtable.h. */ #define TLBI_TTL_MASK GENMASK_ULL(47, 44) +#define TLBI_TTL_UNKNOWN INT_MAX + #define __tlbi_level(op, addr, level) do { \ u64 arg = addr; \ \ if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \ - level) { \ + level >= 0 && level <= 3) { \ u64 ttl = level & 3; \ ttl |= get_trans_granule() << 2; \ arg &= ~TLBI_TTL_MASK; \ @@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void) } while (0) /* - * This macro creates a properly formatted VA operand for the TLB RANGE. - * The value bit assignments are: + * This macro creates a properly formatted VA operand for the TLB RANGE. The + * value bit assignments are: * * +----------+------+-------+-------+-------+----------------------+ * | ASID | TG | SCALE | NUM | TTL | BADDR | * +-----------------+-------+-------+-------+----------------------+ * |63 48|47 46|45 44|43 39|38 37|36 0| * - * The address range is determined by below formula: - * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) + * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) * + * 2^(5*SCALE + 1) * PAGESIZE) + * + * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR + * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI + * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA, + * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (addr) >> PAGE_SHIFT; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= (unsigned long)(ttl) << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = (baddr); \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta &= GENMASK_ULL(36, 0); \ + __ta |= __ttl << 37; \ + __ta |= (unsigned long)(num) << 39; \ + __ta |= (unsigned long)(scale) << 44; \ + __ta |= get_trans_granule() << 46; \ + __ta |= (unsigned long)(asid) << 48; \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -216,12 +225,16 @@ static inline unsigned long get_trans_granule(void) * CPUs, ensuring that any walk-cache entries associated with the * translation are also invalidated. * - * __flush_tlb_range(vma, start, end, stride, last_level) + * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level) * Invalidate the virtual-address range '[start, end)' on all * CPUs for the user address space corresponding to 'vma->mm'. * The invalidation operations are issued at a granularity * determined by 'stride' and only affect any walk-cache entries - * if 'last_level' is equal to false. + * if 'last_level' is equal to false. tlb_level is the level at + * which the invalidation must take place. If the level is wrong, + * no invalidation may take place. In the case where the level + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will + * perform a non-hinted invalidation. * * * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented @@ -345,34 +358,44 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * @tlb_level: Translation Table level hint, if known * @tlbi_user: If 'true', call an additional __tlbi_user() * (typically for user ASIDs). 'flase' for IPA instructions + * @lpa2: If 'true', the lpa2 scheme is used as set out below * * When the CPU does not support TLB range operations, flush the TLB * entries one by one at the granularity of 'stride'. If the TLB * range ops are supported, then: * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; + * 1. If FEAT_LPA2 is in use, the start address of a range operation must be + * 64KB aligned, so flush pages one by one until the alignment is reached + * using the non-range operations. This step is skipped if LPA2 is not in + * use. + * + * 2. The minimum range granularity is decided by 'scale', so multiple range + * TLBI operations may be required. Start from scale = 3, flush the largest + * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the + * requested range, then decrement scale and continue until one or zero pages + * are left. We must start from highest scale to ensure 64KB start alignment + * is maintained in the LPA2 case. * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. + * 3. If there is 1 page remaining, flush it through non-range operations. Range + * operations can only span an even number of pages. We save this for last to + * ensure 64KB start alignment is maintained for the LPA2 case. * * Note that certain ranges can be represented by either num = 31 and * scale or num = 0 and scale + 1. The loop below favours the latter * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. */ #define __flush_tlb_range_op(op, start, pages, stride, \ - asid, tlb_level, tlbi_user) \ + asid, tlb_level, tlbi_user, lpa2) \ do { \ int num = 0; \ - int scale = 0; \ + int scale = 3; \ + int shift = lpa2 ? 16 : PAGE_SHIFT; \ unsigned long addr; \ \ while (pages > 0) { \ if (!system_supports_tlb_range() || \ - pages % 2 == 1) { \ + pages == 1 || \ + (lpa2 && start != ALIGN(start, SZ_64K))) { \ addr = __TLBI_VADDR(start, asid); \ __tlbi_level(op, addr, tlb_level); \ if (tlbi_user) \ @@ -384,20 +407,20 @@ do { \ \ num = __TLBI_RANGE_NUM(pages, scale); \ if (num >= 0) { \ - addr = __TLBI_VADDR_RANGE(start, asid, scale, \ - num, tlb_level); \ + addr = __TLBI_VADDR_RANGE(start >> shift, asid, \ + scale, num, tlb_level); \ __tlbi(r##op, addr); \ if (tlbi_user) \ __tlbi_user(r##op, addr); \ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ pages -= __TLBI_RANGE_PAGES(num, scale); \ } \ - scale++; \ + scale--; \ } \ } while (0) #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ - __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -427,9 +450,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, asid = ASID(vma->vm_mm); if (last_level) - __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); + __flush_tlb_range_op(vale1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); else - __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); + __flush_tlb_range_op(vae1is, start, pages, stride, asid, + tlb_level, true, lpa2_is_enabled()); dsb(ish); mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); @@ -441,9 +466,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. - * Set the tlb_level to 0 because we can not get enough information here. + * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough + * information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 646591c67e7a..73b10e64b9df 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1081,25 +1081,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); - - /* - * Initialize the indirect array of CPU capabilities pointers before we - * handle the boot CPU below. - */ - init_cpucap_indirect_list(); - - /* - * Detect broken pseudo-NMI. Must be called _before_ the call to - * setup_boot_cpu_capabilities() since it interacts with - * can_use_gic_priorities(). - */ - detect_system_supports_pseudo_nmi(); - - /* - * Detect and enable early CPU capabilities based on the boot CPU, - * after we have initialised the CPU feature infrastructure. - */ - setup_boot_cpu_capabilities(); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -1584,16 +1565,6 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, return has_sre; } -static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused) -{ - u32 midr = read_cpuid_id(); - - /* Cavium ThunderX pass 1.x and 2.x */ - return midr_is_cpu_model_range(midr, MIDR_THUNDERX, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); -} - static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, int scope) { @@ -1768,6 +1739,39 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return !meltdown_safe; } +#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2) +static bool has_lpa2_at_stage1(u64 mmfr0) +{ + unsigned int tgran; + + tgran = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_EL1_TGRAN_SHIFT); + return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2; +} + +static bool has_lpa2_at_stage2(u64 mmfr0) +{ + unsigned int tgran; + + tgran = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_EL1_TGRAN_2_SHIFT); + return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2; +} + +static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope) +{ + u64 mmfr0; + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0); +} +#else +static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope) +{ + return false; +} +#endif + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) @@ -2322,12 +2326,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }, #endif /* CONFIG_ARM64_LSE_ATOMICS */ { - .desc = "Software prefetching using PRFM", - .capability = ARM64_HAS_NO_HW_PREFETCH, - .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, - .matches = has_no_hw_prefetch, - }, - { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, @@ -2731,6 +2729,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, + { + .desc = "52-bit Virtual Addressing for KVM (LPA2)", + .capability = ARM64_HAS_LPA2, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_lpa2, + }, {}, }; @@ -3271,14 +3275,6 @@ void check_local_cpu_capabilities(void) verify_local_cpu_capabilities(); } -static void __init setup_boot_cpu_capabilities(void) -{ - /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */ - update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU); - /* Enable the SCOPE_BOOT_CPU capabilities alone right away */ - enable_cpu_capabilities(SCOPE_BOOT_CPU); -} - bool this_cpu_has_cap(unsigned int n) { if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) { @@ -3334,37 +3330,52 @@ unsigned long cpu_get_elf_hwcap2(void) return elf_hwcap[1]; } -void __init setup_system_features(void) +static void __init setup_boot_cpu_capabilities(void) { - int i; /* - * The system-wide safe feature feature register values have been - * finalized. Finalize and log the available system capabilities. + * The boot CPU's feature register values have been recorded. Detect + * boot cpucaps and local cpucaps for the boot CPU, then enable and + * patch alternatives for the available boot cpucaps. */ - update_cpu_capabilities(SCOPE_SYSTEM); - if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && - !cpus_have_cap(ARM64_HAS_PAN)) - pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); + update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU); + enable_cpu_capabilities(SCOPE_BOOT_CPU); + apply_boot_alternatives(); +} +void __init setup_boot_cpu_features(void) +{ /* - * Enable all the available capabilities which have not been enabled - * already. + * Initialize the indirect array of CPU capabilities pointers before we + * handle the boot CPU. */ - enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); + init_cpucap_indirect_list(); - kpti_install_ng_mappings(); + /* + * Detect broken pseudo-NMI. Must be called _before_ the call to + * setup_boot_cpu_capabilities() since it interacts with + * can_use_gic_priorities(). + */ + detect_system_supports_pseudo_nmi(); - sve_setup(); - sme_setup(); + setup_boot_cpu_capabilities(); +} +static void __init setup_system_capabilities(void) +{ /* - * Check for sane CTR_EL0.CWG value. + * The system-wide safe feature register values have been finalized. + * Detect, enable, and patch alternatives for the available system + * cpucaps. */ - if (!cache_type_cwg()) - pr_warn("No Cache Writeback Granule information, assuming %d\n", - ARCH_DMA_MINALIGN); + update_cpu_capabilities(SCOPE_SYSTEM); + enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); + apply_alternatives_all(); - for (i = 0; i < ARM64_NCAPS; i++) { + /* + * Log any cpucaps with a cpumask as these aren't logged by + * update_cpu_capabilities(). + */ + for (int i = 0; i < ARM64_NCAPS; i++) { const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i]; if (caps && caps->cpus && caps->desc && @@ -3372,6 +3383,29 @@ void __init setup_system_features(void) pr_info("detected: %s on CPU%*pbl\n", caps->desc, cpumask_pr_args(caps->cpus)); } + + /* + * TTBR0 PAN doesn't have its own cpucap, so log it manually. + */ + if (system_uses_ttbr0_pan()) + pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); +} + +void __init setup_system_features(void) +{ + setup_system_capabilities(); + + kpti_install_ng_mappings(); + + sve_setup(); + sme_setup(); + + /* + * Check for sane CTR_EL0.CWG value. + */ + if (!cache_type_cwg()) + pr_warn("No Cache Writeback Granule information, assuming %d\n", + ARCH_DMA_MINALIGN); } void __init setup_user_features(void) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1559c706d32d..505f389be3e0 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -85,13 +85,13 @@ * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and * flag the register state as invalid. * - * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may - * save the task's FPSIMD context back to task_struct from softirq context. - * To prevent this from racing with the manipulation of the task's FPSIMD state - * from task context and thereby corrupting the state, it is necessary to - * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE - * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to - * run but prevent them to use FPSIMD. + * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be + * called from softirq context, which will save the task's FPSIMD context back + * to task_struct. To prevent this from racing with the manipulation of the + * task's FPSIMD state from task context and thereby corrupting the state, it + * is necessary to protect any manipulation of a task's fpsimd_state or + * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend + * softirq servicing entirely until put_cpu_fpsimd_context() is called. * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_cpu field @@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { } #endif -DEFINE_PER_CPU(bool, fpsimd_context_busy); -EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); - static void fpsimd_bind_task_to_cpu(void); -static void __get_cpu_fpsimd_context(void) -{ - bool busy = __this_cpu_xchg(fpsimd_context_busy, true); - - WARN_ON(busy); -} - /* * Claim ownership of the CPU FPSIMD context for use by the calling context. * * The caller may freely manipulate the FPSIMD context metadata until * put_cpu_fpsimd_context() is called. * - * The double-underscore version must only be called if you know the task - * can't be preempted. - * * On RT kernels local_bh_disable() is not sufficient because it only * serializes soft interrupt related sections via a local lock, but stays * preemptible. Disabling preemption is the right choice here as bottom @@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void) local_bh_disable(); else preempt_disable(); - __get_cpu_fpsimd_context(); -} - -static void __put_cpu_fpsimd_context(void) -{ - bool busy = __this_cpu_xchg(fpsimd_context_busy, false); - - WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ } /* @@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void) */ static void put_cpu_fpsimd_context(void) { - __put_cpu_fpsimd_context(); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) local_bh_enable(); else preempt_enable(); } -static bool have_cpu_fpsimd_context(void) -{ - return !preemptible() && __this_cpu_read(fpsimd_context_busy); -} - unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) { return task->thread.vl[type]; @@ -383,7 +356,8 @@ static void task_fpsimd_load(void) bool restore_ffr; WARN_ON(!system_supports_fpsimd()); - WARN_ON(!have_cpu_fpsimd_context()); + WARN_ON(preemptible()); + WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE)); if (system_supports_sve() || system_supports_sme()) { switch (current->thread.fp_type) { @@ -406,7 +380,7 @@ static void task_fpsimd_load(void) default: /* * This indicates either a bug in - * fpsimd_save() or memory corruption, we + * fpsimd_save_user_state() or memory corruption, we * should always record an explicit format * when we save. We always at least have the * memory allocated for FPSMID registers so @@ -457,7 +431,7 @@ static void task_fpsimd_load(void) * than via current, if we are saving KVM state then it will have * ensured that the type of registers to save is set in last->to_save. */ -static void fpsimd_save(void) +static void fpsimd_save_user_state(void) { struct cpu_fp_state const *last = this_cpu_ptr(&fpsimd_last_state); @@ -467,7 +441,7 @@ static void fpsimd_save(void) unsigned int vl; WARN_ON(!system_supports_fpsimd()); - WARN_ON(!have_cpu_fpsimd_context()); + WARN_ON(preemptible()); if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return; @@ -888,7 +862,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, if (task == current) { get_cpu_fpsimd_context(); - fpsimd_save(); + fpsimd_save_user_state(); } fpsimd_flush_task_state(task); @@ -1171,7 +1145,7 @@ void __init sve_setup(void) unsigned long b; int max_bit; - if (!cpus_have_cap(ARM64_SVE)) + if (!system_supports_sve()) return; /* @@ -1301,7 +1275,7 @@ void __init sme_setup(void) struct vl_info *info = &vl_info[ARM64_VEC_SME]; int min_bit, max_bit; - if (!cpus_have_cap(ARM64_SME)) + if (!system_supports_sme()) return; /* @@ -1500,6 +1474,34 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs) current); } +static void fpsimd_load_kernel_state(struct task_struct *task) +{ + struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state); + + /* + * Elide the load if this CPU holds the most recent kernel mode + * FPSIMD context of the current task. + */ + if (last->st == &task->thread.kernel_fpsimd_state && + task->thread.kernel_fpsimd_cpu == smp_processor_id()) + return; + + fpsimd_load_state(&task->thread.kernel_fpsimd_state); +} + +static void fpsimd_save_kernel_state(struct task_struct *task) +{ + struct cpu_fp_state cpu_fp_state = { + .st = &task->thread.kernel_fpsimd_state, + .to_save = FP_STATE_FPSIMD, + }; + + fpsimd_save_state(&task->thread.kernel_fpsimd_state); + fpsimd_bind_state_to_cpu(&cpu_fp_state); + + task->thread.kernel_fpsimd_cpu = smp_processor_id(); +} + void fpsimd_thread_switch(struct task_struct *next) { bool wrong_task, wrong_cpu; @@ -1507,24 +1509,31 @@ void fpsimd_thread_switch(struct task_struct *next) if (!system_supports_fpsimd()) return; - __get_cpu_fpsimd_context(); + WARN_ON_ONCE(!irqs_disabled()); /* Save unsaved fpsimd state, if any: */ - fpsimd_save(); - - /* - * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's - * state. For kernel threads, FPSIMD registers are never loaded - * and wrong_task and wrong_cpu will always be true. - */ - wrong_task = __this_cpu_read(fpsimd_last_state.st) != - &next->thread.uw.fpsimd_state; - wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id(); + if (test_thread_flag(TIF_KERNEL_FPSTATE)) + fpsimd_save_kernel_state(current); + else + fpsimd_save_user_state(); - update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, - wrong_task || wrong_cpu); + if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) { + fpsimd_load_kernel_state(next); + set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); + } else { + /* + * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's + * state. For kernel threads, FPSIMD registers are never + * loaded with user mode FPSIMD state and so wrong_task and + * wrong_cpu will always be true. + */ + wrong_task = __this_cpu_read(fpsimd_last_state.st) != + &next->thread.uw.fpsimd_state; + wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id(); - __put_cpu_fpsimd_context(); + update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, + wrong_task || wrong_cpu); + } } static void fpsimd_flush_thread_vl(enum vec_type type) @@ -1614,7 +1623,7 @@ void fpsimd_preserve_current_state(void) return; get_cpu_fpsimd_context(); - fpsimd_save(); + fpsimd_save_user_state(); put_cpu_fpsimd_context(); } @@ -1826,13 +1835,15 @@ static void fpsimd_flush_cpu_state(void) */ void fpsimd_save_and_flush_cpu_state(void) { + unsigned long flags; + if (!system_supports_fpsimd()) return; WARN_ON(preemptible()); - __get_cpu_fpsimd_context(); - fpsimd_save(); + local_irq_save(flags); + fpsimd_save_user_state(); fpsimd_flush_cpu_state(); - __put_cpu_fpsimd_context(); + local_irq_restore(flags); } #ifdef CONFIG_KERNEL_MODE_NEON @@ -1864,10 +1875,37 @@ void kernel_neon_begin(void) get_cpu_fpsimd_context(); /* Save unsaved fpsimd state, if any: */ - fpsimd_save(); + if (test_thread_flag(TIF_KERNEL_FPSTATE)) { + BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()); + fpsimd_save_kernel_state(current); + } else { + fpsimd_save_user_state(); + + /* + * Set the thread flag so that the kernel mode FPSIMD state + * will be context switched along with the rest of the task + * state. + * + * On non-PREEMPT_RT, softirqs may interrupt task level kernel + * mode FPSIMD, but the task will not be preemptible so setting + * TIF_KERNEL_FPSTATE for those would be both wrong (as it + * would mark the task context FPSIMD state as requiring a + * context switch) and unnecessary. + * + * On PREEMPT_RT, softirqs are serviced from a separate thread, + * which is scheduled as usual, and this guarantees that these + * softirqs are not interrupting use of the FPSIMD in kernel + * mode in task context. So in this case, setting the flag here + * is always appropriate. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) + set_thread_flag(TIF_KERNEL_FPSTATE); + } /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); + + put_cpu_fpsimd_context(); } EXPORT_SYMBOL_GPL(kernel_neon_begin); @@ -1885,7 +1923,16 @@ void kernel_neon_end(void) if (!system_supports_fpsimd()) return; - put_cpu_fpsimd_context(); + /* + * If we are returning from a nested use of kernel mode FPSIMD, restore + * the task context kernel mode FPSIMD state. This can only happen when + * running in softirq context on non-PREEMPT_RT. + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() && + test_thread_flag(TIF_KERNEL_FPSTATE)) + fpsimd_load_kernel_state(current); + else + clear_thread_flag(TIF_KERNEL_FPSTATE); } EXPORT_SYMBOL_GPL(kernel_neon_end); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7b236994f0e1..cab7f91949d8 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -482,7 +482,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) str_l x21, __fdt_pointer, x5 // Save FDT pointer - ldr_l x4, kimage_vaddr // Save the offset between + adrp x4, _text // Save the offset between sub x4, x4, x0 // the kernel virtual and str_l x4, kimage_voffset, x5 // physical mappings diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 3addc09f8746..e30fd9e32ef3 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -21,14 +21,25 @@ static u64 __boot_status __initdata; +// temporary __prel64 related definitions +// to be removed when this code is moved under pi/ + +#define __prel64_initconst __initconst + +#define PREL64(type, name) union { type *name; } + +#define prel64_pointer(__d) (__d) + +typedef bool filter_t(u64 val); + struct ftr_set_desc { char name[FTR_DESC_NAME_LEN]; - struct arm64_ftr_override *override; + PREL64(struct arm64_ftr_override, override); struct { char name[FTR_DESC_FIELD_LEN]; u8 shift; u8 width; - bool (*filter)(u64 val); + PREL64(filter_t, filter); } fields[]; }; @@ -46,7 +57,7 @@ static bool __init mmfr1_vh_filter(u64 val) val == 0); } -static const struct ftr_set_desc mmfr1 __initconst = { +static const struct ftr_set_desc mmfr1 __prel64_initconst = { .name = "id_aa64mmfr1", .override = &id_aa64mmfr1_override, .fields = { @@ -70,7 +81,7 @@ static bool __init pfr0_sve_filter(u64 val) return true; } -static const struct ftr_set_desc pfr0 __initconst = { +static const struct ftr_set_desc pfr0 __prel64_initconst = { .name = "id_aa64pfr0", .override = &id_aa64pfr0_override, .fields = { @@ -94,7 +105,7 @@ static bool __init pfr1_sme_filter(u64 val) return true; } -static const struct ftr_set_desc pfr1 __initconst = { +static const struct ftr_set_desc pfr1 __prel64_initconst = { .name = "id_aa64pfr1", .override = &id_aa64pfr1_override, .fields = { @@ -105,7 +116,7 @@ static const struct ftr_set_desc pfr1 __initconst = { }, }; -static const struct ftr_set_desc isar1 __initconst = { +static const struct ftr_set_desc isar1 __prel64_initconst = { .name = "id_aa64isar1", .override = &id_aa64isar1_override, .fields = { @@ -117,7 +128,7 @@ static const struct ftr_set_desc isar1 __initconst = { }, }; -static const struct ftr_set_desc isar2 __initconst = { +static const struct ftr_set_desc isar2 __prel64_initconst = { .name = "id_aa64isar2", .override = &id_aa64isar2_override, .fields = { @@ -128,7 +139,7 @@ static const struct ftr_set_desc isar2 __initconst = { }, }; -static const struct ftr_set_desc smfr0 __initconst = { +static const struct ftr_set_desc smfr0 __prel64_initconst = { .name = "id_aa64smfr0", .override = &id_aa64smfr0_override, .fields = { @@ -149,7 +160,7 @@ static bool __init hvhe_filter(u64 val) ID_AA64MMFR1_EL1_VH_SHIFT)); } -static const struct ftr_set_desc sw_features __initconst = { +static const struct ftr_set_desc sw_features __prel64_initconst = { .name = "arm64_sw", .override = &arm64_sw_feature_override, .fields = { @@ -159,22 +170,23 @@ static const struct ftr_set_desc sw_features __initconst = { }, }; -static const struct ftr_set_desc * const regs[] __initconst = { - &mmfr1, - &pfr0, - &pfr1, - &isar1, - &isar2, - &smfr0, - &sw_features, +static const +PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = { + { &mmfr1 }, + { &pfr0 }, + { &pfr1 }, + { &isar1 }, + { &isar2 }, + { &smfr0 }, + { &sw_features }, }; static const struct { char alias[FTR_ALIAS_NAME_LEN]; char feature[FTR_ALIAS_OPTION_LEN]; } aliases[] __initconst = { - { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, - { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, + { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, + { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" }, { "arm64.nosve", "id_aa64pfr0.sve=0" }, { "arm64.nosme", "id_aa64pfr1.sme=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, @@ -187,45 +199,61 @@ static const struct { { "nokaslr", "arm64_sw.nokaslr=1" }, }; -static int __init parse_nokaslr(char *unused) +static int __init parse_hexdigit(const char *p, u64 *v) { - /* nokaslr param handling is done by early cpufeature code */ + // skip "0x" if it comes next + if (p[0] == '0' && tolower(p[1]) == 'x') + p += 2; + + // check whether the RHS is a single hex digit + if (!isxdigit(p[0]) || (p[1] && !isspace(p[1]))) + return -EINVAL; + + *v = tolower(*p) - (isdigit(*p) ? '0' : 'a' - 10); return 0; } -early_param("nokaslr", parse_nokaslr); -static int __init find_field(const char *cmdline, +static int __init find_field(const char *cmdline, char *opt, int len, const struct ftr_set_desc *reg, int f, u64 *v) { - char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2]; - int len; + int flen = strlen(reg->fields[f].name); - len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=", - reg->name, reg->fields[f].name); + // append '<fieldname>=' to obtain '<name>.<fieldname>=' + memcpy(opt + len, reg->fields[f].name, flen); + len += flen; + opt[len++] = '='; - if (!parameqn(cmdline, opt, len)) + if (memcmp(cmdline, opt, len)) return -1; - return kstrtou64(cmdline + len, 0, v); + return parse_hexdigit(cmdline + len, v); } static void __init match_options(const char *cmdline) { + char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2]; int i; for (i = 0; i < ARRAY_SIZE(regs); i++) { + const struct ftr_set_desc *reg = prel64_pointer(regs[i].reg); + struct arm64_ftr_override *override; + int len = strlen(reg->name); int f; - if (!regs[i]->override) - continue; + override = prel64_pointer(reg->override); - for (f = 0; strlen(regs[i]->fields[f].name); f++) { - u64 shift = regs[i]->fields[f].shift; - u64 width = regs[i]->fields[f].width ?: 4; + // set opt[] to '<name>.' + memcpy(opt, reg->name, len); + opt[len++] = '.'; + + for (f = 0; reg->fields[f].name[0] != '\0'; f++) { + u64 shift = reg->fields[f].shift; + u64 width = reg->fields[f].width ?: 4; u64 mask = GENMASK_ULL(shift + width - 1, shift); + bool (*filter)(u64 val); u64 v; - if (find_field(cmdline, regs[i], f, &v)) + if (find_field(cmdline, opt, len, reg, f, &v)) continue; /* @@ -233,16 +261,16 @@ static void __init match_options(const char *cmdline) * it by setting the value to the all-ones while * clearing the mask... Yes, this is fragile. */ - if (regs[i]->fields[f].filter && - !regs[i]->fields[f].filter(v)) { - regs[i]->override->val |= mask; - regs[i]->override->mask &= ~mask; + filter = prel64_pointer(reg->fields[f].filter); + if (filter && !filter(v)) { + override->val |= mask; + override->mask &= ~mask; continue; } - regs[i]->override->val &= ~mask; - regs[i]->override->val |= (v << shift) & mask; - regs[i]->override->mask |= mask; + override->val &= ~mask; + override->val |= (v << shift) & mask; + override->mask |= mask; return; } @@ -258,23 +286,29 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) cmdline = skip_spaces(cmdline); - for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++); - if (!len) + /* terminate on "--" appearing on the command line by itself */ + if (cmdline[0] == '-' && cmdline[1] == '-' && isspace(cmdline[2])) return; - len = min(len, ARRAY_SIZE(buf) - 1); - memcpy(buf, cmdline, len); - buf[len] = '\0'; - - if (strcmp(buf, "--") == 0) + for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++) { + if (len >= sizeof(buf) - 1) + break; + if (cmdline[len] == '-') + buf[len] = '_'; + else + buf[len] = cmdline[len]; + } + if (!len) return; + buf[len] = 0; + cmdline += len; match_options(buf); for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++) - if (parameq(buf, aliases[i].alias)) + if (!memcmp(buf, aliases[i].alias, len + 1)) __parse_cmdline(aliases[i].feature, false); } while (1); } @@ -316,13 +350,16 @@ void init_feature_override(u64 boot_status); asmlinkage void __init init_feature_override(u64 boot_status) { + struct arm64_ftr_override *override; + const struct ftr_set_desc *reg; int i; for (i = 0; i < ARRAY_SIZE(regs); i++) { - if (regs[i]->override) { - regs[i]->override->val = 0; - regs[i]->override->mask = 0; - } + reg = prel64_pointer(regs[i].reg); + override = prel64_pointer(reg->override); + + override->val = 0; + override->mask = 0; } __boot_status = boot_status; @@ -330,9 +367,9 @@ asmlinkage void __init init_feature_override(u64 boot_status) parse_cmdline(); for (i = 0; i < ARRAY_SIZE(regs); i++) { - if (regs[i]->override) - dcache_clean_inval_poc((unsigned long)regs[i]->override, - (unsigned long)regs[i]->override + - sizeof(*regs[i]->override)); + reg = prel64_pointer(regs[i].reg); + override = prel64_pointer(reg->override); + dcache_clean_inval_poc((unsigned long)override, + (unsigned long)(override + 1)); } } diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 6ad5c6ef5329..85087e2df564 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <asm/daifflags.h> #include <asm/exception.h> +#include <asm/numa.h> #include <asm/softirq_stack.h> #include <asm/stacktrace.h> #include <asm/vmap_stack.h> @@ -47,17 +48,17 @@ static void init_irq_scs(void) for_each_possible_cpu(cpu) per_cpu(irq_shadow_call_stack_ptr, cpu) = - scs_alloc(cpu_to_node(cpu)); + scs_alloc(early_cpu_to_node(cpu)); } #ifdef CONFIG_VMAP_STACK -static void init_irq_stacks(void) +static void __init init_irq_stacks(void) { int cpu; unsigned long *p; for_each_possible_cpu(cpu) { - p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu)); + p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu)); per_cpu(irq_stack_ptr, cpu) = p; } } diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 94a269cd1f07..12c7f3c8ba76 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -36,3 +36,10 @@ void __init kaslr_init(void) pr_info("KASLR enabled\n"); __kaslr_is_enabled = true; } + +static int __init parse_nokaslr(char *unused) +{ + /* nokaslr param handling is done by early cpufeature code */ + return 0; +} +early_param("nokaslr", parse_nokaslr); diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile index 4c0ea3cd4ea4..c844a0546d7f 100644 --- a/arch/arm64/kernel/pi/Makefile +++ b/arch/arm64/kernel/pi/Makefile @@ -3,6 +3,7 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ + $(DISABLE_LATENT_ENTROPY_PLUGIN) \ $(call cc-option,-mbranch-protection=none) \ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ -include $(srctree)/include/linux/hidden.h \ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index defbab84e9e5..4ced34f62dab 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -439,9 +439,8 @@ static void __init hyp_mode_check(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); - setup_system_features(); hyp_mode_check(); - apply_alternatives_all(); + setup_system_features(); setup_user_features(); mark_linear_text_alias_ro(); } @@ -454,14 +453,9 @@ void __init smp_prepare_boot_cpu(void) * freed shortly, so we must move over to the runtime per-cpu area. */ set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - cpuinfo_store_boot_cpu(); - /* - * We now know enough about the boot CPU to apply the - * alternatives that cannot wait until interrupt handling - * and/or scheduling is enabled. - */ - apply_boot_alternatives(); + cpuinfo_store_boot_cpu(); + setup_boot_cpu_features(); /* Conditionally switch to GIC PMR for interrupt masking */ if (system_uses_irq_prio_masking()) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 1f911a76c5af..2266fcdff78a 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -118,7 +118,7 @@ endif VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os # Build rules -targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw +targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso32.so.dbg vdso.so.raw c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso)) c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday)) asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso)) @@ -127,15 +127,15 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) -include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE +include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE $(call if_changed,vdsosym) # Strip rule for vdso.so $(obj)/vdso.so: OBJCOPYFLAGS := -S -$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE +$(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE $(call if_changed,objcopy) -$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE +$(obj)/vdso32.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE $(call if_changed,vdsomunge) # Link rule for the .so file, .lds has to be first diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index c336d2ffdec5..6a56d7cf309d 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -18,13 +18,6 @@ * x1 - src */ SYM_FUNC_START(__pi_copy_page) -alternative_if ARM64_HAS_NO_HW_PREFETCH - // Prefetch three cache lines ahead. - prfm pldl1strm, [x1, #128] - prfm pldl1strm, [x1, #256] - prfm pldl1strm, [x1, #384] -alternative_else_nop_endif - ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] @@ -39,10 +32,6 @@ alternative_else_nop_endif 1: tst x0, #(PAGE_SIZE - 1) -alternative_if ARM64_HAS_NO_HW_PREFETCH - prfm pldl1strm, [x1, #384] -alternative_else_nop_endif - stnp x2, x3, [x0, #-256] ldp x2, x3, [x1] stnp x4, x5, [x0, #16 - 256] diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 15f6347d23b6..1ac7467d34c9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -52,9 +52,6 @@ u64 vabits_actual __ro_after_init = VA_BITS_MIN; EXPORT_SYMBOL(vabits_actual); #endif -u64 kimage_vaddr __ro_after_init = (u64)&_text; -EXPORT_SYMBOL(kimage_vaddr); - u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); @@ -674,6 +671,9 @@ static int __init map_entry_trampoline(void) { int i; + if (!arm64_kernel_unmapped_at_el0()) + return 0; + pgprot_t prot = kernel_exec_prot(); phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index b98c38288a9d..1e07d74d7a6c 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -37,10 +37,10 @@ HAS_GIC_PRIO_MASKING HAS_GIC_PRIO_RELAXED_SYNC HAS_HCX HAS_LDAPR +HAS_LPA2 HAS_LSE_ATOMICS HAS_MOPS HAS_NESTED_VIRT -HAS_NO_HW_PREFETCH HAS_PAN HAS_S1PIE HAS_RAS_EXTN diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index eaa31e567d1e..5b59d133b6af 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -144,7 +144,7 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static int __init early_cpu_to_node(int cpu) +int __init early_cpu_to_node(int cpu) { return cpu_to_node_map[cpu]; } diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot index 2c489627a807..65ffd0b760b2 100644 --- a/drivers/firmware/efi/libstub/Makefile.zboot +++ b/drivers/firmware/efi/libstub/Makefile.zboot @@ -5,8 +5,8 @@ # EFI_ZBOOT_FORWARD_CFI quiet_cmd_copy_and_pad = PAD $@ - cmd_copy_and_pad = cp $< $@ && \ - truncate -s $(shell hexdump -s16 -n4 -e '"%u"' $<) $@ + cmd_copy_and_pad = cp $< $@; \ + truncate -s $$(hexdump -s16 -n4 -e '"%u"' $<) $@ # Pad the file to the size of the uncompressed image in memory, including BSS $(obj)/vmlinux.bin: $(obj)/$(EFI_ZBOOT_PAYLOAD) FORCE diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h index 1a3ad6d29833..c32e0cf23c90 100644 --- a/include/asm-generic/numa.h +++ b/include/asm-generic/numa.h @@ -35,6 +35,7 @@ int __init numa_add_memblk(int nodeid, u64 start, u64 end); void __init numa_set_distance(int from, int to, int distance); void __init numa_free_distance(void); void __init early_map_cpu_to_node(unsigned int cpu, int nid); +int __init early_cpu_to_node(int cpu); void numa_store_cpu_info(unsigned int cpu); void numa_add_cpu(unsigned int cpu); void numa_remove_cpu(unsigned int cpu); @@ -46,6 +47,7 @@ static inline void numa_add_cpu(unsigned int cpu) { } static inline void numa_remove_cpu(unsigned int cpu) { } static inline void arch_numa_init(void) { } static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { } +static inline int early_cpu_to_node(int cpu) { return 0; } #endif /* CONFIG_NUMA */ |