From e176e2677cccd458f99c69d16d27f86adcdd02e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 May 2021 12:50:27 +0100 Subject: arm64: assembler: add set_this_cpu_offset There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Suzuki Poulose Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210520115031.18509-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 18 +++++++++++++----- arch/arm64/mm/proc.S | 12 ++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 8418c1bd8f04..f0188903557f 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -232,15 +232,23 @@ lr .req x30 // link register * @dst: destination register */ #if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) - .macro this_cpu_offset, dst + .macro get_this_cpu_offset, dst mrs \dst, tpidr_el2 .endm #else - .macro this_cpu_offset, dst + .macro get_this_cpu_offset, dst alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs \dst, tpidr_el1 alternative_else mrs \dst, tpidr_el2 +alternative_endif + .endm + + .macro set_this_cpu_offset, src +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + msr tpidr_el1, \src +alternative_else + msr tpidr_el2, \src alternative_endif .endm #endif @@ -253,7 +261,7 @@ alternative_endif .macro adr_this_cpu, dst, sym, tmp adrp \tmp, \sym add \dst, \tmp, #:lo12:\sym - this_cpu_offset \tmp + get_this_cpu_offset \tmp add \dst, \dst, \tmp .endm @@ -264,7 +272,7 @@ alternative_endif */ .macro ldr_this_cpu dst, sym, tmp adr_l \dst, \sym - this_cpu_offset \tmp + get_this_cpu_offset \tmp ldr \dst, [\dst, \tmp] .endm @@ -745,7 +753,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU cbz \tmp, \lbl #endif adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING - this_cpu_offset \tmp2 + get_this_cpu_offset \tmp2 ldr w\tmp, [\tmp, \tmp2] cbnz w\tmp, \lbl // yield on pending softirq in task context .Lnoyield_\@: diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 97d7bcd8d4f2..bc555cd5e6b1 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -83,11 +83,7 @@ SYM_FUNC_START(cpu_do_suspend) mrs x9, mdscr_el1 mrs x10, oslsr_el1 mrs x11, sctlr_el1 -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x12, tpidr_el1 -alternative_else - mrs x12, tpidr_el2 -alternative_endif + get_this_cpu_offset x12 mrs x13, sp_el0 stp x2, x3, [x0] stp x4, x5, [x0, #16] @@ -145,11 +141,7 @@ SYM_FUNC_START(cpu_do_resume) msr mdscr_el1, x10 msr sctlr_el1, x12 -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - msr tpidr_el1, x13 -alternative_else - msr tpidr_el2, x13 -alternative_endif + set_this_cpu_offset x13 msr sp_el0, x14 /* * Restore oslsr_el1 by writing oslar_el1 -- cgit From 98c7a1666ee94af59a65f2787a887a05a546d163 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 May 2021 12:50:28 +0100 Subject: arm64: smp: remove pointless secondary_data maintenance All reads and writes of secondary_data occur with the MMU on, using coherent attributes, so there's no need to perform any cache maintenance for this. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Suzuki Poulose Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210520115031.18509-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dcd7041b2b07..92e83e8bac94 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -122,7 +122,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); @@ -143,7 +142,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (status == CPU_MMU_OFF) status = READ_ONCE(__early_cpu_boot_status); -- cgit From 3305e7f74a14cdb19e61af4febb098ad62820d71 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 May 2021 12:50:29 +0100 Subject: arm64: smp: remove stack from secondary_data When we boot a secondary CPU, we pass it a task and a stack to use. As the stack is always the task's stack, which can be derived from the task, let's have the secondary CPU derive this itself and avoid passing redundant information. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Suzuki Poulose Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210520115031.18509-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/smp.h | 2 -- arch/arm64/kernel/asm-offsets.c | 1 - arch/arm64/kernel/head.S | 7 ++++--- arch/arm64/kernel/smp.c | 2 -- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 0e357757c0cc..fc55f5a57a06 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -73,12 +73,10 @@ asmlinkage void secondary_start_kernel(void); /* * Initial data for bringing up a secondary CPU. - * @stack - sp for the secondary CPU * @status - Result passed back from the secondary CPU to * indicate failure. */ struct secondary_data { - void *stack; struct task_struct *task; long status; }; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0cb34ccb6e73..4a5e204c33af 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -99,7 +99,6 @@ int main(void) DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT); DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending)); BLANK(); - DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index cc2d45d54838..9be95e11367d 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -645,11 +645,12 @@ SYM_FUNC_START_LOCAL(__secondary_switched) isb adr_l x0, secondary_data - ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack - cbz x1, __secondary_too_slow - mov sp, x1 ldr x2, [x0, #CPU_BOOT_TASK] cbz x2, __secondary_too_slow + + ldr x1, [x2, #TSK_STACK] + add sp, x1, #THREAD_SIZE + msr sp_el0, x2 scs_load x2, x3 setup_final_frame diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 92e83e8bac94..73625cc39574 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -120,7 +120,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * page tables. */ secondary_data.task = idle; - secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); /* Now bring the CPU into our world */ @@ -141,7 +140,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; - secondary_data.stack = NULL; status = READ_ONCE(secondary_data.status); if (status == CPU_MMU_OFF) status = READ_ONCE(__early_cpu_boot_status); -- cgit From 8e334d729bc4787f728e9e5abc91649f131124ff Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 May 2021 12:50:30 +0100 Subject: arm64: smp: unify task and sp setup Once we enable the MMU, we have to initialize: * SP_EL0 to point at the active task * SP to point at the active task's stack * SCS_SP to point at the active task's shadow stack For all tasks (including init_task), this information can be derived from the task's task_struct. Let's unify __primary_switched and __secondary_switched to consistently acquire this information from the relevant task_struct. At the same time, let's fold this together with initializing a task's final frame. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Suzuki Poulose Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210520115031.18509-6-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 9be95e11367d..e83b2899dce5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -395,15 +395,24 @@ SYM_FUNC_START_LOCAL(__create_page_tables) SYM_FUNC_END(__create_page_tables) /* + * Initialize CPU registers with task-specific and cpu-specific context. + * * Create a final frame record at task_pt_regs(current)->stackframe, so * that the unwinder can identify the final frame record of any task by * its location in the task stack. We reserve the entire pt_regs space * for consistency with user tasks and kthreads. */ - .macro setup_final_frame + .macro init_cpu_task tsk, tmp + msr sp_el0, \tsk + + ldr \tmp, [\tsk, #TSK_STACK] + add sp, \tmp, #THREAD_SIZE sub sp, sp, #PT_REGS_SIZE + stp xzr, xzr, [sp, #S_STACKFRAME] add x29, sp, #S_STACKFRAME + + scs_load \tsk, \tmp .endm /* @@ -412,22 +421,16 @@ SYM_FUNC_END(__create_page_tables) * x0 = __PHYS_OFFSET */ SYM_FUNC_START_LOCAL(__primary_switched) - adrp x4, init_thread_union - add sp, x4, #THREAD_SIZE - adr_l x5, init_task - msr sp_el0, x5 // Save thread_info + adr_l x4, init_task + init_cpu_task x4, x5 adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb - stp xzr, x30, [sp, #-16]! + stp x29, x30, [sp, #-16]! mov x29, sp -#ifdef CONFIG_SHADOW_CALL_STACK - adr_l scs_sp, init_shadow_call_stack // Set shadow call stack -#endif - str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -459,8 +462,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) 0: #endif bl switch_to_vhe // Prefer VHE if possible - add sp, sp, #16 - setup_final_frame + ldp x29, x30, [sp], #16 bl start_kernel ASM_BUG() SYM_FUNC_END(__primary_switched) @@ -648,12 +650,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched) ldr x2, [x0, #CPU_BOOT_TASK] cbz x2, __secondary_too_slow - ldr x1, [x2, #TSK_STACK] - add sp, x1, #THREAD_SIZE - - msr sp_el0, x2 - scs_load x2, x3 - setup_final_frame + init_cpu_task x2, x1 #ifdef CONFIG_ARM64_PTR_AUTH ptrauth_keys_init_cpu x2, x3, x4, x5 -- cgit From 3d8c1a013d78f32ee266097496cbd89b734b5fcb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 May 2021 12:50:31 +0100 Subject: arm64: smp: initialize cpu offset earlier Now that we have a consistent place to initialize CPU context registers early in the boot path, let's also initialize the per-cpu offset here. This makes the primary and secondary boot paths more consistent, and allows for the use of per-cpu operations earlier, which will be necessary for instrumentation with KCSAN. Note that smp_prepare_boot_cpu() still needs to re-initialize CPU0's offset as immediately prior to this the per-cpu areas may be reallocated, and hence the boot-time offset may be stale. A comment is added to make this clear. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Suzuki Poulose Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210520115031.18509-7-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/head.S | 17 +++++++++++------ arch/arm64/kernel/setup.c | 6 ------ arch/arm64/kernel/smp.c | 10 ++++++---- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 4a5e204c33af..bd0fc23d8719 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -27,6 +27,7 @@ int main(void) { DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); + DEFINE(TSK_CPU, offsetof(struct task_struct, cpu)); BLANK(); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index e83b2899dce5..070ed53c049d 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -402,17 +402,22 @@ SYM_FUNC_END(__create_page_tables) * its location in the task stack. We reserve the entire pt_regs space * for consistency with user tasks and kthreads. */ - .macro init_cpu_task tsk, tmp + .macro init_cpu_task tsk, tmp1, tmp2 msr sp_el0, \tsk - ldr \tmp, [\tsk, #TSK_STACK] - add sp, \tmp, #THREAD_SIZE + ldr \tmp1, [\tsk, #TSK_STACK] + add sp, \tmp1, #THREAD_SIZE sub sp, sp, #PT_REGS_SIZE stp xzr, xzr, [sp, #S_STACKFRAME] add x29, sp, #S_STACKFRAME - scs_load \tsk, \tmp + scs_load \tsk, \tmp1 + + adr_l \tmp1, __per_cpu_offset + ldr w\tmp2, [\tsk, #TSK_CPU] + ldr \tmp1, [\tmp1, \tmp2, lsl #3] + set_this_cpu_offset \tmp1 .endm /* @@ -422,7 +427,7 @@ SYM_FUNC_END(__create_page_tables) */ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x4, init_task - init_cpu_task x4, x5 + init_cpu_task x4, x5, x6 adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address @@ -650,7 +655,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched) ldr x2, [x0, #CPU_BOOT_TASK] cbz x2, __secondary_too_slow - init_cpu_task x2, x1 + init_cpu_task x2, x1, x3 #ifdef CONFIG_ARM64_PTR_AUTH ptrauth_keys_init_cpu x2, x3, x4, x5 diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 61845c0821d9..b7a35a03e9b9 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -87,12 +87,6 @@ void __init smp_setup_processor_id(void) u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; set_cpu_logical_map(0, mpidr); - /* - * clear __my_cpu_offset on boot CPU to avoid hang caused by - * using percpu variable early, for example, lockdep will - * access percpu variable inside lock_release - */ - set_my_cpu_offset(0); pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n", (unsigned long)mpidr, read_cpuid_id()); } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 73625cc39574..2fe8fab886e2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -198,10 +198,7 @@ asmlinkage notrace void secondary_start_kernel(void) u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; struct mm_struct *mm = &init_mm; const struct cpu_operations *ops; - unsigned int cpu; - - cpu = task_cpu(current); - set_my_cpu_offset(per_cpu_offset(cpu)); + unsigned int cpu = smp_processor_id(); /* * All kernel threads share the same mm context; grab a @@ -448,6 +445,11 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + /* + * The runtime per-cpu areas have been allocated by + * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be + * freed shortly, so we must move over to the runtime per-cpu area. + */ set_my_cpu_offset(per_cpu_offset(smp_processor_id())); cpuinfo_store_boot_cpu(); -- cgit From 16c230b30de8b69ae75d2b98d04a77904da58d15 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 27 May 2021 11:55:29 +0100 Subject: arm64: scs: Drop unused 'tmp' argument to scs_{load, save} asm macros The scs_load and scs_save asm macros don't make use of the mandatory 'tmp' register argument, so drop it and fix up the callers. Cc: Sami Tolvanen Cc: Mark Rutland Acked-by: Mark Rutland Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/r/20210527105529.21967-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/scs.h | 8 ++++---- arch/arm64/kernel/entry.S | 8 ++++---- arch/arm64/kernel/head.S | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index eaa2cd92e4c1..8297bccf0784 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -9,18 +9,18 @@ #ifdef CONFIG_SHADOW_CALL_STACK scs_sp .req x18 - .macro scs_load tsk, tmp + .macro scs_load tsk ldr scs_sp, [\tsk, #TSK_TI_SCS_SP] .endm - .macro scs_save tsk, tmp + .macro scs_save tsk str scs_sp, [\tsk, #TSK_TI_SCS_SP] .endm #else - .macro scs_load tsk, tmp + .macro scs_load tsk .endm - .macro scs_save tsk, tmp + .macro scs_save tsk .endm #endif /* CONFIG_SHADOW_CALL_STACK */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 294f24e16fee..3153f1448cdb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -275,7 +275,7 @@ alternative_else_nop_endif mte_set_kernel_gcr x22, x23 - scs_load tsk, x20 + scs_load tsk .else add x21, sp, #PT_REGS_SIZE get_current_task tsk @@ -375,7 +375,7 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: - scs_save tsk, x0 + scs_save tsk #ifdef CONFIG_ARM64_PTR_AUTH alternative_if ARM64_HAS_ADDRESS_AUTH @@ -979,8 +979,8 @@ SYM_FUNC_START(cpu_switch_to) mov sp, x9 msr sp_el0, x1 ptrauth_keys_install_kernel x1, x8, x9, x10 - scs_save x0, x8 - scs_load x1, x8 + scs_save x0 + scs_load x1 ret SYM_FUNC_END(cpu_switch_to) NOKPROBE(cpu_switch_to) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 070ed53c049d..6a700526b117 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -412,7 +412,7 @@ SYM_FUNC_END(__create_page_tables) stp xzr, xzr, [sp, #S_STACKFRAME] add x29, sp, #S_STACKFRAME - scs_load \tsk, \tmp1 + scs_load \tsk adr_l \tmp1, __per_cpu_offset ldr w\tmp2, [\tsk, #TSK_CPU] -- cgit