From 7d7b720a4b8049446cffce870b1dd3ffa89d4b40 Mon Sep 17 00:00:00 2001 From: "Madhavan T. Venkataraman" Date: Mon, 10 May 2021 12:00:26 +0100 Subject: arm64: Implement stack trace termination record Reliable stacktracing requires that we identify when a stacktrace is terminated early. We can do this by ensuring all tasks have a final frame record at a known location on their task stack, and checking that this is the final frame record in the chain. We'd like to use task_pt_regs(task)->stackframe as the final frame record, as this is already setup upon exception entry from EL0. For kernel tasks we need to consistently reserve the pt_regs and point x29 at this, which we can do with small changes to __primary_switched, __secondary_switched, and copy_process(). Since the final frame record must be at a specific location, we must create the final frame record in __primary_switched and __secondary_switched rather than leaving this to start_kernel and secondary_start_kernel. Thus, __primary_switched and __secondary_switched will now show up in stacktraces for the idle tasks. Since the final frame record is now identified by its location rather than by its contents, we identify it at the start of unwind_frame(), before we read any values from it. External debuggers may terminate the stack trace when FP == 0. In the pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the debugger may print an extra record 0x0 at the end. While this is not pretty, this does not do any harm. This is a small price to pay for having reliable stack trace termination in the kernel. That said, gdb does not show the extra record probably because it uses DWARF and not frame pointers for stack traces. Signed-off-by: Madhavan T. Venkataraman Reviewed-by: Mark Brown [Mark: rebase, use ASM_BUG(), update comments, update commit message] Signed-off-by: Mark Rutland Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/head.S | 25 +++++++++++++++++++------ arch/arm64/kernel/process.c | 5 +++++ arch/arm64/kernel/stacktrace.c | 16 +++++++--------- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3513984a88bd..294f24e16fee 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -285,7 +285,7 @@ alternative_else_nop_endif stp lr, x21, [sp, #S_LR] /* - * For exceptions from EL0, create a terminal frame record. + * For exceptions from EL0, create a final frame record. * For exceptions from EL1, create a synthetic frame record so the * interrupted code shows up in the backtrace. */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 96873dfa67fd..cc2d45d54838 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -393,6 +394,18 @@ SYM_FUNC_START_LOCAL(__create_page_tables) ret x28 SYM_FUNC_END(__create_page_tables) + /* + * Create a final frame record at task_pt_regs(current)->stackframe, so + * that the unwinder can identify the final frame record of any task by + * its location in the task stack. We reserve the entire pt_regs space + * for consistency with user tasks and kthreads. + */ + .macro setup_final_frame + sub sp, sp, #PT_REGS_SIZE + stp xzr, xzr, [sp, #S_STACKFRAME] + add x29, sp, #S_STACKFRAME + .endm + /* * The following fragment of code is executed with the MMU enabled. * @@ -447,9 +460,9 @@ SYM_FUNC_START_LOCAL(__primary_switched) #endif bl switch_to_vhe // Prefer VHE if possible add sp, sp, #16 - mov x29, #0 - mov x30, #0 - b start_kernel + setup_final_frame + bl start_kernel + ASM_BUG() SYM_FUNC_END(__primary_switched) .pushsection ".rodata", "a" @@ -639,14 +652,14 @@ SYM_FUNC_START_LOCAL(__secondary_switched) cbz x2, __secondary_too_slow msr sp_el0, x2 scs_load x2, x3 - mov x29, #0 - mov x30, #0 + setup_final_frame #ifdef CONFIG_ARM64_PTR_AUTH ptrauth_keys_init_cpu x2, x3, x4, x5 #endif - b secondary_start_kernel + bl secondary_start_kernel + ASM_BUG() SYM_FUNC_END(__secondary_switched) SYM_FUNC_START_LOCAL(__secondary_too_slow) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b4bb67f17a2c..8928fba54e4b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -435,6 +435,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; + /* + * For the benefit of the unwinder, set up childregs->stackframe + * as the final frame for the new task. + */ + p->thread.cpu_context.fp = (unsigned long)childregs->stackframe; ptrace_hw_copy_thread(p); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index de07147a7926..36cf05d5eb9e 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -68,12 +68,16 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long fp = frame->fp; struct stack_info info; - if (fp & 0xf) - return -EINVAL; - if (!tsk) tsk = current; + /* Final frame; nothing to unwind */ + if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) + return -ENOENT; + + if (fp & 0xf) + return -EINVAL; + if (!on_accessible_stack(tsk, fp, &info)) return -EINVAL; @@ -128,12 +132,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = ptrauth_strip_insn_pac(frame->pc); - /* - * This is a terminal record, so we have finished unwinding. - */ - if (!frame->fp && !frame->pc) - return -ENOENT; - return 0; } NOKPROBE_SYMBOL(unwind_frame); -- cgit From 76734d26b54192a31440039459eef2612da63ed4 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 26 May 2021 10:49:25 -0700 Subject: arm64: Change the on_*stack functions to take a size argument unwind_frame() was previously implicitly checking that the frame record is in bounds of the stack by enforcing that FP is both aligned to 16 and in bounds of the stack. Once the FP alignment requirement is relaxed to 8 this will not be sufficient because it does not account for the case where FP points to 8 bytes before the end of the stack. Make the check explicit by changing the on_*stack functions to take a size argument and adjusting the callers to pass the appropriate sizes. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/Ib7a3eb3eea41b0687ffaba045ceb2012d077d8b4 Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20210526174927.2477847-1-pcc@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 12 ++++++------ arch/arm64/include/asm/sdei.h | 7 ++++--- arch/arm64/include/asm/stacktrace.h | 32 ++++++++++++++++---------------- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/sdei.c | 16 +++++++++------- arch/arm64/kernel/stacktrace.c | 2 +- 6 files changed, 37 insertions(+), 34 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9df3feeee890..7a094aafec20 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -329,13 +329,13 @@ long get_tagged_addr_ctrl(struct task_struct *task); * of header definitions for the use of task_stack_page. */ -#define current_top_of_stack() \ -({ \ - struct stack_info _info; \ - BUG_ON(!on_accessible_stack(current, current_stack_pointer, &_info)); \ - _info.high; \ +#define current_top_of_stack() \ +({ \ + struct stack_info _info; \ + BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info)); \ + _info.high; \ }) -#define on_thread_stack() (on_task_stack(current, current_stack_pointer, NULL)) +#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL)) #endif /* __ASSEMBLY__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 63e0b92a5fbb..8bc30a5c4569 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -42,8 +42,9 @@ unsigned long sdei_arch_get_entry_point(int conduit); struct stack_info; -bool _on_sdei_stack(unsigned long sp, struct stack_info *info); -static inline bool on_sdei_stack(unsigned long sp, +bool _on_sdei_stack(unsigned long sp, unsigned long size, + struct stack_info *info); +static inline bool on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info) { if (!IS_ENABLED(CONFIG_VMAP_STACK)) @@ -51,7 +52,7 @@ static inline bool on_sdei_stack(unsigned long sp, if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) return false; if (in_nmi()) - return _on_sdei_stack(sp, info); + return _on_sdei_stack(sp, size, info); return false; } diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 4b33ca620679..1801399204d7 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -69,14 +69,14 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); -static inline bool on_stack(unsigned long sp, unsigned long low, - unsigned long high, enum stack_type type, - struct stack_info *info) +static inline bool on_stack(unsigned long sp, unsigned long size, + unsigned long low, unsigned long high, + enum stack_type type, struct stack_info *info) { if (!low) return false; - if (sp < low || sp >= high) + if (sp < low || sp + size < sp || sp + size > high) return false; if (info) { @@ -87,38 +87,38 @@ static inline bool on_stack(unsigned long sp, unsigned long low, return true; } -static inline bool on_irq_stack(unsigned long sp, +static inline bool on_irq_stack(unsigned long sp, unsigned long size, struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); unsigned long high = low + IRQ_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_IRQ, info); + return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info); } static inline bool on_task_stack(const struct task_struct *tsk, - unsigned long sp, + unsigned long sp, unsigned long size, struct stack_info *info) { unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; - return on_stack(sp, low, high, STACK_TYPE_TASK, info); + return on_stack(sp, size, low, high, STACK_TYPE_TASK, info); } #ifdef CONFIG_VMAP_STACK DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); -static inline bool on_overflow_stack(unsigned long sp, +static inline bool on_overflow_stack(unsigned long sp, unsigned long size, struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info); + return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); } #else -static inline bool on_overflow_stack(unsigned long sp, +static inline bool on_overflow_stack(unsigned long sp, unsigned long size, struct stack_info *info) { return false; } #endif @@ -128,21 +128,21 @@ static inline bool on_overflow_stack(unsigned long sp, * context. */ static inline bool on_accessible_stack(const struct task_struct *tsk, - unsigned long sp, + unsigned long sp, unsigned long size, struct stack_info *info) { if (info) info->type = STACK_TYPE_UNKNOWN; - if (on_task_stack(tsk, sp, info)) + if (on_task_stack(tsk, sp, size, info)) return true; if (tsk != current || preemptible()) return false; - if (on_irq_stack(sp, info)) + if (on_irq_stack(sp, size, info)) return true; - if (on_overflow_stack(sp, info)) + if (on_overflow_stack(sp, size, info)) return true; - if (on_sdei_stack(sp, info)) + if (on_sdei_stack(sp, size, info)) return true; return false; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index eb2f73939b7b..499b6b2f9757 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -122,7 +122,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr, NULL); + on_irq_stack(addr, sizeof(unsigned long), NULL); } /** diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 2c7ca449dd51..c524f96f97c4 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -162,31 +162,33 @@ static int init_sdei_scs(void) return err; } -static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) +static bool on_sdei_normal_stack(unsigned long sp, unsigned long size, + struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info); + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info); } -static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) +static bool on_sdei_critical_stack(unsigned long sp, unsigned long size, + struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info); + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info); } -bool _on_sdei_stack(unsigned long sp, struct stack_info *info) +bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info) { if (!IS_ENABLED(CONFIG_VMAP_STACK)) return false; - if (on_sdei_critical_stack(sp, info)) + if (on_sdei_critical_stack(sp, size, info)) return true; - if (on_sdei_normal_stack(sp, info)) + if (on_sdei_normal_stack(sp, size, info)) return true; return false; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 36cf05d5eb9e..5c70f247645b 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -78,7 +78,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (fp & 0xf) return -EINVAL; - if (!on_accessible_stack(tsk, fp, &info)) + if (!on_accessible_stack(tsk, fp, 16, &info)) return -EINVAL; if (test_bit(info.type, frame->stacks_done)) -- cgit From 33c222aeda14596ca5b9a1a3002858c6c3565ddd Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 26 May 2021 10:49:26 -0700 Subject: arm64: stacktrace: Relax frame record alignment requirement to 8 bytes The AAPCS places no requirements on the alignment of the frame record. In theory it could be placed anywhere, although it seems sensible to require it to be aligned to 8 bytes. With an upcoming enhancement to tag-based KASAN Clang will begin creating frame records located at an address that is only aligned to 8 bytes. Accommodate such frame records in the stack unwinding code. As pointed out by Mark Rutland, the userspace stack unwinding code has the same problem, so fix it there as well. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/Ia22c375230e67ca055e9e4bb639383567f7ad268 Acked-by: Andrey Konovalov Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20210526174927.2477847-2-pcc@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_callchain.c | 2 +- arch/arm64/kernel/stacktrace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 88ff471b0bce..4a72c2727309 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -116,7 +116,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, tail = (struct frame_tail __user *)regs->regs[29]; while (entry->nr < entry->max_stack && - tail && !((unsigned long)tail & 0xf)) + tail && !((unsigned long)tail & 0x7)) tail = user_backtrace(tail, entry); } else { #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 5c70f247645b..b189de5ca6cb 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -75,7 +75,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) return -ENOENT; - if (fp & 0xf) + if (fp & 0x7) return -EINVAL; if (!on_accessible_stack(tsk, fp, 16, &info)) -- cgit