From e37e43a497d5a8b7c0cc1736d56986f432c394c9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 11 Aug 2016 02:35:23 -0700 Subject: x86/mm/64: Enable vmapped stacks (CONFIG_HAVE_ARCH_VMAP_STACK=y) This allows x86_64 kernels to enable vmapped stacks by setting HAVE_ARCH_VMAP_STACK=y - which enables the CONFIG_VMAP_STACK=y high level Kconfig option. There are a couple of interesting bits: First, x86 lazily faults in top-level paging entries for the vmalloc area. This won't work if we get a page fault while trying to access the stack: the CPU will promote it to a double-fault and we'll die. To avoid this problem, probe the new stack when switching stacks and forcibly populate the pgd entry for the stack when switching mms. Second, once we have guard pages around the stack, we'll want to detect and handle stack overflow. I didn't enable it on x86_32. We'd need to rework the double-fault code a bit and I'm concerned about running out of vmalloc virtual addresses under some workloads. This patch, by itself, will behave somewhat erratically when the stack overflows while RSP is still more than a few tens of bytes above the bottom of the stack. Specifically, we'll get #PF and make it to no_context and them oops without reliably triggering a double-fault, and no_context doesn't know about stack overflows. The next patch will improve that case. Thank you to Nadav and Brian for helping me pay enough attention to the SDM to hopefully get this right. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c88f3e2920b18e6cc621d772a04a62c06869037e.1470907718.git.luto@kernel.org [ Minor edits. ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c580d8c33562..21a6d0ec5983 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -94,6 +94,7 @@ config X86 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_EBPF_JIT if X86_64 + select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL -- cgit From e4a744ef2fef5c803348b650a3a2d01da7797a9b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 19 Aug 2016 06:52:55 -0500 Subject: ftrace: Remove CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST from config Make HAVE_FUNCTION_GRAPH_FP_TEST a normal define, independent from kconfig. This removes some config file pollution and simplifies the checking for the fp test. Suggested-by: Steven Rostedt Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Byungchul Park Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Nilay Vaish Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2c4e5f05054d6d367f702fd153af7a0109dd5c81.1471607358.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/arm64/kernel/entry-ftrace.S | 2 +- arch/blackfin/kernel/ftrace-entry.S | 4 ++-- arch/sparc/Kconfig | 1 - arch/sparc/include/asm/ftrace.h | 4 ++++ arch/x86/Kconfig | 1 - arch/x86/include/asm/ftrace.h | 1 + kernel/trace/Kconfig | 5 ----- kernel/trace/trace_functions_graph.c | 2 +- 8 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 0f03a8fe2314..aef02d2af3b5 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -219,7 +219,7 @@ ENDPROC(ftrace_graph_caller) * * Run ftrace_return_to_handler() before going back to parent. * @fp is checked against the value passed by ftrace_graph_caller() - * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. + * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled. */ ENTRY(return_to_handler) save_return_regs diff --git a/arch/blackfin/kernel/ftrace-entry.S b/arch/blackfin/kernel/ftrace-entry.S index 28d059540424..3b8bdcbb7da3 100644 --- a/arch/blackfin/kernel/ftrace-entry.S +++ b/arch/blackfin/kernel/ftrace-entry.S @@ -169,7 +169,7 @@ ENTRY(_ftrace_graph_caller) r0 = sp; /* unsigned long *parent */ r1 = [sp]; /* unsigned long self_addr */ # endif -# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST +# ifdef HAVE_FUNCTION_GRAPH_FP_TEST r2 = fp; /* unsigned long frame_pointer */ # endif r0 += 16; /* skip the 4 local regs on stack */ @@ -190,7 +190,7 @@ ENTRY(_return_to_handler) [--sp] = r1; /* get original return address */ -# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST +# ifdef HAVE_FUNCTION_GRAPH_FP_TEST r0 = fp; /* Blackfin is sane, so omit this */ # endif call _ftrace_return_to_handler; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 59b09600dd32..f5d60f14a0bc 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -56,7 +56,6 @@ config SPARC64 def_bool 64BIT select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index 3192a8e42fd6..62755a339a59 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -9,6 +9,10 @@ void _mcount(void); #endif +#endif /* CONFIG_MCOUNT */ + +#if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY) +#define HAVE_FUNCTION_GRAPH_FP_TEST #endif #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 21a6d0ec5983..ce8860cccc34 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -111,7 +111,6 @@ config X86 select HAVE_EXIT_THREAD select HAVE_FENTRY if X86_64 select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index a4820d4df617..37f67cbba1c6 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -6,6 +6,7 @@ # define MCOUNT_ADDR ((unsigned long)(__fentry__)) #else # define MCOUNT_ADDR ((unsigned long)(mcount)) +# define HAVE_FUNCTION_GRAPH_FP_TEST #endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f4b86e8ca1e7..ba3326785ca4 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -24,11 +24,6 @@ config HAVE_FUNCTION_GRAPH_TRACER help See Documentation/trace/ftrace-design.txt -config HAVE_FUNCTION_GRAPH_FP_TEST - bool - help - See Documentation/trace/ftrace-design.txt - config HAVE_DYNAMIC_FTRACE bool help diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 7363ccf79512..fc173cd9fbfd 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -204,7 +204,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, return; } -#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY) +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST /* * The arch may choose to record the frame pointer used * and check it here to make sure that it is what we expect it -- cgit From 15f4eae70d365bba26854c90b6002aaabb18c8aa Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 13 Sep 2016 14:29:25 -0700 Subject: x86: Move thread_info into task_struct Now that most of the thread_info users have been cleaned up, this is straightforward. Most of this code was written by Linus. Originally-from: Linus Torvalds Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jann Horn Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a50eab40abeaec9cb9a9e3cbdeafd32190206654.1473801993.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/entry/entry_64.S | 7 ++++-- arch/x86/include/asm/thread_info.h | 46 -------------------------------------- arch/x86/kernel/asm-offsets.c | 4 +--- arch/x86/kernel/irq_64.c | 3 +-- arch/x86/kernel/process.c | 6 ++--- 6 files changed, 10 insertions(+), 57 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4c3972847c2a..2a83bc8b24c6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -157,6 +157,7 @@ config X86 select SPARSE_IRQ select SRCU select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select X86_DEV_DMA_OPS if X86_64 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e7fba58f4d9c..2b46384b4a4f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) * If we need to do entry work or if we guess we'll need to do * exit work, go straight to the slow path. */ - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + movq PER_CPU_VAR(current_task), %r11 + testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) jnz entry_SYSCALL64_slow_path entry_SYSCALL_64_fastpath: @@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath: */ DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + movq PER_CPU_VAR(current_task), %r11 + testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) jnz 1f LOCKDEP_SYS_EXIT @@ -370,6 +372,7 @@ END(ptregs_\func) /* * %rdi: prev task * %rsi: next task + * rsi: task we're switching to */ ENTRY(__switch_to_asm) /* diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c9dcfe7c7e4b..2aaca53c0974 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -52,20 +52,6 @@ struct task_struct; #include #include -struct thread_info { - struct task_struct *task; /* main task structure */ - __u32 flags; /* low level flags */ - __u32 cpu; /* current CPU */ -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .flags = 0, \ - .cpu = 0, \ -} - -#define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ @@ -157,11 +143,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); -} - static inline unsigned long current_stack_pointer(void) { unsigned long sp; @@ -223,33 +204,6 @@ static inline int arch_within_stack_frames(const void * const stack, # define cpu_current_top_of_stack (cpu_tss + TSS_sp0) #endif -/* - * ASM operand which evaluates to a 'thread_info' address of - * the current task, if it is known that "reg" is exactly "off" - * bytes below the top of the stack currently. - * - * ( The kernel stack's size is known at build time, it is usually - * 2 or 4 pages, and the bottom of the kernel stack contains - * the thread_info structure. So to access the thread_info very - * quickly from assembly code we can calculate down from the - * top of the kernel stack to the bottom, using constant, - * build-time calculations only. ) - * - * For example, to fetch the current thread_info->flags value into %eax - * on x86-64 defconfig kernels, in syscall entry code where RSP is - * currently at exactly SIZEOF_PTREGS bytes away from the top of the - * stack: - * - * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax - * - * will translate to: - * - * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax - * - * which is below the current RSP by almost 16K. - */ -#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) - #endif #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index add5f90b93d4..c62e015b126c 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -35,9 +35,7 @@ void common(void) { #endif BLANK(); - OFFSET(TI_flags, thread_info, flags); - - BLANK(); + OFFSET(TASK_TI_flags, task_struct, thread_info.flags); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4a7903714065..9ebd0b0e73d9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (user_mode(regs)) return; - if (regs->sp >= curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + STACK_TOP_MARGIN && + if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && regs->sp <= curbase + THREAD_SIZE) return; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c1fa790c81cd..0b9ed8ec5226 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -549,9 +549,7 @@ unsigned long get_wchan(struct task_struct *p) * PADDING * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING * stack - * ----------- bottom = start + sizeof(thread_info) - * thread_info - * ----------- start + * ----------- bottom = start * * The tasks stack pointer points at the location where the * framepointer is stored. The data on the stack is: @@ -562,7 +560,7 @@ unsigned long get_wchan(struct task_struct *p) */ top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; top -= 2 * sizeof(unsigned long); - bottom = start + sizeof(struct thread_info); + bottom = start; sp = READ_ONCE(p->thread.sp); if (sp < bottom || sp > top) -- cgit