From 1966c5e5bd9802cf62d3744ef4d2d6d32e22604d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 5 Jun 2018 18:39:09 +0200 Subject: x86/asm: Use CC_SET/CC_OUT in percpu_cmpxchg8b_double() to micro-optimize code generation Use CC_SET(z)/CC_OUT(z) instead of explicit SETZ instruction. Using these two defines, the compiler that supports generation of condition code outputs from inline assembly flags generates e.g.: cmpxchg8b %fs:(%esi) jne 172255 <__kmalloc+0x65> instead of: cmpxchg8b %fs:(%esi) sete %al test %al,%al je 172255 <__kmalloc+0x65> Note that older compilers now generate: cmpxchg8b %fs:(%esi) sete %cl test %cl,%cl je 173a85 <__kmalloc+0x65> since we have to mark that cmpxchg8b instruction outputs to %eax register and this way clobbers the value in the register. Signed-off-by: Uros Bizjak Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180605163910.13015-1-ubizjak@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index a06b07399d17..e9202a0de8f0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -450,9 +450,10 @@ do { \ bool __ret; \ typeof(pcp1) __o1 = (o1), __n1 = (n1); \ typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ - : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ - : "b" (__n1), "c" (__n2), "a" (__o1)); \ + asm volatile("cmpxchg8b "__percpu_arg(1) \ + CC_SET(z) \ + : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \ + : "b" (__n1), "c" (__n2)); \ __ret; \ }) -- cgit From 0797a8d0d79769574550caa5ca5d89c237723250 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 18 May 2018 08:47:08 +0200 Subject: x86/stacktrace: Do not unwind after user regs Josh pointed out, that there is no way a frame can be after user regs. So remove the last unwind and the check. Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/lkml/20180518064713.26440-1-jslaby@suse.cz Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 093f2ea5dd56..8948b7d9c064 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -113,15 +113,6 @@ __save_stack_trace_reliable(struct stack_trace *trace, if (!user_mode(regs)) return -EINVAL; - /* - * The last frame contains the user mode syscall - * pt_regs. Skip it and finish the unwind. - */ - unwind_next_frame(&state); - if (!unwind_done(&state)) { - STACKTRACE_DUMP_ONCE(task); - return -EINVAL; - } break; } -- cgit From 17426923b03f098da83b8c1e044934a34959f69b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 18 May 2018 08:47:09 +0200 Subject: x86/stacktrace: Remove STACKTRACE_DUMP_ONCE The stack unwinding can sometimes fail yet. Especially with the generated debug info. So do not yell at users -- live patching (the only user of this interface) will inform the user about the failure gracefully. And given this was the only user of the macro, remove the macro proper too. Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/lkml/20180518064713.26440-2-jslaby@suse.cz Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 8948b7d9c064..f9dacf6d4667 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -81,16 +81,6 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); #ifdef CONFIG_HAVE_RELIABLE_STACKTRACE -#define STACKTRACE_DUMP_ONCE(task) ({ \ - static bool __section(.data.unlikely) __dumped; \ - \ - if (!__dumped) { \ - __dumped = true; \ - WARN_ON(1); \ - show_stack(task, NULL); \ - } \ -}) - static int __always_inline __save_stack_trace_reliable(struct stack_trace *trace, struct task_struct *task) @@ -123,20 +113,16 @@ __save_stack_trace_reliable(struct stack_trace *trace, * generated code which __kernel_text_address() doesn't know * about. */ - if (!addr) { - STACKTRACE_DUMP_ONCE(task); + if (!addr) return -EINVAL; - } if (save_stack_address(trace, addr, false)) return -EINVAL; } /* Check for stack corruption */ - if (unwind_error(&state)) { - STACKTRACE_DUMP_ONCE(task); + if (unwind_error(&state)) return -EINVAL; - } if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; -- cgit From 441ccc3580f45340715fd8f5c4db795b06326404 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 18 May 2018 08:47:10 +0200 Subject: x86/stacktrace: Clarify the reliable success paths Make clear which path is for user tasks and for kthreads and idle tasks. This will allow easier plug-in of the ORC unwinder in the next patches. Note that we added a check for unwind error to the top of the loop, so that an error is returned also for user tasks (the 'goto success' would skip the check after the loop otherwise). Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/lkml/20180518064713.26440-3-jslaby@suse.cz Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index f9dacf6d4667..6acf1d5ca832 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -89,21 +89,24 @@ __save_stack_trace_reliable(struct stack_trace *trace, struct pt_regs *regs; unsigned long addr; - for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); + for (unwind_start(&state, task, NULL, NULL); + !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { regs = unwind_get_entry_regs(&state, NULL); if (regs) { + /* Success path for user tasks */ + if (user_mode(regs)) + goto success; + /* * Kernel mode registers on the stack indicate an * in-kernel interrupt or exception (e.g., preemption * or a page fault), which can make frame pointers * unreliable. */ - if (!user_mode(regs)) - return -EINVAL; - break; + return -EINVAL; } addr = unwind_get_return_address(&state); @@ -124,6 +127,11 @@ __save_stack_trace_reliable(struct stack_trace *trace, if (unwind_error(&state)) return -EINVAL; + /* Success path for non-user tasks, i.e. kthreads and idle tasks */ + if (!(task->flags & (PF_KTHREAD | PF_IDLE))) + return -EINVAL; + +success: if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; -- cgit From 0c414367c04eeb00c3ebfee0b74c9e7f3b95fd62 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 18 May 2018 08:47:11 +0200 Subject: x86/stacktrace: Do not fail for ORC with regs on stack save_stack_trace_reliable now returns "non reliable" when there are kernel pt_regs on stack. This means an interrupt or exception happened somewhere down the route. It is a problem for the frame pointer unwinder, because the frame might not have been set up yet when the irq happened, so the unwinder might fail to unwind from the interrupted function. With ORC, this is not a problem, as ORC has out-of-band data. We can find ORC data even for the IP in the interrupted function and always unwind one level up reliably. So lift the check to apply only when CONFIG_FRAME_POINTER=y is enabled. Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/lkml/20180518064713.26440-4-jslaby@suse.cz Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6acf1d5ca832..7627455047c2 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -106,7 +106,8 @@ __save_stack_trace_reliable(struct stack_trace *trace, * unreliable. */ - return -EINVAL; + if (IS_ENABLED(CONFIG_FRAME_POINTER)) + return -EINVAL; } addr = unwind_get_return_address(&state); -- cgit From d31a580266eeb1f355df90fde8a71f480e30ad70 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 18 May 2018 08:47:12 +0200 Subject: x86/unwind/orc: Detect the end of the stack The existing UNWIND_HINT_EMPTY annotations happen to be good indicators of where entry code calls into C code for the first time. So also use them to mark the end of the stack for the ORC unwinder. Use that information to set unwind->error if the ORC unwinder doesn't unwind all the way to the end. This will be needed for enabling HAVE_RELIABLE_STACKTRACE for the ORC unwinder so we can use it with the livepatch consistency model. Thanks to Jiri Slaby for teaching the ORCs about the unwind hints. Signed-off-by: Josh Poimboeuf Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/lkml/20180518064713.26440-5-jslaby@suse.cz Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 1 + arch/x86/include/asm/orc_types.h | 2 + arch/x86/include/asm/unwind_hints.h | 16 +++++--- arch/x86/kernel/unwind_orc.c | 52 +++++++++++++++----------- tools/objtool/arch/x86/include/asm/orc_types.h | 2 + tools/objtool/check.c | 1 + tools/objtool/check.h | 2 +- tools/objtool/orc_dump.c | 3 +- tools/objtool/orc_gen.c | 2 + 9 files changed, 52 insertions(+), 29 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 73a522d53b53..c6f3677e6105 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -408,6 +408,7 @@ ENTRY(ret_from_fork) 1: /* kernel thread */ + UNWIND_HINT_EMPTY movq %r12, %rdi CALL_NOSPEC %rbx /* diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 9c9dc579bd7d..46f516dd80ce 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -88,6 +88,7 @@ struct orc_entry { unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; + unsigned end:1; } __packed; /* @@ -101,6 +102,7 @@ struct unwind_hint { s16 sp_offset; u8 sp_reg; u8 type; + u8 end; }; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index bae46fc6b9de..0bcdb1279361 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -26,7 +26,7 @@ * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL +.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0 #ifdef CONFIG_STACK_VALIDATION .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints @@ -35,12 +35,14 @@ .short \sp_offset .byte \sp_reg .byte \type + .byte \end + .balign 4 .popsection #endif .endm .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1 .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 @@ -86,19 +88,21 @@ #else /* !__ASSEMBLY__ */ -#define UNWIND_HINT(sp_reg, sp_offset, type) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "987: \n\t" \ ".pushsection .discard.unwind_hints\n\t" \ /* struct unwind_hint */ \ ".long 987b - .\n\t" \ - ".short " __stringify(sp_offset) "\n\t" \ + ".short " __stringify(sp_offset) "\n\t" \ ".byte " __stringify(sp_reg) "\n\t" \ ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(end) "\n\t" \ + ".balign 4 \n\t" \ ".popsection\n\t" -#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE) +#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0) -#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE) +#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0) #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index feb28fee6cea..26038eacf74a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -198,7 +198,7 @@ static int orc_sort_cmp(const void *_a, const void *_b) * whitelisted .o files which didn't get objtool generation. */ orc_a = cur_orc_table + (a - cur_orc_ip_table); - return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1; + return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; } #ifdef CONFIG_MODULES @@ -352,7 +352,7 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr bool unwind_next_frame(struct unwind_state *state) { - unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; + unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp; enum stack_type prev_type = state->stack_info.type; struct orc_entry *orc; bool indirect = false; @@ -363,9 +363,9 @@ bool unwind_next_frame(struct unwind_state *state) /* Don't let modules unload while we're reading their ORC data. */ preempt_disable(); - /* Have we reached the end? */ + /* End-of-stack check for user tasks: */ if (state->regs && user_mode(state->regs)) - goto done; + goto the_end; /* * Find the orc_entry associated with the text address. @@ -374,9 +374,16 @@ bool unwind_next_frame(struct unwind_state *state) * calls and calls to noreturn functions. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); - if (!orc || orc->sp_reg == ORC_REG_UNDEFINED) - goto done; - orig_ip = state->ip; + if (!orc) + goto err; + + /* End-of-stack check for kernel threads: */ + if (orc->sp_reg == ORC_REG_UNDEFINED) { + if (!orc->end) + goto err; + + goto the_end; + } /* Find the previous frame's stack: */ switch (orc->sp_reg) { @@ -402,7 +409,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!state->regs || !state->full_regs) { orc_warn("missing regs for base reg R10 at ip %pB\n", (void *)state->ip); - goto done; + goto err; } sp = state->regs->r10; break; @@ -411,7 +418,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!state->regs || !state->full_regs) { orc_warn("missing regs for base reg R13 at ip %pB\n", (void *)state->ip); - goto done; + goto err; } sp = state->regs->r13; break; @@ -420,7 +427,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!state->regs || !state->full_regs) { orc_warn("missing regs for base reg DI at ip %pB\n", (void *)state->ip); - goto done; + goto err; } sp = state->regs->di; break; @@ -429,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!state->regs || !state->full_regs) { orc_warn("missing regs for base reg DX at ip %pB\n", (void *)state->ip); - goto done; + goto err; } sp = state->regs->dx; break; @@ -437,12 +444,12 @@ bool unwind_next_frame(struct unwind_state *state) default: orc_warn("unknown SP base reg %d for ip %pB\n", orc->sp_reg, (void *)state->ip); - goto done; + goto err; } if (indirect) { if (!deref_stack_reg(state, sp, &sp)) - goto done; + goto err; } /* Find IP, SP and possibly regs: */ @@ -451,7 +458,7 @@ bool unwind_next_frame(struct unwind_state *state) ip_p = sp - sizeof(long); if (!deref_stack_reg(state, ip_p, &state->ip)) - goto done; + goto err; state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, state->ip, (void *)ip_p); @@ -465,7 +472,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { orc_warn("can't dereference registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); - goto done; + goto err; } state->regs = (struct pt_regs *)sp; @@ -477,7 +484,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { orc_warn("can't dereference iret registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); - goto done; + goto err; } state->regs = (void *)sp - IRET_FRAME_OFFSET; @@ -500,18 +507,18 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_PREV_SP: if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp)) - goto done; + goto err; break; case ORC_REG_BP: if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp)) - goto done; + goto err; break; default: orc_warn("unknown BP base reg %d for ip %pB\n", orc->bp_reg, (void *)orig_ip); - goto done; + goto err; } /* Prevent a recursive loop due to bad ORC data: */ @@ -520,13 +527,16 @@ bool unwind_next_frame(struct unwind_state *state) state->sp <= prev_sp) { orc_warn("stack going in the wrong direction? ip=%pB\n", (void *)orig_ip); - goto done; + goto err; } preempt_enable(); return true; -done: +err: + state->error = true; + +the_end: preempt_enable(); state->stack_info.type = STACK_TYPE_UNKNOWN; return false; diff --git a/tools/objtool/arch/x86/include/asm/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h index 9c9dc579bd7d..46f516dd80ce 100644 --- a/tools/objtool/arch/x86/include/asm/orc_types.h +++ b/tools/objtool/arch/x86/include/asm/orc_types.h @@ -88,6 +88,7 @@ struct orc_entry { unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; + unsigned end:1; } __packed; /* @@ -101,6 +102,7 @@ struct unwind_hint { s16 sp_offset; u8 sp_reg; u8 type; + u8 end; }; #endif /* __ASSEMBLY__ */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 38047c6aa575..8491beb1a636 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1156,6 +1156,7 @@ static int read_unwind_hints(struct objtool_file *file) cfa->offset = hint->sp_offset; insn->state.type = hint->type; + insn->state.end = hint->end; } return 0; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index c6b68fcb926f..95700a2bcb7c 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -31,7 +31,7 @@ struct insn_state { int stack_size; unsigned char type; bool bp_scratch; - bool drap; + bool drap, end; int drap_reg, drap_offset; struct cfi_reg vals[CFI_NUM_REGS]; }; diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index c3343820916a..faa444270ee3 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -203,7 +203,8 @@ int orc_dump(const char *_objname) print_reg(orc[i].bp_reg, orc[i].bp_offset); - printf(" type:%s\n", orc_type_name(orc[i].type)); + printf(" type:%s end:%d\n", + orc_type_name(orc[i].type), orc[i].end); } elf_end(elf); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 18384d9be4e1..3f98dcfbc177 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -31,6 +31,8 @@ int create_orc(struct objtool_file *file) struct cfi_reg *cfa = &insn->state.cfa; struct cfi_reg *bp = &insn->state.regs[CFI_BP]; + orc->end = insn->state.end; + if (cfa->base == CFI_UNDEFINED) { orc->sp_reg = ORC_REG_UNDEFINED; continue; -- cgit From 6415b38bae2641ab6e173ffa4ff6a453b60ba99b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 18 May 2018 08:47:13 +0200 Subject: x86/stacktrace: Enable HAVE_RELIABLE_STACKTRACE for the ORC unwinder In SUSE, we need a reliable stack unwinder for kernel live patching, but we do not want to enable frame pointers for performance reasons. So after the previous patches to make the ORC reliable, mark ORC as a reliable stack unwinder on x86. Signed-off-by: Jiri Slaby Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/lkml/20180518064713.26440-6-jslaby@suse.cz Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f1dbb4ee19d7..c4d64b19acff 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -180,7 +180,7 @@ config X86 select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION + select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR select HAVE_STACK_VALIDATION if X86_64 select HAVE_RSEQ -- cgit From b5722a457b6e5eabd97a7f39c6c2b8e9a623ab15 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Jul 2018 10:50:13 +0900 Subject: x86/build/vdso: Remove unused vdso-syms.lds This file contains symbol values, and was originally linked into vmlinux, but I have no idea what it was actually used for. Since the following commit: 827880ec260b ("x86/um: thin archives build fix") it is not even linked. Now it is completely orphan, and no problem has been reported. It is a proof that this file was not needed in the first place. Signed-off-by: Masahiro Yamada Acked-by: Richard Weinberger Cc: Andy Lutomirski Cc: Jeff Dike Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-um@lists.infradead.org Link: http://lkml.kernel.org/r/1530582614-5173-2-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Ingo Molnar --- arch/x86/um/vdso/.gitignore | 1 - arch/x86/um/vdso/Makefile | 16 ---------------- 2 files changed, 17 deletions(-) diff --git a/arch/x86/um/vdso/.gitignore b/arch/x86/um/vdso/.gitignore index 9cac6d072199..f8b69d84238e 100644 --- a/arch/x86/um/vdso/.gitignore +++ b/arch/x86/um/vdso/.gitignore @@ -1,2 +1 @@ -vdso-syms.lds vdso.lds diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index b2d6967262b2..822ccdba93ad 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -53,22 +53,6 @@ $(vobjs): KBUILD_CFLAGS += $(CFL) CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage -targets += vdso-syms.lds -extra-$(VDSO64-y) += vdso-syms.lds - -# -# Match symbols in the DSO that look like VDSO*; produce a file of constants. -# -sed-vdsosym := -e 's/^00*/0/' \ - -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' -quiet_cmd_vdsosym = VDSOSYM $@ -define cmd_vdsosym - $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ -endef - -$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE - $(call if_changed,vdsosym) - # # The DSO images are built using a special linker script. # -- cgit From c5fcdbf15523f6bbbaeb822e3be6003e60f9d3b7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Jul 2018 10:50:14 +0900 Subject: x86/build/vdso: Simplify 'cmd_vdso2c' No reason to use 'define' directive here. Just use the = operator. Signed-off-by: Masahiro Yamada Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1530582614-5173-3-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 261802b1cc50..b9ed1aa53a26 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -58,9 +58,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(src hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ -define cmd_vdso2c - $(obj)/vdso2c $< $(<:%.dbg=%) $@ -endef + cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE $(call if_changed,vdso2c) -- cgit From a7bea8308933aaeea76dad7d42a6e51000417626 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 2 Jul 2018 04:31:54 -0600 Subject: x86/asm/64: Use 32-bit XOR to zero registers Some Intel CPUs don't recognize 64-bit XORs as zeroing idioms. Zeroing idioms don't require execution bandwidth, as they're being taken care of in the frontend (through register renaming). Use 32-bit XORs instead. Signed-off-by: Jan Beulich Cc: Alok Kataria Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davem@davemloft.net Cc: herbert@gondor.apana.org.au Cc: pavel@ucw.cz Cc: rjw@rjwysocki.net Link: http://lkml.kernel.org/r/5B39FF1A02000078001CFB54@prv1-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/crypto/aegis128-aesni-asm.S | 2 +- arch/x86/crypto/aegis128l-aesni-asm.S | 2 +- arch/x86/crypto/aegis256-aesni-asm.S | 2 +- arch/x86/crypto/aesni-intel_asm.S | 8 ++++---- arch/x86/crypto/aesni-intel_avx-x86_64.S | 4 ++-- arch/x86/crypto/morus1280-avx2-asm.S | 2 +- arch/x86/crypto/morus1280-sse2-asm.S | 2 +- arch/x86/crypto/morus640-sse2-asm.S | 2 +- arch/x86/crypto/sha1_ssse3_asm.S | 2 +- arch/x86/kernel/head_64.S | 2 +- arch/x86/kernel/paravirt_patch_64.c | 2 +- arch/x86/lib/memcpy_64.S | 2 +- arch/x86/power/hibernate_asm_64.S | 2 +- 13 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 9254e0b6cc06..d5c5e2082ae7 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -75,7 +75,7 @@ * %r9 */ __load_partial: - xor %r9, %r9 + xor %r9d, %r9d pxor MSG, MSG mov LEN, %r8 diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S index 9263c344f2c7..0fbdf5f00bda 100644 --- a/arch/x86/crypto/aegis128l-aesni-asm.S +++ b/arch/x86/crypto/aegis128l-aesni-asm.S @@ -66,7 +66,7 @@ * %r9 */ __load_partial: - xor %r9, %r9 + xor %r9d, %r9d pxor MSG0, MSG0 pxor MSG1, MSG1 diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S index 1d977d515bf9..a49f58e2a5dd 100644 --- a/arch/x86/crypto/aegis256-aesni-asm.S +++ b/arch/x86/crypto/aegis256-aesni-asm.S @@ -59,7 +59,7 @@ * %r9 */ __load_partial: - xor %r9, %r9 + xor %r9d, %r9d pxor MSG, MSG mov LEN, %r8 diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index e762ef417562..9bd139569b41 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff .macro GCM_INIT Iv SUBKEY AAD AADLEN mov \AADLEN, %r11 mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length - xor %r11, %r11 + xor %r11d, %r11d mov %r11, InLen(%arg2) # ctx_data.in_length = 0 mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 @@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff movdqu HashKey(%arg2), %xmm13 add %arg5, InLen(%arg2) - xor %r11, %r11 # initialise the data pointer offset as zero + xor %r11d, %r11d # initialise the data pointer offset as zero PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation sub %r11, %arg5 # sub partial block data used @@ -702,7 +702,7 @@ _no_extra_mask_1_\@: # GHASH computation for the last <16 Byte block GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 - xor %rax,%rax + xor %eax, %eax mov %rax, PBlockLen(%arg2) jmp _dec_done_\@ @@ -737,7 +737,7 @@ _no_extra_mask_2_\@: # GHASH computation for the last <16 Byte block GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 - xor %rax,%rax + xor %eax, %eax mov %rax, PBlockLen(%arg2) jmp _encode_done_\@ diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index faecb1518bf8..1985ea0b551b 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -463,7 +463,7 @@ _get_AAD_rest_final\@: _get_AAD_done\@: # initialize the data pointer offset as zero - xor %r11, %r11 + xor %r11d, %r11d # start AES for num_initial_blocks blocks mov arg5, %rax # rax = *Y0 @@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@: _get_AAD_done\@: # initialize the data pointer offset as zero - xor %r11, %r11 + xor %r11d, %r11d # start AES for num_initial_blocks blocks mov arg5, %rax # rax = *Y0 diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S index 37d422e77931..c3f74913476c 100644 --- a/arch/x86/crypto/morus1280-avx2-asm.S +++ b/arch/x86/crypto/morus1280-avx2-asm.S @@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero) * %r9 */ __load_partial: - xor %r9, %r9 + xor %r9d, %r9d vpxor MSG, MSG, MSG mov %rcx, %r8 diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S index 1fe637c7be9d..b3f4d103ba06 100644 --- a/arch/x86/crypto/morus1280-sse2-asm.S +++ b/arch/x86/crypto/morus1280-sse2-asm.S @@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero) * %r9 */ __load_partial: - xor %r9, %r9 + xor %r9d, %r9d pxor MSG_LO, MSG_LO pxor MSG_HI, MSG_HI diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S index 71c72a0a0862..d2958a47fccc 100644 --- a/arch/x86/crypto/morus640-sse2-asm.S +++ b/arch/x86/crypto/morus640-sse2-asm.S @@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero) * %r9 */ __load_partial: - xor %r9, %r9 + xor %r9d, %r9d pxor MSG, MSG mov %rcx, %r8 diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 6204bd53528c..613d0bfc3d84 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -96,7 +96,7 @@ # cleanup workspace mov $8, %ecx mov %rsp, %rdi - xor %rax, %rax + xor %eax, %eax rep stosq mov %rbp, %rsp # deallocate workspace diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 8344dd2f310a..15ebc2fc166e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -235,7 +235,7 @@ ENTRY(secondary_startup_64) * address given in m16:64. */ pushq $.Lafter_lret # put return address on stack for unwinder - xorq %rbp, %rbp # clear frame pointer + xorl %ebp, %ebp # clear frame pointer movq initial_code(%rip), %rax pushq $__KERNEL_CS # set correct cs pushq %rax # target address in negative space diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 9edadabf04f6..9cb98f7b07c9 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 298ef1479240..3b24dc05251c 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe) /* Copy successful. Return zero */ .L_done_memcpy_trap: - xorq %rax, %rax + xorl %eax, %eax ret ENDPROC(__memcpy_mcsafe) EXPORT_SYMBOL_GPL(__memcpy_mcsafe) diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index ce8da3a0412c..fd369a6e9ff8 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -137,7 +137,7 @@ ENTRY(restore_registers) /* Saved in save_processor_state. */ lgdt saved_context_gdt_desc(%rax) - xorq %rax, %rax + xorl %eax, %eax /* tell the hibernation core that we've just restored the memory */ movq %rax, in_suspend(%rip) -- cgit From 6709812f094d96543b443645c68daaa32d3d3e77 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 2 Jul 2018 04:47:57 -0600 Subject: x86/entry/64: Add two more instruction suffixes Sadly, other than claimed in: a368d7fd2a ("x86/entry/64: Add instruction suffix") ... there are two more instances which want to be adjusted. As said there, omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream gas in the future (mine does already). Add the other missing suffixes here as well. Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5B3A02DD02000078001CFB78@prv1-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c6f3677e6105..65aa16d845f6 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -92,7 +92,7 @@ END(native_usergs_sysret64) .endm .macro TRACE_IRQS_IRETQ_DEBUG - bt $9, EFLAGS(%rsp) /* interrupts off? */ + btl $9, EFLAGS(%rsp) /* interrupts off? */ jnc 1f TRACE_IRQS_ON_DEBUG 1: @@ -702,7 +702,7 @@ retint_kernel: #ifdef CONFIG_PREEMPT /* Interrupts are off */ /* Check if we need preemption */ - bt $9, EFLAGS(%rsp) /* were interrupts off? */ + btl $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) jnz 1f -- cgit