diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 19:53:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 19:53:15 -0700 |
commit | 685d98211273f60e38a6d361b62d7016c545297e (patch) | |
tree | 76c3be7af88578437c72325c322a52a12be3d05d /arch/x86/kernel | |
parent | fcc196579aa1fc167d6778948bff69fae6116737 (diff) | |
parent | 35ce64922c8263448e58a2b9e8d15a64e11e9b2d (diff) |
Merge tag 'x86-core-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core x86 updates from Ingo Molnar:
- The biggest change is the rework of the percpu code, to support the
'Named Address Spaces' GCC feature, by Uros Bizjak:
- This allows C code to access GS and FS segment relative memory
via variables declared with such attributes, which allows the
compiler to better optimize those accesses than the previous
inline assembly code.
- The series also includes a number of micro-optimizations for
various percpu access methods, plus a number of cleanups of %gs
accesses in assembly code.
- These changes have been exposed to linux-next testing for the
last ~5 months, with no known regressions in this area.
- Fix/clean up __switch_to()'s broken but accidentally working handling
of FPU switching - which also generates better code
- Propagate more RIP-relative addressing in assembly code, to generate
slightly better code
- Rework the CPU mitigations Kconfig space to be less idiosyncratic, to
make it easier for distros to follow & maintain these options
- Rework the x86 idle code to cure RCU violations and to clean up the
logic
- Clean up the vDSO Makefile logic
- Misc cleanups and fixes
* tag 'x86-core-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (52 commits)
x86/idle: Select idle routine only once
x86/idle: Let prefer_mwait_c1_over_halt() return bool
x86/idle: Cleanup idle_setup()
x86/idle: Clean up idle selection
x86/idle: Sanitize X86_BUG_AMD_E400 handling
sched/idle: Conditionally handle tick broadcast in default_idle_call()
x86: Increase brk randomness entropy for 64-bit systems
x86/vdso: Move vDSO to mmap region
x86/vdso/kbuild: Group non-standard build attributes and primary object file rules together
x86/vdso: Fix rethunk patching for vdso-image-{32,64}.o
x86/retpoline: Ensure default return thunk isn't used at runtime
x86/vdso: Use CONFIG_COMPAT_32 to specify vdso32
x86/vdso: Use $(addprefix ) instead of $(foreach )
x86/vdso: Simplify obj-y addition
x86/vdso: Consolidate targets and clean-files
x86/bugs: Rename CONFIG_RETHUNK => CONFIG_MITIGATION_RETHUNK
x86/bugs: Rename CONFIG_CPU_SRSO => CONFIG_MITIGATION_SRSO
x86/bugs: Rename CONFIG_CPU_IBRS_ENTRY => CONFIG_MITIGATION_IBRS_ENTRY
x86/bugs: Rename CONFIG_CPU_UNRET_ENTRY => CONFIG_MITIGATION_UNRET_ENTRY
x86/bugs: Rename CONFIG_SLS => CONFIG_MITIGATION_SLS
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/acpi/wakeup_64.S | 24 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/callthunks.c | 32 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 48 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 4 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 4 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/opt.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 99 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/smp.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/static_call.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/sys_x86_64.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 11 |
21 files changed, 150 insertions, 156 deletions
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index d5d8a352eafa..94ff83f3d3fe 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -17,7 +17,7 @@ * Hooray, we are in Long 64-bit mode (but still running in low memory) */ SYM_FUNC_START(wakeup_long64) - movq saved_magic, %rax + movq saved_magic(%rip), %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax je 2f @@ -33,14 +33,14 @@ SYM_FUNC_START(wakeup_long64) movw %ax, %es movw %ax, %fs movw %ax, %gs - movq saved_rsp, %rsp + movq saved_rsp(%rip), %rsp - movq saved_rbx, %rbx - movq saved_rdi, %rdi - movq saved_rsi, %rsi - movq saved_rbp, %rbp + movq saved_rbx(%rip), %rbx + movq saved_rdi(%rip), %rdi + movq saved_rsi(%rip), %rsi + movq saved_rbp(%rip), %rbp - movq saved_rip, %rax + movq saved_rip(%rip), %rax ANNOTATE_RETPOLINE_SAFE jmp *%rax SYM_FUNC_END(wakeup_long64) @@ -72,11 +72,11 @@ SYM_FUNC_START(do_suspend_lowlevel) movq $.Lresume_point, saved_rip(%rip) - movq %rsp, saved_rsp - movq %rbp, saved_rbp - movq %rbx, saved_rbx - movq %rdi, saved_rdi - movq %rsi, saved_rsi + movq %rsp, saved_rsp(%rip) + movq %rbp, saved_rbp(%rip) + movq %rbx, saved_rbx(%rip) + movq %rdi, saved_rdi(%rip) + movq %rsi, saved_rsi(%rip) addq $8, %rsp movl $3, %edi diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1d85cb7071cb..ff6e32ec8259 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(alternatives_patched); #define DA_ENDBR 0x08 #define DA_SMP 0x10 -static unsigned int __initdata_or_module debug_alternative; +static unsigned int debug_alternative; static int __init debug_alt(char *str) { @@ -133,7 +133,7 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and * *jump* over instead of executing long and daft NOPs. */ -static void __init_or_module add_nop(u8 *instr, unsigned int len) +static void add_nop(u8 *instr, unsigned int len) { u8 *target = instr + len; @@ -206,7 +206,7 @@ static int skip_nops(u8 *instr, int offset, int len) * Optimize a sequence of NOPs, possibly preceded by an unconditional jump * to the end of the NOP sequence into a single NOP. */ -static bool __init_or_module +static bool __optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target) { int i = *next - insn->length; @@ -335,8 +335,7 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len) return (target < src || target > src + src_len); } -static void __init_or_module noinline -apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) +void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) { int prev, target = 0; @@ -545,7 +544,7 @@ static inline bool is_jcc32(struct insn *insn) return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80; } -#if defined(CONFIG_RETPOLINE) && defined(CONFIG_OBJTOOL) +#if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_OBJTOOL) /* * CALL/JMP *%\reg @@ -709,8 +708,8 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) /* * The compiler is supposed to EMIT an INT3 after every unconditional * JMP instruction due to AMD BTC. However, if the compiler is too old - * or SLS isn't enabled, we still need an INT3 after indirect JMPs - * even on Intel. + * or MITIGATION_SLS isn't enabled, we still need an INT3 after + * indirect JMPs even on Intel. */ if (op == JMP32_INSN_OPCODE && i < insn->length) bytes[i++] = INT3_INSN_OPCODE; @@ -770,7 +769,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK /* * Rewrite the compiler generated return thunk tail-calls. @@ -843,14 +842,14 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) } #else void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } -#endif /* CONFIG_RETHUNK */ +#endif /* CONFIG_MITIGATION_RETHUNK */ -#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */ +#else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } -#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */ +#endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */ #ifdef CONFIG_X86_KERNEL_IBT diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 6913b372ccf7..a98020bf31bb 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -109,7 +109,7 @@ static void __used common(void) OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); OFFSET(X86_current_task, pcpu_hot, current_task); -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING OFFSET(X86_call_depth, pcpu_hot, call_depth); #endif #if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index aeccea2da91b..30335182b6b0 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -24,6 +24,8 @@ static int __initdata_or_module debug_callthunks; +#define MAX_PATCH_LEN (255-1) + #define prdbg(fmt, args...) \ do { \ if (debug_callthunks) \ @@ -179,10 +181,15 @@ static const u8 nops[] = { static void *patch_dest(void *dest, bool direct) { unsigned int tsize = SKL_TMPL_SIZE; + u8 insn_buff[MAX_PATCH_LEN]; u8 *pad = dest - tsize; + memcpy(insn_buff, skl_call_thunk_template, tsize); + apply_relocation(insn_buff, tsize, pad, + skl_call_thunk_template, tsize); + /* Already patched? */ - if (!bcmp(pad, skl_call_thunk_template, tsize)) + if (!bcmp(pad, insn_buff, tsize)) return pad; /* Ensure there are nops */ @@ -192,9 +199,9 @@ static void *patch_dest(void *dest, bool direct) } if (direct) - memcpy(pad, skl_call_thunk_template, tsize); + memcpy(pad, insn_buff, tsize); else - text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true); + text_poke_copy_locked(pad, insn_buff, tsize, true); return pad; } @@ -290,20 +297,27 @@ void *callthunks_translate_call_dest(void *dest) static bool is_callthunk(void *addr) { unsigned int tmpl_size = SKL_TMPL_SIZE; - void *tmpl = skl_call_thunk_template; + u8 insn_buff[MAX_PATCH_LEN]; unsigned long dest; + u8 *pad; dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT); if (!thunks_initialized || skip_addr((void *)dest)) return false; - return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size); + pad = (void *)(dest - tmpl_size); + + memcpy(insn_buff, skl_call_thunk_template, tmpl_size); + apply_relocation(insn_buff, tmpl_size, pad, + skl_call_thunk_template, tmpl_size); + + return !bcmp(pad, insn_buff, tmpl_size); } int x86_call_depth_emit_accounting(u8 **pprog, void *func) { unsigned int tmpl_size = SKL_TMPL_SIZE; - void *tmpl = skl_call_thunk_template; + u8 insn_buff[MAX_PATCH_LEN]; if (!thunks_initialized) return 0; @@ -312,7 +326,11 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) if (func && is_callthunk(func)) return 0; - memcpy(*pprog, tmpl, tmpl_size); + memcpy(insn_buff, skl_call_thunk_template, tmpl_size); + apply_relocation(insn_buff, tmpl_size, *pprog, + skl_call_thunk_template, tmpl_size); + + memcpy(*pprog, insn_buff, tmpl_size); *pprog += tmpl_size; return tmpl_size; } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f8ae10222fd0..3282a747b645 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -817,7 +817,7 @@ static void fix_erratum_1386(struct cpuinfo_x86 *c) void init_spectral_chicken(struct cpuinfo_x86 *c) { -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY u64 value; /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0767ab63ca63..4dd00066c12a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -668,7 +668,7 @@ enum gds_mitigations { GDS_MITIGATION_HYPERVISOR, }; -#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION) +#if IS_ENABLED(CONFIG_MITIGATION_GDS_FORCE) static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE; #else static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL; @@ -979,10 +979,10 @@ static void __init retbleed_select_mitigation(void) return; case RETBLEED_CMD_UNRET: - if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { + if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY)) { retbleed_mitigation = RETBLEED_MITIGATION_UNRET; } else { - pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_UNRET_ENTRY.\n"); goto do_cmd_auto; } break; @@ -991,24 +991,24 @@ static void __init retbleed_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_IBPB)) { pr_err("WARNING: CPU does not support IBPB.\n"); goto do_cmd_auto; - } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + } else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { retbleed_mitigation = RETBLEED_MITIGATION_IBPB; } else { - pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); goto do_cmd_auto; } break; case RETBLEED_CMD_STUFF: - if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) && + if (IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) && spectre_v2_enabled == SPECTRE_V2_RETPOLINE) { retbleed_mitigation = RETBLEED_MITIGATION_STUFF; } else { - if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING)) + if (IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)) pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n"); else - pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_CALL_DEPTH_TRACKING.\n"); goto do_cmd_auto; } @@ -1018,9 +1018,10 @@ do_cmd_auto: case RETBLEED_CMD_AUTO: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + if (IS_ENABLED(CONFIG_MITIGATION_UNRET_ENTRY)) retbleed_mitigation = RETBLEED_MITIGATION_UNRET; - else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + else if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY) && + boot_cpu_has(X86_FEATURE_IBPB)) retbleed_mitigation = RETBLEED_MITIGATION_IBPB; } @@ -1099,7 +1100,7 @@ static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = SPECTRE_V2_USER_NONE; -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE static bool spectre_v2_bad_module; bool retpoline_module_ok(bool has_retpoline) @@ -1412,7 +1413,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC || cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && - !IS_ENABLED(CONFIG_RETPOLINE)) { + !IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) { pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; @@ -1435,7 +1436,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY)) { pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; @@ -1466,7 +1467,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) { - if (!IS_ENABLED(CONFIG_RETPOLINE)) { + if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)) { pr_err("Kernel not compiled with retpoline; no mitigation available!"); return SPECTRE_V2_NONE; } @@ -1561,7 +1562,7 @@ static void __init spectre_v2_select_mitigation(void) break; } - if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + if (IS_ENABLED(CONFIG_MITIGATION_IBRS_ENTRY) && boot_cpu_has_bug(X86_BUG_RETBLEED) && retbleed_cmd != RETBLEED_CMD_OFF && retbleed_cmd != RETBLEED_CMD_STUFF && @@ -2454,7 +2455,7 @@ static void __init srso_select_mitigation(void) break; case SRSO_CMD_SAFE_RET: - if (IS_ENABLED(CONFIG_CPU_SRSO)) { + if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) { /* * Enable the return thunk for generated code * like ftrace, static_call, etc. @@ -2474,29 +2475,29 @@ static void __init srso_select_mitigation(void) else srso_mitigation = SRSO_MITIGATION_SAFE_RET_UCODE_NEEDED; } else { - pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); } break; case SRSO_CMD_IBPB: - if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); srso_mitigation = SRSO_MITIGATION_IBPB; } } else { - pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); } break; case SRSO_CMD_IBPB_ON_VMEXIT: - if (IS_ENABLED(CONFIG_CPU_SRSO)) { + if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) { if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; } } else { - pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); } break; } @@ -2846,3 +2847,8 @@ ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *bu return cpu_show_common(dev, attr, buf, X86_BUG_GDS); } #endif + +void __warn_thunk(void) +{ + WARN_ONCE(1, "Unpatched return thunk in use. This should not happen!\n"); +} diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6756025b35ca..39792077f65b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1856,8 +1856,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); - select_idle_routine(c); - #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif @@ -1967,6 +1965,7 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { .top_of_stack = TOP_OF_INIT_STACK, }; EXPORT_PER_CPU_SYMBOL(pcpu_hot); +EXPORT_PER_CPU_SYMBOL(const_pcpu_hot); #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, @@ -2278,6 +2277,8 @@ void __init arch_cpu_finalize_init(void) { identify_boot_cpu(); + select_idle_routine(); + /* * identify_boot_cpu() initialized SMT support information, let the * core code know. diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f18ca44c904b..44a91ef5a23b 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -410,7 +410,7 @@ static void __die_header(const char *str, struct pt_regs *regs, long err) IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", - IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? + IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION) ? (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); } NOKPROBE_SYMBOL(__die_header); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 12df54ff0e81..70139d9d2e01 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -307,7 +307,8 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) +#define RET_SIZE \ + (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_MITIGATION_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 487ac57e2c81..b50f3641c4d6 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -414,7 +414,7 @@ __REFDATA .align 4 SYM_DATA(initial_code, .long i386_start_kernel) -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #define PGD_ALIGN (2 * PAGE_SIZE) #define PTI_USER_PGD_FILL 1024 #else @@ -474,7 +474,7 @@ SYM_DATA_START(initial_page_table) # endif .align PAGE_SIZE /* needs to be page-sized too */ -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * PTI needs another page so sync_initial_pagetable() works correctly * and does not scribble over the data which is placed behind the diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c38e43589046..3dbd05f93859 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -478,7 +478,7 @@ SYM_CODE_START(soft_restart_cpu) UNWIND_HINT_END_OF_STACK /* Find the idle task stack */ - movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx + movq PER_CPU_VAR(pcpu_hot + X86_current_task), %rcx movq TASK_threadsp(%rcx), %rsp jmp .Ljump_to_C_code @@ -623,7 +623,7 @@ SYM_CODE_END(vc_no_ghcb) #define SYM_DATA_START_PAGE_ALIGNED(name) \ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Each PGD needs to be 8k long and 8k aligned. We do not * ever go out to userspace with these, so we do not diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 517821b48391..36d6809c6c9e 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -324,7 +324,7 @@ static int can_optimize(unsigned long paddr) * However, the kernel built with retpolines or IBT has jump * tables disabled so the check can be skipped altogether. */ - if (!IS_ENABLED(CONFIG_RETPOLINE) && + if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && !IS_ENABLED(CONFIG_X86_KERNEL_IBT) && insn_is_indirect_jump(&insn)) return 0; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 7a814b41402d..0f19ef355f5f 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -184,7 +184,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) return new_ldt; } -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION static void do_sanity_check(struct mm_struct *mm, bool had_kernel_mapping, @@ -377,7 +377,7 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); } -#else /* !CONFIG_PAGE_TABLE_ISOLATION */ +#else /* !CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ static int map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) @@ -388,11 +388,11 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) { } -#endif /* CONFIG_PAGE_TABLE_ISOLATION */ +#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ static void free_ldt_pgtables(struct mm_struct *mm) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION struct mmu_gather tlb; unsigned long start = LDT_BASE_ADDR; unsigned long end = LDT_END_ADDR; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6121c2b42ecf..b8441147eb5e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -846,31 +846,6 @@ void __noreturn stop_this_cpu(void *dummy) } /* - * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power - * states (local apic timer and TSC stop). - * - * XXX this function is completely buggered vs RCU and tracing. - */ -static void amd_e400_idle(void) -{ - /* - * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E - * gets set after static_cpu_has() places have been converted via - * alternatives. - */ - if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) { - default_idle(); - return; - } - - tick_broadcast_enter(); - - default_idle(); - - tick_broadcast_exit(); -} - -/* * Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf * exists and whenever MONITOR/MWAIT extensions are present there is at * least one C1 substate. @@ -878,21 +853,22 @@ static void amd_e400_idle(void) * Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait * is passed to kernel commandline parameter. */ -static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) +static __init bool prefer_mwait_c1_over_halt(void) { + const struct cpuinfo_x86 *c = &boot_cpu_data; u32 eax, ebx, ecx, edx; - /* User has disallowed the use of MWAIT. Fallback to HALT */ - if (boot_option_idle_override == IDLE_NOMWAIT) - return 0; + /* If override is enforced on the command line, fall back to HALT. */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) + return false; /* MWAIT is not supported on this platform. Fallback to HALT */ if (!cpu_has(c, X86_FEATURE_MWAIT)) - return 0; + return false; - /* Monitor has a bug. Fallback to HALT */ - if (boot_cpu_has_bug(X86_BUG_MONITOR)) - return 0; + /* Monitor has a bug or APIC stops in C1E. Fallback to HALT */ + if (boot_cpu_has_bug(X86_BUG_MONITOR) || boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) + return false; cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); @@ -901,13 +877,13 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) * with EAX=0, ECX=0. */ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) - return 1; + return true; /* * If MWAIT extensions are available, there should be at least one * MWAIT C1 substate present. */ - return (edx & MWAIT_C1_SUBSTATE_MASK); + return !!(edx & MWAIT_C1_SUBSTATE_MASK); } /* @@ -933,26 +909,27 @@ static __cpuidle void mwait_idle(void) __current_clr_polling(); } -void select_idle_routine(const struct cpuinfo_x86 *c) +void __init select_idle_routine(void) { -#ifdef CONFIG_SMP - if (boot_option_idle_override == IDLE_POLL && __max_threads_per_core > 1) - pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); -#endif - if (x86_idle_set() || boot_option_idle_override == IDLE_POLL) + if (boot_option_idle_override == IDLE_POLL) { + if (IS_ENABLED(CONFIG_SMP) && __max_threads_per_core > 1) + pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); + return; + } + + /* Required to guard against xen_set_default_idle() */ + if (x86_idle_set()) return; - if (boot_cpu_has_bug(X86_BUG_AMD_E400)) { - pr_info("using AMD E400 aware idle routine\n"); - static_call_update(x86_idle, amd_e400_idle); - } else if (prefer_mwait_c1_over_halt(c)) { + if (prefer_mwait_c1_over_halt()) { pr_info("using mwait in idle threads\n"); static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); static_call_update(x86_idle, tdx_safe_halt); - } else + } else { static_call_update(x86_idle, default_idle); + } } void amd_e400_c1e_apic_setup(void) @@ -985,7 +962,10 @@ void __init arch_post_acpi_subsys_init(void) if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); - pr_info("System has AMD C1E enabled\n"); + + if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE)) + static_branch_enable(&arch_needs_tick_broadcast); + pr_info("System has AMD C1E erratum E400. Workaround enabled.\n"); } static int __init idle_setup(char *str) @@ -998,24 +978,14 @@ static int __init idle_setup(char *str) boot_option_idle_override = IDLE_POLL; cpu_idle_poll_ctrl(true); } else if (!strcmp(str, "halt")) { - /* - * When the boot option of idle=halt is added, halt is - * forced to be used for CPU idle. In such case CPU C2/C3 - * won't be used again. - * To continue to load the CPU idle driver, don't touch - * the boot_option_idle_override. - */ - static_call_update(x86_idle, default_idle); + /* 'idle=halt' HALT for idle. C-states are disabled. */ boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { - /* - * If the boot option of "idle=nomwait" is added, - * it means that mwait will be disabled for CPU C1/C2/C3 - * states. - */ + /* 'idle=nomwait' disables MWAIT for idle */ boot_option_idle_override = IDLE_NOMWAIT; - } else - return -1; + } else { + return -EINVAL; + } return 0; } @@ -1030,7 +1000,10 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { - return randomize_page(mm->brk, 0x02000000); + if (mmap_is_ia32()) + return randomize_page(mm->brk, SZ_32M); + + return randomize_page(mm->brk, SZ_1G); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 708c87b88cc1..0917c7f25720 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -156,13 +156,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; int cpu = smp_processor_id(); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_p, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -209,7 +208,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) raw_cpu_write(pcpu_hot.current_task, next_p); - switch_fpu_finish(); + switch_fpu_finish(next_p); /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(next_p); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c075591b7b46..7062b84dd467 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -611,14 +611,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(pcpu_hot.hardirq_stack_inuse)); - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_p, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -672,7 +671,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) raw_cpu_write(pcpu_hot.current_task, next_p); raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(); + switch_fpu_finish(next_p); /* Reload sp0. */ update_task_stack(next_p); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 96a771f9f930..2908e063d7d8 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -148,14 +148,16 @@ static int register_stop_handler(void) static void native_stop_other_cpus(int wait) { - unsigned int cpu = smp_processor_id(); + unsigned int old_cpu, this_cpu; unsigned long flags, timeout; if (reboot_force) return; /* Only proceed if this is the first CPU to reach this code */ - if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1) + old_cpu = -1; + this_cpu = smp_processor_id(); + if (!atomic_try_cmpxchg(&stopping_cpu, &old_cpu, this_cpu)) return; /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */ @@ -186,7 +188,7 @@ static void native_stop_other_cpus(int wait) * NMIs. */ cpumask_copy(&cpus_stop_mask, cpu_online_mask); - cpumask_clear_cpu(cpu, &cpus_stop_mask); + cpumask_clear_cpu(this_cpu, &cpus_stop_mask); if (!cpumask_empty(&cpus_stop_mask)) { apic_send_IPI_allbutself(REBOOT_VECTOR); @@ -210,6 +212,8 @@ static void native_stop_other_cpus(int wait) * CPUs to stop. */ if (!smp_no_nmi_ipi && !register_stop_handler()) { + unsigned int cpu; + pr_emerg("Shutting down cpus with NMI\n"); for_each_cpu(cpu, &cpus_stop_mask) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 77a9316da435..4eefaac64c6c 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -172,7 +172,7 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) } EXPORT_SYMBOL_GPL(arch_static_call_transform); -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK /* * This is called by apply_returns() to fix up static call trampolines, * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index c783aeb37dce..cb9fa1d5c66f 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -52,13 +52,6 @@ static unsigned long get_align_bits(void) return va_align.bits & get_align_mask(); } -unsigned long align_vdso_addr(unsigned long addr) -{ - unsigned long align_mask = get_align_mask(); - addr = (addr + align_mask) & ~align_mask; - return addr | get_align_bits(); -} - static int __init control_va_addr_alignment(char *str) { /* guard against enabling this on other CPU families */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6cb31df3d5ff..4fa0b17e5043 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -774,7 +774,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) */ asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1; + struct pt_regs *regs = (struct pt_regs *)current_top_of_stack() - 1; if (regs != eregs) *regs = *eregs; return regs; @@ -792,7 +792,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r * trust it and switch to the current kernel stack */ if (ip_within_syscall_gap(regs)) { - sp = this_cpu_read(pcpu_hot.top_of_stack); + sp = current_top_of_stack(); goto sync; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a349dbfc6d5a..56451fd2099e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -46,6 +46,7 @@ ENTRY(phys_startup_64) #endif jiffies = jiffies_64; +const_pcpu_hot = pcpu_hot; #if defined(CONFIG_X86_64) /* @@ -132,7 +133,7 @@ SECTIONS LOCK_TEXT KPROBES_TEXT SOFTIRQENTRY_TEXT -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE *(.text..__x86.indirect_thunk) *(.text..__x86.return_thunk) #endif @@ -142,7 +143,7 @@ SECTIONS *(.text..__x86.rethunk_untrain) ENTRY_TEXT -#ifdef CONFIG_CPU_SRSO +#ifdef CONFIG_MITIGATION_SRSO /* * See the comment above srso_alias_untrain_ret()'s * definition. @@ -267,7 +268,7 @@ SECTIONS } #endif -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* * List of instructions that call/jmp/jcc to retpoline thunks * __x86_indirect_thunk_*(). These instructions can be patched along @@ -504,11 +505,11 @@ INIT_PER_CPU(irq_stack_backing_store); "fixed_percpu_data is not at start of per-cpu area"); #endif -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); #endif -#ifdef CONFIG_CPU_SRSO +#ifdef CONFIG_MITIGATION_SRSO . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); /* * GNU ld cannot do XOR until 2.41. |