diff options
Diffstat (limited to 'arch/x86/kernel')
| -rw-r--r-- | arch/x86/kernel/Makefile | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/Makefile | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/amd.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/microcode/amd.c | 16 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/resctrl/core.c | 8 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/topology.c | 16 | ||||
| -rw-r--r-- | arch/x86/kernel/dumpstack.c | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/fpu/init.c | 8 | ||||
| -rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 42 | ||||
| -rw-r--r-- | arch/x86/kernel/ftrace_64.S | 34 | ||||
| -rw-r--r-- | arch/x86/kernel/kprobes/core.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/module.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/process.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/smpboot.c | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/tboot.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/unwind_frame.c | 11 | ||||
| -rw-r--r-- | arch/x86/kernel/unwind_orc.c | 2 |
19 files changed, 96 insertions, 76 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1286a73ebdbc..f901658d9f7c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -3,10 +3,6 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o -extra-y += head$(BITS).o -extra-y += ebda.o -extra-y += platform-quirks.o extra-y += vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) @@ -33,6 +29,8 @@ KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. KCSAN_SANITIZE := n +KMSAN_SANITIZE_head$(BITS).o := n +KMSAN_SANITIZE_nmi.o := n # If instrumentation of this dir is enabled, boot hangs during first second. # Probably could be more selective here, but note that files related to irqs, @@ -42,7 +40,11 @@ KCOV_INSTRUMENT := n CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace -obj-y := process_$(BITS).o signal.o +obj-y += head_$(BITS).o +obj-y += head$(BITS).o +obj-y += ebda.o +obj-y += platform-quirks.o +obj-y += process_$(BITS).o signal.o obj-$(CONFIG_COMPAT) += signal_compat.o obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o dumpstack.o nmi.o diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 9661e3e802be..f10a921ee756 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -12,6 +12,7 @@ endif # If these files are instrumented, boot hangs during the first second. KCOV_INSTRUMENT_common.o := n KCOV_INSTRUMENT_perf_event.o := n +KMSAN_SANITIZE_common.o := n # As above, instrumenting secondary CPU boot code causes boot hangs. KCSAN_SANITIZE_common.o := n diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 48276c0e479d..860b60273df3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -503,7 +503,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; /* A random value per boot for bit slice [12:upper_bit) */ - va_align.bits = get_random_int() & va_align.mask; + va_align.bits = get_random_u32() & va_align.mask; } if (cpu_has(c, X86_FEATURE_MWAITX)) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index e7410e98fc1f..3a35dec3ec55 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -440,7 +440,13 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p return ret; native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (rev >= mc->hdr.patch_id) + + /* + * Allow application of the same revision to pick up SMT-specific + * changes even if the revision of the other SMT thread is already + * up-to-date. + */ + if (rev > mc->hdr.patch_id) return ret; if (!__apply_microcode_amd(mc)) { @@ -528,8 +534,12 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax) native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* Check whether we have saved a new patch already: */ - if (*new_rev && rev < mc->hdr.patch_id) { + /* + * Check whether a new patch has been saved already. Also, allow application of + * the same revision in order to pick up SMT-thread-specific configuration even + * if the sibling SMT thread already has an up-to-date revision. + */ + if (*new_rev && rev <= mc->hdr.patch_id) { if (!__apply_microcode_amd(mc)) { *new_rev = mc->hdr.patch_id; return; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index de62b0b87ced..3266ea36667c 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -66,9 +66,6 @@ struct rdt_hw_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L3, .name = "L3", .cache_level = 3, - .cache = { - .min_cbm_bits = 1, - }, .domains = domain_init(RDT_RESOURCE_L3), .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", @@ -83,9 +80,6 @@ struct rdt_hw_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L2, .name = "L2", .cache_level = 2, - .cache = { - .min_cbm_bits = 1, - }, .domains = domain_init(RDT_RESOURCE_L2), .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", @@ -836,6 +830,7 @@ static __init void rdt_init_res_defs_intel(void) r->cache.arch_has_sparse_bitmaps = false; r->cache.arch_has_empty_bitmaps = false; r->cache.arch_has_per_cpu_cfg = false; + r->cache.min_cbm_bits = 1; } else if (r->rid == RDT_RESOURCE_MBA) { hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE; hw_res->msr_update = mba_wrmsr_intel; @@ -856,6 +851,7 @@ static __init void rdt_init_res_defs_amd(void) r->cache.arch_has_sparse_bitmaps = true; r->cache.arch_has_empty_bitmaps = true; r->cache.arch_has_per_cpu_cfg = true; + r->cache.min_cbm_bits = 0; } else if (r->rid == RDT_RESOURCE_MBA) { hw_res->msr_base = MSR_IA32_MBA_BW_BASE; hw_res->msr_update = mba_wrmsr_amd; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index fd44b54c90d5..fc01f81f6e2a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 132a2de44d2f..5e868b62a7c4 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; unsigned int core_select_mask, core_level_siblings; unsigned int die_select_mask, die_level_siblings; + unsigned int pkg_mask_width; bool die_level_present = false; int leaf; @@ -111,10 +112,10 @@ int detect_extended_topology(struct cpuinfo_x86 *c) core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); sub_index = 1; - do { + while (true) { cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx); /* @@ -132,10 +133,15 @@ int detect_extended_topology(struct cpuinfo_x86 *c) die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } + if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE) + pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + else + break; + sub_index++; - } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); + } - core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; + core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width; die_select_mask = (~(-1 << die_plus_mask_width)) >> core_plus_mask_width; @@ -148,7 +154,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) } c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, - die_plus_mask_width); + pkg_mask_width); /* * Reinit the apicid, now that we have extended initial_apicid. */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b3dba35f466e..0bf6779187dd 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -177,6 +177,12 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, } } +/* + * This function reads pointers from the stack and dereferences them. The + * pointers may not have their KMSAN shadow set up properly, which may result + * in false positive reports. Disable instrumentation to avoid those. + */ +__no_kmsan_checks static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, const char *log_lvl) { diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 621f4b6cac4a..8946f89761cc 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -210,13 +210,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) fpstate_reset(¤t->thread.fpu); } -static void __init fpu__init_init_fpstate(void) -{ - /* Bring init_fpstate size and features up to date */ - init_fpstate.size = fpu_kernel_cfg.max_size; - init_fpstate.xfeatures = fpu_kernel_cfg.max_features; -} - /* * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: @@ -236,5 +229,4 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(fpu_kernel_cfg.max_size); fpu__init_task_struct_size(); - fpu__init_init_fpstate(); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c8340156bfd2..59e543b95a3c 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -360,7 +360,7 @@ static void __init setup_init_fpu_buf(void) print_xstate_features(); - xstate_init_xcomp_bv(&init_fpstate.regs.xsave, fpu_kernel_cfg.max_features); + xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures); /* * Init all the features state with header.xfeatures being 0x0 @@ -678,20 +678,6 @@ static unsigned int __init get_xsave_size_user(void) return ebx; } -/* - * Will the runtime-enumerated 'xstate_size' fit in the init - * task's statically-allocated buffer? - */ -static bool __init is_supported_xstate_size(unsigned int test_xstate_size) -{ - if (test_xstate_size <= sizeof(init_fpstate.regs)) - return true; - - pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n", - sizeof(init_fpstate.regs), test_xstate_size); - return false; -} - static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ @@ -717,10 +703,6 @@ static int __init init_xstate_size(void) kernel_default_size = xstate_calculate_size(fpu_kernel_cfg.default_features, compacted); - /* Ensure we have the space to store all default enabled features. */ - if (!is_supported_xstate_size(kernel_default_size)) - return -EINVAL; - if (!paranoid_xstate_size_valid(kernel_size)) return -EINVAL; @@ -875,6 +857,19 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) update_regset_xstate_info(fpu_user_cfg.max_size, fpu_user_cfg.max_features); + /* + * init_fpstate excludes dynamic states as they are large but init + * state is zero. + */ + init_fpstate.size = fpu_kernel_cfg.default_size; + init_fpstate.xfeatures = fpu_kernel_cfg.default_features; + + if (init_fpstate.size > sizeof(init_fpstate.regs)) { + pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n", + sizeof(init_fpstate.regs), init_fpstate.size); + goto out_disable; + } + setup_init_fpu_buf(); /* @@ -1130,6 +1125,15 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, */ mask = fpstate->user_xfeatures; + /* + * Dynamic features are not present in init_fpstate. When they are + * in an all zeros init state, remove those from 'mask' to zero + * those features in the user buffer instead of retrieving them + * from init_fpstate. + */ + if (fpu_state_size_dynamic()) + mask &= (header.xfeatures | xinit->header.xcomp_bv); + for_each_extended_xfeature(i, mask) { /* * If there was a feature or alignment gap, zero the space diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index dfeb227de561..2a4be92fd144 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -4,6 +4,7 @@ */ #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/ptrace.h> #include <asm/ftrace.h> #include <asm/export.h> @@ -129,6 +130,14 @@ .endm +SYM_TYPED_FUNC_START(ftrace_stub) + RET +SYM_FUNC_END(ftrace_stub) + +SYM_TYPED_FUNC_START(ftrace_stub_graph) + RET +SYM_FUNC_END(ftrace_stub_graph) + #ifdef CONFIG_DYNAMIC_FTRACE SYM_FUNC_START(__fentry__) @@ -172,21 +181,10 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) */ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR - - jmp ftrace_epilogue + RET SYM_FUNC_END(ftrace_caller); STACK_FRAME_NON_STANDARD_FP(ftrace_caller) -SYM_FUNC_START(ftrace_epilogue) -/* - * This is weak to keep gas from relaxing the jumps. - */ -SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) - UNWIND_HINT_FUNC - ENDBR - RET -SYM_FUNC_END(ftrace_epilogue) - SYM_FUNC_START(ftrace_regs_caller) /* Save the current flags before any operations that can change them */ pushfq @@ -262,14 +260,11 @@ SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) popfq /* - * As this jmp to ftrace_epilogue can be a short jump - * it must not be copied into the trampoline. - * The trampoline will add the code to jump - * to the return. + * The trampoline will add the return. */ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR - jmp ftrace_epilogue + RET /* Swap the flags with orig_rax */ 1: movq MCOUNT_REG_SIZE(%rsp), %rdi @@ -280,7 +275,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) /* Restore flags */ popfq UNWIND_HINT_FUNC - jmp ftrace_epilogue + RET SYM_FUNC_END(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) @@ -291,9 +286,6 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) SYM_FUNC_START(__fentry__) cmpq $ftrace_stub, ftrace_trace_function jnz trace - -SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) - ENDBR RET trace: diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4c3c27b6aea3..eb8bc82846b9 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -59,8 +59,6 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#define stack_addr(regs) ((unsigned long *)regs->sp) - #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b1abf663417c..c032edcd3d95 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -53,7 +53,7 @@ static unsigned long int get_module_load_offset(void) */ if (module_load_offset == 0) module_load_offset = - (get_random_int() % 1024 + 1) * PAGE_SIZE; + (prandom_u32_max(1024) + 1) * PAGE_SIZE; mutex_unlock(&module_kaslr_mutex); } return module_load_offset; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 58a6ea472db9..c21b7347a26d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -965,7 +965,7 @@ early_param("idle", idle_setup); unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; + sp -= prandom_u32_max(8192); return sp & ~0xf; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 1962008fe743..6b3418bff326 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -553,6 +553,7 @@ void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32) * Kprobes not supported here. Set the probe on schedule instead. * Function graph tracer not supported too. */ +__no_kmsan_checks __visible __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f24227bc3220..3f3ea0287f69 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1316,7 +1316,7 @@ static void __init smp_sanity_check(void) nr++; } - nr_cpu_ids = 8; + set_nr_cpu_ids(8); } #endif @@ -1569,7 +1569,7 @@ __init void prefill_possible_map(void) possible = i; } - nr_cpu_ids = possible; + set_nr_cpu_ids(possible); pr_info("Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 3bacd935f840..4c1bcb6053fc 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -95,7 +95,7 @@ void __init tboot_probe(void) static pgd_t *tboot_pg_dir; static struct mm_struct tboot_mm = { - .mm_rb = RB_ROOT, + .mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock), .pgd = swapper_pg_dir, .mm_users = ATOMIC_INIT(2), .mm_count = ATOMIC_INIT(1), diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 8e1c50c86e5d..d8ba93778ae3 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -183,6 +183,16 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) } #endif +/* + * While walking the stack, KMSAN may stomp on stale locals from other + * functions that were marked as uninitialized upon function exit, and + * now hold the call frame information for the current function (e.g. the frame + * pointer). Because KMSAN does not specifically mark call frames as + * initialized, false positive reports are possible. To prevent such reports, + * we mark the functions scanning the stack (here and below) with + * __no_kmsan_checks. + */ +__no_kmsan_checks static bool update_stack_state(struct unwind_state *state, unsigned long *next_bp) { @@ -250,6 +260,7 @@ static bool update_stack_state(struct unwind_state *state, return true; } +__no_kmsan_checks bool unwind_next_frame(struct unwind_state *state) { struct pt_regs *regs; diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 0ea57da92940..c059820dfaea 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -713,7 +713,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Otherwise, skip ahead to the user-specified starting frame: */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->sp < (unsigned long)first_frame)) + state->sp <= (unsigned long)first_frame)) unwind_next_frame(state); return; |