aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/entry-common.h13
-rw-r--r--arch/x86/include/asm/fred.h23
-rw-r--r--arch/x86/include/asm/msr.h25
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/switch_to.h6
-rw-r--r--arch/x86/kernel/cpu/common.c22
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/fred.c45
-rw-r--r--arch/x86/kernel/setup.c7
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/traps.c28
11 files changed, 111 insertions, 64 deletions
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index fb2809b20b0a..77d20555e04d 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -8,6 +8,7 @@
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
#include <asm/fpu/api.h>
+#include <asm/fred.h>
/* Check that the stack and regs on entry from user mode are sane. */
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
@@ -44,8 +45,7 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
}
#define arch_enter_from_user_mode arch_enter_from_user_mode
-static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
- unsigned long ti_work)
+static inline void arch_exit_work(unsigned long ti_work)
{
if (ti_work & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
@@ -56,6 +56,15 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
fpregs_assert_state_consistent();
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
+}
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
+ arch_exit_work(ti_work);
+
+ fred_update_rsp0();
#ifdef CONFIG_COMPAT
/*
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index e86c7ba32435..25ca00bd70e8 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -36,6 +36,7 @@
#ifdef CONFIG_X86_FRED
#include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
#include <asm/ptrace.h>
@@ -84,13 +85,33 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int
}
void cpu_init_fred_exceptions(void);
+void cpu_init_fred_rsps(void);
void fred_complete_exception_setup(void);
+DECLARE_PER_CPU(unsigned long, fred_rsp0);
+
+static __always_inline void fred_sync_rsp0(unsigned long rsp0)
+{
+ __this_cpu_write(fred_rsp0, rsp0);
+}
+
+static __always_inline void fred_update_rsp0(void)
+{
+ unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
+
+ if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
+ wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
+ __this_cpu_write(fred_rsp0, rsp0);
+ }
+}
#else /* CONFIG_X86_FRED */
static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
static inline void cpu_init_fred_exceptions(void) { }
+static inline void cpu_init_fred_rsps(void) { }
static inline void fred_complete_exception_setup(void) { }
-static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_sync_rsp0(unsigned long rsp0) { }
+static inline void fred_update_rsp0(void) { }
#endif /* CONFIG_X86_FRED */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index d642037f9ed5..001853541f1e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -99,19 +99,6 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
-/*
- * WRMSRNS behaves exactly like WRMSR with the only difference being
- * that it is not a serializing instruction by default.
- */
-static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
-{
- /* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
- asm volatile("1: .byte 0x0f,0x01,0xc6\n"
- "2:\n"
- _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
- : : "c" (msr), "a"(low), "d" (high));
-}
-
#define native_rdmsr(msr, val1, val2) \
do { \
u64 __val = __rdmsr((msr)); \
@@ -312,9 +299,19 @@ do { \
#endif /* !CONFIG_PARAVIRT_XXL */
+/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
+#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
+
+/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
static __always_inline void wrmsrns(u32 msr, u64 val)
{
- __wrmsrns(msr, val, val >> 32);
+ /*
+ * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
+ * DS prefix to avoid a trailing NOP.
+ */
+ asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
+ "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
}
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 775acbdea1a9..4a686f0e5dbf 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -582,7 +582,8 @@ extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void cpu_init(void);
-extern void cpu_init_exception_handling(void);
+extern void cpu_init_exception_handling(bool boot_cpu);
+extern void cpu_init_replace_early_idt(void);
extern void cr4_init(void);
extern void set_task_blockstep(struct task_struct *task, bool on);
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index c3bd0c0758c9..75248546403d 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -70,13 +70,9 @@ static inline void update_task_stack(struct task_struct *task)
#ifdef CONFIG_X86_32
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
- if (cpu_feature_enabled(X86_FEATURE_FRED)) {
- /* WRMSRNS is a baseline feature for FRED. */
- wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE);
- } else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
+ if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
/* Xen PV enters the kernel on the thread stack. */
load_sp0(task_top_of_stack(task));
- }
#endif
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index be307c9ef263..07a34d723505 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void)
if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
+ /* Minimize the gap between FRED is available and available but disabled. */
+ arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
+ if (arglen != 2 || strncmp(arg, "on", 2))
+ setup_clear_cpu_cap(X86_FEATURE_FRED);
+
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
if (arglen <= 0)
return;
@@ -2171,7 +2176,7 @@ static inline void tss_setup_io_bitmap(struct tss_struct *tss)
* Setup everything needed to handle exceptions from the IDT, including the IST
* exceptions which use paranoid_entry().
*/
-void cpu_init_exception_handling(void)
+void cpu_init_exception_handling(bool boot_cpu)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
int cpu = raw_smp_processor_id();
@@ -2190,10 +2195,23 @@ void cpu_init_exception_handling(void)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
+ if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+ /* The boot CPU has enabled FRED during early boot */
+ if (!boot_cpu)
+ cpu_init_fred_exceptions();
+
+ cpu_init_fred_rsps();
+ } else {
+ load_current_idt();
+ }
+}
+
+void __init cpu_init_replace_early_idt(void)
+{
if (cpu_feature_enabled(X86_FEATURE_FRED))
cpu_init_fred_exceptions();
else
- load_current_idt();
+ idt_setup_early_pf();
}
/*
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index b7d9f530ae16..8bd84114c2d9 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -83,7 +83,6 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
{ X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
{ X86_FEATURE_FRED, X86_FEATURE_LKGS },
- { X86_FEATURE_FRED, X86_FEATURE_WRMSRNS },
{}
};
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index 4bcd8791ad96..8d32c3f48abc 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -21,17 +21,53 @@
#define FRED_STKLVL(vector, lvl) ((lvl) << (2 * (vector)))
+DEFINE_PER_CPU(unsigned long, fred_rsp0);
+EXPORT_PER_CPU_SYMBOL(fred_rsp0);
+
void cpu_init_fred_exceptions(void)
{
/* When FRED is enabled by default, remove this log message */
pr_info("Initialize FRED on CPU%d\n", smp_processor_id());
+ /*
+ * If a kernel event is delivered before a CPU goes to user level for
+ * the first time, its SS is NULL thus NULL is pushed into the SS field
+ * of the FRED stack frame. But before ERETS is executed, the CPU may
+ * context switch to another task and go to user level. Then when the
+ * CPU comes back to kernel mode, SS is changed to __KERNEL_DS. Later
+ * when ERETS is executed to return from the kernel event handler, a #GP
+ * fault is generated because SS doesn't match the SS saved in the FRED
+ * stack frame.
+ *
+ * Initialize SS to __KERNEL_DS when enabling FRED to avoid such #GPs.
+ */
+ loadsegment(ss, __KERNEL_DS);
+
wrmsrl(MSR_IA32_FRED_CONFIG,
/* Reserve for CALL emulation */
FRED_CONFIG_REDZONE |
FRED_CONFIG_INT_STKLVL(0) |
FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
+ wrmsrl(MSR_IA32_FRED_STKLVLS, 0);
+ wrmsrl(MSR_IA32_FRED_RSP0, 0);
+ wrmsrl(MSR_IA32_FRED_RSP1, 0);
+ wrmsrl(MSR_IA32_FRED_RSP2, 0);
+ wrmsrl(MSR_IA32_FRED_RSP3, 0);
+
+ /* Enable FRED */
+ cr4_set_bits(X86_CR4_FRED);
+ /* Any further IDT use is a bug */
+ idt_invalidate();
+
+ /* Use int $0x80 for 32-bit system calls in FRED mode */
+ setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+}
+
+/* Must be called after setup_cpu_entry_areas() */
+void cpu_init_fred_rsps(void)
+{
/*
* The purpose of separate stacks for NMI, #DB and #MC *in the kernel*
* (remember that user space faults are always taken on stack level 0)
@@ -47,13 +83,4 @@ void cpu_init_fred_exceptions(void)
wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
-
- /* Enable FRED */
- cr4_set_bits(X86_CR4_FRED);
- /* Any further IDT use is a bug */
- idt_invalidate();
-
- /* Use int $0x80 for 32-bit system calls in FRED mode */
- setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
- setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6129dc2ba784..f1fea506e20f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1039,7 +1039,12 @@ void __init setup_arch(char **cmdline_p)
init_mem_mapping();
- idt_setup_early_pf();
+ /*
+ * init_mem_mapping() relies on the early IDT page fault handling.
+ * Now either enable FRED or install the real page fault handler
+ * for 64-bit in the IDT.
+ */
+ cpu_init_replace_early_idt();
/*
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0c35207320cb..dc4fff8fccce 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -246,7 +246,7 @@ static void notrace start_secondary(void *unused)
__flush_tlb_all();
}
- cpu_init_exception_handling();
+ cpu_init_exception_handling(false);
/*
* Load the microcode before reaching the AP alive synchronization
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 415881607c5d..d05392db5d0f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1451,34 +1451,8 @@ DEFINE_IDTENTRY_SW(iret_error)
}
#endif
-/* Do not enable FRED by default yet. */
-static bool enable_fred __ro_after_init = false;
-
-#ifdef CONFIG_X86_FRED
-static int __init fred_setup(char *str)
-{
- if (!str)
- return -EINVAL;
-
- if (!cpu_feature_enabled(X86_FEATURE_FRED))
- return 0;
-
- if (!strcmp(str, "on"))
- enable_fred = true;
- else if (!strcmp(str, "off"))
- enable_fred = false;
- else
- pr_warn("invalid FRED option: 'fred=%s'\n", str);
- return 0;
-}
-early_param("fred", fred_setup);
-#endif
-
void __init trap_init(void)
{
- if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred)
- setup_clear_cpu_cap(X86_FEATURE_FRED);
-
/* Init cpu_entry_area before IST entries are set up */
setup_cpu_entry_areas();
@@ -1486,7 +1460,7 @@ void __init trap_init(void)
sev_es_init_vc_handling();
/* Initialize TSS before setting up traps so ISTs work */
- cpu_init_exception_handling();
+ cpu_init_exception_handling(true);
/* Setup traps as cpu_init() might #GP */
if (!cpu_feature_enabled(X86_FEATURE_FRED))