aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S6
-rw-r--r--arch/x86/kernel/alternative.c59
-rw-r--r--arch/x86/kernel/apic/msi.c11
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/cc_platform.c8
-rw-r--r--arch/x86/kernel/cpu/bugs.c204
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c15
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/encls.h36
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c10
-rw-r--r--arch/x86/kernel/dumpstack.c4
-rw-r--r--arch/x86/kernel/early-quirks.c10
-rw-r--r--arch/x86/kernel/fpu/core.c99
-rw-r--r--arch/x86/kernel/fpu/legacy.h6
-rw-r--r--arch/x86/kernel/fpu/regset.c9
-rw-r--r--arch/x86/kernel/fpu/xstate.c152
-rw-r--r--arch/x86/kernel/fpu/xstate.h25
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/ftrace_32.S6
-rw-r--r--arch/x86/kernel/ftrace_64.S8
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/irqflags.S2
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/kvm.c19
-rw-r--r--arch/x86/kernel/kvmclock.c5
-rw-r--r--arch/x86/kernel/module.c7
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S10
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S10
-rw-r--r--arch/x86/kernel/setup_percpu.c66
-rw-r--r--arch/x86/kernel/sev_verify_cbit.S2
-rw-r--r--arch/x86/kernel/static_call.c5
-rw-r--r--arch/x86/kernel/verify_cpu.S4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/kernel/x86_init.c12
41 files changed, 531 insertions, 316 deletions
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index daf88f8143c5..cf69081073b5 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -60,7 +60,7 @@ save_registers:
popl saved_context_eflags
movl $ret_point, saved_eip
- ret
+ RET
restore_registers:
@@ -70,7 +70,7 @@ restore_registers:
movl saved_context_edi, %edi
pushl saved_context_eflags
popfl
- ret
+ RET
SYM_CODE_START(do_suspend_lowlevel)
call save_processor_state
@@ -86,7 +86,7 @@ SYM_CODE_START(do_suspend_lowlevel)
ret_point:
call restore_registers
call restore_processor_state
- ret
+ RET
SYM_CODE_END(do_suspend_lowlevel)
.data
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 23fb4d51a5da..b4470eabf151 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -389,7 +389,7 @@ static int emit_indirect(int op, int reg, u8 *bytes)
*
* CALL *%\reg
*
- * It also tries to inline spectre_v2=retpoline,amd when size permits.
+ * It also tries to inline spectre_v2=retpoline,lfence when size permits.
*/
static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
{
@@ -407,7 +407,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
BUG_ON(reg == 4);
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
- !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD))
+ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
return -1;
op = insn->opcode.bytes[0];
@@ -438,9 +438,9 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
}
/*
- * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
+ * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE.
*/
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
bytes[i++] = 0x0f;
bytes[i++] = 0xae;
bytes[i++] = 0xe8; /* LFENCE */
@@ -714,7 +714,7 @@ asm (
" .type int3_magic, @function\n"
"int3_magic:\n"
" movl $1, (%" _ASM_ARG1 ")\n"
-" ret\n"
+ ASM_RET
" .size int3_magic, .-int3_magic\n"
" .popsection\n"
);
@@ -1113,10 +1113,13 @@ void text_poke_sync(void)
}
struct text_poke_loc {
- s32 rel_addr; /* addr := _stext + rel_addr */
- s32 rel32;
+ /* addr := _stext + rel_addr */
+ s32 rel_addr;
+ s32 disp;
+ u8 len;
u8 opcode;
const u8 text[POKE_MAX_OPCODE_SIZE];
+ /* see text_poke_bp_batch() */
u8 old;
};
@@ -1131,7 +1134,8 @@ static struct bp_patching_desc *bp_desc;
static __always_inline
struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
{
- struct bp_patching_desc *desc = __READ_ONCE(*descp); /* rcu_dereference */
+ /* rcu_dereference */
+ struct bp_patching_desc *desc = __READ_ONCE(*descp);
if (!desc || !arch_atomic_inc_not_zero(&desc->refs))
return NULL;
@@ -1165,7 +1169,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
{
struct bp_patching_desc *desc;
struct text_poke_loc *tp;
- int len, ret = 0;
+ int ret = 0;
void *ip;
if (user_mode(regs))
@@ -1205,8 +1209,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
goto out_put;
}
- len = text_opcode_size(tp->opcode);
- ip += len;
+ ip += tp->len;
switch (tp->opcode) {
case INT3_INSN_OPCODE:
@@ -1221,12 +1224,12 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
break;
case CALL_INSN_OPCODE:
- int3_emulate_call(regs, (long)ip + tp->rel32);
+ int3_emulate_call(regs, (long)ip + tp->disp);
break;
case JMP32_INSN_OPCODE:
case JMP8_INSN_OPCODE:
- int3_emulate_jmp(regs, (long)ip + tp->rel32);
+ int3_emulate_jmp(regs, (long)ip + tp->disp);
break;
default:
@@ -1301,7 +1304,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
*/
for (do_sync = 0, i = 0; i < nr_entries; i++) {
u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
- int len = text_opcode_size(tp[i].opcode);
+ int len = tp[i].len;
if (len - INT3_INSN_SIZE > 0) {
memcpy(old + INT3_INSN_SIZE,
@@ -1378,21 +1381,37 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
const void *opcode, size_t len, const void *emulate)
{
struct insn insn;
- int ret;
+ int ret, i;
memcpy((void *)tp->text, opcode, len);
if (!emulate)
emulate = opcode;
ret = insn_decode_kernel(&insn, emulate);
-
BUG_ON(ret < 0);
- BUG_ON(len != insn.length);
tp->rel_addr = addr - (void *)_stext;
+ tp->len = len;
tp->opcode = insn.opcode.bytes[0];
switch (tp->opcode) {
+ case RET_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ /*
+ * Control flow instructions without implied execution of the
+ * next instruction can be padded with INT3.
+ */
+ for (i = insn.length; i < len; i++)
+ BUG_ON(tp->text[i] != INT3_INSN_OPCODE);
+ break;
+
+ default:
+ BUG_ON(len != insn.length);
+ };
+
+
+ switch (tp->opcode) {
case INT3_INSN_OPCODE:
case RET_INSN_OPCODE:
break;
@@ -1400,7 +1419,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
case JMP8_INSN_OPCODE:
- tp->rel32 = insn.immediate.value;
+ tp->disp = insn.immediate.value;
break;
default: /* assume NOP */
@@ -1408,13 +1427,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
case 2: /* NOP2 -- emulate as JMP8+0 */
BUG_ON(memcmp(emulate, x86_nops[len], len));
tp->opcode = JMP8_INSN_OPCODE;
- tp->rel32 = 0;
+ tp->disp = 0;
break;
case 5: /* NOP5 -- emulate as JMP32+0 */
BUG_ON(memcmp(emulate, x86_nops[len], len));
tp->opcode = JMP32_INSN_OPCODE;
- tp->rel32 = 0;
+ tp->disp = 0;
break;
default: /* unknown instruction */
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index dbacb9ec8843..7517eb05bdc1 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -19,6 +19,7 @@
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
+#include <asm/xen/hypervisor.h>
struct irq_domain *x86_pci_msi_default_domain __ro_after_init;
@@ -159,11 +160,8 @@ static struct irq_chip pci_msi_controller = {
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
msi_alloc_info_t *arg)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct msi_desc *desc = first_pci_msi_entry(pdev);
-
init_irq_alloc_info(arg, NULL);
- if (desc->msi_attrib.is_msix) {
+ if (to_pci_dev(dev)->msix_enabled) {
arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
} else {
arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
@@ -345,3 +343,8 @@ void dmar_free_hwirq(int irq)
irq_domain_free_irqs(irq, 1);
}
#endif
+
+bool arch_restore_msi_irqs(struct pci_dev *dev)
+{
+ return xen_initdom_restore_msi(dev);
+}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index c132daabe615..3e6f6b448f6a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -760,9 +760,9 @@ void __init lapic_update_legacy_vectors(void)
void __init lapic_assign_system_vectors(void)
{
- unsigned int i, vector = 0;
+ unsigned int i, vector;
- for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
+ for_each_set_bit(vector, system_vectors, NR_VECTORS)
irq_matrix_assign_system(vector_matrix, vector, false);
if (nr_legacy_irqs() > 1)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index ecd3fd6993d1..9fb0a2f8b62a 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -38,9 +38,6 @@ static void __used common(void)
#endif
BLANK();
- OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
-
- BLANK();
OFFSET(pbe_address, pbe, address);
OFFSET(pbe_orig_address, pbe, orig_address);
OFFSET(pbe_next, pbe, next);
diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c
index 8a25b1c0d480..6a6ffcd978f6 100644
--- a/arch/x86/kernel/cc_platform.c
+++ b/arch/x86/kernel/cc_platform.c
@@ -11,6 +11,7 @@
#include <linux/cc_platform.h>
#include <linux/mem_encrypt.h>
+#include <asm/mshyperv.h>
#include <asm/processor.h>
static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr)
@@ -66,12 +67,19 @@ static bool amd_cc_platform_has(enum cc_attr attr)
#endif
}
+static bool hyperv_cc_platform_has(enum cc_attr attr)
+{
+ return attr == CC_ATTR_GUEST_MEM_ENCRYPT;
+}
bool cc_platform_has(enum cc_attr attr)
{
if (sme_me_mask)
return amd_cc_platform_has(attr);
+ if (hv_is_isolation_supported())
+ return hyperv_cc_platform_has(attr);
+
return false;
}
EXPORT_SYMBOL_GPL(cc_platform_has);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1c1f218a701d..6296e1ebed1d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -16,6 +16,7 @@
#include <linux/prctl.h>
#include <linux/sched/smt.h>
#include <linux/pgtable.h>
+#include <linux/bpf.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@@ -650,6 +651,32 @@ static inline const char *spectre_v2_module_string(void)
static inline const char *spectre_v2_module_string(void) { return ""; }
#endif
+#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
+#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+
+#ifdef CONFIG_BPF_SYSCALL
+void unpriv_ebpf_notify(int new_state)
+{
+ if (new_state)
+ return;
+
+ /* Unprivileged eBPF is enabled */
+
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_EIBRS:
+ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+ break;
+ case SPECTRE_V2_EIBRS_LFENCE:
+ if (sched_smt_active())
+ pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+ break;
+ default:
+ break;
+ }
+}
+#endif
+
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt);
@@ -664,7 +691,10 @@ enum spectre_v2_mitigation_cmd {
SPECTRE_V2_CMD_FORCE,
SPECTRE_V2_CMD_RETPOLINE,
SPECTRE_V2_CMD_RETPOLINE_GENERIC,
- SPECTRE_V2_CMD_RETPOLINE_AMD,
+ SPECTRE_V2_CMD_RETPOLINE_LFENCE,
+ SPECTRE_V2_CMD_EIBRS,
+ SPECTRE_V2_CMD_EIBRS_RETPOLINE,
+ SPECTRE_V2_CMD_EIBRS_LFENCE,
};
enum spectre_v2_user_cmd {
@@ -737,6 +767,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
return SPECTRE_V2_USER_CMD_AUTO;
}
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+{
+ return (mode == SPECTRE_V2_EIBRS ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_EIBRS_LFENCE);
+}
+
static void __init
spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
{
@@ -804,7 +841,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return;
/*
@@ -824,9 +861,11 @@ set_mode:
static const char * const spectre_v2_strings[] = {
[SPECTRE_V2_NONE] = "Vulnerable",
- [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
- [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
- [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS",
+ [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines",
+ [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE",
+ [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
+ [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
+ [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
};
static const struct {
@@ -837,8 +876,12 @@ static const struct {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
- { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
+ { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false },
{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "eibrs", SPECTRE_V2_CMD_EIBRS, false },
+ { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
+ { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
};
@@ -875,10 +918,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
}
if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
- cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
- cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+ cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
!IS_ENABLED(CONFIG_RETPOLINE)) {
- pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_EIBRS ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+ pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+ cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) &&
+ !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n",
+ mitigation_options[i].option);
return SPECTRE_V2_CMD_AUTO;
}
@@ -887,6 +950,16 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return cmd;
}
+static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
+{
+ if (!IS_ENABLED(CONFIG_RETPOLINE)) {
+ pr_err("Kernel not compiled with retpoline; no mitigation available!");
+ return SPECTRE_V2_NONE;
+ }
+
+ return SPECTRE_V2_RETPOLINE;
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -907,49 +980,64 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_CMD_FORCE:
case SPECTRE_V2_CMD_AUTO:
if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
- mode = SPECTRE_V2_IBRS_ENHANCED;
- /* Force it so VMEXIT will restore correctly */
- x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
- goto specv2_set_mode;
+ mode = SPECTRE_V2_EIBRS;
+ break;
}
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_auto;
+
+ mode = spectre_v2_select_retpoline();
break;
- case SPECTRE_V2_CMD_RETPOLINE_AMD:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_amd;
+
+ case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
+ pr_err(SPECTRE_V2_LFENCE_MSG);
+ mode = SPECTRE_V2_LFENCE;
break;
+
case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_generic;
+ mode = SPECTRE_V2_RETPOLINE;
break;
+
case SPECTRE_V2_CMD_RETPOLINE:
- if (IS_ENABLED(CONFIG_RETPOLINE))
- goto retpoline_auto;
+ mode = spectre_v2_select_retpoline();
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS:
+ mode = SPECTRE_V2_EIBRS;
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS_LFENCE:
+ mode = SPECTRE_V2_EIBRS_LFENCE;
+ break;
+
+ case SPECTRE_V2_CMD_EIBRS_RETPOLINE:
+ mode = SPECTRE_V2_EIBRS_RETPOLINE;
break;
}
- pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
- return;
-retpoline_auto:
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
- retpoline_amd:
- if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
- pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
- goto retpoline_generic;
- }
- mode = SPECTRE_V2_RETPOLINE_AMD;
- setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
- setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
- } else {
- retpoline_generic:
- mode = SPECTRE_V2_RETPOLINE_GENERIC;
+ if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+
+ if (spectre_v2_in_eibrs_mode(mode)) {
+ /* Force it so VMEXIT will restore correctly */
+ x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ }
+
+ switch (mode) {
+ case SPECTRE_V2_NONE:
+ case SPECTRE_V2_EIBRS:
+ break;
+
+ case SPECTRE_V2_LFENCE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
+ fallthrough;
+
+ case SPECTRE_V2_RETPOLINE:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ break;
}
-specv2_set_mode:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -975,7 +1063,7 @@ specv2_set_mode:
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
+ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
@@ -1045,6 +1133,10 @@ void cpu_bugs_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
+ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
break;
@@ -1684,7 +1776,7 @@ static ssize_t tsx_async_abort_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
@@ -1714,6 +1806,27 @@ static char *ibpb_state(void)
return "";
}
+static ssize_t spectre_v2_show_state(char *buf)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+ return sprintf(buf, "Vulnerable: LFENCE\n");
+
+ if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+ return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
+
+ return sprintf(buf, "%s%s%s%s%s%s\n",
+ spectre_v2_strings[spectre_v2_enabled],
+ ibpb_state(),
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ stibp_state(),
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ spectre_v2_module_string());
+}
+
static ssize_t srbds_show_state(char *buf)
{
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
@@ -1739,12 +1852,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
case X86_BUG_SPECTRE_V2:
- return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
- spectre_v2_module_string());
+ return spectre_v2_show_state(buf);
case X86_BUG_SPEC_STORE_BYPASS:
return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index a1e2f41796dc..9f4b508886dd 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -423,7 +423,7 @@ static void threshold_restart_bank(void *_tr)
u32 hi, lo;
/* sysfs write might race against an offline operation */
- if (this_cpu_read(threshold_banks))
+ if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off)
return;
rdmsr(tr->b->address, lo, hi);
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index bb9a46a804bf..baafbb37be67 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 2a0f83678911..5a99f993e639 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -18,6 +18,7 @@
#include <linux/kexec.h>
#include <linux/i8253.h>
#include <linux/random.h>
+#include <linux/swiotlb.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
@@ -329,8 +330,20 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
- if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
+ if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
static_branch_enable(&isolation_type_snp);
+#ifdef CONFIG_SWIOTLB
+ swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
+#endif
+ }
+
+#ifdef CONFIG_SWIOTLB
+ /*
+ * Enable swiotlb force mode in Isolation VM to
+ * use swiotlb bounce buffer for dma transaction.
+ */
+ swiotlb_force = SWIOTLB_FORCE;
+#endif
}
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 001808e3901c..48afe96ae0f0 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -410,6 +410,8 @@ void sgx_encl_release(struct kref *ref)
}
kfree(entry);
+ /* Invoke scheduler to prevent soft lockups. */
+ cond_resched();
}
xa_destroy(&encl->page_array);
diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index 9b204843b78d..fa04a73daf9c 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -11,26 +11,8 @@
#include <asm/traps.h>
#include "sgx.h"
-/**
- * ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
- *
- * ENCLS has its own (positive value) error codes and also generates
- * ENCLS specific #GP and #PF faults. And the ENCLS values get munged
- * with system error codes as everything percolates back up the stack.
- * Unfortunately (for us), we need to precisely identify each unique
- * error code, e.g. the action taken if EWB fails varies based on the
- * type of fault and on the exact SGX error code, i.e. we can't simply
- * convert all faults to -EFAULT.
- *
- * To make all three error types coexist, we set bit 30 to identify an
- * ENCLS fault. Bit 31 (technically bits N:31) is used to differentiate
- * between positive (faults and SGX error codes) and negative (system
- * error codes) values.
- */
-#define ENCLS_FAULT_FLAG 0x40000000
-
/* Retrieve the encoded trapnr from the specified return code. */
-#define ENCLS_TRAPNR(r) ((r) & ~ENCLS_FAULT_FLAG)
+#define ENCLS_TRAPNR(r) ((r) & ~SGX_ENCLS_FAULT_FLAG)
/* Issue a WARN() about an ENCLS function. */
#define ENCLS_WARN(r, name) { \
@@ -50,7 +32,7 @@
*/
static inline bool encls_faulted(int ret)
{
- return ret & ENCLS_FAULT_FLAG;
+ return ret & SGX_ENCLS_FAULT_FLAG;
}
/**
@@ -88,11 +70,7 @@ static inline bool encls_failed(int ret)
asm volatile( \
"1: .byte 0x0f, 0x01, 0xcf;\n\t" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: orl $"__stringify(ENCLS_FAULT_FLAG)",%%eax\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_FAULT(1b, 3b) \
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \
: "=a"(ret) \
: "a"(rax), inputs \
: "memory", "cc"); \
@@ -127,7 +105,7 @@ static inline bool encls_failed(int ret)
*
* Return:
* 0 on success,
- * trapnr with ENCLS_FAULT_FLAG set on fault
+ * trapnr with SGX_ENCLS_FAULT_FLAG set on fault
*/
#define __encls_N(rax, rbx_out, inputs...) \
({ \
@@ -136,11 +114,7 @@ static inline bool encls_failed(int ret)
"1: .byte 0x0f, 0x01, 0xcf;\n\t" \
" xor %%eax,%%eax;\n" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: orl $"__stringify(ENCLS_FAULT_FLAG)",%%eax\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_FAULT(1b, 3b) \
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_SGX) \
: "=a"(ret), "=b"(rbx_out) \
: "a"(rax), inputs \
: "memory"); \
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 4b41efc9e367..8e4bc6453d26 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -344,10 +344,8 @@ static void sgx_reclaim_pages(void)
{
struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
struct sgx_backing backing[SGX_NR_TO_SCAN];
- struct sgx_epc_section *section;
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
- struct sgx_numa_node *node;
pgoff_t page_index;
int cnt = 0;
int ret;
@@ -418,13 +416,7 @@ skip:
kref_put(&encl_page->encl->refcount, sgx_encl_release);
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
- section = &sgx_epc_sections[epc_page->section];
- node = section->node;
-
- spin_lock(&node->lock);
- list_add_tail(&epc_page->list, &node->free_page_list);
- spin_unlock(&node->lock);
- atomic_long_inc(&sgx_nr_free_pages);
+ sgx_free_epc_page(epc_page);
}
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ea4fe192189d..53de044e5654 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -351,7 +351,7 @@ unsigned long oops_begin(void)
}
NOKPROBE_SYMBOL(oops_begin);
-void __noreturn rewind_stack_do_exit(int signr);
+void __noreturn rewind_stack_and_make_dead(int signr);
void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
@@ -386,7 +386,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
* reuse the task stack and that existing poisons are invalid.
*/
kasan_unpoison_task_stack(current);
- rewind_stack_do_exit(signr);
+ rewind_stack_and_make_dead(signr);
}
NOKPROBE_SYMBOL(oops_end);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index fd2d3ab38ebb..dc7da08bc700 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -515,6 +515,7 @@ static const struct intel_early_ops gen11_early_ops __initconst = {
.stolen_size = gen9_stolen_size,
};
+/* Intel integrated GPUs for which we need to reserve "stolen memory" */
static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_I830_IDS(&i830_early_ops),
INTEL_I845G_IDS(&i845_early_ops),
@@ -592,6 +593,13 @@ static void __init intel_graphics_quirks(int num, int slot, int func)
u16 device;
int i;
+ /*
+ * Reserve "stolen memory" for an integrated GPU. If we've already
+ * found one, there's nothing to do for other (discrete) GPUs.
+ */
+ if (resource_size(&intel_graphics_stolen_res))
+ return;
+
device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) {
@@ -704,7 +712,7 @@ static struct chipset early_qrk[] __initdata = {
{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
- QFLAG_APPLY_ONCE, intel_graphics_quirks },
+ 0, intel_graphics_quirks },
/*
* HPET on the current version of the Baytrail platform has accuracy
* problems: it will halt in deep idle state - so we disable it.
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index dd3777ac0443..8dea01ffc5c1 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -206,7 +206,27 @@ void fpu_reset_from_exception_fixup(void)
}
#if IS_ENABLED(CONFIG_KVM)
-static void __fpstate_reset(struct fpstate *fpstate);
+static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);
+
+static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
+{
+ struct fpu_state_perm *fpuperm;
+ u64 perm;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ spin_lock_irq(&current->sighand->siglock);
+ fpuperm = &current->group_leader->thread.fpu.guest_perm;
+ perm = fpuperm->__state_perm;
+
+ /* First fpstate allocation locks down permissions. */
+ WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);
+
+ spin_unlock_irq(&current->sighand->siglock);
+
+ gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
+}
bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
{
@@ -218,12 +238,18 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
if (!fpstate)
return false;
- __fpstate_reset(fpstate);
+ /* Leave xfd to 0 (the reset value defined by spec) */
+ __fpstate_reset(fpstate, 0);
fpstate_init_user(fpstate);
fpstate->is_valloc = true;
fpstate->is_guest = true;
- gfpu->fpstate = fpstate;
+ gfpu->fpstate = fpstate;
+ gfpu->xfeatures = fpu_user_cfg.default_features;
+ gfpu->perm = fpu_user_cfg.default_features;
+ gfpu->uabi_size = fpu_user_cfg.default_size;
+ fpu_init_guest_permissions(gfpu);
+
return true;
}
EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
@@ -243,6 +269,64 @@ void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
}
EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
+/*
+ * fpu_enable_guest_xfd_features - Check xfeatures against guest perm and enable
+ * @guest_fpu: Pointer to the guest FPU container
+ * @xfeatures: Features requested by guest CPUID
+ *
+ * Enable all dynamic xfeatures according to guest perm and requested CPUID.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures)
+{
+ lockdep_assert_preemption_enabled();
+
+ /* Nothing to do if all requested features are already enabled. */
+ xfeatures &= ~guest_fpu->xfeatures;
+ if (!xfeatures)
+ return 0;
+
+ return __xfd_enable_feature(xfeatures, guest_fpu);
+}
+EXPORT_SYMBOL_GPL(fpu_enable_guest_xfd_features);
+
+#ifdef CONFIG_X86_64
+void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd)
+{
+ fpregs_lock();
+ guest_fpu->fpstate->xfd = xfd;
+ if (guest_fpu->fpstate->in_use)
+ xfd_update_state(guest_fpu->fpstate);
+ fpregs_unlock();
+}
+EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
+
+/**
+ * fpu_sync_guest_vmexit_xfd_state - Synchronize XFD MSR and software state
+ *
+ * Must be invoked from KVM after a VMEXIT before enabling interrupts when
+ * XFD write emulation is disabled. This is required because the guest can
+ * freely modify XFD and the state at VMEXIT is not guaranteed to be the
+ * same as the state on VMENTER. So software state has to be udpated before
+ * any operation which depends on it can take place.
+ *
+ * Note: It can be invoked unconditionally even when write emulation is
+ * enabled for the price of a then pointless MSR read.
+ */
+void fpu_sync_guest_vmexit_xfd_state(void)
+{
+ struct fpstate *fps = current->thread.fpu.fpstate;
+
+ lockdep_assert_irqs_disabled();
+ if (fpu_state_size_dynamic()) {
+ rdmsrl(MSR_IA32_XFD, fps->xfd);
+ __this_cpu_write(xfd_state, fps->xfd);
+ }
+}
+EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
+#endif /* CONFIG_X86_64 */
+
int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
{
struct fpstate *guest_fps = guest_fpu->fpstate;
@@ -437,26 +521,28 @@ void fpstate_init_user(struct fpstate *fpstate)
fpstate_init_fstate(fpstate);
}
-static void __fpstate_reset(struct fpstate *fpstate)
+static void __fpstate_reset(struct fpstate *fpstate, u64 xfd)
{
/* Initialize sizes and feature masks */
fpstate->size = fpu_kernel_cfg.default_size;
fpstate->user_size = fpu_user_cfg.default_size;
fpstate->xfeatures = fpu_kernel_cfg.default_features;
fpstate->user_xfeatures = fpu_user_cfg.default_features;
- fpstate->xfd = init_fpstate.xfd;
+ fpstate->xfd = xfd;
}
void fpstate_reset(struct fpu *fpu)
{
/* Set the fpstate pointer to the default fpstate */
fpu->fpstate = &fpu->__fpstate;
- __fpstate_reset(fpu->fpstate);
+ __fpstate_reset(fpu->fpstate, init_fpstate.xfd);
/* Initialize the permission related info in fpu */
fpu->perm.__state_perm = fpu_kernel_cfg.default_features;
fpu->perm.__state_size = fpu_kernel_cfg.default_size;
fpu->perm.__user_state_size = fpu_user_cfg.default_size;
+ /* Same defaults for guests */
+ fpu->guest_perm = fpu->perm;
}
static inline void fpu_inherit_perms(struct fpu *dst_fpu)
@@ -467,6 +553,7 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu)
spin_lock_irq(&current->sighand->siglock);
/* Fork also inherits the permissions of the parent */
dst_fpu->perm = src_fpu->perm;
+ dst_fpu->guest_perm = src_fpu->guest_perm;
spin_unlock_irq(&current->sighand->siglock);
}
}
diff --git a/arch/x86/kernel/fpu/legacy.h b/arch/x86/kernel/fpu/legacy.h
index 17c26b164c63..098f367bb8a7 100644
--- a/arch/x86/kernel/fpu/legacy.h
+++ b/arch/x86/kernel/fpu/legacy.h
@@ -35,11 +35,7 @@ static inline void ldmxcsr(u32 mxcsr)
int err; \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %[err]) \
: [err] "=r" (err), output \
: "0"(0), input); \
err; \
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 437d7c930c0b..75ffaef8c299 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
struct fpu *fpu = &target->thread.fpu;
- struct user32_fxsr_struct newstate;
+ struct fxregs_state newstate;
int ret;
- BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state));
-
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
@@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
/* Copy the state */
memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate));
- /* Clear xmm8..15 */
+ /* Clear xmm8..15 for 32-bit callers */
BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16);
- memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16);
+ if (in_ia32_syscall())
+ memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16);
/* Mark FP and SSE as in use when XSAVE is enabled */
if (use_xsave())
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d28829403ed0..7c7824ae7862 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1500,35 +1500,13 @@ void fpstate_free(struct fpu *fpu)
}
/**
- * fpu_install_fpstate - Update the active fpstate in the FPU
- *
- * @fpu: A struct fpu * pointer
- * @newfps: A struct fpstate * pointer
- *
- * Returns: A null pointer if the last active fpstate is the embedded
- * one or the new fpstate is already installed;
- * otherwise, a pointer to the old fpstate which has to
- * be freed by the caller.
- */
-static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
- struct fpstate *newfps)
-{
- struct fpstate *oldfps = fpu->fpstate;
-
- if (fpu->fpstate == newfps)
- return NULL;
-
- fpu->fpstate = newfps;
- return oldfps != &fpu->__fpstate ? oldfps : NULL;
-}
-
-/**
* fpstate_realloc - Reallocate struct fpstate for the requested new features
*
* @xfeatures: A bitmap of xstate features which extend the enabled features
* of that task
* @ksize: The required size for the kernel buffer
* @usize: The required size for user space buffers
+ * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
*
* Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
* terminates quickly, vfree()-induced IPIs may be a concern, but tasks
@@ -1537,13 +1515,13 @@ static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
* Returns: 0 on success, -ENOMEM on allocation error.
*/
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
- unsigned int usize)
+ unsigned int usize, struct fpu_guest *guest_fpu)
{
struct fpu *fpu = &current->thread.fpu;
struct fpstate *curfps, *newfps = NULL;
unsigned int fpsize;
+ bool in_use;
- curfps = fpu->fpstate;
fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
newfps = vzalloc(fpsize);
@@ -1553,28 +1531,59 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
newfps->user_size = usize;
newfps->is_valloc = true;
+ /*
+ * When a guest FPU is supplied, use @guest_fpu->fpstate
+ * as reference independent whether it is in use or not.
+ */
+ curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
+
+ /* Determine whether @curfps is the active fpstate */
+ in_use = fpu->fpstate == curfps;
+
+ if (guest_fpu) {
+ newfps->is_guest = true;
+ newfps->is_confidential = curfps->is_confidential;
+ newfps->in_use = curfps->in_use;
+ guest_fpu->xfeatures |= xfeatures;
+ guest_fpu->uabi_size = usize;
+ }
+
fpregs_lock();
/*
- * Ensure that the current state is in the registers before
- * swapping fpstate as that might invalidate it due to layout
- * changes.
+ * If @curfps is in use, ensure that the current state is in the
+ * registers before swapping fpstate as that might invalidate it
+ * due to layout changes.
*/
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
- newfps->xfd = curfps->xfd & ~xfeatures;
- curfps = fpu_install_fpstate(fpu, newfps);
+ if (!guest_fpu)
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+
+ newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
- xfd_update_state(newfps);
+ if (guest_fpu) {
+ guest_fpu->fpstate = newfps;
+ /* If curfps is active, update the FPU fpstate pointer */
+ if (in_use)
+ fpu->fpstate = newfps;
+ } else {
+ fpu->fpstate = newfps;
+ }
+
+ if (in_use)
+ xfd_update_state(fpu->fpstate);
fpregs_unlock();
- vfree(curfps);
+ /* Only free valloc'ed state */
+ if (curfps && curfps->is_valloc)
+ vfree(curfps);
+
return 0;
}
@@ -1595,7 +1604,7 @@ static int validate_sigaltstack(unsigned int usize)
return 0;
}
-static int __xstate_request_perm(u64 permitted, u64 requested)
+static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
{
/*
* This deliberately does not exclude !XSAVES as we still might
@@ -1605,9 +1614,10 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
*/
bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu_state_perm *perm;
unsigned int ksize, usize;
u64 mask;
- int ret;
+ int ret = 0;
/* Check whether fully enabled */
if ((permitted & requested) == requested)
@@ -1621,15 +1631,18 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
mask &= XFEATURE_MASK_USER_SUPPORTED;
usize = xstate_calculate_size(mask, false);
- ret = validate_sigaltstack(usize);
- if (ret)
- return ret;
+ if (!guest) {
+ ret = validate_sigaltstack(usize);
+ if (ret)
+ return ret;
+ }
+ perm = guest ? &fpu->guest_perm : &fpu->perm;
/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
- WRITE_ONCE(fpu->perm.__state_perm, requested);
+ WRITE_ONCE(perm->__state_perm, requested);
/* Protected by sighand lock */
- fpu->perm.__state_size = ksize;
- fpu->perm.__user_state_size = usize;
+ perm->__state_size = ksize;
+ perm->__user_state_size = usize;
return ret;
}
@@ -1640,7 +1653,7 @@ static const u64 xstate_prctl_req[XFEATURE_MAX] = {
[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
};
-static int xstate_request_perm(unsigned long idx)
+static int xstate_request_perm(unsigned long idx, bool guest)
{
u64 permitted, requested;
int ret;
@@ -1661,26 +1674,33 @@ static int xstate_request_perm(unsigned long idx)
return -EOPNOTSUPP;
/* Lockless quick check */
- permitted = xstate_get_host_group_perm();
+ permitted = xstate_get_group_perm(guest);
if ((permitted & requested) == requested)
return 0;
/* Protect against concurrent modifications */
spin_lock_irq(&current->sighand->siglock);
- permitted = xstate_get_host_group_perm();
- ret = __xstate_request_perm(permitted, requested);
+ permitted = xstate_get_group_perm(guest);
+
+ /* First vCPU allocation locks the permissions. */
+ if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
+ ret = -EBUSY;
+ else
+ ret = __xstate_request_perm(permitted, requested, guest);
spin_unlock_irq(&current->sighand->siglock);
return ret;
}
-int xfd_enable_feature(u64 xfd_err)
+int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
{
u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
+ struct fpu_state_perm *perm;
unsigned int ksize, usize;
struct fpu *fpu;
if (!xfd_event) {
- pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
+ if (!guest_fpu)
+ pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
return 0;
}
@@ -1688,14 +1708,16 @@ int xfd_enable_feature(u64 xfd_err)
spin_lock_irq(&current->sighand->siglock);
/* If not permitted let it die */
- if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) {
+ if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
spin_unlock_irq(&current->sighand->siglock);
return -EPERM;
}
fpu = &current->group_leader->thread.fpu;
- ksize = fpu->perm.__state_size;
- usize = fpu->perm.__user_state_size;
+ perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
+ ksize = perm->__state_size;
+ usize = perm->__user_state_size;
+
/*
* The feature is permitted. State size is sufficient. Dropping
* the lock is safe here even if more features are added from
@@ -1708,17 +1730,29 @@ int xfd_enable_feature(u64 xfd_err)
* Try to allocate a new fpstate. If that fails there is no way
* out.
*/
- if (fpstate_realloc(xfd_event, ksize, usize))
+ if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
return -EFAULT;
return 0;
}
+
+int xfd_enable_feature(u64 xfd_err)
+{
+ return __xfd_enable_feature(xfd_err, NULL);
+}
+
#else /* CONFIG_X86_64 */
-static inline int xstate_request_perm(unsigned long idx)
+static inline int xstate_request_perm(unsigned long idx, bool guest)
{
return -EPERM;
}
#endif /* !CONFIG_X86_64 */
+u64 xstate_get_guest_group_perm(void)
+{
+ return xstate_get_group_perm(true);
+}
+EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
+
/**
* fpu_xstate_prctl - xstate permission operations
* @tsk: Redundant pointer to current
@@ -1742,6 +1776,7 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
u64 __user *uptr = (u64 __user *)arg2;
u64 permitted, supported;
unsigned long idx = arg2;
+ bool guest = false;
if (tsk != current)
return -EPERM;
@@ -1760,11 +1795,20 @@ long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
permitted &= XFEATURE_MASK_USER_SUPPORTED;
return put_user(permitted, uptr);
+ case ARCH_GET_XCOMP_GUEST_PERM:
+ permitted = xstate_get_guest_group_perm();
+ permitted &= XFEATURE_MASK_USER_SUPPORTED;
+ return put_user(permitted, uptr);
+
+ case ARCH_REQ_XCOMP_GUEST_PERM:
+ guest = true;
+ fallthrough;
+
case ARCH_REQ_XCOMP_PERM:
if (!IS_ENABLED(CONFIG_X86_64))
return -EOPNOTSUPP;
- return xstate_request_perm(idx);
+ return xstate_request_perm(idx, guest);
default:
return -EINVAL;
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 86ea7c0fa2f6..d22ace092ca2 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -20,10 +20,19 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
}
-static inline u64 xstate_get_host_group_perm(void)
+static inline u64 xstate_get_group_perm(bool guest)
{
+ struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu_state_perm *perm;
+
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
- return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm);
+ perm = guest ? &fpu->guest_perm : &fpu->perm;
+ return READ_ONCE(perm->__state_perm);
+}
+
+static inline u64 xstate_get_host_group_perm(void)
+{
+ return xstate_get_group_perm(false);
}
enum xstate_copy_mode {
@@ -108,11 +117,7 @@ static inline u64 xfeatures_mask_independent(void)
"\n" \
"xor %[err], %[err]\n" \
"3:\n" \
- ".pushsection .fixup,\"ax\"\n" \
- "4: movl $-2, %[err]\n" \
- "jmp 3b\n" \
- ".popsection\n" \
- _ASM_EXTABLE(661b, 4b) \
+ _ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
: [err] "=r" (err) \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
@@ -149,8 +154,14 @@ static inline void xfd_update_state(struct fpstate *fpstate)
}
}
}
+
+extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
#else
static inline void xfd_update_state(struct fpstate *fpstate) { }
+
+static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {
+ return -EPERM;
+}
#endif
/*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c39f906cdc4e..7cc540e6de0c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -303,7 +303,7 @@ union ftrace_op_code_union {
} __attribute__((packed));
};
-#define RET_SIZE 1
+#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
static unsigned long
create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index e405fe1a8bf4..a0ed0e4a2c0c 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -19,7 +19,7 @@
#endif
SYM_FUNC_START(__fentry__)
- ret
+ RET
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
@@ -84,7 +84,7 @@ ftrace_graph_call:
/* This is weak to keep gas from relaxing the jumps */
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
- ret
+ RET
SYM_CODE_END(ftrace_caller)
SYM_CODE_START(ftrace_regs_caller)
@@ -177,7 +177,7 @@ SYM_CODE_START(ftrace_graph_caller)
popl %edx
popl %ecx
popl %eax
- ret
+ RET
SYM_CODE_END(ftrace_graph_caller)
.globl return_to_handler
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 7a879901f103..11ac028e30e4 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -132,7 +132,7 @@
#ifdef CONFIG_DYNAMIC_FTRACE
SYM_FUNC_START(__fentry__)
- retq
+ RET
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
@@ -176,11 +176,11 @@ SYM_FUNC_END(ftrace_caller);
SYM_FUNC_START(ftrace_epilogue)
/*
* This is weak to keep gas from relaxing the jumps.
- * It is also used to copy the retq for trampolines.
+ * It is also used to copy the RET for trampolines.
*/
SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
UNWIND_HINT_FUNC
- retq
+ RET
SYM_FUNC_END(ftrace_epilogue)
SYM_FUNC_START(ftrace_regs_caller)
@@ -284,7 +284,7 @@ SYM_FUNC_START(__fentry__)
jnz trace
SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL)
- retq
+ RET
trace:
/* save_mcount_regs fills in first two parameters */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d8c64dab0efe..eb8656bac99b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -340,7 +340,7 @@ SYM_FUNC_END(startup_32_smp)
__INIT
setup_once:
andl $0,setup_once_ref /* Once is enough, thanks */
- ret
+ RET
SYM_FUNC_START(early_idt_handler_array)
# 36(%esp) %eflags
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 882213df3713..71f336425e58 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1435,8 +1435,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
hpet_rtc_timer_reinit();
memset(&curr_time, 0, sizeof(struct rtc_time));
- if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- mc146818_get_time(&curr_time);
+ if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) {
+ if (unlikely(mc146818_get_time(&curr_time) < 0)) {
+ pr_err_ratelimited("unable to read current time from RTC\n");
+ return IRQ_HANDLED;
+ }
+ }
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
index 760e1f293093..aaf9e776f323 100644
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -11,7 +11,7 @@
SYM_FUNC_START(native_save_fl)
pushf
pop %_ASM_AX
- ret
+ RET
SYM_FUNC_END(native_save_fl)
.popsection
EXPORT_SYMBOL(native_save_fl)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index fce99e249d61..6290712cb36d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1051,7 +1051,7 @@ asm(
" addl $4, %esp\n"
" popfl\n"
#endif
- " ret\n"
+ ASM_RET
".size __kretprobe_trampoline, .-__kretprobe_trampoline\n"
);
NOKPROBE_SYMBOL(__kretprobe_trampoline);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 59abbdad7729..d77481ecb0d5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -313,7 +313,7 @@ static void kvm_register_steal_time(void)
return;
wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
- pr_info("stealtime: cpu %d, msr %llx\n", cpu,
+ pr_debug("stealtime: cpu %d, msr %llx\n", cpu,
(unsigned long long) slow_virt_to_phys(st));
}
@@ -350,7 +350,7 @@ static void kvm_guest_cpu_init(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
- pr_info("setup async PF for cpu %d\n", smp_processor_id());
+ pr_debug("setup async PF for cpu %d\n", smp_processor_id());
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -376,7 +376,7 @@ static void kvm_pv_disable_apf(void)
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
__this_cpu_write(apf_reason.enabled, 0);
- pr_info("disable async PF for cpu %d\n", smp_processor_id());
+ pr_debug("disable async PF for cpu %d\n", smp_processor_id());
}
static void kvm_disable_steal_time(void)
@@ -462,19 +462,24 @@ static bool pv_tlb_flush_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
+ !boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (num_possible_cpus() != 1));
}
static bool pv_ipi_supported(void)
{
- return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
+ return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
+ (num_possible_cpus() != 1));
}
static bool pv_sched_yield_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
+ !boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (num_possible_cpus() != 1));
}
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
@@ -619,7 +624,7 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
/* Make sure other vCPUs get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
- if (vcpu_is_preempted(cpu)) {
+ if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) {
kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
break;
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 462dd8e9b03d..c5caa7311bd8 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -174,7 +174,7 @@ static void kvm_register_clock(char *txt)
pa = slow_virt_to_phys(&src->pvti) | 0x01ULL;
wrmsrl(msr_kvm_system_time, pa);
- pr_info("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
+ pr_debug("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
}
static void kvm_save_sched_clock_state(void)
@@ -239,6 +239,9 @@ static void __init kvmclock_init_mem(void)
static int __init kvm_setup_vsyscall_timeinfo(void)
{
+ if (!kvm_para_available() || !kvmclock)
+ return 0;
+
kvmclock_init_mem();
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 169fb6f4cd2e..95fa745e310a 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -67,6 +67,7 @@ static unsigned long int get_module_load_offset(void)
void *module_alloc(unsigned long size)
{
+ gfp_t gfp_mask = GFP_KERNEL;
void *p;
if (PAGE_ALIGN(size) > MODULES_LEN)
@@ -74,10 +75,10 @@ void *module_alloc(unsigned long size)
p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
- MODULES_END, GFP_KERNEL,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
+ MODULES_END, gfp_mask,
+ PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
__builtin_return_address(0));
- if (p && (kasan_module_alloc(p, size) < 0)) {
+ if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7f7636aac620..4420499f7bb4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -41,7 +41,7 @@ extern void _paravirt_nop(void);
asm (".pushsection .entry.text, \"ax\"\n"
".global _paravirt_nop\n"
"_paravirt_nop:\n\t"
- "ret\n\t"
+ ASM_RET
".size _paravirt_nop, . - _paravirt_nop\n\t"
".type _paravirt_nop, @function\n\t"
".popsection");
@@ -51,7 +51,7 @@ asm (".pushsection .entry.text, \"ax\"\n"
".global paravirt_ret0\n"
"paravirt_ret0:\n\t"
"xor %" _ASM_AX ", %" _ASM_AX ";\n\t"
- "ret\n\t"
+ ASM_RET
".size paravirt_ret0, . - paravirt_ret0\n\t"
".type paravirt_ret0, @function\n\t"
".popsection");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5d481038fe0b..81d8ef036637 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -993,6 +993,8 @@ long do_arch_prctl_common(struct task_struct *task, int option,
case ARCH_GET_XCOMP_SUPP:
case ARCH_GET_XCOMP_PERM:
case ARCH_REQ_XCOMP_PERM:
+ case ARCH_GET_XCOMP_GUEST_PERM:
+ case ARCH_REQ_XCOMP_GUEST_PERM:
return fpu_xstate_prctl(task, option, arg2);
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 6d2244c94799..8d2f2f995539 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = {
},
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
- .n = sizeof(struct user_i387_struct) / sizeof(long),
+ .n = sizeof(struct fxregs_state) / sizeof(long),
.size = sizeof(long), .align = sizeof(long),
.active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
@@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = {
},
[REGSET_XFP] = {
.core_note_type = NT_PRXFPREG,
- .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
+ .n = sizeof(struct fxregs_state) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
.active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index f469153eca8a..fcc8a7699103 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -91,7 +91,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
movl %edi, %eax
addl $(identity_mapped - relocate_kernel), %eax
pushl %eax
- ret
+ RET
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
@@ -159,7 +159,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %edx, %edx
xorl %esi, %esi
xorl %ebp, %ebp
- ret
+ RET
1:
popl %edx
movl CP_PA_SWAP_PAGE(%edi), %esp
@@ -190,7 +190,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
movl %edi, %eax
addl $(virtual_mapped - relocate_kernel), %eax
pushl %eax
- ret
+ RET
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
@@ -208,7 +208,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popl %edi
popl %esi
popl %ebx
- ret
+ RET
SYM_CODE_END(virtual_mapped)
/* Do the copies */
@@ -271,7 +271,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
popl %edi
popl %ebx
popl %ebp
- ret
+ RET
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index c8fe74a28143..399f075ccdc4 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -104,7 +104,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
/* jump to identity mapped page */
addq $(identity_mapped - relocate_kernel), %r8
pushq %r8
- ret
+ RET
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
@@ -191,7 +191,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %r14d, %r14d
xorl %r15d, %r15d
- ret
+ RET
1:
popq %rdx
@@ -210,7 +210,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
call swap_pages
movq $virtual_mapped, %rax
pushq %rax
- ret
+ RET
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
@@ -231,7 +231,7 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popq %r12
popq %rbp
popq %rbx
- ret
+ RET
SYM_CODE_END(virtual_mapped)
/* Do the copies */
@@ -288,7 +288,7 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
lea PAGE_SIZE(%rax), %rsi
jmp 0b
3:
- ret
+ RET
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7b65275544b2..49325caa7307 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -84,60 +84,6 @@ static bool __init pcpu_need_numa(void)
}
#endif
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
- unsigned long align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = early_cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE,
- node);
-
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
- cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-/*
- * Helpers for first chunk memory allocation
- */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
-{
- return pcpu_alloc_bootmem(cpu, size, align);
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
#ifdef CONFIG_NUMA
@@ -150,7 +96,12 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
#endif
}
-static void __init pcpup_populate_pte(unsigned long addr)
+static int __init pcpu_cpu_to_node(int cpu)
+{
+ return early_cpu_to_node(cpu);
+}
+
+void __init pcpu_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
}
@@ -205,15 +156,14 @@ void __init setup_per_cpu_areas(void)
rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
dyn_size, atom_size,
pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ pcpu_cpu_to_node);
if (rc < 0)
pr_warn("%s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
- pcpu_fc_alloc, pcpu_fc_free,
- pcpup_populate_pte);
+ pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S
index ee04941a6546..3355e27c69eb 100644
--- a/arch/x86/kernel/sev_verify_cbit.S
+++ b/arch/x86/kernel/sev_verify_cbit.S
@@ -85,5 +85,5 @@ SYM_FUNC_START(sev_verify_cbit)
#endif
/* Return page-table pointer */
movq %rdi, %rax
- ret
+ RET
SYM_FUNC_END(sev_verify_cbit)
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 9c407a33a774..531fb4cbb63f 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -17,6 +17,8 @@ enum insn_type {
*/
static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 };
+static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
+
static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
{
const void *emulate = NULL;
@@ -42,8 +44,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
break;
case RET:
- code = text_gen_insn(RET_INSN_OPCODE, insn, func);
- size = RET_INSN_SIZE;
+ code = &retinsn;
break;
}
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 641f0fe1e5b4..1258a5872d12 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -132,9 +132,9 @@ SYM_FUNC_START_LOCAL(verify_cpu)
.Lverify_cpu_no_longmode:
popf # Restore caller passed flags
movl $1,%eax
- ret
+ RET
.Lverify_cpu_sse_ok:
popf # Restore caller passed flags
xorl %eax, %eax
- ret
+ RET
SYM_FUNC_END(verify_cpu)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3d6dc12d198f..27f830345b6f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -137,7 +137,6 @@ SECTIONS
ALIGN_ENTRY_TEXT_END
SOFTIRQENTRY_TEXT
STATIC_CALL_TEXT
- *(.fixup)
*(.gnu.warning)
#ifdef CONFIG_RETPOLINE
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 8b395821cb8d..7d20c1d34a3c 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -145,18 +145,6 @@ struct x86_platform_ops x86_platform __ro_after_init = {
EXPORT_SYMBOL_GPL(x86_platform);
-#if defined(CONFIG_PCI_MSI)
-struct x86_msi_ops x86_msi __ro_after_init = {
- .restore_msi_irqs = default_restore_msi_irqs,
-};
-
-/* MSI arch specific hooks */
-void arch_restore_msi_irqs(struct pci_dev *dev)
-{
- x86_msi.restore_msi_irqs(dev);
-}
-#endif
-
struct x86_apic_ops x86_apic_ops __ro_after_init = {
.io_apic_read = native_io_apic_read,
.restore = native_restore_boot_irq_mode,