aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/amd.c6
-rw-r--r--arch/x86/kernel/cpu/bugs.c36
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/cpu/hygon.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c63
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c33
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c8
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c16
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c12
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h10
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c4
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c35
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c9
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c8
-rw-r--r--arch/x86/kernel/cpu/topology.c16
-rw-r--r--arch/x86/kernel/cpu/tsx.c38
20 files changed, 191 insertions, 120 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 860b60273df3..c75d75b9f11a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -770,8 +770,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
}
-#define MSR_AMD64_DE_CFG 0xC0011029
-
static void init_amd_ln(struct cpuinfo_x86 *c)
{
/*
@@ -965,8 +963,8 @@ static void init_amd(struct cpuinfo_x86 *c)
* msr_set_bit() uses the safe accessors, too, even if the MSR
* is not present.
*/
- msr_set_bit(MSR_F10H_DECFG,
- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+ msr_set_bit(MSR_AMD64_DE_CFG,
+ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT);
/* A serializing LFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index da7c361f47e0..6daf84229548 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
static DEFINE_MUTEX(spec_ctrl_mutex);
+/* Update SPEC_CTRL MSR and its cached copy unconditionally */
+static void update_spec_ctrl(u64 val)
+{
+ this_cpu_write(x86_spec_ctrl_current, val);
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
/*
* Keep track of the SPEC_CTRL MSR value for the current task, which may differ
* from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
-void write_spec_ctrl_current(u64 val, bool force)
+void update_spec_ctrl_cond(u64 val)
{
if (this_cpu_read(x86_spec_ctrl_current) == val)
return;
@@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force)
* When KERNEL_IBRS this MSR is written on return-to-user, unless
* forced the update can be delayed until that time.
*/
- if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
+ if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
@@ -196,22 +203,15 @@ void __init check_bugs(void)
}
/*
- * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
- * done in vmenter.S.
+ * NOTE: This function is *only* called for SVM, since Intel uses
+ * MSR_IA32_SPEC_CTRL for SSBD.
*/
void
-x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool setguest)
{
- u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
+ u64 guestval, hostval;
struct thread_info *ti = current_thread_info();
- if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
- if (hostval != guestval) {
- msrval = setguest ? guestval : hostval;
- wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
- }
- }
-
/*
* If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
* MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
@@ -1335,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
if (ia32_cap & ARCH_CAP_RRSBA) {
x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -1457,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void)
if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
switch (mode) {
@@ -1571,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void)
static void update_stibp_msr(void * __unused)
{
u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
- write_spec_ctrl_current(val, true);
+ update_spec_ctrl(val);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
@@ -1804,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -2055,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3e508f239098..3f66dd03c091 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -22,9 +22,9 @@
#include <linux/io.h>
#include <linux/syscore_ops.h>
#include <linux/pgtable.h>
+#include <linux/stackprotector.h>
#include <asm/cmdline.h>
-#include <asm/stackprotector.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/doublefault.h>
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index c881bcafba7d..d95221117129 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -75,6 +75,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_SGX_LC, X86_FEATURE_SGX },
{ X86_FEATURE_SGX1, X86_FEATURE_SGX },
{ X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
+ { X86_FEATURE_SGX_EDECCSSA, X86_FEATURE_SGX1 },
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
{ X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 21fd425088fe..c393b8773ace 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -326,8 +326,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
* msr_set_bit() uses the safe accessors, too, even if the MSR
* is not present.
*/
- msr_set_bit(MSR_F10H_DECFG,
- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+ msr_set_bit(MSR_AMD64_DE_CFG,
+ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT);
/* A serializing LFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 2d7ea5480ec3..427899650483 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1034,8 +1034,32 @@ static const struct {
static struct ratelimit_state bld_ratelimit;
+static unsigned int sysctl_sld_mitigate = 1;
static DEFINE_SEMAPHORE(buslock_sem);
+#ifdef CONFIG_PROC_SYSCTL
+static struct ctl_table sld_sysctls[] = {
+ {
+ .procname = "split_lock_mitigate",
+ .data = &sysctl_sld_mitigate,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {}
+};
+
+static int __init sld_mitigate_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sld_sysctls);
+ return 0;
+}
+
+late_initcall(sld_mitigate_sysctl_init);
+#endif
+
static inline bool match_option(const char *arg, int arglen, const char *opt)
{
int len = strlen(opt), ratelimit;
@@ -1146,12 +1170,20 @@ static void split_lock_init(void)
split_lock_verify_msr(sld_state != sld_off);
}
-static void __split_lock_reenable(struct work_struct *work)
+static void __split_lock_reenable_unlock(struct work_struct *work)
{
sld_update_msr(true);
up(&buslock_sem);
}
+static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
+
+static void __split_lock_reenable(struct work_struct *work)
+{
+ sld_update_msr(true);
+}
+static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+
/*
* If a CPU goes offline with pending delayed work to re-enable split lock
* detection then the delayed work will be executed on some other CPU. That
@@ -1169,10 +1201,9 @@ static int splitlock_cpu_offline(unsigned int cpu)
return 0;
}
-static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
-
static void split_lock_warn(unsigned long ip)
{
+ struct delayed_work *work;
int cpu;
if (!current->reported_split_lock)
@@ -1180,14 +1211,26 @@ static void split_lock_warn(unsigned long ip)
current->comm, current->pid, ip);
current->reported_split_lock = 1;
- /* misery factor #1, sleep 10ms before trying to execute split lock */
- if (msleep_interruptible(10) > 0)
- return;
- /* Misery factor #2, only allow one buslocked disabled core at a time */
- if (down_interruptible(&buslock_sem) == -EINTR)
- return;
+ if (sysctl_sld_mitigate) {
+ /*
+ * misery factor #1:
+ * sleep 10ms before trying to execute split lock.
+ */
+ if (msleep_interruptible(10) > 0)
+ return;
+ /*
+ * Misery factor #2:
+ * only allow one buslocked disabled core at a time.
+ */
+ if (down_interruptible(&buslock_sem) == -EINTR)
+ return;
+ work = &sl_reenable_unlock;
+ } else {
+ work = &sl_reenable;
+ }
+
cpu = get_cpu();
- schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
+ schedule_delayed_work_on(cpu, work, 2);
/* Disable split lock detection on this CPU to make progress */
sld_update_msr(false);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 1c87501e0fa3..10fb5b5c9efa 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -788,6 +788,24 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
return status & MCI_STATUS_DEFERRED;
}
+static bool _log_error_deferred(unsigned int bank, u32 misc)
+{
+ if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
+ mca_msr_reg(bank, MCA_ADDR), misc))
+ return false;
+
+ /*
+ * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
+ * Return true here to avoid accessing these registers.
+ */
+ if (!mce_flags.smca)
+ return true;
+
+ /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+ return true;
+}
+
/*
* We have three scenarios for checking for Deferred errors:
*
@@ -799,19 +817,8 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
*/
static void log_error_deferred(unsigned int bank)
{
- bool defrd;
-
- defrd = _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
- mca_msr_reg(bank, MCA_ADDR), 0);
-
- if (!mce_flags.smca)
- return;
-
- /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
- if (defrd) {
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+ if (_log_error_deferred(bank, 0))
return;
- }
/*
* Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
@@ -832,7 +839,7 @@ static void amd_deferred_error_interrupt(void)
static void log_error_thresholding(unsigned int bank, u64 misc)
{
- _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), mca_msr_reg(bank, MCA_ADDR), misc);
+ _log_error_deferred(bank, misc);
}
static void log_and_reset_block(struct threshold_block *block)
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 00483d1c27e4..c4477162c07d 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -203,6 +203,11 @@ static struct severity {
BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
),
MCESEV(
+ PANIC, "Uncorrected in kernel",
+ BITSET(MCI_STATUS_UC),
+ KERNEL
+ ),
+ MCESEV(
UC, "Uncorrected",
BITSET(MCI_STATUS_UC)
),
@@ -391,9 +396,6 @@ static noinstr int mce_severity_intel(struct mce *m, struct pt_regs *regs, char
*msg = s->msg;
s->covered = 1;
- if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL)
- return MCE_PANIC_SEVERITY;
-
return s->sev;
}
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index e7410e98fc1f..3a35dec3ec55 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -440,7 +440,13 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p
return ret;
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- if (rev >= mc->hdr.patch_id)
+
+ /*
+ * Allow application of the same revision to pick up SMT-specific
+ * changes even if the revision of the other SMT thread is already
+ * up-to-date.
+ */
+ if (rev > mc->hdr.patch_id)
return ret;
if (!__apply_microcode_amd(mc)) {
@@ -528,8 +534,12 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- /* Check whether we have saved a new patch already: */
- if (*new_rev && rev < mc->hdr.patch_id) {
+ /*
+ * Check whether a new patch has been saved already. Also, allow application of
+ * the same revision in order to pick up SMT-thread-specific configuration even
+ * if the sibling SMT thread already has an up-to-date revision.
+ */
+ if (*new_rev && rev <= mc->hdr.patch_id) {
if (!__apply_microcode_amd(mc)) {
*new_rev = mc->hdr.patch_id;
return;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 831613959a92..46668e255421 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -475,6 +475,12 @@ static bool __init ms_hyperv_x2apic_available(void)
* (logically) generates MSIs directly to the system APIC irq domain.
* There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the
* pci-hyperv host bridge.
+ *
+ * Note: for a Hyper-V root partition, this will always return false.
+ * The hypervisor doesn't expose these HYPERV_CPUID_VIRT_STACK_* cpuids by
+ * default, they are implemented as intercepts by the Windows Hyper-V stack.
+ * Even a nested root partition (L2 root) will not get them because the
+ * nested (L1) hypervisor filters them out.
*/
static bool __init ms_hyperv_msi_ext_dest_id(void)
{
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index de62b0b87ced..c98e52ff5f20 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -66,9 +66,6 @@ struct rdt_hw_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L3,
.name = "L3",
.cache_level = 3,
- .cache = {
- .min_cbm_bits = 1,
- },
.domains = domain_init(RDT_RESOURCE_L3),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
@@ -83,9 +80,6 @@ struct rdt_hw_resource rdt_resources_all[] = {
.rid = RDT_RESOURCE_L2,
.name = "L2",
.cache_level = 2,
- .cache = {
- .min_cbm_bits = 1,
- },
.domains = domain_init(RDT_RESOURCE_L2),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
@@ -581,7 +575,7 @@ static void clear_closid_rmid(int cpu)
state->default_rmid = 0;
state->cur_closid = 0;
state->cur_rmid = 0;
- wrmsr(IA32_PQR_ASSOC, 0, 0);
+ wrmsr(MSR_IA32_PQR_ASSOC, 0, 0);
}
static int resctrl_online_cpu(unsigned int cpu)
@@ -834,8 +828,8 @@ static __init void rdt_init_res_defs_intel(void)
if (r->rid == RDT_RESOURCE_L3 ||
r->rid == RDT_RESOURCE_L2) {
r->cache.arch_has_sparse_bitmaps = false;
- r->cache.arch_has_empty_bitmaps = false;
r->cache.arch_has_per_cpu_cfg = false;
+ r->cache.min_cbm_bits = 1;
} else if (r->rid == RDT_RESOURCE_MBA) {
hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
hw_res->msr_update = mba_wrmsr_intel;
@@ -854,8 +848,8 @@ static __init void rdt_init_res_defs_amd(void)
if (r->rid == RDT_RESOURCE_L3 ||
r->rid == RDT_RESOURCE_L2) {
r->cache.arch_has_sparse_bitmaps = true;
- r->cache.arch_has_empty_bitmaps = true;
r->cache.arch_has_per_cpu_cfg = true;
+ r->cache.min_cbm_bits = 0;
} else if (r->rid == RDT_RESOURCE_MBA) {
hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
hw_res->msr_update = mba_wrmsr_amd;
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 1dafbdc5ac31..1df0e3262bca 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -105,8 +105,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
return false;
}
- if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
- val > r->default_ctrl) {
+ if ((r->cache.min_cbm_bits > 0 && val == 0) || val > r->default_ctrl) {
rdt_last_cmd_puts("Mask out of range\n");
return false;
}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5f7128686cfd..5ebd28e6aa0c 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -8,16 +8,6 @@
#include <linux/fs_context.h>
#include <linux/jump_label.h>
-#define MSR_IA32_L3_QOS_CFG 0xc81
-#define MSR_IA32_L2_QOS_CFG 0xc82
-#define MSR_IA32_L3_CBM_BASE 0xc90
-#define MSR_IA32_L2_CBM_BASE 0xd10
-#define MSR_IA32_MBA_THRTL_BASE 0xd50
-#define MSR_IA32_MBA_BW_BASE 0xc0000200
-
-#define MSR_IA32_QM_CTR 0x0c8e
-#define MSR_IA32_QM_EVTSEL 0x0c8d
-
#define L3_QOS_CDP_ENABLE 0x01ULL
#define L2_QOS_CDP_ENABLE 0x01ULL
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index d961ae3ed96e..ba8d0763b36b 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -477,7 +477,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
* pseudo-locked followed by reading of kernel memory to load it
* into the cache.
*/
- __wrmsr(IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
+ __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
/*
* Cache was flushed earlier. Now access kernel memory to read it
* into cache region associated with just activated plr->closid.
@@ -513,7 +513,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
* Critical section end: restore closid with capacity bitmask that
* does not overlap with pseudo-locked region.
*/
- __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
+ __wrmsr(MSR_IA32_PQR_ASSOC, rmid_p, closid_p);
/* Re-enable the hardware prefetcher(s) */
wrmsrl(MSR_MISC_FEATURE_CONTROL, saved_msr);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index fc01f81f6e2a..f53944fb8f7f 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,6 +40,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
{ X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 },
{ X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 },
+ { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 1ec20807de1e..68f8b18d2278 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -160,8 +160,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
return ret;
pginfo.addr = encl_page->desc & PAGE_MASK;
- pginfo.contents = (unsigned long)kmap_atomic(b.contents);
- pcmd_page = kmap_atomic(b.pcmd);
+ pginfo.contents = (unsigned long)kmap_local_page(b.contents);
+ pcmd_page = kmap_local_page(b.pcmd);
pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset;
if (secs_page)
@@ -187,8 +187,8 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
*/
pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE);
- kunmap_atomic(pcmd_page);
- kunmap_atomic((void *)(unsigned long)pginfo.contents);
+ kunmap_local(pcmd_page);
+ kunmap_local((void *)(unsigned long)pginfo.contents);
get_page(b.pcmd);
sgx_encl_put_backing(&b);
@@ -197,10 +197,10 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page,
if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) {
sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off));
- pcmd_page = kmap_atomic(b.pcmd);
+ pcmd_page = kmap_local_page(b.pcmd);
if (memchr_inv(pcmd_page, 0, PAGE_SIZE))
pr_warn("PCMD page not empty after truncate.\n");
- kunmap_atomic(pcmd_page);
+ kunmap_local(pcmd_page);
}
put_page(b.pcmd);
@@ -680,11 +680,15 @@ const struct vm_operations_struct sgx_vm_ops = {
void sgx_encl_release(struct kref *ref)
{
struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
+ unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1);
struct sgx_va_page *va_page;
struct sgx_encl_page *entry;
- unsigned long index;
+ unsigned long count = 0;
+
+ XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base));
- xa_for_each(&encl->page_array, index, entry) {
+ xas_lock(&xas);
+ xas_for_each(&xas, entry, max_page_index) {
if (entry->epc_page) {
/*
* The page and its radix tree entry cannot be freed
@@ -699,9 +703,20 @@ void sgx_encl_release(struct kref *ref)
}
kfree(entry);
- /* Invoke scheduler to prevent soft lockups. */
- cond_resched();
+ /*
+ * Invoke scheduler on every XA_CHECK_SCHED iteration
+ * to prevent soft lockups.
+ */
+ if (!(++count % XA_CHECK_SCHED)) {
+ xas_pause(&xas);
+ xas_unlock(&xas);
+
+ cond_resched();
+
+ xas_lock(&xas);
+ }
}
+ xas_unlock(&xas);
xa_destroy(&encl->page_array);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index ebe79d60619f..21ca0a831b70 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -111,7 +111,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
encl->base = secs->base;
encl->size = secs->size;
encl->attributes = secs->attributes;
- encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
+ encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
/* Set only after completion, as encl->lock has not been taken. */
set_bit(SGX_ENCL_CREATED, &encl->flags);
@@ -221,11 +221,11 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
pginfo.addr = encl_page->desc & PAGE_MASK;
pginfo.metadata = (unsigned long)secinfo;
- pginfo.contents = (unsigned long)kmap_atomic(src_page);
+ pginfo.contents = (unsigned long)kmap_local_page(src_page);
ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
- kunmap_atomic((void *)pginfo.contents);
+ kunmap_local((void *)pginfo.contents);
put_page(src_page);
return ret ? -EIO : 0;
@@ -356,6 +356,9 @@ static int sgx_validate_offset_length(struct sgx_encl *encl,
if (!length || !IS_ALIGNED(length, PAGE_SIZE))
return -EINVAL;
+ if (offset + length < offset)
+ return -EINVAL;
+
if (offset + length - PAGE_SIZE >= encl->size)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 0aad028f04d4..e5a37b6e9aa5 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -165,17 +165,17 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot,
pginfo.addr = 0;
pginfo.secs = 0;
- pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
- pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
+ pginfo.contents = (unsigned long)kmap_local_page(backing->contents);
+ pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) +
backing->pcmd_offset;
ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot);
set_page_dirty(backing->pcmd);
set_page_dirty(backing->contents);
- kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
+ kunmap_local((void *)(unsigned long)(pginfo.metadata -
backing->pcmd_offset));
- kunmap_atomic((void *)(unsigned long)pginfo.contents);
+ kunmap_local((void *)(unsigned long)pginfo.contents);
return ret;
}
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 132a2de44d2f..5e868b62a7c4 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
unsigned int die_select_mask, die_level_siblings;
+ unsigned int pkg_mask_width;
bool die_level_present = false;
int leaf;
@@ -111,10 +112,10 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
- die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
sub_index = 1;
- do {
+ while (true) {
cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
/*
@@ -132,10 +133,15 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
}
+ if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE)
+ pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ else
+ break;
+
sub_index++;
- } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
+ }
- core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
+ core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width;
die_select_mask = (~(-1 << die_plus_mask_width)) >>
core_plus_mask_width;
@@ -148,7 +154,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
}
c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
- die_plus_mask_width);
+ pkg_mask_width);
/*
* Reinit the apicid, now that we have extended initial_apicid.
*/
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index ec7bbac3a9f2..8009c8346d8f 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -58,24 +58,6 @@ static void tsx_enable(void)
wrmsrl(MSR_IA32_TSX_CTRL, tsx);
}
-static bool tsx_ctrl_is_supported(void)
-{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
- /*
- * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
- * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
- *
- * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
- * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
- * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
- * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
- * tsx= cmdline requests will do nothing on CPUs without
- * MSR_IA32_TSX_CTRL support.
- */
- return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
-}
-
static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
{
if (boot_cpu_has_bug(X86_BUG_TAA))
@@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void)
rdmsrl(MSR_TSX_FORCE_ABORT, msr);
msr |= MSR_TFA_TSX_CPUID_CLEAR;
wrmsrl(MSR_TSX_FORCE_ABORT, msr);
- } else if (tsx_ctrl_is_supported()) {
+ } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) {
rdmsrl(MSR_IA32_TSX_CTRL, msr);
msr |= TSX_CTRL_CPUID_CLEAR;
wrmsrl(MSR_IA32_TSX_CTRL, msr);
@@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void)
u64 mcu_opt_ctrl;
/* Check if RTM_ALLOW exists */
- if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() ||
+ if (!boot_cpu_has_bug(X86_BUG_TAA) ||
+ !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) ||
!cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL))
return;
@@ -191,7 +174,20 @@ void __init tsx_init(void)
return;
}
- if (!tsx_ctrl_is_supported()) {
+ /*
+ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
+ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+ *
+ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+ * tsx= cmdline requests will do nothing on CPUs without
+ * MSR_IA32_TSX_CTRL support.
+ */
+ if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) {
+ setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL);
+ } else {
tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED;
return;
}