aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig28
-rw-r--r--arch/x86/boot/boot.h1
-rw-r--r--arch/x86/boot/compressed/misc.c15
-rw-r--r--arch/x86/boot/string.c8
-rw-r--r--arch/x86/boot/string.h1
-rw-r--r--arch/x86/coco/sev/core.c269
-rw-r--r--arch/x86/coco/tdx/tdx.c138
-rw-r--r--arch/x86/crypto/Kconfig4
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S532
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c145
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c2
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S76
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c2
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S354
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl4
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl4
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S20
-rw-r--r--arch/x86/entry/vdso/vma.c92
-rw-r--r--arch/x86/events/amd/core.c10
-rw-r--r--arch/x86/events/amd/uncore.c5
-rw-r--r--arch/x86/events/core.c64
-rw-r--r--arch/x86/events/intel/core.c137
-rw-r--r--arch/x86/events/intel/ds.c21
-rw-r--r--arch/x86/events/intel/pt.c84
-rw-r--r--arch/x86/events/intel/pt.h6
-rw-r--r--arch/x86/events/perf_event.h34
-rw-r--r--arch/x86/events/rapl.c130
-rw-r--r--arch/x86/include/asm/atomic64_32.h3
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h6
-rw-r--r--arch/x86/include/asm/cpu.h17
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/cpuid.h8
-rw-r--r--arch/x86/include/asm/ftrace.h30
-rw-r--r--arch/x86/include/asm/intel-family.h7
-rw-r--r--arch/x86/include/asm/io.h5
-rw-r--r--arch/x86/include/asm/mce.h36
-rw-r--r--arch/x86/include/asm/page_types.h5
-rw-r--r--arch/x86/include/asm/perf_event.h12
-rw-r--r--arch/x86/include/asm/processor.h18
-rw-r--r--arch/x86/include/asm/sev-common.h27
-rw-r--r--arch/x86/include/asm/sev.h67
-rw-r--r--arch/x86/include/asm/shared/tdx.h13
-rw-r--r--arch/x86/include/asm/thread_info.h6
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/include/asm/topology.h9
-rw-r--r--arch/x86/include/asm/vdso/getrandom.h10
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h12
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h15
-rw-r--r--arch/x86/include/asm/vvar.h71
-rw-r--r--arch/x86/include/uapi/asm/amd_hsmp.h3
-rw-r--r--arch/x86/include/uapi/asm/mce.h3
-rw-r--r--arch/x86/include/uapi/asm/mman.h3
-rw-r--r--arch/x86/kernel/acpi/boot.c6
-rw-r--r--arch/x86/kernel/acpi/cppc.c23
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S1
-rw-r--r--arch/x86/kernel/apic/vector.c8
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/bus_lock.c406
-rw-r--r--arch/x86/kernel/cpu/common.c43
-rw-r--r--arch/x86/kernel/cpu/debugfs.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c422
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c30
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c107
-rw-r--r--arch/x86/kernel/cpu/mce/core.c216
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c11
-rw-r--r--arch/x86/kernel/cpu/mce/genpool.c18
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c6
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c10
-rw-r--r--arch/x86/kernel/cpu/proc.c10
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c56
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c12
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c3
-rw-r--r--arch/x86/kernel/cpu/topology_common.c34
-rw-r--r--arch/x86/kernel/devicetree.c2
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/head_64.S1
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c19
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/tsc.c5
-rw-r--r--arch/x86/kernel/unwind_orc.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S42
-rw-r--r--arch/x86/kvm/svm/sev.c39
-rw-r--r--arch/x86/kvm/xen.c12
-rw-r--r--arch/x86/mm/init.c23
-rw-r--r--arch/x86/mm/kaslr.c2
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c77
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c11
-rw-r--r--arch/x86/mm/mmap.c5
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c149
-rw-r--r--arch/x86/platform/efi/efi.c20
-rw-r--r--arch/x86/platform/efi/efi_64.c42
-rw-r--r--arch/x86/platform/efi/quirks.c3
-rw-r--r--arch/x86/platform/intel-mid/pwr.c14
-rw-r--r--arch/x86/platform/intel-quark/imr.c2
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c2
-rw-r--r--arch/x86/platform/iris/iris.c2
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c4
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c2
-rw-r--r--arch/x86/platform/pvh/head.S50
-rw-r--r--arch/x86/tools/relocs.c2
-rw-r--r--arch/x86/virt/svm/Makefile1
-rw-r--r--arch/x86/virt/svm/cmdline.c45
-rw-r--r--arch/x86/xen/xen-head.S6
109 files changed, 2528 insertions, 2081 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7b9a7e8f39ac..948707a3615e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,6 +93,7 @@ config X86
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API if X86_64
+ select ARCH_HAS_PREEMPT_LAZY
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_HW_PTE_YOUNG
@@ -145,7 +146,6 @@ config X86
select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT
select CLKEVT_I8253
- select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
# Word-size accesses may read uninitialized data past the trailing \0
# in strings and cause false KMSAN reports.
@@ -1954,6 +1954,7 @@ config X86_USER_SHADOW_STACK
depends on AS_WRUSS
depends on X86_64
select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_HAS_USER_SHADOW_STACK
select X86_CET
help
Shadow stack protection is a hardware feature that detects function
@@ -2427,6 +2428,14 @@ config CFI_AUTO_DEFAULT
source "kernel/livepatch/Kconfig"
+config X86_BUS_LOCK_DETECT
+ bool "Split Lock Detect and Bus Lock Detect support"
+ depends on CPU_SUP_INTEL || CPU_SUP_AMD
+ default y
+ help
+ Enable Split Lock Detect and Bus Lock Detect functionalities.
+ See <file:Documentation/arch/x86/buslock.rst> for more information.
+
endmenu
config CC_HAS_NAMED_AS
@@ -2555,15 +2564,14 @@ config MITIGATION_CALL_DEPTH_TRACKING
default y
help
Compile the kernel with call depth tracking to mitigate the Intel
- SKL Return-Speculation-Buffer (RSB) underflow issue. The
- mitigation is off by default and needs to be enabled on the
- kernel command line via the retbleed=stuff option. For
- non-affected systems the overhead of this option is marginal as
- the call depth tracking is using run-time generated call thunks
- in a compiler generated padding area and call patching. This
- increases text size by ~5%. For non affected systems this space
- is unused. On affected SKL systems this results in a significant
- performance gain over the IBRS mitigation.
+ SKL Return-Stack-Buffer (RSB) underflow issue. The mitigation is off
+ by default and needs to be enabled on the kernel command line via the
+ retbleed=stuff option. For non-affected systems the overhead of this
+ option is marginal as the call depth tracking is using run-time
+ generated call thunks in a compiler generated padding area and call
+ patching. This increases text size by ~5%. For non affected systems
+ this space is unused. On affected SKL systems this results in a
+ significant performance gain over the IBRS mitigation.
config CALL_THUNKS_DEBUG
bool "Enable call thunks and call depth tracking debugging"
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 148ba5c5106e..0f24f7ebec9b 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -305,7 +305,6 @@ void initregs(struct biosregs *regs);
int strcmp(const char *str1, const char *str2);
int strncmp(const char *cs, const char *ct, size_t count);
size_t strnlen(const char *s, size_t maxlen);
-unsigned int atou(const char *s);
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
size_t strlen(const char *s);
char *strchr(const char *s, int c);
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 04a35b2c26e9..0d37420cad02 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -385,6 +385,19 @@ static void parse_mem_encrypt(struct setup_header *hdr)
hdr->xloadflags |= XLF_MEM_ENCRYPTION;
}
+static void early_sev_detect(void)
+{
+ /*
+ * Accessing video memory causes guest termination because
+ * the boot stage2 #VC handler of SEV-ES/SNP guests does not
+ * support MMIO handling and kexec -c adds screen_info to the
+ * boot parameters passed to the kexec kernel, which causes
+ * console output to be dumped to both video and serial.
+ */
+ if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
+ lines = cols = 0;
+}
+
/*
* The compressed kernel image (ZO), has been moved so that its position
* is against the end of the buffer used to hold the uncompressed kernel
@@ -440,6 +453,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
*/
early_tdx_detect();
+ early_sev_detect();
+
console_init();
/*
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index c23f3b9c84fe..84f7a883ce1e 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -88,14 +88,6 @@ size_t strnlen(const char *s, size_t maxlen)
return (es - s);
}
-unsigned int atou(const char *s)
-{
- unsigned int i = 0;
- while (isdigit(*s))
- i = i * 10 + (*s++ - '0');
- return i;
-}
-
/* Works only for digits and letters, but small and fast */
#define TOLOWER(x) ((x) | 0x20)
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index e5d2c6b8c2f1..a5b05ebc037d 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -24,7 +24,6 @@ extern size_t strlen(const char *s);
extern char *strstr(const char *s1, const char *s2);
extern char *strchr(const char *s, int c);
extern size_t strnlen(const char *s, size_t maxlen);
-extern unsigned int atou(const char *s);
extern unsigned long long simple_strtoull(const char *cp, char **endp,
unsigned int base);
long simple_strtol(const char *cp, char **endp, unsigned int base);
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index de1df0cb45da..c5b0148b8c0a 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -92,6 +92,9 @@ static struct ghcb *boot_ghcb __section(".data");
/* Bitmap of SEV features supported by the hypervisor */
static u64 sev_hv_features __ro_after_init;
+/* Secrets page physical address from the CC blob */
+static u64 secrets_pa __ro_after_init;
+
/* #VC handler runtime per-CPU data */
struct sev_es_runtime_data {
struct ghcb ghcb_page;
@@ -141,33 +144,6 @@ static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
static DEFINE_PER_CPU(u64, svsm_caa_pa);
-struct sev_config {
- __u64 debug : 1,
-
- /*
- * Indicates when the per-CPU GHCB has been created and registered
- * and thus can be used by the BSP instead of the early boot GHCB.
- *
- * For APs, the per-CPU GHCB is created before they are started
- * and registered upon startup, so this flag can be used globally
- * for the BSP and APs.
- */
- ghcbs_initialized : 1,
-
- /*
- * Indicates when the per-CPU SVSM CA is to be used instead of the
- * boot SVSM CA.
- *
- * For APs, the per-CPU SVSM CA is created as part of the AP
- * bringup, so this flag can be used globally for the BSP and APs.
- */
- use_cas : 1,
-
- __reserved : 61;
-};
-
-static struct sev_config sev_cfg __read_mostly;
-
static __always_inline bool on_vc_stack(struct pt_regs *regs)
{
unsigned long sp = regs->sp;
@@ -722,45 +698,13 @@ void noinstr __sev_es_nmi_complete(void)
__sev_put_ghcb(&state);
}
-static u64 __init get_secrets_page(void)
-{
- u64 pa_data = boot_params.cc_blob_address;
- struct cc_blob_sev_info info;
- void *map;
-
- /*
- * The CC blob contains the address of the secrets page, check if the
- * blob is present.
- */
- if (!pa_data)
- return 0;
-
- map = early_memremap(pa_data, sizeof(info));
- if (!map) {
- pr_err("Unable to locate SNP secrets page: failed to map the Confidential Computing blob.\n");
- return 0;
- }
- memcpy(&info, map, sizeof(info));
- early_memunmap(map, sizeof(info));
-
- /* smoke-test the secrets page passed */
- if (!info.secrets_phys || info.secrets_len != PAGE_SIZE)
- return 0;
-
- return info.secrets_phys;
-}
-
static u64 __init get_snp_jump_table_addr(void)
{
struct snp_secrets_page *secrets;
void __iomem *mem;
- u64 pa, addr;
+ u64 addr;
- pa = get_secrets_page();
- if (!pa)
- return 0;
-
- mem = ioremap_encrypted(pa, PAGE_SIZE);
+ mem = ioremap_encrypted(secrets_pa, PAGE_SIZE);
if (!mem) {
pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
return 0;
@@ -1010,6 +954,137 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
}
+static void set_pte_enc(pte_t *kpte, int level, void *va)
+{
+ struct pte_enc_desc d = {
+ .kpte = kpte,
+ .pte_level = level,
+ .va = va,
+ .encrypt = true
+ };
+
+ prepare_pte_enc(&d);
+ set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
+}
+
+static void unshare_all_memory(void)
+{
+ unsigned long addr, end, size, ghcb;
+ struct sev_es_runtime_data *data;
+ unsigned int npages, level;
+ bool skipped_addr;
+ pte_t *pte;
+ int cpu;
+
+ /* Unshare the direct mapping. */
+ addr = PAGE_OFFSET;
+ end = PAGE_OFFSET + get_max_mapped();
+
+ while (addr < end) {
+ pte = lookup_address(addr, &level);
+ size = page_level_size(level);
+ npages = size / PAGE_SIZE;
+ skipped_addr = false;
+
+ if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) {
+ addr += size;
+ continue;
+ }
+
+ /*
+ * Ensure that all the per-CPU GHCBs are made private at the
+ * end of the unsharing loop so that the switch to the slower
+ * MSR protocol happens last.
+ */
+ for_each_possible_cpu(cpu) {
+ data = per_cpu(runtime_data, cpu);
+ ghcb = (unsigned long)&data->ghcb_page;
+
+ if (addr <= ghcb && ghcb <= addr + size) {
+ skipped_addr = true;
+ break;
+ }
+ }
+
+ if (!skipped_addr) {
+ set_pte_enc(pte, level, (void *)addr);
+ snp_set_memory_private(addr, npages);
+ }
+ addr += size;
+ }
+
+ /* Unshare all bss decrypted memory. */
+ addr = (unsigned long)__start_bss_decrypted;
+ end = (unsigned long)__start_bss_decrypted_unused;
+ npages = (end - addr) >> PAGE_SHIFT;
+
+ for (; addr < end; addr += PAGE_SIZE) {
+ pte = lookup_address(addr, &level);
+ if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
+ continue;
+
+ set_pte_enc(pte, level, (void *)addr);
+ }
+ addr = (unsigned long)__start_bss_decrypted;
+ snp_set_memory_private(addr, npages);
+
+ __flush_tlb_all();
+}
+
+/* Stop new private<->shared conversions */
+void snp_kexec_begin(void)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ /*
+ * Crash kernel ends up here with interrupts disabled: can't wait for
+ * conversions to finish.
+ *
+ * If race happened, just report and proceed.
+ */
+ if (!set_memory_enc_stop_conversion())
+ pr_warn("Failed to stop shared<->private conversions\n");
+}
+
+void snp_kexec_finish(void)
+{
+ struct sev_es_runtime_data *data;
+ unsigned int level, cpu;
+ unsigned long size;
+ struct ghcb *ghcb;
+ pte_t *pte;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ unshare_all_memory();
+
+ /*
+ * Switch to using the MSR protocol to change per-CPU GHCBs to
+ * private. All the per-CPU GHCBs have been switched back to private,
+ * so can't do any more GHCB calls to the hypervisor beyond this point
+ * until the kexec'ed kernel starts running.
+ */
+ boot_ghcb = NULL;
+ sev_cfg.ghcbs_initialized = false;
+
+ for_each_possible_cpu(cpu) {
+ data = per_cpu(runtime_data, cpu);
+ ghcb = &data->ghcb_page;
+ pte = lookup_address((unsigned long)ghcb, &level);
+ size = page_level_size(level);
+ set_pte_enc(pte, level, (void *)ghcb);
+ snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
+ }
+}
+
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
{
int ret;
@@ -1331,35 +1406,39 @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
return 0;
}
+/* Writes to the SVSM CAA MSR are ignored */
+static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write)
+{
+ if (write)
+ return ES_OK;
+
+ regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
+ regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
+
+ return ES_OK;
+}
+
static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
struct pt_regs *regs = ctxt->regs;
enum es_result ret;
- u64 exit_info_1;
+ bool write;
/* Is it a WRMSR? */
- exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
+ write = ctxt->insn.opcode.bytes[1] == 0x30;
- if (regs->cx == MSR_SVSM_CAA) {
- /* Writes to the SVSM CAA msr are ignored */
- if (exit_info_1)
- return ES_OK;
-
- regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
- regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
-
- return ES_OK;
- }
+ if (regs->cx == MSR_SVSM_CAA)
+ return __vc_handle_msr_caa(regs, write);
ghcb_set_rcx(ghcb, regs->cx);
- if (exit_info_1) {
+ if (write) {
ghcb_set_rax(ghcb, regs->ax);
ghcb_set_rdx(ghcb, regs->dx);
}
- ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0);
- if ((ret == ES_OK) && (!exit_info_1)) {
+ if ((ret == ES_OK) && !write) {
regs->ax = ghcb->save.rax;
regs->dx = ghcb->save.rdx;
}
@@ -2300,6 +2379,11 @@ bool __head snp_init(struct boot_params *bp)
if (!cc_info)
return false;
+ if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE)
+ secrets_pa = cc_info->secrets_phys;
+ else
+ return false;
+
setup_cpuid_table(cc_info);
svsm_setup(cc_info);
@@ -2374,23 +2458,6 @@ static int __init report_snp_info(void)
}
arch_initcall(report_snp_info);
-static int __init init_sev_config(char *str)
-{
- char *s;
-
- while ((s = strsep(&str, ","))) {
- if (!strcmp(s, "debug")) {
- sev_cfg.debug = true;
- continue;
- }
-
- pr_info("SEV command-line option '%s' was not recognized\n", s);
- }
-
- return 1;
-}
-__setup("sev=", init_sev_config);
-
static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input)
{
/* If (new) lengths have been returned, propagate them up */
@@ -2441,7 +2508,8 @@ int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call,
}
EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req);
-int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
+int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input,
+ struct snp_guest_request_ioctl *rio)
{
struct ghcb_state state;
struct es_em_ctxt ctxt;
@@ -2465,12 +2533,12 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn
vc_ghcb_invalidate(ghcb);
- if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
+ if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
ghcb_set_rax(ghcb, input->data_gpa);
ghcb_set_rbx(ghcb, input->data_npages);
}
- ret = sev_es_ghcb_hv_call(ghcb, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa);
if (ret)
goto e_put;
@@ -2485,7 +2553,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn
case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
/* Number of expected pages are returned in RBX */
- if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
+ if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
input->data_npages = ghcb_get_rbx(ghcb);
ret = -ENOSPC;
break;
@@ -2513,16 +2581,11 @@ static struct platform_device sev_guest_device = {
static int __init snp_init_platform_device(void)
{
struct sev_guest_platform_data data;
- u64 gpa;
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return -ENODEV;
- gpa = get_secrets_page();
- if (!gpa)
- return -ENODEV;
-
- data.secrets_gpa = gpa;
+ data.secrets_gpa = secrets_pa;
if (platform_device_add_data(&sev_guest_device, &data, sizeof(data)))
return -ENODEV;
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 327c45c5013f..0d9b090b4880 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -78,6 +78,32 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}
+/* Read TD-scoped metadata */
+static inline u64 tdg_vm_rd(u64 field, u64 *value)
+{
+ struct tdx_module_args args = {
+ .rdx = field,
+ };
+ u64 ret;
+
+ ret = __tdcall_ret(TDG_VM_RD, &args);
+ *value = args.r8;
+
+ return ret;
+}
+
+/* Write TD-scoped metadata */
+static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask)
+{
+ struct tdx_module_args args = {
+ .rdx = field,
+ .r8 = value,
+ .r9 = mask,
+ };
+
+ return __tdcall(TDG_VM_WR, &args);
+}
+
/**
* tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT
* subtype 0) using TDG.MR.REPORT TDCALL.
@@ -168,7 +194,87 @@ static void __noreturn tdx_panic(const char *msg)
__tdx_hypercall(&args);
}
-static void tdx_parse_tdinfo(u64 *cc_mask)
+/*
+ * The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
+ * that no #VE will be delivered for accesses to TD-private memory.
+ *
+ * TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
+ * controls if the guest will receive such #VE with TD attribute
+ * ATTR_SEPT_VE_DISABLE.
+ *
+ * Newer TDX modules allow the guest to control if it wants to receive SEPT
+ * violation #VEs.
+ *
+ * Check if the feature is available and disable SEPT #VE if possible.
+ *
+ * If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
+ * attribute is no longer reliable. It reflects the initial state of the
+ * control for the TD, but it will not be updated if someone (e.g. bootloader)
+ * changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
+ * determine if SEPT #VEs are enabled or disabled.
+ */
+static void disable_sept_ve(u64 td_attr)
+{
+ const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
+ bool debug = td_attr & ATTR_DEBUG;
+ u64 config, controls;
+
+ /* Is this TD allowed to disable SEPT #VE */
+ tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
+ if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
+ /* No SEPT #VE controls for the guest: check the attribute */
+ if (td_attr & ATTR_SEPT_VE_DISABLE)
+ return;
+
+ /* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
+ if (debug)
+ pr_warn("%s\n", msg);
+ else
+ tdx_panic(msg);
+ return;
+ }
+
+ /* Check if SEPT #VE has been disabled before us */
+ tdg_vm_rd(TDCS_TD_CTLS, &controls);
+ if (controls & TD_CTLS_PENDING_VE_DISABLE)
+ return;
+
+ /* Keep #VEs enabled for splats in debugging environments */
+ if (debug)
+ return;
+
+ /* Disable SEPT #VEs */
+ tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_PENDING_VE_DISABLE,
+ TD_CTLS_PENDING_VE_DISABLE);
+}
+
+/*
+ * TDX 1.0 generates a #VE when accessing topology-related CPUID leafs (0xB and
+ * 0x1F) and the X2APIC_APICID MSR. The kernel returns all zeros on CPUID #VEs.
+ * In practice, this means that the kernel can only boot with a plain topology.
+ * Any complications will cause problems.
+ *
+ * The ENUM_TOPOLOGY feature allows the VMM to provide topology information.
+ * Enabling the feature eliminates topology-related #VEs: the TDX module
+ * virtualizes accesses to the CPUID leafs and the MSR.
+ *
+ * Enable ENUM_TOPOLOGY if it is available.
+ */
+static void enable_cpu_topology_enumeration(void)
+{
+ u64 configured;
+
+ /* Has the VMM provided a valid topology configuration? */
+ tdg_vm_rd(TDCS_TOPOLOGY_ENUM_CONFIGURED, &configured);
+ if (!configured) {
+ pr_err("VMM did not configure X2APIC_IDs properly\n");
+ return;
+ }
+
+ tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_ENUM_TOPOLOGY, TD_CTLS_ENUM_TOPOLOGY);
+}
+
+static void tdx_setup(u64 *cc_mask)
{
struct tdx_module_args args = {};
unsigned int gpa_width;
@@ -193,21 +299,13 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
gpa_width = args.rcx & GENMASK(5, 0);
*cc_mask = BIT_ULL(gpa_width - 1);
- /*
- * The kernel can not handle #VE's when accessing normal kernel
- * memory. Ensure that no #VE will be delivered for accesses to
- * TD-private memory. Only VMM-shared memory (MMIO) will #VE.
- */
td_attr = args.rdx;
- if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
- const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
- /* Relax SEPT_VE_DISABLE check for debug TD. */
- if (td_attr & ATTR_DEBUG)
- pr_warn("%s\n", msg);
- else
- tdx_panic(msg);
- }
+ /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
+ tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
+
+ disable_sept_ve(td_attr);
+ enable_cpu_topology_enumeration();
}
/*
@@ -929,10 +1027,6 @@ static void tdx_kexec_finish(void)
void __init tdx_early_init(void)
{
- struct tdx_module_args args = {
- .rdx = TDCS_NOTIFY_ENABLES,
- .r9 = -1ULL,
- };
u64 cc_mask;
u32 eax, sig[3];
@@ -947,11 +1041,11 @@ void __init tdx_early_init(void)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
cc_vendor = CC_VENDOR_INTEL;
- tdx_parse_tdinfo(&cc_mask);
- cc_set_mask(cc_mask);
- /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
- tdcall(TDG_VM_WR, &args);
+ /* Configure the TD */
+ tdx_setup(&cc_mask);
+
+ cc_set_mask(cc_mask);
/*
* All bits above GPA width are reserved and kernel treats shared bit
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 7b1bebed879d..3d2e38ba5240 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -363,7 +363,7 @@ config CRYPTO_CHACHA20_X86_64
- AVX-512VL (Advanced Vector Extensions-512VL)
config CRYPTO_AEGIS128_AESNI_SSE2
- tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE2)"
+ tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE4.1)"
depends on X86 && 64BIT
select CRYPTO_AEAD
select CRYPTO_SIMD
@@ -372,7 +372,7 @@ config CRYPTO_AEGIS128_AESNI_SSE2
Architecture: x86_64 using:
- AES-NI (AES New Instructions)
- - SSE2 (Streaming SIMD Extensions 2)
+ - SSE4.1 (Streaming SIMD Extensions 4.1)
config CRYPTO_NHPOLY1305_SSE2
tristate "Hash functions: NHPoly1305 (SSE2)"
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index ad7f4c891625..7294dc0ee7ba 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -1,14 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * AES-NI + SSE2 implementation of AEGIS-128
+ * AES-NI + SSE4.1 implementation of AEGIS-128
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <[email protected]>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ * Copyright 2024 Google LLC
*/
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
-#include <asm/frame.h>
#define STATE0 %xmm0
#define STATE1 %xmm1
@@ -20,11 +19,6 @@
#define T0 %xmm6
#define T1 %xmm7
-#define STATEP %rdi
-#define LEN %rsi
-#define SRC %rdx
-#define DST %rcx
-
.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
.align 16
.Laegis128_const_0:
@@ -34,11 +28,11 @@
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
-.align 16
-.Laegis128_counter:
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
- .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32
+.align 32
+.Lzeropad_mask:
+ .octa 0xffffffffffffffffffffffffffffffff
+ .octa 0
.text
@@ -61,140 +55,102 @@
.endm
/*
- * __load_partial: internal ABI
- * input:
- * LEN - bytes
- * SRC - src
- * output:
- * MSG - message block
- * changed:
- * T0
- * %r8
- * %r9
+ * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register
+ * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8.
*/
-SYM_FUNC_START_LOCAL(__load_partial)
- xor %r9d, %r9d
- pxor MSG, MSG
-
- mov LEN, %r8
- and $0x1, %r8
- jz .Lld_partial_1
-
- mov LEN, %r8
- and $0x1E, %r8
- add SRC, %r8
- mov (%r8), %r9b
-
-.Lld_partial_1:
- mov LEN, %r8
- and $0x2, %r8
- jz .Lld_partial_2
-
- mov LEN, %r8
- and $0x1C, %r8
- add SRC, %r8
- shl $0x10, %r9
- mov (%r8), %r9w
-
-.Lld_partial_2:
- mov LEN, %r8
- and $0x4, %r8
- jz .Lld_partial_4
-
- mov LEN, %r8
- and $0x18, %r8
- add SRC, %r8
- shl $32, %r9
- mov (%r8), %r8d
- xor %r8, %r9
-
-.Lld_partial_4:
- movq %r9, MSG
-
- mov LEN, %r8
- and $0x8, %r8
- jz .Lld_partial_8
-
- mov LEN, %r8
- and $0x10, %r8
- add SRC, %r8
- pslldq $8, MSG
- movq (%r8), T0
- pxor T0, MSG
-
-.Lld_partial_8:
- RET
-SYM_FUNC_END(__load_partial)
+.macro load_partial
+ sub $8, %ecx /* LEN - 8 */
+ jle .Lle8\@
+
+ /* Load 9 <= LEN <= 15 bytes: */
+ movq (SRC), MSG /* Load first 8 bytes */
+ mov (SRC, %rcx), %rax /* Load last 8 bytes */
+ neg %ecx
+ shl $3, %ecx
+ shr %cl, %rax /* Discard overlapping bytes */
+ pinsrq $1, %rax, MSG
+ jmp .Ldone\@
+
+.Lle8\@:
+ add $4, %ecx /* LEN - 4 */
+ jl .Llt4\@
+
+ /* Load 4 <= LEN <= 8 bytes: */
+ mov (SRC), %eax /* Load first 4 bytes */
+ mov (SRC, %rcx), %r8d /* Load last 4 bytes */
+ jmp .Lcombine\@
+
+.Llt4\@:
+ /* Load 1 <= LEN <= 3 bytes: */
+ add $2, %ecx /* LEN - 2 */
+ movzbl (SRC), %eax /* Load first byte */
+ jl .Lmovq\@
+ movzwl (SRC, %rcx), %r8d /* Load last 2 bytes */
+.Lcombine\@:
+ shl $3, %ecx
+ shl %cl, %r8
+ or %r8, %rax /* Combine the two parts */
+.Lmovq\@:
+ movq %rax, MSG
+.Ldone\@:
+.endm
/*
- * __store_partial: internal ABI
- * input:
- * LEN - bytes
- * DST - dst
- * output:
- * T0 - message block
- * changed:
- * %r8
- * %r9
- * %r10
+ * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer
+ * DST. Clobbers %rax, %rcx, and %r8.
*/
-SYM_FUNC_START_LOCAL(__store_partial)
- mov LEN, %r8
- mov DST, %r9
-
- movq T0, %r10
-
- cmp $8, %r8
- jl .Lst_partial_8
-
- mov %r10, (%r9)
- psrldq $8, T0
- movq T0, %r10
-
- sub $8, %r8
- add $8, %r9
-
-.Lst_partial_8:
- cmp $4, %r8
- jl .Lst_partial_4
-
- mov %r10d, (%r9)
- shr $32, %r10
-
- sub $4, %r8
- add $4, %r9
-
-.Lst_partial_4:
- cmp $2, %r8
- jl .Lst_partial_2
-
- mov %r10w, (%r9)
- shr $0x10, %r10
-
- sub $2, %r8
- add $2, %r9
-
-.Lst_partial_2:
- cmp $1, %r8
- jl .Lst_partial_1
-
- mov %r10b, (%r9)
-
-.Lst_partial_1:
- RET
-SYM_FUNC_END(__store_partial)
+.macro store_partial msg
+ sub $8, %ecx /* LEN - 8 */
+ jl .Llt8\@
+
+ /* Store 8 <= LEN <= 15 bytes: */
+ pextrq $1, \msg, %rax
+ mov %ecx, %r8d
+ shl $3, %ecx
+ ror %cl, %rax
+ mov %rax, (DST, %r8) /* Store last LEN - 8 bytes */
+ movq \msg, (DST) /* Store first 8 bytes */
+ jmp .Ldone\@
+
+.Llt8\@:
+ add $4, %ecx /* LEN - 4 */
+ jl .Llt4\@
+
+ /* Store 4 <= LEN <= 7 bytes: */
+ pextrd $1, \msg, %eax
+ mov %ecx, %r8d
+ shl $3, %ecx
+ ror %cl, %eax
+ mov %eax, (DST, %r8) /* Store last LEN - 4 bytes */
+ movd \msg, (DST) /* Store first 4 bytes */
+ jmp .Ldone\@
+
+.Llt4\@:
+ /* Store 1 <= LEN <= 3 bytes: */
+ pextrb $0, \msg, 0(DST)
+ cmp $-2, %ecx /* LEN - 4 == -2, i.e. LEN == 2? */
+ jl .Ldone\@
+ pextrb $1, \msg, 1(DST)
+ je .Ldone\@
+ pextrb $2, \msg, 2(DST)
+.Ldone\@:
+.endm
/*
- * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
+ * void aegis128_aesni_init(struct aegis_state *state,
+ * const struct aegis_block *key,
+ * const u8 iv[AEGIS128_NONCE_SIZE]);
*/
-SYM_FUNC_START(crypto_aegis128_aesni_init)
- FRAME_BEGIN
+SYM_FUNC_START(aegis128_aesni_init)
+ .set STATEP, %rdi
+ .set KEYP, %rsi
+ .set IVP, %rdx
/* load IV: */
- movdqu (%rdx), T1
+ movdqu (IVP), T1
/* load key: */
- movdqa (%rsi), KEY
+ movdqa (KEYP), KEY
pxor KEY, T1
movdqa T1, STATE0
movdqa KEY, STATE3
@@ -224,20 +180,22 @@ SYM_FUNC_START(crypto_aegis128_aesni_init)
movdqu STATE2, 0x20(STATEP)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
-
- FRAME_END
RET
-SYM_FUNC_END(crypto_aegis128_aesni_init)
+SYM_FUNC_END(aegis128_aesni_init)
/*
- * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
- * const void *data);
+ * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data,
+ * unsigned int len);
+ *
+ * len must be a multiple of 16.
*/
-SYM_FUNC_START(crypto_aegis128_aesni_ad)
- FRAME_BEGIN
+SYM_FUNC_START(aegis128_aesni_ad)
+ .set STATEP, %rdi
+ .set SRC, %rsi
+ .set LEN, %edx
- cmp $0x10, LEN
- jb .Lad_out
+ test LEN, LEN
+ jz .Lad_out
/* load the state: */
movdqu 0x00(STATEP), STATE0
@@ -246,89 +204,40 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
- mov SRC, %r8
- and $0xF, %r8
- jnz .Lad_u_loop
-
-.align 8
-.Lad_a_loop:
- movdqa 0x00(SRC), MSG
- aegis128_update
- pxor MSG, STATE4
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_1
-
- movdqa 0x10(SRC), MSG
- aegis128_update
- pxor MSG, STATE3
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_2
-
- movdqa 0x20(SRC), MSG
- aegis128_update
- pxor MSG, STATE2
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_3
-
- movdqa 0x30(SRC), MSG
- aegis128_update
- pxor MSG, STATE1
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_4
-
- movdqa 0x40(SRC), MSG
- aegis128_update
- pxor MSG, STATE0
- sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_0
-
- add $0x50, SRC
- jmp .Lad_a_loop
-
.align 8
-.Lad_u_loop:
+.Lad_loop:
movdqu 0x00(SRC), MSG
aegis128_update
pxor MSG, STATE4
sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_1
+ jz .Lad_out_1
movdqu 0x10(SRC), MSG
aegis128_update
pxor MSG, STATE3
sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_2
+ jz .Lad_out_2
movdqu 0x20(SRC), MSG
aegis128_update
pxor MSG, STATE2
sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_3
+ jz .Lad_out_3
movdqu 0x30(SRC), MSG
aegis128_update
pxor MSG, STATE1
sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_4
+ jz .Lad_out_4
movdqu 0x40(SRC), MSG
aegis128_update
pxor MSG, STATE0
sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lad_out_0
+ jz .Lad_out_0
add $0x50, SRC
- jmp .Lad_u_loop
+ jmp .Lad_loop
/* store the state: */
.Lad_out_0:
@@ -337,7 +246,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE2, 0x20(STATEP)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
- FRAME_END
RET
.Lad_out_1:
@@ -346,7 +254,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
- FRAME_END
RET
.Lad_out_2:
@@ -355,7 +262,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE0, 0x20(STATEP)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
- FRAME_END
RET
.Lad_out_3:
@@ -364,7 +270,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE4, 0x20(STATEP)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
- FRAME_END
RET
.Lad_out_4:
@@ -373,41 +278,38 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad)
movdqu STATE3, 0x20(STATEP)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
- FRAME_END
- RET
-
.Lad_out:
- FRAME_END
RET
-SYM_FUNC_END(crypto_aegis128_aesni_ad)
+SYM_FUNC_END(aegis128_aesni_ad)
-.macro encrypt_block a s0 s1 s2 s3 s4 i
- movdq\a (\i * 0x10)(SRC), MSG
+.macro encrypt_block s0 s1 s2 s3 s4 i
+ movdqu (\i * 0x10)(SRC), MSG
movdqa MSG, T0
pxor \s1, T0
pxor \s4, T0
movdqa \s2, T1
pand \s3, T1
pxor T1, T0
- movdq\a T0, (\i * 0x10)(DST)
+ movdqu T0, (\i * 0x10)(DST)
aegis128_update
pxor MSG, \s4
sub $0x10, LEN
- cmp $0x10, LEN
- jl .Lenc_out_\i
+ jz .Lenc_out_\i
.endm
/*
- * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
- * const void *src, void *dst);
+ * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst,
+ * unsigned int len);
+ *
+ * len must be nonzero and a multiple of 16.
*/
-SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
- FRAME_BEGIN
-
- cmp $0x10, LEN
- jb .Lenc_out
+SYM_FUNC_START(aegis128_aesni_enc)
+ .set STATEP, %rdi
+ .set SRC, %rsi
+ .set DST, %rdx
+ .set LEN, %ecx
/* load the state: */
movdqu 0x00(STATEP), STATE0
@@ -416,34 +318,17 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
- mov SRC, %r8
- or DST, %r8
- and $0xF, %r8
- jnz .Lenc_u_loop
-
.align 8
-.Lenc_a_loop:
- encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
- encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
- encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
- encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
- encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
+.Lenc_loop:
+ encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
+ encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
+ encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
+ encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
+ encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
add $0x50, SRC
add $0x50, DST
- jmp .Lenc_a_loop
-
-.align 8
-.Lenc_u_loop:
- encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
- encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
- encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
- encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
- encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
-
- add $0x50, SRC
- add $0x50, DST
- jmp .Lenc_u_loop
+ jmp .Lenc_loop
/* store the state: */
.Lenc_out_0:
@@ -452,7 +337,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
- FRAME_END
RET
.Lenc_out_1:
@@ -461,7 +345,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE0, 0x20(STATEP)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
- FRAME_END
RET
.Lenc_out_2:
@@ -470,7 +353,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE4, 0x20(STATEP)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
- FRAME_END
RET
.Lenc_out_3:
@@ -479,7 +361,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE3, 0x20(STATEP)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
- FRAME_END
RET
.Lenc_out_4:
@@ -488,20 +369,19 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
movdqu STATE2, 0x20(STATEP)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
- FRAME_END
- RET
-
.Lenc_out:
- FRAME_END
RET
-SYM_FUNC_END(crypto_aegis128_aesni_enc)
+SYM_FUNC_END(aegis128_aesni_enc)
/*
- * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
- * const void *src, void *dst);
+ * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src,
+ * u8 *dst, unsigned int len);
*/
-SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
- FRAME_BEGIN
+SYM_FUNC_START(aegis128_aesni_enc_tail)
+ .set STATEP, %rdi
+ .set SRC, %rsi
+ .set DST, %rdx
+ .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
/* load the state: */
movdqu 0x00(STATEP), STATE0
@@ -511,7 +391,8 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
movdqu 0x40(STATEP), STATE4
/* encrypt message: */
- call __load_partial
+ mov LEN, %r9d
+ load_partial
movdqa MSG, T0
pxor STATE1, T0
@@ -520,7 +401,8 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
pand STATE3, T1
pxor T1, T0
- call __store_partial
+ mov %r9d, LEN
+ store_partial T0
aegis128_update
pxor MSG, STATE4
@@ -531,37 +413,36 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
-
- FRAME_END
RET
-SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
+SYM_FUNC_END(aegis128_aesni_enc_tail)
-.macro decrypt_block a s0 s1 s2 s3 s4 i
- movdq\a (\i * 0x10)(SRC), MSG
+.macro decrypt_block s0 s1 s2 s3 s4 i
+ movdqu (\i * 0x10)(SRC), MSG
pxor \s1, MSG
pxor \s4, MSG
movdqa \s2, T1
pand \s3, T1
pxor T1, MSG
- movdq\a MSG, (\i * 0x10)(DST)
+ movdqu MSG, (\i * 0x10)(DST)
aegis128_update
pxor MSG, \s4
sub $0x10, LEN
- cmp $0x10, LEN
- jl .Ldec_out_\i
+ jz .Ldec_out_\i
.endm
/*
- * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
- * const void *src, void *dst);
+ * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst,
+ * unsigned int len);
+ *
+ * len must be nonzero and a multiple of 16.
*/
-SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
- FRAME_BEGIN
-
- cmp $0x10, LEN
- jb .Ldec_out
+SYM_FUNC_START(aegis128_aesni_dec)
+ .set STATEP, %rdi
+ .set SRC, %rsi
+ .set DST, %rdx
+ .set LEN, %ecx
/* load the state: */
movdqu 0x00(STATEP), STATE0
@@ -570,34 +451,17 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4
- mov SRC, %r8
- or DST, %r8
- and $0xF, %r8
- jnz .Ldec_u_loop
-
.align 8
-.Ldec_a_loop:
- decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
- decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
- decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
- decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
- decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
+.Ldec_loop:
+ decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
+ decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
+ decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
+ decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
+ decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
add $0x50, SRC
add $0x50, DST
- jmp .Ldec_a_loop
-
-.align 8
-.Ldec_u_loop:
- decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
- decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
- decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
- decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
- decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
-
- add $0x50, SRC
- add $0x50, DST
- jmp .Ldec_u_loop
+ jmp .Ldec_loop
/* store the state: */
.Ldec_out_0:
@@ -606,7 +470,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
- FRAME_END
RET
.Ldec_out_1:
@@ -615,7 +478,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE0, 0x20(STATEP)
movdqu STATE1, 0x30(STATEP)
movdqu STATE2, 0x40(STATEP)
- FRAME_END
RET
.Ldec_out_2:
@@ -624,7 +486,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE4, 0x20(STATEP)
movdqu STATE0, 0x30(STATEP)
movdqu STATE1, 0x40(STATEP)
- FRAME_END
RET
.Ldec_out_3:
@@ -633,7 +494,6 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE3, 0x20(STATEP)
movdqu STATE4, 0x30(STATEP)
movdqu STATE0, 0x40(STATEP)
- FRAME_END
RET
.Ldec_out_4:
@@ -642,20 +502,19 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
movdqu STATE2, 0x20(STATEP)
movdqu STATE3, 0x30(STATEP)
movdqu STATE4, 0x40(STATEP)
- FRAME_END
- RET
-
.Ldec_out:
- FRAME_END
RET
-SYM_FUNC_END(crypto_aegis128_aesni_dec)
+SYM_FUNC_END(aegis128_aesni_dec)
/*
- * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
- * const void *src, void *dst);
+ * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src,
+ * u8 *dst, unsigned int len);
*/
-SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
- FRAME_BEGIN
+SYM_FUNC_START(aegis128_aesni_dec_tail)
+ .set STATEP, %rdi
+ .set SRC, %rsi
+ .set DST, %rdx
+ .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
/* load the state: */
movdqu 0x00(STATEP), STATE0
@@ -665,7 +524,8 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
movdqu 0x40(STATEP), STATE4
/* decrypt message: */
- call __load_partial
+ mov LEN, %r9d
+ load_partial
pxor STATE1, MSG
pxor STATE4, MSG
@@ -673,17 +533,13 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
pand STATE3, T1
pxor T1, MSG
- movdqa MSG, T0
- call __store_partial
+ mov %r9d, LEN
+ store_partial MSG
/* mask with byte count: */
- movq LEN, T0
- punpcklbw T0, T0
- punpcklbw T0, T0
- punpcklbw T0, T0
- punpcklbw T0, T0
- movdqa .Laegis128_counter(%rip), T1
- pcmpgtb T1, T0
+ lea .Lzeropad_mask+16(%rip), %rax
+ sub %r9, %rax
+ movdqu (%rax), T0
pand T0, MSG
aegis128_update
@@ -695,17 +551,19 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
movdqu STATE1, 0x20(STATEP)
movdqu STATE2, 0x30(STATEP)
movdqu STATE3, 0x40(STATEP)
-
- FRAME_END
RET
-SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
+SYM_FUNC_END(aegis128_aesni_dec_tail)
/*
- * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
- * u64 assoclen, u64 cryptlen);
+ * void aegis128_aesni_final(struct aegis_state *state,
+ * struct aegis_block *tag_xor,
+ * unsigned int assoclen, unsigned int cryptlen);
*/
-SYM_FUNC_START(crypto_aegis128_aesni_final)
- FRAME_BEGIN
+SYM_FUNC_START(aegis128_aesni_final)
+ .set STATEP, %rdi
+ .set TAG_XOR, %rsi
+ .set ASSOCLEN, %edx
+ .set CRYPTLEN, %ecx
/* load the state: */
movdqu 0x00(STATEP), STATE0
@@ -715,10 +573,8 @@ SYM_FUNC_START(crypto_aegis128_aesni_final)
movdqu 0x40(STATEP), STATE4
/* prepare length block: */
- movq %rdx, MSG
- movq %rcx, T0
- pslldq $8, T0
- pxor T0, MSG
+ movd ASSOCLEN, MSG
+ pinsrd $2, CRYPTLEN, MSG
psllq $3, MSG /* multiply by 8 (to get bit count) */
pxor STATE3, MSG
@@ -733,7 +589,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_final)
aegis128_update; pxor MSG, STATE3
/* xor tag: */
- movdqu (%rsi), MSG
+ movdqu (TAG_XOR), MSG
pxor STATE0, MSG
pxor STATE1, MSG
@@ -741,8 +597,6 @@ SYM_FUNC_START(crypto_aegis128_aesni_final)
pxor STATE3, MSG
pxor STATE4, MSG
- movdqu MSG, (%rsi)
-
- FRAME_END
+ movdqu MSG, (TAG_XOR)
RET
-SYM_FUNC_END(crypto_aegis128_aesni_final)
+SYM_FUNC_END(aegis128_aesni_final)
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 4623189000d8..c19d8e3d96a3 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The AEGIS-128 Authenticated-Encryption Algorithm
- * Glue for AES-NI + SSE2 implementation
+ * Glue for AES-NI + SSE4.1 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <[email protected]>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
@@ -23,27 +23,6 @@
#define AEGIS128_MIN_AUTH_SIZE 8
#define AEGIS128_MAX_AUTH_SIZE 16
-asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv);
-
-asmlinkage void crypto_aegis128_aesni_ad(
- void *state, unsigned int length, const void *data);
-
-asmlinkage void crypto_aegis128_aesni_enc(
- void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128_aesni_dec(
- void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128_aesni_enc_tail(
- void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128_aesni_dec_tail(
- void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128_aesni_final(
- void *state, void *tag_xor, unsigned int cryptlen,
- unsigned int assoclen);
-
struct aegis_block {
u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN);
};
@@ -56,15 +35,31 @@ struct aegis_ctx {
struct aegis_block key;
};
-struct aegis_crypt_ops {
- int (*skcipher_walk_init)(struct skcipher_walk *walk,
- struct aead_request *req, bool atomic);
+asmlinkage void aegis128_aesni_init(struct aegis_state *state,
+ const struct aegis_block *key,
+ const u8 iv[AEGIS128_NONCE_SIZE]);
- void (*crypt_blocks)(void *state, unsigned int length, const void *src,
- void *dst);
- void (*crypt_tail)(void *state, unsigned int length, const void *src,
- void *dst);
-};
+asmlinkage void aegis128_aesni_ad(struct aegis_state *state, const u8 *data,
+ unsigned int len);
+
+asmlinkage void aegis128_aesni_enc(struct aegis_state *state, const u8 *src,
+ u8 *dst, unsigned int len);
+
+asmlinkage void aegis128_aesni_dec(struct aegis_state *state, const u8 *src,
+ u8 *dst, unsigned int len);
+
+asmlinkage void aegis128_aesni_enc_tail(struct aegis_state *state,
+ const u8 *src, u8 *dst,
+ unsigned int len);
+
+asmlinkage void aegis128_aesni_dec_tail(struct aegis_state *state,
+ const u8 *src, u8 *dst,
+ unsigned int len);
+
+asmlinkage void aegis128_aesni_final(struct aegis_state *state,
+ struct aegis_block *tag_xor,
+ unsigned int assoclen,
+ unsigned int cryptlen);
static void crypto_aegis128_aesni_process_ad(
struct aegis_state *state, struct scatterlist *sg_src,
@@ -85,16 +80,15 @@ static void crypto_aegis128_aesni_process_ad(
if (pos > 0) {
unsigned int fill = AEGIS128_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
- crypto_aegis128_aesni_ad(state,
- AEGIS128_BLOCK_SIZE,
- buf.bytes);
+ aegis128_aesni_ad(state, buf.bytes,
+ AEGIS128_BLOCK_SIZE);
pos = 0;
left -= fill;
src += fill;
}
- crypto_aegis128_aesni_ad(state, left, src);
-
+ aegis128_aesni_ad(state, src,
+ left & ~(AEGIS128_BLOCK_SIZE - 1));
src += left & ~(AEGIS128_BLOCK_SIZE - 1);
left &= AEGIS128_BLOCK_SIZE - 1;
}
@@ -110,24 +104,37 @@ static void crypto_aegis128_aesni_process_ad(
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
- crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes);
+ aegis128_aesni_ad(state, buf.bytes, AEGIS128_BLOCK_SIZE);
}
}
-static void crypto_aegis128_aesni_process_crypt(
- struct aegis_state *state, struct skcipher_walk *walk,
- const struct aegis_crypt_ops *ops)
+static __always_inline void
+crypto_aegis128_aesni_process_crypt(struct aegis_state *state,
+ struct skcipher_walk *walk, bool enc)
{
while (walk->nbytes >= AEGIS128_BLOCK_SIZE) {
- ops->crypt_blocks(state,
- round_down(walk->nbytes, AEGIS128_BLOCK_SIZE),
- walk->src.virt.addr, walk->dst.virt.addr);
+ if (enc)
+ aegis128_aesni_enc(state, walk->src.virt.addr,
+ walk->dst.virt.addr,
+ round_down(walk->nbytes,
+ AEGIS128_BLOCK_SIZE));
+ else
+ aegis128_aesni_dec(state, walk->src.virt.addr,
+ walk->dst.virt.addr,
+ round_down(walk->nbytes,
+ AEGIS128_BLOCK_SIZE));
skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
}
if (walk->nbytes) {
- ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
- walk->dst.virt.addr);
+ if (enc)
+ aegis128_aesni_enc_tail(state, walk->src.virt.addr,
+ walk->dst.virt.addr,
+ walk->nbytes);
+ else
+ aegis128_aesni_dec_tail(state, walk->src.virt.addr,
+ walk->dst.virt.addr,
+ walk->nbytes);
skcipher_walk_done(walk, 0);
}
}
@@ -162,42 +169,39 @@ static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm,
return 0;
}
-static void crypto_aegis128_aesni_crypt(struct aead_request *req,
- struct aegis_block *tag_xor,
- unsigned int cryptlen,
- const struct aegis_crypt_ops *ops)
+static __always_inline void
+crypto_aegis128_aesni_crypt(struct aead_request *req,
+ struct aegis_block *tag_xor,
+ unsigned int cryptlen, bool enc)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
struct skcipher_walk walk;
struct aegis_state state;
- ops->skcipher_walk_init(&walk, req, true);
+ if (enc)
+ skcipher_walk_aead_encrypt(&walk, req, true);
+ else
+ skcipher_walk_aead_decrypt(&walk, req, true);
kernel_fpu_begin();
- crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
+ aegis128_aesni_init(&state, &ctx->key, req->iv);
crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
- crypto_aegis128_aesni_process_crypt(&state, &walk, ops);
- crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
+ crypto_aegis128_aesni_process_crypt(&state, &walk, enc);
+ aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
static int crypto_aegis128_aesni_encrypt(struct aead_request *req)
{
- static const struct aegis_crypt_ops OPS = {
- .skcipher_walk_init = skcipher_walk_aead_encrypt,
- .crypt_blocks = crypto_aegis128_aesni_enc,
- .crypt_tail = crypto_aegis128_aesni_enc_tail,
- };
-
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
- crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
+ crypto_aegis128_aesni_crypt(req, &tag, cryptlen, true);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
@@ -208,12 +212,6 @@ static int crypto_aegis128_aesni_decrypt(struct aead_request *req)
{
static const struct aegis_block zeros = {};
- static const struct aegis_crypt_ops OPS = {
- .skcipher_walk_init = skcipher_walk_aead_decrypt,
- .crypt_blocks = crypto_aegis128_aesni_dec,
- .crypt_tail = crypto_aegis128_aesni_dec_tail,
- };
-
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
@@ -222,27 +220,16 @@ static int crypto_aegis128_aesni_decrypt(struct aead_request *req)
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
- crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
+ crypto_aegis128_aesni_crypt(req, &tag, cryptlen, false);
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
}
-static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead)
-{
- return 0;
-}
-
-static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
-{
-}
-
static struct aead_alg crypto_aegis128_aesni_alg = {
.setkey = crypto_aegis128_aesni_setkey,
.setauthsize = crypto_aegis128_aesni_setauthsize,
.encrypt = crypto_aegis128_aesni_encrypt,
.decrypt = crypto_aegis128_aesni_decrypt,
- .init = crypto_aegis128_aesni_init_tfm,
- .exit = crypto_aegis128_aesni_exit_tfm,
.ivsize = AEGIS128_NONCE_SIZE,
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
@@ -267,7 +254,7 @@ static struct simd_aead_alg *simd_alg;
static int __init crypto_aegis128_aesni_module_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ if (!boot_cpu_has(X86_FEATURE_XMM4_1) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
@@ -286,6 +273,6 @@ module_exit(crypto_aegis128_aesni_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <[email protected]>");
-MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation");
+MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE4.1 implementation");
MODULE_ALIAS_CRYPTO("aegis128");
MODULE_ALIAS_CRYPTO("aegis128-aesni");
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index b0dd83555499..fbf43482e1f5 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1747,7 +1747,7 @@ static void __exit aesni_exit(void)
unregister_avx_algs();
}
-late_initcall(aesni_init);
+module_init(aesni_init);
module_exit(aesni_exit);
MODULE_DESCRIPTION("AES cipher and modes, optimized with AES-NI or VAES instructions");
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b4e460a87f18..fb95a614249d 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -487,79 +487,3 @@ SYM_FUNC_START(cast5_cbc_dec_16way)
FRAME_END
RET;
SYM_FUNC_END(cast5_cbc_dec_16way)
-
-SYM_FUNC_START(cast5_ctr_16way)
- /* input:
- * %rdi: ctx
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (big endian, 64bit)
- */
- FRAME_BEGIN
- pushq %r12;
- pushq %r15;
-
- movq %rdi, CTX;
- movq %rsi, %r11;
- movq %rdx, %r12;
-
- vpcmpeqd RTMP, RTMP, RTMP;
- vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */
-
- vpcmpeqd RKR, RKR, RKR;
- vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
- vmovdqa .Lbswap_iv_mask(%rip), R1ST;
- vmovdqa .Lbswap128_mask(%rip), RKM;
-
- /* load IV and byteswap */
- vmovq (%rcx), RX;
- vpshufb R1ST, RX, RX;
-
- /* construct IVs */
- vpsubq RTMP, RX, RX; /* le: IV1, IV0 */
- vpshufb RKM, RX, RL1; /* be: IV0, IV1 */
- vpsubq RKR, RX, RX;
- vpshufb RKM, RX, RR1; /* be: IV2, IV3 */
- vpsubq RKR, RX, RX;
- vpshufb RKM, RX, RL2; /* be: IV4, IV5 */
- vpsubq RKR, RX, RX;
- vpshufb RKM, RX, RR2; /* be: IV6, IV7 */
- vpsubq RKR, RX, RX;
- vpshufb RKM, RX, RL3; /* be: IV8, IV9 */
- vpsubq RKR, RX, RX;
- vpshufb RKM, RX, RR3; /* be: IV10, IV11 */
- vpsubq RKR, RX, RX;
- vpshufb RKM, RX, RL4; /* be: IV12, IV13 */
- vpsubq RKR, RX, RX;
- vpshufb RKM, RX, RR4; /* be: IV14, IV15 */
-
- /* store last IV */
- vpsubq RTMP, RX, RX; /* le: IV16, IV14 */
- vpshufb R1ST, RX, RX; /* be: IV16, IV16 */
- vmovq RX, (%rcx);
-
- call __cast5_enc_blk16;
-
- /* dst = src ^ iv */
- vpxor (0*16)(%r12), RR1, RR1;
- vpxor (1*16)(%r12), RL1, RL1;
- vpxor (2*16)(%r12), RR2, RR2;
- vpxor (3*16)(%r12), RL2, RL2;
- vpxor (4*16)(%r12), RR3, RR3;
- vpxor (5*16)(%r12), RL3, RL3;
- vpxor (6*16)(%r12), RR4, RR4;
- vpxor (7*16)(%r12), RL4, RL4;
- vmovdqu RR1, (0*16)(%r11);
- vmovdqu RL1, (1*16)(%r11);
- vmovdqu RR2, (2*16)(%r11);
- vmovdqu RL2, (3*16)(%r11);
- vmovdqu RR3, (4*16)(%r11);
- vmovdqu RL3, (5*16)(%r11);
- vmovdqu RR4, (6*16)(%r11);
- vmovdqu RL4, (7*16)(%r11);
-
- popq %r15;
- popq %r12;
- FRAME_END
- RET;
-SYM_FUNC_END(cast5_ctr_16way)
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index feccb5254c7e..52c5d47ef5a1 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -41,7 +41,7 @@
*/
#define CRC32C_PCL_BREAKEVEN 512
-asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
+asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
unsigned int crc_init);
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index bbcff1fb78cb..752812bc4991 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -7,6 +7,7 @@
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
*
* Copyright (C) 2012 Intel Corporation.
+ * Copyright 2024 Google LLC
*
* Authors:
* Wajdi Feghali <[email protected]>
@@ -44,185 +45,129 @@
*/
#include <linux/linkage.h>
-#include <asm/nospec-branch.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
-.macro LABEL prefix n
-.L\prefix\n\():
-.endm
-
-.macro JMPTBL_ENTRY i
-.quad .Lcrc_\i
-.endm
-
-.macro JNC_LESS_THAN j
- jnc .Lless_than_\j
-.endm
-
-# Define threshold where buffers are considered "small" and routed to more
-# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
-# SMALL_SIZE can be no larger than 255.
-
+# Define threshold below which buffers are considered "small" and routed to
+# regular CRC code that does not interleave the CRC instructions.
#define SMALL_SIZE 200
-.if (SMALL_SIZE > 255)
-.error "SMALL_ SIZE must be < 256"
-.endif
-
-# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
+# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
.text
SYM_FUNC_START(crc_pcl)
-#define bufp rdi
-#define bufp_dw %edi
-#define bufp_w %di
-#define bufp_b %dil
-#define bufptmp %rcx
-#define block_0 %rcx
-#define block_1 %rdx
-#define block_2 %r11
-#define len %rsi
-#define len_dw %esi
-#define len_w %si
-#define len_b %sil
-#define crc_init_arg %rdx
-#define tmp %rbx
-#define crc_init %r8
-#define crc_init_dw %r8d
-#define crc1 %r9
-#define crc2 %r10
-
- pushq %rbx
- pushq %rdi
- pushq %rsi
-
- ## Move crc_init for Linux to a different
- mov crc_init_arg, crc_init
+#define bufp %rdi
+#define bufp_d %edi
+#define len %esi
+#define crc_init %edx
+#define crc_init_q %rdx
+#define n_misaligned %ecx /* overlaps chunk_bytes! */
+#define n_misaligned_q %rcx
+#define chunk_bytes %ecx /* overlaps n_misaligned! */
+#define chunk_bytes_q %rcx
+#define crc1 %r8
+#define crc2 %r9
+
+ cmp $SMALL_SIZE, len
+ jb .Lsmall
################################################################
## 1) ALIGN:
################################################################
-
- mov %bufp, bufptmp # rdi = *buf
- neg %bufp
- and $7, %bufp # calculate the unalignment amount of
+ mov bufp_d, n_misaligned
+ neg n_misaligned
+ and $7, n_misaligned # calculate the misalignment amount of
# the address
- je .Lproc_block # Skip if aligned
-
- ## If len is less than 8 and we're unaligned, we need to jump
- ## to special code to avoid reading beyond the end of the buffer
- cmp $8, len
- jae .Ldo_align
- # less_than_8 expects length in upper 3 bits of len_dw
- # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
- shl $32-3+1, len_dw
- jmp .Lless_than_8_post_shl1
+ je .Laligned # Skip if aligned
+ # Process 1 <= n_misaligned <= 7 bytes individually in order to align
+ # the remaining data to an 8-byte boundary.
.Ldo_align:
- #### Calculate CRC of unaligned bytes of the buffer (if any)
- movq (bufptmp), tmp # load a quadward from the buffer
- add %bufp, bufptmp # align buffer pointer for quadword
- # processing
- sub %bufp, len # update buffer length
+ movq (bufp), %rax
+ add n_misaligned_q, bufp
+ sub n_misaligned, len
.Lalign_loop:
- crc32b %bl, crc_init_dw # compute crc32 of 1-byte
- shr $8, tmp # get next byte
- dec %bufp
+ crc32b %al, crc_init # compute crc32 of 1-byte
+ shr $8, %rax # get next byte
+ dec n_misaligned
jne .Lalign_loop
-
-.Lproc_block:
+.Laligned:
################################################################
- ## 2) PROCESS BLOCKS:
+ ## 2) PROCESS BLOCK:
################################################################
- ## compute num of bytes to be processed
- movq len, tmp # save num bytes in tmp
-
- cmpq $128*24, len
+ cmp $128*24, len
jae .Lfull_block
-.Lcontinue_block:
- cmpq $SMALL_SIZE, len
- jb .Lsmall
-
- ## len < 128*24
- movq $2731, %rax # 2731 = ceil(2^16 / 24)
- mul len_dw
- shrq $16, %rax
-
- ## eax contains floor(bytes / 24) = num 24-byte chunks to do
-
- ## process rax 24-byte chunks (128 >= rax >= 0)
-
- ## compute end address of each block
- ## block 0 (base addr + RAX * 8)
- ## block 1 (base addr + RAX * 16)
- ## block 2 (base addr + RAX * 24)
- lea (bufptmp, %rax, 8), block_0
- lea (block_0, %rax, 8), block_1
- lea (block_1, %rax, 8), block_2
+.Lpartial_block:
+ # Compute floor(len / 24) to get num qwords to process from each lane.
+ imul $2731, len, %eax # 2731 = ceil(2^16 / 24)
+ shr $16, %eax
+ jmp .Lcrc_3lanes
- xor crc1, crc1
- xor crc2, crc2
-
- ## branch into array
- leaq jump_table(%rip), %bufp
- mov (%bufp,%rax,8), %bufp
- JMP_NOSPEC bufp
-
- ################################################################
- ## 2a) PROCESS FULL BLOCKS:
- ################################################################
.Lfull_block:
- movl $128,%eax
- lea 128*8*2(block_0), block_1
- lea 128*8*3(block_0), block_2
- add $128*8*1, block_0
-
- xor crc1,crc1
- xor crc2,crc2
-
- # Fall through into top of crc array (crc_128)
+ # Processing 128 qwords from each lane.
+ mov $128, %eax
################################################################
- ## 3) CRC Array:
+ ## 3) CRC each of three lanes:
################################################################
- i=128
-.rept 128-1
-.altmacro
-LABEL crc_ %i
-.noaltmacro
- ENDBR
- crc32q -i*8(block_0), crc_init
- crc32q -i*8(block_1), crc1
- crc32q -i*8(block_2), crc2
- i=(i-1)
-.endr
-
-.altmacro
-LABEL crc_ %i
-.noaltmacro
- ENDBR
- crc32q -i*8(block_0), crc_init
- crc32q -i*8(block_1), crc1
-# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
-
- mov block_2, block_0
+.Lcrc_3lanes:
+ xor crc1,crc1
+ xor crc2,crc2
+ mov %eax, chunk_bytes
+ shl $3, chunk_bytes # num bytes to process from each lane
+ sub $5, %eax # 4 for 4x_loop, 1 for special last iter
+ jl .Lcrc_3lanes_4x_done
+
+ # Unroll the loop by a factor of 4 to reduce the overhead of the loop
+ # bookkeeping instructions, which can compete with crc32q for the ALUs.
+.Lcrc_3lanes_4x_loop:
+ crc32q (bufp), crc_init_q
+ crc32q (bufp,chunk_bytes_q), crc1
+ crc32q (bufp,chunk_bytes_q,2), crc2
+ crc32q 8(bufp), crc_init_q
+ crc32q 8(bufp,chunk_bytes_q), crc1
+ crc32q 8(bufp,chunk_bytes_q,2), crc2
+ crc32q 16(bufp), crc_init_q
+ crc32q 16(bufp,chunk_bytes_q), crc1
+ crc32q 16(bufp,chunk_bytes_q,2), crc2
+ crc32q 24(bufp), crc_init_q
+ crc32q 24(bufp,chunk_bytes_q), crc1
+ crc32q 24(bufp,chunk_bytes_q,2), crc2
+ add $32, bufp
+ sub $4, %eax
+ jge .Lcrc_3lanes_4x_loop
+
+.Lcrc_3lanes_4x_done:
+ add $4, %eax
+ jz .Lcrc_3lanes_last_qword
+
+.Lcrc_3lanes_1x_loop:
+ crc32q (bufp), crc_init_q
+ crc32q (bufp,chunk_bytes_q), crc1
+ crc32q (bufp,chunk_bytes_q,2), crc2
+ add $8, bufp
+ dec %eax
+ jnz .Lcrc_3lanes_1x_loop
+
+.Lcrc_3lanes_last_qword:
+ crc32q (bufp), crc_init_q
+ crc32q (bufp,chunk_bytes_q), crc1
+# SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet
################################################################
## 4) Combine three results:
################################################################
- lea (K_table-8)(%rip), %bufp # first entry is for idx 1
- shlq $3, %rax # rax *= 8
- pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2
- leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
- subq %rax, tmp # tmp -= rax*24
+ lea (K_table-8)(%rip), %rax # first entry is for idx 1
+ pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2
+ lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
+ sub %eax, len # len -= chunk_bytes * 3
- movq crc_init, %xmm1 # CRC for block 1
+ movq crc_init_q, %xmm1 # CRC for block 1
pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2
@@ -230,103 +175,54 @@ LABEL crc_ %i
pxor %xmm2,%xmm1
movq %xmm1, %rax
- xor -i*8(block_2), %rax
- mov crc2, crc_init
- crc32 %rax, crc_init
+ xor (bufp,chunk_bytes_q,2), %rax
+ mov crc2, crc_init_q
+ crc32 %rax, crc_init_q
+ lea 8(bufp,chunk_bytes_q,2), bufp
################################################################
- ## 5) Check for end:
+ ## 5) If more blocks remain, goto (2):
################################################################
-LABEL crc_ 0
- ENDBR
- mov tmp, len
- cmp $128*24, tmp
- jae .Lfull_block
- cmp $24, tmp
- jae .Lcontinue_block
-
-.Lless_than_24:
- shl $32-4, len_dw # less_than_16 expects length
- # in upper 4 bits of len_dw
- jnc .Lless_than_16
- crc32q (bufptmp), crc_init
- crc32q 8(bufptmp), crc_init
- jz .Ldo_return
- add $16, bufptmp
- # len is less than 8 if we got here
- # less_than_8 expects length in upper 3 bits of len_dw
- # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
- shl $2, len_dw
- jmp .Lless_than_8_post_shl1
+ cmp $128*24, len
+ jae .Lfull_block
+ cmp $SMALL_SIZE, len
+ jae .Lpartial_block
#######################################################################
- ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
+ ## 6) Process any remainder without interleaving:
#######################################################################
.Lsmall:
- shl $32-8, len_dw # Prepare len_dw for less_than_256
- j=256
-.rept 5 # j = {256, 128, 64, 32, 16}
-.altmacro
-LABEL less_than_ %j # less_than_j: Length should be in
- # upper lg(j) bits of len_dw
- j=(j/2)
- shl $1, len_dw # Get next MSB
- JNC_LESS_THAN %j
-.noaltmacro
- i=0
-.rept (j/8)
- crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
- i=i+8
-.endr
- jz .Ldo_return # Return if remaining length is zero
- add $j, bufptmp # Advance buf
-.endr
-
-.Lless_than_8: # Length should be stored in
- # upper 3 bits of len_dw
- shl $1, len_dw
-.Lless_than_8_post_shl1:
- jnc .Lless_than_4
- crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
- jz .Ldo_return # return if remaining data is zero
- add $4, bufptmp
-.Lless_than_4: # Length should be stored in
- # upper 2 bits of len_dw
- shl $1, len_dw
- jnc .Lless_than_2
- crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
- jz .Ldo_return # return if remaining data is zero
- add $2, bufptmp
-.Lless_than_2: # Length should be stored in the MSB
- # of len_dw
- shl $1, len_dw
- jnc .Lless_than_1
- crc32b (bufptmp), crc_init_dw # CRC of 1 byte
-.Lless_than_1: # Length should be zero
-.Ldo_return:
- movq crc_init, %rax
- popq %rsi
- popq %rdi
- popq %rbx
+ test len, len
+ jz .Ldone
+ mov len, %eax
+ shr $3, %eax
+ jz .Ldo_dword
+.Ldo_qwords:
+ crc32q (bufp), crc_init_q
+ add $8, bufp
+ dec %eax
+ jnz .Ldo_qwords
+.Ldo_dword:
+ test $4, len
+ jz .Ldo_word
+ crc32l (bufp), crc_init
+ add $4, bufp
+.Ldo_word:
+ test $2, len
+ jz .Ldo_byte
+ crc32w (bufp), crc_init
+ add $2, bufp
+.Ldo_byte:
+ test $1, len
+ jz .Ldone
+ crc32b (bufp), crc_init
+.Ldone:
+ mov crc_init, %eax
RET
SYM_FUNC_END(crc_pcl)
.section .rodata, "a", @progbits
- ################################################################
- ## jump table Table is 129 entries x 2 bytes each
- ################################################################
-.align 4
-jump_table:
- i=0
-.rept 129
-.altmacro
-JMPTBL_ENTRY %i
-.noaltmacro
- i=i+1
-.endr
-
-
################################################################
## PCLMULQDQ tables
## Table is 128 entries x 2 words (8 bytes) each
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 534c74b14fab..4d0fb2fba7e2 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -468,3 +468,7 @@
460 i386 lsm_set_self_attr sys_lsm_set_self_attr
461 i386 lsm_list_modules sys_lsm_list_modules
462 i386 mseal sys_mseal
+463 i386 setxattrat sys_setxattrat
+464 i386 getxattrat sys_getxattrat
+465 i386 listxattrat sys_listxattrat
+466 i386 removexattrat sys_removexattrat
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 7093ee21c0d1..5eb708bff1c7 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -386,6 +386,10 @@
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
462 common mseal sys_mseal
+463 common setxattrat sys_setxattrat
+464 common getxattrat sys_getxattrat
+465 common listxattrat sys_listxattrat
+466 common removexattrat sys_removexattrat
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index bafa73f09e92..872947c1004c 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/vdso.h>
+#include <asm/vdso/vsyscall.h>
/*
* Linker script for vDSO. This is an ELF shared object prelinked to
@@ -16,23 +17,16 @@ SECTIONS
* segment.
*/
- vvar_start = . - 4 * PAGE_SIZE;
+ vvar_start = . - __VVAR_PAGES * PAGE_SIZE;
vvar_page = vvar_start;
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
-#include <asm/vvar.h>
-#undef EMIT_VVAR
+ vdso_rng_data = vvar_page + __VDSO_RND_DATA_OFFSET;
- pvclock_page = vvar_start + PAGE_SIZE;
- hvclock_page = vvar_start + 2 * PAGE_SIZE;
- timens_page = vvar_start + 3 * PAGE_SIZE;
+ timens_page = vvar_start + PAGE_SIZE;
-#undef _ASM_X86_VVAR_H
- /* Place all vvars in timens too at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) timens_ ## name = timens_page + offset;
-#include <asm/vvar.h>
-#undef EMIT_VVAR
+ vclock_pages = vvar_start + VDSO_NR_VCLOCK_PAGES * PAGE_SIZE;
+ pvclock_page = vclock_pages + VDSO_PAGE_PVCLOCK_OFFSET * PAGE_SIZE;
+ hvclock_page = vclock_pages + VDSO_PAGE_HVCLOCK_OFFSET * PAGE_SIZE;
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index b8fed8b8b9cc..bfc7cabf4017 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -20,26 +20,20 @@
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
-#include <asm/vvar.h>
#include <asm/tlb.h>
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
+#include <asm/vdso/vsyscall.h>
#include <clocksource/hyperv_timer.h>
-#undef _ASM_X86_VVAR_H
-#define EMIT_VVAR(name, offset) \
- const size_t name ## _offset = offset;
-#include <asm/vvar.h>
-
struct vdso_data *arch_get_vdso_data(void *vvar_page)
{
- return (struct vdso_data *)(vvar_page + _vdso_data_offset);
+ return (struct vdso_data *)vvar_page;
}
-#undef EMIT_VVAR
-DEFINE_VVAR(struct vdso_data, _vdso_data);
-DEFINE_VVAR_SINGLE(struct vdso_rng_data, _vdso_rng_data);
+static union vdso_data_store vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = vdso_data_store.data;
unsigned int vclocks_used __read_mostly;
@@ -154,7 +148,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
if (sym_offset == image->sym_vvar_page) {
struct page *timens_page = find_timens_vvar_page(vma);
- pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+ pfn = __pa_symbol(vdso_data) >> PAGE_SHIFT;
/*
* If a task belongs to a time namespace then a namespace
@@ -182,32 +176,52 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
}
return vmf_insert_pfn(vma, vmf->address, pfn);
- } else if (sym_offset == image->sym_pvclock_page) {
- struct pvclock_vsyscall_time_info *pvti =
- pvclock_get_pvti_cpu0_va();
- if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK)) {
- return vmf_insert_pfn_prot(vma, vmf->address,
- __pa(pvti) >> PAGE_SHIFT,
- pgprot_decrypted(vma->vm_page_prot));
- }
- } else if (sym_offset == image->sym_hvclock_page) {
- pfn = hv_get_tsc_pfn();
- if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
- return vmf_insert_pfn(vma, vmf->address, pfn);
} else if (sym_offset == image->sym_timens_page) {
struct page *timens_page = find_timens_vvar_page(vma);
if (!timens_page)
return VM_FAULT_SIGBUS;
- pfn = __pa_symbol(&__vvar_page) >> PAGE_SHIFT;
+ pfn = __pa_symbol(vdso_data) >> PAGE_SHIFT;
return vmf_insert_pfn(vma, vmf->address, pfn);
}
return VM_FAULT_SIGBUS;
}
+static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ switch (vmf->pgoff) {
+#ifdef CONFIG_PARAVIRT_CLOCK
+ case VDSO_PAGE_PVCLOCK_OFFSET:
+ {
+ struct pvclock_vsyscall_time_info *pvti =
+ pvclock_get_pvti_cpu0_va();
+
+ if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK))
+ return vmf_insert_pfn_prot(vma, vmf->address,
+ __pa(pvti) >> PAGE_SHIFT,
+ pgprot_decrypted(vma->vm_page_prot));
+ break;
+ }
+#endif /* CONFIG_PARAVIRT_CLOCK */
+#ifdef CONFIG_HYPERV_TIMER
+ case VDSO_PAGE_HVCLOCK_OFFSET:
+ {
+ unsigned long pfn = hv_get_tsc_pfn();
+
+ if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+ break;
+ }
+#endif /* CONFIG_HYPERV_TIMER */
+ }
+
+ return VM_FAULT_SIGBUS;
+}
+
static const struct vm_special_mapping vdso_mapping = {
.name = "[vdso]",
.fault = vdso_fault,
@@ -217,6 +231,10 @@ static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
};
+static const struct vm_special_mapping vvar_vclock_mapping = {
+ .name = "[vvar_vclock]",
+ .fault = vvar_vclock_fault,
+};
/*
* Add vdso and vvar mappings to current process.
@@ -259,7 +277,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
vma = _install_special_mapping(mm,
addr,
- -image->sym_vvar_start,
+ (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE,
VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
VM_PFNMAP,
&vvar_mapping);
@@ -267,11 +285,26 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
do_munmap(mm, text_start, image->size, NULL);
- } else {
- current->mm->context.vdso = (void __user *)text_start;
- current->mm->context.vdso_image = image;
+ goto up_fail;
}
+ vma = _install_special_mapping(mm,
+ addr + (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE,
+ VDSO_NR_VCLOCK_PAGES * PAGE_SIZE,
+ VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+ VM_PFNMAP,
+ &vvar_vclock_mapping);
+
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ do_munmap(mm, text_start, image->size, NULL);
+ do_munmap(mm, addr, image->size, NULL);
+ goto up_fail;
+ }
+
+ current->mm->context.vdso = (void __user *)text_start;
+ current->mm->context.vdso_image = image;
+
up_fail:
mmap_write_unlock(mm);
return ret;
@@ -293,7 +326,8 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
*/
for_each_vma(vmi, vma) {
if (vma_is_special_mapping(vma, &vdso_mapping) ||
- vma_is_special_mapping(vma, &vvar_mapping)) {
+ vma_is_special_mapping(vma, &vvar_mapping) ||
+ vma_is_special_mapping(vma, &vvar_vclock_mapping)) {
mmap_write_unlock(mm);
return -EEXIST;
}
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 920e3a640cad..b4a1a2576510 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -943,11 +943,12 @@ static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, u
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ static atomic64_t status_warned = ATOMIC64_INIT(0);
+ u64 reserved, status, mask, new_bits, prev_bits;
struct perf_sample_data data;
struct hw_perf_event *hwc;
struct perf_event *event;
int handled = 0, idx;
- u64 reserved, status, mask;
bool pmu_enabled;
/*
@@ -1012,7 +1013,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
* the corresponding PMCs are expected to be inactive according to the
* active_mask
*/
- WARN_ON(status > 0);
+ if (status > 0) {
+ prev_bits = atomic64_fetch_or(status, &status_warned);
+ // A new bit was set for the very first time.
+ new_bits = status & ~prev_bits;
+ WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
+ }
/* Clear overflow and freeze bits */
amd_pmu_ack_global_status(~status);
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 0bfde2ea5cb8..49c26ce2b115 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -916,7 +916,8 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
union amd_uncore_info info;
struct amd_uncore_pmu *pmu;
- int index = 0, gid, i;
+ int gid, i;
+ u16 index = 0;
if (pmu_version < 2)
return 0;
@@ -948,7 +949,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
for (i = 0; i < group_num_pmus[gid]; i++) {
pmu = &uncore->pmus[index];
- snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
+ snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index);
pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
pmu->rdpmc_base = -1;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 65ab6460aed4..c75c482d4c52 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -3003,35 +3003,57 @@ static unsigned long code_segment_base(struct pt_regs *regs)
return 0;
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{
- if (perf_guest_state())
- return perf_guest_get_ip();
-
return regs->ip + code_segment_base(regs);
}
-unsigned long perf_misc_flags(struct pt_regs *regs)
+static unsigned long common_misc_flags(struct pt_regs *regs)
{
- unsigned int guest_state = perf_guest_state();
- int misc = 0;
+ if (regs->flags & PERF_EFLAGS_EXACT)
+ return PERF_RECORD_MISC_EXACT_IP;
- if (guest_state) {
- if (guest_state & PERF_GUEST_USER)
- misc |= PERF_RECORD_MISC_GUEST_USER;
- else
- misc |= PERF_RECORD_MISC_GUEST_KERNEL;
- } else {
- if (user_mode(regs))
- misc |= PERF_RECORD_MISC_USER;
- else
- misc |= PERF_RECORD_MISC_KERNEL;
- }
+ return 0;
+}
- if (regs->flags & PERF_EFLAGS_EXACT)
- misc |= PERF_RECORD_MISC_EXACT_IP;
+static unsigned long guest_misc_flags(struct pt_regs *regs)
+{
+ unsigned long guest_state = perf_guest_state();
+
+ if (!(guest_state & PERF_GUEST_ACTIVE))
+ return 0;
+
+ if (guest_state & PERF_GUEST_USER)
+ return PERF_RECORD_MISC_GUEST_USER;
+ else
+ return PERF_RECORD_MISC_GUEST_KERNEL;
+
+}
+
+static unsigned long host_misc_flags(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return PERF_RECORD_MISC_USER;
+ else
+ return PERF_RECORD_MISC_KERNEL;
+}
+
+unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
+{
+ unsigned long flags = common_misc_flags(regs);
+
+ flags |= guest_misc_flags(regs);
+
+ return flags;
+}
+
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
+{
+ unsigned long flags = common_misc_flags(regs);
+
+ flags |= host_misc_flags(regs);
- return misc;
+ return flags;
}
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d879478db3f5..bb284aff7bfd 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3962,8 +3962,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
- if (!(event->attr.sample_type &
- ~intel_pmu_large_pebs_flags(event))) {
+ if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) &&
+ !has_aux_action(event)) {
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
event->attach_state |= PERF_ATTACH_SCHED_CB;
}
@@ -4599,6 +4599,28 @@ static inline bool erratum_hsw11(struct perf_event *event)
X86_CONFIG(.event=0xc0, .umask=0x01);
}
+static struct event_constraint *
+arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_tiny)
+ return cmt_get_event_constraints(cpuc, idx, event);
+
+ return mtl_get_event_constraints(cpuc, idx, event);
+}
+
+static int arl_h_hw_config(struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_tiny)
+ return intel_pmu_hw_config(event);
+
+ return adl_hw_config(event);
+}
+
/*
* The HSW11 requires a period larger than 100 which is the same as the BDM11.
* A minimum period of 128 is enforced as well for the INST_RETIRED.ALL.
@@ -4924,17 +4946,26 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
/*
* This essentially just maps between the 'hybrid_cpu_type'
- * and 'hybrid_pmu_type' enums:
+ * and 'hybrid_pmu_type' enums except for ARL-H processor
+ * which needs to compare atom uarch native id since ARL-H
+ * contains two different atom uarchs.
*/
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
+ u32 native_id;
- if (cpu_type == HYBRID_INTEL_CORE &&
- pmu_type == hybrid_big)
- return &x86_pmu.hybrid_pmu[i];
- if (cpu_type == HYBRID_INTEL_ATOM &&
- pmu_type == hybrid_small)
+ if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big)
return &x86_pmu.hybrid_pmu[i];
+ if (cpu_type == HYBRID_INTEL_ATOM) {
+ if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
+ return &x86_pmu.hybrid_pmu[i];
+
+ native_id = get_this_hybrid_cpu_native_id();
+ if (native_id == skt_native_id && pmu_type == hybrid_small)
+ return &x86_pmu.hybrid_pmu[i];
+ if (native_id == cmt_native_id && pmu_type == hybrid_tiny)
+ return &x86_pmu.hybrid_pmu[i];
+ }
}
return NULL;
@@ -5965,6 +5996,37 @@ static struct attribute *lnl_hybrid_events_attrs[] = {
NULL
};
+/* The event string must be in PMU IDX order. */
+EVENT_ATTR_STR_HYBRID(topdown-retiring,
+ td_retiring_arl_h,
+ "event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-bad-spec,
+ td_bad_spec_arl_h,
+ "event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-fe-bound,
+ td_fe_bound_arl_h,
+ "event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(topdown-be-bound,
+ td_be_bound_arl_h,
+ "event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0",
+ hybrid_big_small_tiny);
+
+static struct attribute *arl_h_hybrid_events_attrs[] = {
+ EVENT_PTR(slots_adl),
+ EVENT_PTR(td_retiring_arl_h),
+ EVENT_PTR(td_bad_spec_arl_h),
+ EVENT_PTR(td_fe_bound_arl_h),
+ EVENT_PTR(td_be_bound_arl_h),
+ EVENT_PTR(td_heavy_ops_adl),
+ EVENT_PTR(td_br_mis_adl),
+ EVENT_PTR(td_fetch_lat_adl),
+ EVENT_PTR(td_mem_bound_adl),
+ NULL,
+};
+
/* Must be in IDX order */
EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small);
EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small);
@@ -5983,6 +6045,21 @@ static struct attribute *mtl_hybrid_mem_attrs[] = {
NULL
};
+EVENT_ATTR_STR_HYBRID(mem-loads,
+ mem_ld_arl_h,
+ "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3",
+ hybrid_big_small_tiny);
+EVENT_ATTR_STR_HYBRID(mem-stores,
+ mem_st_arl_h,
+ "event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6",
+ hybrid_big_small_tiny);
+
+static struct attribute *arl_h_hybrid_mem_attrs[] = {
+ EVENT_PTR(mem_ld_arl_h),
+ EVENT_PTR(mem_st_arl_h),
+ NULL,
+};
+
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
@@ -6006,8 +6083,8 @@ static struct attribute *adl_hybrid_tsx_attrs[] = {
FORMAT_ATTR_HYBRID(in_tx, hybrid_big);
FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big);
-FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
-FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
+FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny);
+FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny);
FORMAT_ATTR_HYBRID(frontend, hybrid_big);
#define ADL_HYBRID_RTM_FORMAT_ATTR \
@@ -6030,7 +6107,7 @@ static struct attribute *adl_hybrid_extra_attr[] = {
NULL
};
-FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
+FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny);
static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
ADL_HYBRID_RTM_FORMAT_ATTR,
@@ -6238,8 +6315,9 @@ static inline int intel_pmu_v6_addr_offset(int index, bool eventsel)
}
static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
- { hybrid_small, "cpu_atom" },
- { hybrid_big, "cpu_core" },
+ { hybrid_small, "cpu_atom" },
+ { hybrid_big, "cpu_core" },
+ { hybrid_tiny, "cpu_lowpower" },
};
static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
@@ -6272,7 +6350,7 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
0, x86_pmu_num_counters(&pmu->pmu), 0, 0);
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
- if (pmu->pmu_type & hybrid_small) {
+ if (pmu->pmu_type & hybrid_small_tiny) {
pmu->intel_cap.perf_metrics = 0;
pmu->intel_cap.pebs_output_pt_available = 1;
pmu->mid_ack = true;
@@ -7111,6 +7189,37 @@ __init int intel_pmu_init(void)
name = "lunarlake_hybrid";
break;
+ case INTEL_ARROWLAKE_H:
+ intel_pmu_init_hybrid(hybrid_big_small_tiny);
+
+ x86_pmu.pebs_latency_data = arl_h_latency_data;
+ x86_pmu.get_event_constraints = arl_h_get_event_constraints;
+ x86_pmu.hw_config = arl_h_hw_config;
+
+ td_attr = arl_h_hybrid_events_attrs;
+ mem_attr = arl_h_hybrid_mem_attrs;
+ tsx_attr = adl_hybrid_tsx_attrs;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+ /* Initialize big core specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+ intel_pmu_init_lnc(&pmu->pmu);
+
+ /* Initialize Atom core specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+ intel_pmu_init_skt(&pmu->pmu);
+
+ /* Initialize Lower Power Atom specific PerfMon capabilities. */
+ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX];
+ intel_pmu_init_grt(&pmu->pmu);
+ pmu->extra_regs = intel_cmt_extra_regs;
+
+ intel_pmu_pebs_data_source_arl_h();
+ pr_cont("ArrowLake-H Hybrid events, ");
+ name = "arrowlake_h_hybrid";
+ break;
+
default:
switch (x86_pmu.version) {
case 1:
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index fa5ea65de0d0..8afc4ad3cd16 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -177,6 +177,17 @@ void __init intel_pmu_pebs_data_source_mtl(void)
__intel_pmu_pebs_data_source_cmt(data_source);
}
+void __init intel_pmu_pebs_data_source_arl_h(void)
+{
+ u64 *data_source;
+
+ intel_pmu_pebs_data_source_lnl();
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ __intel_pmu_pebs_data_source_cmt(data_source);
+}
+
void __init intel_pmu_pebs_data_source_cmt(void)
{
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
@@ -388,6 +399,16 @@ u64 lnl_latency_data(struct perf_event *event, u64 status)
return lnc_latency_data(event, status);
}
+u64 arl_h_latency_data(struct perf_event *event, u64 status)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->pmu_type == hybrid_tiny)
+ return cmt_latency_data(event, status);
+
+ return lnl_latency_data(event, status);
+}
+
static u64 load_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index fd4670a6694e..4b0373bc8ab4 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -418,6 +418,9 @@ static void pt_config_start(struct perf_event *event)
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 ctl = event->hw.aux_config;
+ if (READ_ONCE(event->hw.aux_paused))
+ return;
+
ctl |= RTIT_CTL_TRACEEN;
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
@@ -534,7 +537,24 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
event->hw.aux_config = reg;
+
+ /*
+ * Allow resume before starting so as not to overwrite a value set by a
+ * PMI.
+ */
+ barrier();
+ WRITE_ONCE(pt->resume_allowed, 1);
+ /* Configuration is complete, it is now OK to handle an NMI */
+ barrier();
+ WRITE_ONCE(pt->handle_nmi, 1);
+ barrier();
pt_config_start(event);
+ barrier();
+ /*
+ * Allow pause after starting so its pt_config_stop() doesn't race with
+ * pt_config_start().
+ */
+ WRITE_ONCE(pt->pause_allowed, 1);
}
static void pt_config_stop(struct perf_event *event)
@@ -828,11 +848,13 @@ static void pt_buffer_advance(struct pt_buffer *buf)
buf->cur_idx++;
if (buf->cur_idx == buf->cur->last) {
- if (buf->cur == buf->last)
+ if (buf->cur == buf->last) {
buf->cur = buf->first;
- else
+ buf->wrapped = true;
+ } else {
buf->cur = list_entry(buf->cur->list.next, struct topa,
list);
+ }
buf->cur_idx = 0;
}
}
@@ -846,8 +868,11 @@ static void pt_buffer_advance(struct pt_buffer *buf)
static void pt_update_head(struct pt *pt)
{
struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ bool wrapped = buf->wrapped;
u64 topa_idx, base, old;
+ buf->wrapped = false;
+
if (buf->single) {
local_set(&buf->data_size, buf->output_off);
return;
@@ -865,7 +890,7 @@ static void pt_update_head(struct pt *pt)
} else {
old = (local64_xchg(&buf->head, base) &
((buf->nr_pages << PAGE_SHIFT) - 1));
- if (base < old)
+ if (base < old || (base == old && wrapped))
base += buf->nr_pages << PAGE_SHIFT;
local_add(base - old, &buf->data_size);
@@ -1511,6 +1536,7 @@ void intel_pt_interrupt(void)
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf) {
event->hw.state = PERF_HES_STOPPED;
+ WRITE_ONCE(pt->resume_allowed, 0);
return;
}
@@ -1519,6 +1545,7 @@ void intel_pt_interrupt(void)
ret = pt_buffer_reset_markers(buf, &pt->handle);
if (ret) {
perf_aux_output_end(&pt->handle, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
return;
}
@@ -1573,6 +1600,26 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
+ if (mode & PERF_EF_RESUME) {
+ if (READ_ONCE(pt->resume_allowed)) {
+ u64 status;
+
+ /*
+ * Only if the trace is not active and the error and
+ * stopped bits are clear, is it safe to start, but a
+ * PMI might have just cleared these, so resume_allowed
+ * must be checked again also.
+ */
+ rdmsrl(MSR_IA32_RTIT_STATUS, status);
+ if (!(status & (RTIT_STATUS_TRIGGEREN |
+ RTIT_STATUS_ERROR |
+ RTIT_STATUS_STOPPED)) &&
+ READ_ONCE(pt->resume_allowed))
+ pt_config_start(event);
+ }
+ return;
+ }
+
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
@@ -1583,7 +1630,6 @@ static void pt_event_start(struct perf_event *event, int mode)
goto fail_end_stop;
}
- WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
pt_config_buffer(buf);
@@ -1601,6 +1647,12 @@ static void pt_event_stop(struct perf_event *event, int mode)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
+ if (mode & PERF_EF_PAUSE) {
+ if (READ_ONCE(pt->pause_allowed))
+ pt_config_stop(event);
+ return;
+ }
+
/*
* Protect against the PMI racing with disabling wrmsr,
* see comment in intel_pt_interrupt().
@@ -1608,6 +1660,15 @@ static void pt_event_stop(struct perf_event *event, int mode)
WRITE_ONCE(pt->handle_nmi, 0);
barrier();
+ /*
+ * Prevent a resume from attempting to restart tracing, or a pause
+ * during a subsequent start. Do this after clearing handle_nmi so that
+ * pt_event_snapshot_aux() will not re-allow them.
+ */
+ WRITE_ONCE(pt->pause_allowed, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
+ barrier();
+
pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED)
@@ -1657,6 +1718,10 @@ static long pt_event_snapshot_aux(struct perf_event *event,
if (WARN_ON_ONCE(!buf->snapshot))
return 0;
+ /* Prevent pause/resume from attempting to start/stop tracing */
+ WRITE_ONCE(pt->pause_allowed, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
+ barrier();
/*
* There is no PT interrupt in this mode, so stop the trace and it will
* remain stopped while the buffer is copied.
@@ -1676,8 +1741,13 @@ static long pt_event_snapshot_aux(struct perf_event *event,
* Here, handle_nmi tells us if the tracing was on.
* If the tracing was on, restart it.
*/
- if (READ_ONCE(pt->handle_nmi))
+ if (READ_ONCE(pt->handle_nmi)) {
+ WRITE_ONCE(pt->resume_allowed, 1);
+ barrier();
pt_config_start(event);
+ barrier();
+ WRITE_ONCE(pt->pause_allowed, 1);
+ }
return ret;
}
@@ -1793,7 +1863,9 @@ static __init int pt_init(void)
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
- pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+ pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
+ PERF_PMU_CAP_ITRACE |
+ PERF_PMU_CAP_AUX_PAUSE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
pt_pmu.pmu.event_init = pt_event_init;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index f5e46c04c145..7ee94fc6d7cb 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ struct pt_pmu {
* @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter
* @single: use Single Range Output instead of ToPA
+ * @wrapped: buffer advance wrapped back to the first topa table
* @stop_pos: STOP topa entry index
* @intr_pos: INT topa entry index
* @stop_te: STOP topa entry pointer
@@ -82,6 +83,7 @@ struct pt_buffer {
local64_t head;
bool snapshot;
bool single;
+ bool wrapped;
long stop_pos, intr_pos;
struct topa_entry *stop_te, *intr_te;
void **data_pages;
@@ -117,6 +119,8 @@ struct pt_filters {
* @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
+ * @pause_allowed: PERF_EF_PAUSE is allowed to stop tracing
+ * @resume_allowed: PERF_EF_RESUME is allowed to start tracing
* @output_base: cached RTIT_OUTPUT_BASE MSR value
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
*/
@@ -125,6 +129,8 @@ struct pt {
struct pt_filters filters;
int handle_nmi;
int vmx_on;
+ int pause_allowed;
+ int resume_allowed;
u64 output_base;
u64 output_mask;
};
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ac1182141bf6..82c6f45ce975 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,24 +668,38 @@ enum {
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
+/*
+ * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
+ * of the core. Bits 31-24 indicates its core type (Core or Atom)
+ * and Bits [23:0] indicates the native model ID of the core.
+ * Core type and native model ID are defined in below enumerations.
+ */
enum hybrid_cpu_type {
HYBRID_INTEL_NONE,
HYBRID_INTEL_ATOM = 0x20,
HYBRID_INTEL_CORE = 0x40,
};
+#define X86_HYBRID_PMU_ATOM_IDX 0
+#define X86_HYBRID_PMU_CORE_IDX 1
+#define X86_HYBRID_PMU_TINY_IDX 2
+
enum hybrid_pmu_type {
not_hybrid,
- hybrid_small = BIT(0),
- hybrid_big = BIT(1),
-
- hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */
+ hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX),
+ hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX),
+ hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX),
+
+ /* The belows are only used for matching */
+ hybrid_big_small = hybrid_big | hybrid_small,
+ hybrid_small_tiny = hybrid_small | hybrid_tiny,
+ hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
};
-#define X86_HYBRID_PMU_ATOM_IDX 0
-#define X86_HYBRID_PMU_CORE_IDX 1
-
-#define X86_HYBRID_NUM_PMUS 2
+enum atom_native_id {
+ cmt_native_id = 0x2, /* Crestmont */
+ skt_native_id = 0x3, /* Skymont */
+};
struct x86_hybrid_pmu {
struct pmu pmu;
@@ -1578,6 +1592,8 @@ u64 cmt_latency_data(struct perf_event *event, u64 status);
u64 lnl_latency_data(struct perf_event *event, u64 status);
+u64 arl_h_latency_data(struct perf_event *event, u64 status);
+
extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1697,6 +1713,8 @@ void intel_pmu_pebs_data_source_grt(void);
void intel_pmu_pebs_data_source_mtl(void);
+void intel_pmu_pebs_data_source_arl_h(void);
+
void intel_pmu_pebs_data_source_cmt(void);
void intel_pmu_pebs_data_source_lnl(void);
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index a481a939862e..a8defc813c36 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -148,7 +148,6 @@ struct rapl_model {
/* 1/2^hw_unit Joule */
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus;
-static cpumask_t rapl_cpu_mask;
static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
static struct perf_msr *rapl_msrs;
@@ -369,8 +368,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
if (event->cpu < 0)
return -EINVAL;
- event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
-
if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
return -EINVAL;
@@ -389,7 +386,6 @@ static int rapl_pmu_event_init(struct perf_event *event)
pmu = cpu_to_rapl_pmu(event->cpu);
if (!pmu)
return -EINVAL;
- event->cpu = pmu->cpu;
event->pmu_private = pmu;
event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg;
@@ -403,23 +399,6 @@ static void rapl_pmu_event_read(struct perf_event *event)
rapl_event_update(event);
}
-static ssize_t rapl_get_attr_cpumask(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
-}
-
-static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
-
-static struct attribute *rapl_pmu_attrs[] = {
- &dev_attr_cpumask.attr,
- NULL,
-};
-
-static struct attribute_group rapl_pmu_attr_group = {
- .attrs = rapl_pmu_attrs,
-};
-
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
@@ -467,7 +446,6 @@ static struct attribute_group rapl_pmu_format_group = {
};
static const struct attribute_group *rapl_attr_groups[] = {
- &rapl_pmu_attr_group,
&rapl_pmu_format_group,
&rapl_pmu_events_group,
NULL,
@@ -570,65 +548,6 @@ static struct perf_msr amd_rapl_msrs[] = {
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
};
-static int rapl_cpu_offline(unsigned int cpu)
-{
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
- int target;
-
- /* Check if exiting cpu is used for collecting rapl events */
- if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
- return 0;
-
- pmu->cpu = -1;
- /* Find a new cpu to collect rapl events */
- target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu);
-
- /* Migrate rapl events to the new target */
- if (target < nr_cpu_ids) {
- cpumask_set_cpu(target, &rapl_cpu_mask);
- pmu->cpu = target;
- perf_pmu_migrate_context(pmu->pmu, cpu, target);
- }
- return 0;
-}
-
-static int rapl_cpu_online(unsigned int cpu)
-{
- s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu);
- if (rapl_pmu_idx < 0) {
- pr_err("topology_logical_(package/die)_id() returned a negative value");
- return -EINVAL;
- }
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
- int target;
-
- if (!pmu) {
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
- if (!pmu)
- return -ENOMEM;
-
- raw_spin_lock_init(&pmu->lock);
- INIT_LIST_HEAD(&pmu->active_list);
- pmu->pmu = &rapl_pmus->pmu;
- pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
- rapl_hrtimer_init(pmu);
-
- rapl_pmus->pmus[rapl_pmu_idx] = pmu;
- }
-
- /*
- * Check if there is an online cpu in the package which collects rapl
- * events already.
- */
- target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu));
- if (target < nr_cpu_ids)
- return 0;
-
- cpumask_set_cpu(cpu, &rapl_cpu_mask);
- pmu->cpu = cpu;
- return 0;
-}
-
static int rapl_check_hw_unit(struct rapl_model *rm)
{
u64 msr_rapl_power_unit_bits;
@@ -707,12 +626,41 @@ static const struct attribute_group *rapl_attr_update[] = {
NULL,
};
+static int __init init_rapl_pmu(void)
+{
+ struct rapl_pmu *pmu;
+ int idx;
+
+ for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) {
+ pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ goto free;
+
+ raw_spin_lock_init(&pmu->lock);
+ INIT_LIST_HEAD(&pmu->active_list);
+ pmu->pmu = &rapl_pmus->pmu;
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ rapl_hrtimer_init(pmu);
+
+ rapl_pmus->pmus[idx] = pmu;
+ }
+
+ return 0;
+free:
+ for (; idx > 0; idx--)
+ kfree(rapl_pmus->pmus[idx - 1]);
+ return -ENOMEM;
+}
+
static int __init init_rapl_pmus(void)
{
int nr_rapl_pmu = topology_max_packages();
+ int rapl_pmu_scope = PERF_PMU_SCOPE_PKG;
- if (!rapl_pmu_is_pkg_scope())
+ if (!rapl_pmu_is_pkg_scope()) {
nr_rapl_pmu *= topology_max_dies_per_package();
+ rapl_pmu_scope = PERF_PMU_SCOPE_DIE;
+ }
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL);
if (!rapl_pmus)
@@ -728,9 +676,11 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.start = rapl_pmu_event_start;
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read;
+ rapl_pmus->pmu.scope = rapl_pmu_scope;
rapl_pmus->pmu.module = THIS_MODULE;
rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
- return 0;
+
+ return init_rapl_pmu();
}
static struct rapl_model model_snb = {
@@ -876,24 +826,13 @@ static int __init rapl_pmu_init(void)
if (ret)
return ret;
- /*
- * Install callbacks. Core will call them for each online cpu.
- */
- ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
- "perf/x86/rapl:online",
- rapl_cpu_online, rapl_cpu_offline);
- if (ret)
- goto out;
-
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
if (ret)
- goto out1;
+ goto out;
rapl_advertise();
return 0;
-out1:
- cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
out:
pr_warn("Initialization failed (%d), disabled\n", ret);
cleanup_rapl_pmus();
@@ -903,7 +842,6 @@ module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{
- cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
perf_pmu_unregister(&rapl_pmus->pmu);
cleanup_rapl_pmus();
}
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 1f650b4dde50..6c6e9b9f98a4 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -51,7 +51,8 @@ static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
#ifdef CONFIG_X86_CMPXCHG64
#define __alternative_atomic64(f, g, out, in...) \
asm volatile("call %c[func]" \
- : out : [func] "i" (atomic64_##g##_cx8), ## in)
+ : ALT_OUTPUT_SP(out) \
+ : [func] "i" (atomic64_##g##_cx8), ## in)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 62cef2113ca7..fd1282a783dd 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -94,7 +94,7 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp,
asm volatile(ALTERNATIVE(_lock_loc \
"call cmpxchg8b_emu", \
_lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
- : "+a" (o.low), "+d" (o.high) \
+ : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \
: "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
: "memory"); \
\
@@ -123,8 +123,8 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
"call cmpxchg8b_emu", \
_lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
CC_SET(e) \
- : CC_OUT(e) (ret), \
- "+a" (o.low), "+d" (o.high) \
+ : ALT_OUTPUT_SP(CC_OUT(e) (ret), \
+ "+a" (o.low), "+d" (o.high)) \
: "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
: "memory"); \
\
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index aa30fd8cad7f..98eced5084ca 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -26,12 +26,13 @@ int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
-#ifdef CONFIG_CPU_SUP_INTEL
+#ifdef CONFIG_X86_BUS_LOCK_DETECT
extern void __init sld_setup(struct cpuinfo_x86 *c);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip);
extern void handle_bus_lock(struct pt_regs *regs);
-u8 get_this_hybrid_cpu_type(void);
+void split_lock_init(void);
+void bus_lock_init(void);
#else
static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
@@ -45,11 +46,23 @@ static inline bool handle_guest_split_lock(unsigned long ip)
}
static inline void handle_bus_lock(struct pt_regs *regs) {}
+static inline void split_lock_init(void) {}
+static inline void bus_lock_init(void) {}
+#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+u8 get_this_hybrid_cpu_type(void);
+u32 get_this_hybrid_cpu_native_id(void);
+#else
static inline u8 get_this_hybrid_cpu_type(void)
{
return 0;
}
+
+static inline u32 get_this_hybrid_cpu_native_id(void)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 913fd3a7bac6..ea33439a5d00 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -473,7 +473,9 @@
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
-#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
/*
* BUG word(s)
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
index ca4243318aad..239b9ba5c398 100644
--- a/arch/x86/include/asm/cpuid.h
+++ b/arch/x86/include/asm/cpuid.h
@@ -6,6 +6,8 @@
#ifndef _ASM_X86_CPUID_H
#define _ASM_X86_CPUID_H
+#include <linux/types.h>
+
#include <asm/string.h>
struct cpuid_regs {
@@ -20,11 +22,11 @@ enum cpuid_regs_idx {
};
#ifdef CONFIG_X86_32
-extern int have_cpuid_p(void);
+bool have_cpuid_p(void);
#else
-static inline int have_cpuid_p(void)
+static inline bool have_cpuid_p(void)
{
- return 1;
+ return true;
}
#endif
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b4d719de2c84..6e8cf0fa48fc 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -35,37 +35,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
}
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
-struct ftrace_regs {
- struct pt_regs regs;
-};
+
+#include <linux/ftrace_regs.h>
static __always_inline struct pt_regs *
arch_ftrace_get_regs(struct ftrace_regs *fregs)
{
/* Only when FL_SAVE_REGS is set, cs will be non zero */
- if (!fregs->regs.cs)
+ if (!arch_ftrace_regs(fregs)->regs.cs)
return NULL;
- return &fregs->regs;
+ return &arch_ftrace_regs(fregs)->regs;
}
#define ftrace_regs_set_instruction_pointer(fregs, _ip) \
- do { (fregs)->regs.ip = (_ip); } while (0)
-
-#define ftrace_regs_get_instruction_pointer(fregs) \
- ((fregs)->regs.ip)
-
-#define ftrace_regs_get_argument(fregs, n) \
- regs_get_kernel_argument(&(fregs)->regs, n)
-#define ftrace_regs_get_stack_pointer(fregs) \
- kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
- regs_return_value(&(fregs)->regs)
-#define ftrace_regs_set_return_value(fregs, ret) \
- regs_set_return_value(&(fregs)->regs, ret)
-#define ftrace_override_function_with_return(fregs) \
- override_function_with_return(&(fregs)->regs)
-#define ftrace_regs_query_register_offset(name) \
- regs_query_register_offset(name)
+ do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0)
+
struct ftrace_ops;
#define ftrace_graph_func ftrace_graph_func
@@ -90,7 +74,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
regs->orig_ax = addr;
}
#define arch_ftrace_set_direct_caller(fregs, addr) \
- __arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+ __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
#ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 1a42f829667a..6d7b04ffc5fd 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -177,10 +177,15 @@
#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */
/* Family 5 */
-#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
/* Family 19 */
#define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */
+/* CPU core types */
+enum intel_cpu_type {
+ INTEL_CPU_TYPE_ATOM = 0x20,
+ INTEL_CPU_TYPE_CORE = 0x40,
+};
+
#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 1d60427379c9..ed580c7f9d0a 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -152,11 +152,6 @@ static inline void *phys_to_virt(phys_addr_t address)
#define phys_to_virt phys_to_virt
/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
-/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
* However, we truncate the address to unsigned int to avoid undesirable
* promotions in legacy drivers.
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3b9970117a0f..4543cf2eb5e8 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -61,6 +61,7 @@
* - TCC bit is present in MCx_STATUS.
*/
#define MCI_CONFIG_MCAX 0x1
+#define MCI_CONFIG_FRUTEXT BIT_ULL(9)
#define MCI_IPID_MCATYPE 0xFFFF0000
#define MCI_IPID_HWID 0xFFF
@@ -122,6 +123,9 @@
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
+/* Registers MISC2 to MISC4 are at offsets B to D. */
+#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e
+#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -132,6 +136,8 @@
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
#define XEC(x, mask) (((x) >> 16) & mask)
@@ -187,6 +193,32 @@ enum mce_notifier_prios {
MCE_PRIO_HIGHEST = MCE_PRIO_CEC
};
+/**
+ * struct mce_hw_err - Hardware Error Record.
+ * @m: Machine Check record.
+ * @vendor: Vendor-specific error information.
+ *
+ * Vendor-specific fields should not be added to struct mce. Instead, vendors
+ * should export their vendor-specific data through their structure in the
+ * vendor union below.
+ *
+ * AMD's vendor data is parsed by error decoding tools for supplemental error
+ * information. Thus, current offsets of existing fields must be maintained.
+ * Only add new fields at the end of AMD's vendor structure.
+ */
+struct mce_hw_err {
+ struct mce m;
+
+ union vendor_info {
+ struct {
+ u64 synd1; /* MCA_SYND1 MSR */
+ u64 synd2; /* MCA_SYND2 MSR */
+ } amd;
+ } vendor;
+};
+
+#define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)
+
struct notifier_block;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -221,8 +253,8 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id) { return -EINVAL; }
#endif
-void mce_prep_record(struct mce *m);
-void mce_log(struct mce *m);
+void mce_prep_record(struct mce_hw_err *err);
+void mce_log(struct mce_hw_err *err);
DECLARE_PER_CPU(struct device *, mce_device);
/* Maximum number of MCA banks per CPU. */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 52f1b4ff0cc1..974688973cf6 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -6,10 +6,7 @@
#include <linux/types.h>
#include <linux/mem_encrypt.h>
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT CONFIG_PAGE_SHIFT
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#include <vdso/page.h>
#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 91b73571412f..d95f902acc52 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -536,15 +536,17 @@ struct x86_perf_regs {
u64 *xmm_regs;
};
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
+#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
+#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
#include <asm/stacktrace.h>
/*
- * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
+ * We abuse bit 3 from flags to pass exact information, see
+ * perf_arch_misc_flags() and the comment with PERF_EFLAGS_EXACT.
*/
#define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ip = (__ip); \
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4a686f0e5dbf..c0975815980c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -105,6 +105,24 @@ struct cpuinfo_topology {
// Cache level topology IDs
u32 llc_id;
u32 l2c_id;
+
+ // Hardware defined CPU-type
+ union {
+ u32 cpu_type;
+ struct {
+ // CPUID.1A.EAX[23-0]
+ u32 intel_native_model_id :24;
+ // CPUID.1A.EAX[31-24]
+ u32 intel_type :8;
+ };
+ struct {
+ // CPUID 0x80000026.EBX
+ u32 amd_num_processors :16,
+ amd_power_eff_ranking :8,
+ amd_native_model_id :4,
+ amd_type :4;
+ };
+ };
};
struct cpuinfo_x86 {
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 98726c2b04f8..50f5666938c0 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -220,4 +220,31 @@ struct snp_psc_desc {
#define GHCB_ERR_INVALID_INPUT 5
#define GHCB_ERR_INVALID_EVENT 6
+struct sev_config {
+ __u64 debug : 1,
+
+ /*
+ * Indicates when the per-CPU GHCB has been created and registered
+ * and thus can be used by the BSP instead of the early boot GHCB.
+ *
+ * For APs, the per-CPU GHCB is created before they are started
+ * and registered upon startup, so this flag can be used globally
+ * for the BSP and APs.
+ */
+ ghcbs_initialized : 1,
+
+ /*
+ * Indicates when the per-CPU SVSM CA is to be used instead of the
+ * boot SVSM CA.
+ *
+ * For APs, the per-CPU SVSM CA is created as part of the AP
+ * bringup, so this flag can be used globally for the BSP and APs.
+ */
+ use_cas : 1,
+
+ __reserved : 61;
+};
+
+extern struct sev_config sev_cfg;
+
#endif
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index ee34ab00a8d6..91f08af31078 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -120,6 +120,9 @@ struct snp_req_data {
};
#define MAX_AUTHTAG_LEN 32
+#define AUTHTAG_LEN 16
+#define AAD_LEN 48
+#define MSG_HDR_VER 1
/* See SNP spec SNP_GUEST_REQUEST section for the structure */
enum msg_type {
@@ -171,6 +174,19 @@ struct sev_guest_platform_data {
u64 secrets_gpa;
};
+struct snp_guest_req {
+ void *req_buf;
+ size_t req_sz;
+
+ void *resp_buf;
+ size_t resp_sz;
+
+ u64 exit_code;
+ unsigned int vmpck_id;
+ u8 msg_version;
+ u8 msg_type;
+};
+
/*
* The secrets page contains 96-bytes of reserved field that can be used by
* the guest OS. The guest OS uses the area to save the message sequence
@@ -218,6 +234,27 @@ struct snp_secrets_page {
u8 rsvd4[3744];
} __packed;
+struct snp_msg_desc {
+ /* request and response are in unencrypted memory */
+ struct snp_guest_msg *request, *response;
+
+ /*
+ * Avoid information leakage by double-buffering shared messages
+ * in fields that are in regular encrypted memory.
+ */
+ struct snp_guest_msg secret_request, secret_response;
+
+ struct snp_secrets_page *secrets;
+ struct snp_req_data input;
+
+ void *certs_data;
+
+ struct aesgcm_ctx *ctx;
+
+ u32 *os_area_msg_seqno;
+ u8 *vmpck;
+};
+
/*
* The SVSM Calling Area (CA) related structures.
*/
@@ -285,6 +322,22 @@ struct svsm_attest_call {
u8 rsvd[4];
};
+/* PTE descriptor used for the prepare_pte_enc() operations. */
+struct pte_enc_desc {
+ pte_t *kpte;
+ int pte_level;
+ bool encrypt;
+ /* pfn of the kpte above */
+ unsigned long pfn;
+ /* physical address of @pfn */
+ unsigned long pa;
+ /* virtual address of @pfn */
+ void *va;
+ /* memory covered by the pte */
+ unsigned long size;
+ pgprot_t new_pgprot;
+};
+
/*
* SVSM protocol structure
*/
@@ -392,13 +445,18 @@ void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
void snp_dmi_setup(void);
-int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input,
+ struct snp_guest_request_ioctl *rio);
int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
void sev_show_status(void);
void snp_update_svsm_ca(void);
+int prepare_pte_enc(struct pte_enc_desc *d);
+void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
+void snp_kexec_finish(void);
+void snp_kexec_begin(void);
#else /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -422,7 +480,8 @@ static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
static inline void snp_dmi_setup(void) { }
-static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
+static inline int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input,
+ struct snp_guest_request_ioctl *rio)
{
return -ENOTTY;
}
@@ -435,6 +494,10 @@ static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
static inline void sev_show_status(void) { }
static inline void snp_update_svsm_ca(void) { }
+static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
+static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
+static inline void snp_kexec_finish(void) { }
+static inline void snp_kexec_begin(void) { }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index fdfd41511b02..89f7fcade8ae 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -16,10 +16,21 @@
#define TDG_VP_VEINFO_GET 3
#define TDG_MR_REPORT 4
#define TDG_MEM_PAGE_ACCEPT 6
+#define TDG_VM_RD 7
#define TDG_VM_WR 8
-/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
+/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
+#define TDCS_CONFIG_FLAGS 0x1110000300000016
+#define TDCS_TD_CTLS 0x1110000300000017
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
+#define TDCS_TOPOLOGY_ENUM_CONFIGURED 0x9100000000000019
+
+/* TDCS_CONFIG_FLAGS bits */
+#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
+
+/* TDCS_TD_CTLS bits */
+#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(0)
+#define TD_CTLS_ENUM_TOPOLOGY BIT_ULL(1)
/* TDX hypercall Leaf IDs */
#define TDVMCALL_MAP_GPA 0x10001
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 12da7dfd5ef1..a55c214f3ba6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -87,8 +87,9 @@ struct thread_info {
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_SSBD 5 /* Speculative store bypass disable */
+#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */
+#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
+#define TIF_SSBD 6 /* Speculative store bypass disable */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
@@ -110,6 +111,7 @@ struct thread_info {
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 7365dd4acffb..23baf8c9b34c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -6,8 +6,6 @@
#include <linux/interrupt.h>
#include <linux/math64.h>
-#define TICK_SIZE (tick_nsec / 1000)
-
unsigned long long native_sched_clock(void);
extern void recalibrate_cpu_khz(void);
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 92f3664dd933..fd41103ad342 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -114,6 +114,12 @@ enum x86_topology_domains {
TOPO_MAX_DOMAIN,
};
+enum x86_topology_cpu_type {
+ TOPO_CPU_TYPE_PERFORMANCE,
+ TOPO_CPU_TYPE_EFFICIENCY,
+ TOPO_CPU_TYPE_UNKNOWN,
+};
+
struct x86_topology_system {
unsigned int dom_shifts[TOPO_MAX_DOMAIN];
unsigned int dom_size[TOPO_MAX_DOMAIN];
@@ -149,6 +155,9 @@ extern unsigned int __max_threads_per_core;
extern unsigned int __num_threads_per_package;
extern unsigned int __num_cores_per_package;
+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c);
+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c);
+
static inline unsigned int topology_max_packages(void)
{
return __max_logical_packages;
diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h
index ff5334ad32a0..2bf9c0e970c3 100644
--- a/arch/x86/include/asm/vdso/getrandom.h
+++ b/arch/x86/include/asm/vdso/getrandom.h
@@ -8,7 +8,6 @@
#ifndef __ASSEMBLY__
#include <asm/unistd.h>
-#include <asm/vvar.h>
/**
* getrandom_syscall - Invoke the getrandom() syscall.
@@ -28,13 +27,14 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig
return ret;
}
-#define __vdso_rng_data (VVAR(_vdso_rng_data))
+extern struct vdso_rng_data vdso_rng_data
+ __attribute__((visibility("hidden")));
static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
{
- if (IS_ENABLED(CONFIG_TIME_NS) && __vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS)
- return (void *)&__vdso_rng_data + ((void *)&__timens_vdso_data - (void *)&__vdso_data);
- return &__vdso_rng_data;
+ if (IS_ENABLED(CONFIG_TIME_NS) && __arch_get_vdso_data()->clock_mode == VDSO_CLOCKMODE_TIMENS)
+ return (void *)&vdso_rng_data + ((void *)&timens_page - (void *)__arch_get_vdso_data());
+ return &vdso_rng_data;
}
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index b2d2df026f6e..375a34b0f365 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -14,14 +14,16 @@
#include <uapi/linux/time.h>
#include <asm/vgtod.h>
-#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
#include <asm/pvclock.h>
#include <clocksource/hyperv_timer.h>
-#define __vdso_data (VVAR(_vdso_data))
-#define __timens_vdso_data (TIMENS(_vdso_data))
+extern struct vdso_data vvar_page
+ __attribute__((visibility("hidden")));
+
+extern struct vdso_data timens_page
+ __attribute__((visibility("hidden")));
#define VDSO_HAS_TIME 1
@@ -61,7 +63,7 @@ extern struct ms_hyperv_tsc_page hvclock_page
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
- return __timens_vdso_data;
+ return &timens_page;
}
#endif
@@ -275,7 +277,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode,
static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
{
- return __vdso_data;
+ return &vvar_page;
}
static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd)
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
index 67fedf1698b5..37b4a70559a8 100644
--- a/arch/x86/include/asm/vdso/vsyscall.h
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -2,12 +2,19 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
+#define __VDSO_RND_DATA_OFFSET 640
+#define __VVAR_PAGES 4
+
+#define VDSO_NR_VCLOCK_PAGES 2
+#define VDSO_PAGE_PVCLOCK_OFFSET 0
+#define VDSO_PAGE_HVCLOCK_OFFSET 1
+
#ifndef __ASSEMBLY__
-#include <linux/timekeeper_internal.h>
#include <vdso/datapage.h>
#include <asm/vgtod.h>
-#include <asm/vvar.h>
+
+extern struct vdso_data *vdso_data;
/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
@@ -15,14 +22,14 @@
static __always_inline
struct vdso_data *__x86_get_k_vdso_data(void)
{
- return _vdso_data;
+ return vdso_data;
}
#define __arch_get_k_vdso_data __x86_get_k_vdso_data
static __always_inline
struct vdso_rng_data *__x86_get_k_vdso_rng_data(void)
{
- return &_vdso_rng_data;
+ return (void *)vdso_data + __VDSO_RND_DATA_OFFSET;
}
#define __arch_get_k_vdso_rng_data __x86_get_k_vdso_rng_data
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
deleted file mode 100644
index 9d9af37f7cab..000000000000
--- a/arch/x86/include/asm/vvar.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * vvar.h: Shared vDSO/kernel variable declarations
- * Copyright (c) 2011 Andy Lutomirski
- *
- * A handful of variables are accessible (read-only) from userspace
- * code in the vsyscall page and the vdso. They are declared here.
- * Some other file must define them with DEFINE_VVAR.
- *
- * In normal kernel code, they are used like any other variable.
- * In user code, they are accessed through the VVAR macro.
- *
- * These variables live in a page of kernel data that has an extra RO
- * mapping for userspace. Each variable needs a unique offset within
- * that page; specify that offset with the DECLARE_VVAR macro. (If
- * you mess up, the linker will catch it.)
- */
-
-#ifndef _ASM_X86_VVAR_H
-#define _ASM_X86_VVAR_H
-
-#ifdef EMIT_VVAR
-/*
- * EMIT_VVAR() is used by the kernel linker script to put vvars in the
- * right place. Also, it's used by kernel code to import offsets values.
- */
-#define DECLARE_VVAR(offset, type, name) \
- EMIT_VVAR(name, offset)
-#define DECLARE_VVAR_SINGLE(offset, type, name) \
- EMIT_VVAR(name, offset)
-
-#else
-
-extern char __vvar_page;
-
-#define DECLARE_VVAR(offset, type, name) \
- extern type vvar_ ## name[CS_BASES] \
- __attribute__((visibility("hidden"))); \
- extern type timens_ ## name[CS_BASES] \
- __attribute__((visibility("hidden"))); \
-
-#define DECLARE_VVAR_SINGLE(offset, type, name) \
- extern type vvar_ ## name \
- __attribute__((visibility("hidden"))); \
-
-#define VVAR(name) (vvar_ ## name)
-#define TIMENS(name) (timens_ ## name)
-
-#define DEFINE_VVAR(type, name) \
- type name[CS_BASES] \
- __attribute__((section(".vvar_" #name), aligned(16))) __visible
-
-#define DEFINE_VVAR_SINGLE(type, name) \
- type name \
- __attribute__((section(".vvar_" #name), aligned(16))) __visible
-
-#endif
-
-/* DECLARE_VVAR(offset, type, name) */
-
-DECLARE_VVAR(128, struct vdso_data, _vdso_data)
-
-#if !defined(_SINGLE_DATA)
-#define _SINGLE_DATA
-DECLARE_VVAR_SINGLE(640, struct vdso_rng_data, _vdso_rng_data)
-#endif
-
-#undef DECLARE_VVAR
-#undef DECLARE_VVAR_SINGLE
-
-#endif
diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
index e5d182c7373c..4a7cace06204 100644
--- a/arch/x86/include/uapi/asm/amd_hsmp.h
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -88,7 +88,8 @@ struct hsmp_msg_desc {
*
* Not supported messages would return -ENOMSG.
*/
-static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
+static const struct hsmp_msg_desc hsmp_msg_desc_table[]
+ __attribute__((unused)) = {
/* RESERVED */
{0, 0, HSMP_RSVD},
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index db9adc081c5a..cb6b48a7c22b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -8,7 +8,8 @@
/*
* Fields are zero when not available. Also, this struct is shared with
* userspace mcelog and thus must keep existing fields at current offsets.
- * Only add new fields to the end of the structure
+ * Only add new, shared fields to the end of the structure.
+ * Do not add vendor-specific fields.
*/
struct mce {
__u64 status; /* Bank's MCi_STATUS MSR */
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 46cdc941f958..ac1e6277212b 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,9 +5,6 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
#define MAP_ABOVE4G 0x80 /* only map above 4GB */
-/* Flags for map_shadow_stack(2) */
-#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
-
#include <asm-generic/mman.h>
#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4efecac49863..3a44a9dc3fb7 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1171,7 +1171,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
}
count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
- acpi_parse_int_src_ovr, nr_irqs);
+ acpi_parse_int_src_ovr,
+ irq_get_nr_irqs());
if (count < 0) {
pr_err("Error parsing interrupt source overrides entry\n");
/* TBD: Cleanup to allow fallback to MPS */
@@ -1191,7 +1192,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
mp_config_acpi_legacy_irqs();
count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE,
- acpi_parse_nmi_src, nr_irqs);
+ acpi_parse_nmi_src,
+ irq_get_nr_irqs());
if (count < 0) {
pr_err("Error parsing NMI SRC entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index aab9d0570841..d745dd586303 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -239,8 +239,10 @@ EXPORT_SYMBOL_GPL(amd_detect_prefcore);
*/
int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
{
+ enum x86_topology_cpu_type core_type = get_topology_cpu_type(&cpu_data(cpu));
bool prefcore;
int ret;
+ u32 tmp;
ret = amd_detect_prefcore(&prefcore);
if (ret)
@@ -266,6 +268,27 @@ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator)
break;
}
}
+
+ /* detect if running on heterogeneous design */
+ if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) {
+ switch (core_type) {
+ case TOPO_CPU_TYPE_UNKNOWN:
+ pr_warn("Undefined core type found for cpu %d\n", cpu);
+ break;
+ case TOPO_CPU_TYPE_PERFORMANCE:
+ /* use the max scale for performance cores */
+ *numerator = CPPC_HIGHEST_PERF_PERFORMANCE;
+ return 0;
+ case TOPO_CPU_TYPE_EFFICIENCY:
+ /* use the highest perf value for efficiency cores */
+ ret = amd_get_highest_perf(cpu, &tmp);
+ if (ret)
+ return ret;
+ *numerator = tmp;
+ return 0;
+ }
+ }
+
*numerator = CPPC_HIGHEST_PERF_PREFCORE;
return 0;
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 94ff83f3d3fe..b200a193beeb 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -87,6 +87,7 @@ SYM_FUNC_START(do_suspend_lowlevel)
.align 4
.Lresume_point:
+ ANNOTATE_NOENDBR
/* We don't restore %rax, it must be 0 anyway */
movq $saved_context, %rax
movq saved_context_cr4(%rax), %rbx
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 557318145038..736f62812f5c 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -712,8 +712,8 @@ int __init arch_probe_nr_irqs(void)
{
int nr;
- if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
- nr_irqs = NR_VECTORS * nr_cpu_ids;
+ if (irq_get_nr_irqs() > NR_VECTORS * nr_cpu_ids)
+ irq_set_nr_irqs(NR_VECTORS * nr_cpu_ids);
nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
#if defined(CONFIG_PCI_MSI)
@@ -725,8 +725,8 @@ int __init arch_probe_nr_irqs(void)
else
nr += gsi_top * 16;
#endif
- if (nr < nr_irqs)
- nr_irqs = nr;
+ if (nr < irq_get_nr_irqs())
+ irq_set_nr_irqs(nr);
/*
* We don't know if PIC is present at this point so we need to do
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 5857a0f5d514..4efdf5c2efc8 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -59,6 +59,8 @@ obj-$(CONFIG_ACRN_GUEST) += acrn.o
obj-$(CONFIG_DEBUG_FS) += debugfs.o
+obj-$(CONFIG_X86_BUS_LOCK_DETECT) += bus_lock.o
+
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(src)/mkcapflags.sh $@ $^
diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c
new file mode 100644
index 000000000000..704e9241b964
--- /dev/null
+++ b/arch/x86/kernel/cpu/bus_lock.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "x86/split lock detection: " fmt
+
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/cpuhotplug.h>
+#include <asm/cpu_device_id.h>
+#include <asm/cmdline.h>
+#include <asm/traps.h>
+#include <asm/cpu.h>
+
+enum split_lock_detect_state {
+ sld_off = 0,
+ sld_warn,
+ sld_fatal,
+ sld_ratelimit,
+};
+
+/*
+ * Default to sld_off because most systems do not support split lock detection.
+ * sld_state_setup() will switch this to sld_warn on systems that support
+ * split lock/bus lock detect, unless there is a command line override.
+ */
+static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
+static u64 msr_test_ctrl_cache __ro_after_init;
+
+/*
+ * With a name like MSR_TEST_CTL it should go without saying, but don't touch
+ * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
+ * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
+ */
+static bool cpu_model_supports_sld __ro_after_init;
+
+static const struct {
+ const char *option;
+ enum split_lock_detect_state state;
+} sld_options[] __initconst = {
+ { "off", sld_off },
+ { "warn", sld_warn },
+ { "fatal", sld_fatal },
+ { "ratelimit:", sld_ratelimit },
+};
+
+static struct ratelimit_state bld_ratelimit;
+
+static unsigned int sysctl_sld_mitigate = 1;
+static DEFINE_SEMAPHORE(buslock_sem, 1);
+
+#ifdef CONFIG_PROC_SYSCTL
+static struct ctl_table sld_sysctls[] = {
+ {
+ .procname = "split_lock_mitigate",
+ .data = &sysctl_sld_mitigate,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+};
+
+static int __init sld_mitigate_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sld_sysctls);
+ return 0;
+}
+
+late_initcall(sld_mitigate_sysctl_init);
+#endif
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt), ratelimit;
+
+ if (strncmp(arg, opt, len))
+ return false;
+
+ /*
+ * Min ratelimit is 1 bus lock/sec.
+ * Max ratelimit is 1000 bus locks/sec.
+ */
+ if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
+ ratelimit > 0 && ratelimit <= 1000) {
+ ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
+ ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
+ return true;
+ }
+
+ return len == arglen;
+}
+
+static bool split_lock_verify_msr(bool on)
+{
+ u64 ctrl, tmp;
+
+ if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
+ return false;
+ if (on)
+ ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+ else
+ ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+ if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
+ return false;
+ rdmsrl(MSR_TEST_CTRL, tmp);
+ return ctrl == tmp;
+}
+
+static void __init sld_state_setup(void)
+{
+ enum split_lock_detect_state state = sld_warn;
+ char arg[20];
+ int i, ret;
+
+ if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+ !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ return;
+
+ ret = cmdline_find_option(boot_command_line, "split_lock_detect",
+ arg, sizeof(arg));
+ if (ret >= 0) {
+ for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
+ if (match_option(arg, ret, sld_options[i].option)) {
+ state = sld_options[i].state;
+ break;
+ }
+ }
+ }
+ sld_state = state;
+}
+
+static void __init __split_lock_setup(void)
+{
+ if (!split_lock_verify_msr(false)) {
+ pr_info("MSR access failed: Disabled\n");
+ return;
+ }
+
+ rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+
+ if (!split_lock_verify_msr(true)) {
+ pr_info("MSR access failed: Disabled\n");
+ return;
+ }
+
+ /* Restore the MSR to its cached value. */
+ wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+
+ setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
+}
+
+/*
+ * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
+ * is not implemented as one thread could undo the setting of the other
+ * thread immediately after dropping the lock anyway.
+ */
+static void sld_update_msr(bool on)
+{
+ u64 test_ctrl_val = msr_test_ctrl_cache;
+
+ if (on)
+ test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
+
+ wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
+}
+
+void split_lock_init(void)
+{
+ /*
+ * #DB for bus lock handles ratelimit and #AC for split lock is
+ * disabled.
+ */
+ if (sld_state == sld_ratelimit) {
+ split_lock_verify_msr(false);
+ return;
+ }
+
+ if (cpu_model_supports_sld)
+ split_lock_verify_msr(sld_state != sld_off);
+}
+
+static void __split_lock_reenable_unlock(struct work_struct *work)
+{
+ sld_update_msr(true);
+ up(&buslock_sem);
+}
+
+static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
+
+static void __split_lock_reenable(struct work_struct *work)
+{
+ sld_update_msr(true);
+}
+static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+
+/*
+ * If a CPU goes offline with pending delayed work to re-enable split lock
+ * detection then the delayed work will be executed on some other CPU. That
+ * handles releasing the buslock_sem, but because it executes on a
+ * different CPU probably won't re-enable split lock detection. This is a
+ * problem on HT systems since the sibling CPU on the same core may then be
+ * left running with split lock detection disabled.
+ *
+ * Unconditionally re-enable detection here.
+ */
+static int splitlock_cpu_offline(unsigned int cpu)
+{
+ sld_update_msr(true);
+
+ return 0;
+}
+
+static void split_lock_warn(unsigned long ip)
+{
+ struct delayed_work *work;
+ int cpu;
+
+ if (!current->reported_split_lock)
+ pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
+ current->comm, current->pid, ip);
+ current->reported_split_lock = 1;
+
+ if (sysctl_sld_mitigate) {
+ /*
+ * misery factor #1:
+ * sleep 10ms before trying to execute split lock.
+ */
+ if (msleep_interruptible(10) > 0)
+ return;
+ /*
+ * Misery factor #2:
+ * only allow one buslocked disabled core at a time.
+ */
+ if (down_interruptible(&buslock_sem) == -EINTR)
+ return;
+ work = &sl_reenable_unlock;
+ } else {
+ work = &sl_reenable;
+ }
+
+ cpu = get_cpu();
+ schedule_delayed_work_on(cpu, work, 2);
+
+ /* Disable split lock detection on this CPU to make progress */
+ sld_update_msr(false);
+ put_cpu();
+}
+
+bool handle_guest_split_lock(unsigned long ip)
+{
+ if (sld_state == sld_warn) {
+ split_lock_warn(ip);
+ return true;
+ }
+
+ pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
+ current->comm, current->pid,
+ sld_state == sld_fatal ? "fatal" : "bogus", ip);
+
+ current->thread.error_code = 0;
+ current->thread.trap_nr = X86_TRAP_AC;
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
+ return false;
+}
+EXPORT_SYMBOL_GPL(handle_guest_split_lock);
+
+void bus_lock_init(void)
+{
+ u64 val;
+
+ if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ return;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
+
+ if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+ (sld_state == sld_warn || sld_state == sld_fatal)) ||
+ sld_state == sld_off) {
+ /*
+ * Warn and fatal are handled by #AC for split lock if #AC for
+ * split lock is supported.
+ */
+ val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+ } else {
+ val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+ }
+
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+}
+
+bool handle_user_split_lock(struct pt_regs *regs, long error_code)
+{
+ if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
+ return false;
+ split_lock_warn(regs->ip);
+ return true;
+}
+
+void handle_bus_lock(struct pt_regs *regs)
+{
+ switch (sld_state) {
+ case sld_off:
+ break;
+ case sld_ratelimit:
+ /* Enforce no more than bld_ratelimit bus locks/sec. */
+ while (!__ratelimit(&bld_ratelimit))
+ msleep(20);
+ /* Warn on the bus lock. */
+ fallthrough;
+ case sld_warn:
+ pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
+ current->comm, current->pid, regs->ip);
+ break;
+ case sld_fatal:
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
+ break;
+ }
+}
+
+/*
+ * CPU models that are known to have the per-core split-lock detection
+ * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
+ */
+static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
+ X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
+ {}
+};
+
+static void __init split_lock_setup(struct cpuinfo_x86 *c)
+{
+ const struct x86_cpu_id *m;
+ u64 ia32_core_caps;
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return;
+
+ /* Check for CPUs that have support but do not enumerate it: */
+ m = x86_match_cpu(split_lock_cpu_ids);
+ if (m)
+ goto supported;
+
+ if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
+ return;
+
+ /*
+ * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
+ * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
+ * it have split lock detection.
+ */
+ rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
+ if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
+ goto supported;
+
+ /* CPU is not in the model list and does not have the MSR bit: */
+ return;
+
+supported:
+ cpu_model_supports_sld = true;
+ __split_lock_setup();
+}
+
+static void sld_state_show(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
+ !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+ return;
+
+ switch (sld_state) {
+ case sld_off:
+ pr_info("disabled\n");
+ break;
+ case sld_warn:
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
+ pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
+ if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
+ pr_warn("No splitlock CPU offline handler\n");
+ } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
+ pr_info("#DB: warning on user-space bus_locks\n");
+ }
+ break;
+ case sld_fatal:
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
+ pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
+ } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
+ pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
+ boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
+ " from non-WB" : "");
+ }
+ break;
+ case sld_ratelimit:
+ if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
+ break;
+ }
+}
+
+void __init sld_setup(struct cpuinfo_x86 *c)
+{
+ split_lock_setup(c);
+ sld_state_setup();
+ sld_state_show();
+}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f43bb974fc66..06a516f6795b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -276,21 +276,13 @@ static int __init x86_noinvpcid_setup(char *s)
}
early_param("noinvpcid", x86_noinvpcid_setup);
-#ifdef CONFIG_X86_32
-static int cachesize_override = -1;
-static int disable_x86_serial_nr = 1;
-
-static int __init cachesize_setup(char *str)
-{
- get_option(&str, &cachesize_override);
- return 1;
-}
-__setup("cachesize=", cachesize_setup);
-
/* Standard macro to see if a specific flag is changeable */
-static inline int flag_is_changeable_p(u32 flag)
+static inline bool flag_is_changeable_p(unsigned long flag)
{
- u32 f1, f2;
+ unsigned long f1, f2;
+
+ if (!IS_ENABLED(CONFIG_X86_32))
+ return true;
/*
* Cyrix and IDT cpus allow disabling of CPUID
@@ -313,11 +305,22 @@ static inline int flag_is_changeable_p(u32 flag)
: "=&r" (f1), "=&r" (f2)
: "ir" (flag));
- return ((f1^f2) & flag) != 0;
+ return (f1 ^ f2) & flag;
+}
+
+#ifdef CONFIG_X86_32
+static int cachesize_override = -1;
+static int disable_x86_serial_nr = 1;
+
+static int __init cachesize_setup(char *str)
+{
+ get_option(&str, &cachesize_override);
+ return 1;
}
+__setup("cachesize=", cachesize_setup);
/* Probe for the CPUID instruction */
-int have_cpuid_p(void)
+bool have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
@@ -349,10 +352,6 @@ static int __init x86_serial_nr_setup(char *s)
}
__setup("serialnumber", x86_serial_nr_setup);
#else
-static inline int flag_is_changeable_p(u32 flag)
-{
- return 1;
-}
static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
}
@@ -1088,7 +1087,6 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_32
int i;
/*
@@ -1109,7 +1107,6 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
break;
}
}
-#endif
}
#define NO_SPECULATION BIT(0)
@@ -1841,6 +1838,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_init)
this_cpu->c_init(c);
+ bus_lock_init();
+
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);
@@ -1906,9 +1905,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Init Machine Check Exception if available. */
mcheck_cpu_init(c);
-#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
-#endif
}
/*
diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c
index 3baf3e435834..10719aba6276 100644
--- a/arch/x86/kernel/cpu/debugfs.c
+++ b/arch/x86/kernel/cpu/debugfs.c
@@ -22,6 +22,7 @@ static int cpu_debug_show(struct seq_file *m, void *p)
seq_printf(m, "die_id: %u\n", c->topo.die_id);
seq_printf(m, "cu_id: %u\n", c->topo.cu_id);
seq_printf(m, "core_id: %u\n", c->topo.core_id);
+ seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c));
seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id);
seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id);
seq_printf(m, "llc_id: %u\n", c->topo.llc_id);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e7656cbef68d..d1de300af173 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,13 +7,9 @@
#include <linux/smp.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
-#include <linux/semaphore.h>
#include <linux/thread_info.h>
#include <linux/init.h>
#include <linux/uaccess.h>
-#include <linux/workqueue.h>
-#include <linux/delay.h>
-#include <linux/cpuhotplug.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
@@ -24,8 +20,6 @@
#include <asm/hwcap2.h>
#include <asm/elf.h>
#include <asm/cpu_device_id.h>
-#include <asm/cmdline.h>
-#include <asm/traps.h>
#include <asm/resctrl.h>
#include <asm/numa.h>
#include <asm/thermal.h>
@@ -41,28 +35,6 @@
#include <asm/apic.h>
#endif
-enum split_lock_detect_state {
- sld_off = 0,
- sld_warn,
- sld_fatal,
- sld_ratelimit,
-};
-
-/*
- * Default to sld_off because most systems do not support split lock detection.
- * sld_state_setup() will switch this to sld_warn on systems that support
- * split lock/bus lock detect, unless there is a command line override.
- */
-static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
-static u64 msr_test_ctrl_cache __ro_after_init;
-
-/*
- * With a name like MSR_TEST_CTL it should go without saying, but don't touch
- * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
- * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
- */
-static bool cpu_model_supports_sld __ro_after_init;
-
/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
@@ -549,9 +521,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
}
-static void split_lock_init(void);
-static void bus_lock_init(void);
-
static void init_intel(struct cpuinfo_x86 *c)
{
early_init_intel(c);
@@ -643,7 +612,6 @@ static void init_intel(struct cpuinfo_x86 *c)
init_intel_misc_features(c);
split_lock_init();
- bus_lock_init();
intel_init_thermal(c);
}
@@ -909,381 +877,6 @@ static const struct cpu_dev intel_cpu_dev = {
cpu_dev_register(intel_cpu_dev);
-#undef pr_fmt
-#define pr_fmt(fmt) "x86/split lock detection: " fmt
-
-static const struct {
- const char *option;
- enum split_lock_detect_state state;
-} sld_options[] __initconst = {
- { "off", sld_off },
- { "warn", sld_warn },
- { "fatal", sld_fatal },
- { "ratelimit:", sld_ratelimit },
-};
-
-static struct ratelimit_state bld_ratelimit;
-
-static unsigned int sysctl_sld_mitigate = 1;
-static DEFINE_SEMAPHORE(buslock_sem, 1);
-
-#ifdef CONFIG_PROC_SYSCTL
-static struct ctl_table sld_sysctls[] = {
- {
- .procname = "split_lock_mitigate",
- .data = &sysctl_sld_mitigate,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_douintvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-};
-
-static int __init sld_mitigate_sysctl_init(void)
-{
- register_sysctl_init("kernel", sld_sysctls);
- return 0;
-}
-
-late_initcall(sld_mitigate_sysctl_init);
-#endif
-
-static inline bool match_option(const char *arg, int arglen, const char *opt)
-{
- int len = strlen(opt), ratelimit;
-
- if (strncmp(arg, opt, len))
- return false;
-
- /*
- * Min ratelimit is 1 bus lock/sec.
- * Max ratelimit is 1000 bus locks/sec.
- */
- if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
- ratelimit > 0 && ratelimit <= 1000) {
- ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
- ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
- return true;
- }
-
- return len == arglen;
-}
-
-static bool split_lock_verify_msr(bool on)
-{
- u64 ctrl, tmp;
-
- if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
- return false;
- if (on)
- ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
- else
- ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
- if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
- return false;
- rdmsrl(MSR_TEST_CTRL, tmp);
- return ctrl == tmp;
-}
-
-static void __init sld_state_setup(void)
-{
- enum split_lock_detect_state state = sld_warn;
- char arg[20];
- int i, ret;
-
- if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
- !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- return;
-
- ret = cmdline_find_option(boot_command_line, "split_lock_detect",
- arg, sizeof(arg));
- if (ret >= 0) {
- for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
- if (match_option(arg, ret, sld_options[i].option)) {
- state = sld_options[i].state;
- break;
- }
- }
- }
- sld_state = state;
-}
-
-static void __init __split_lock_setup(void)
-{
- if (!split_lock_verify_msr(false)) {
- pr_info("MSR access failed: Disabled\n");
- return;
- }
-
- rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
-
- if (!split_lock_verify_msr(true)) {
- pr_info("MSR access failed: Disabled\n");
- return;
- }
-
- /* Restore the MSR to its cached value. */
- wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
-
- setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
-}
-
-/*
- * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
- * is not implemented as one thread could undo the setting of the other
- * thread immediately after dropping the lock anyway.
- */
-static void sld_update_msr(bool on)
-{
- u64 test_ctrl_val = msr_test_ctrl_cache;
-
- if (on)
- test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
-
- wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
-}
-
-static void split_lock_init(void)
-{
- /*
- * #DB for bus lock handles ratelimit and #AC for split lock is
- * disabled.
- */
- if (sld_state == sld_ratelimit) {
- split_lock_verify_msr(false);
- return;
- }
-
- if (cpu_model_supports_sld)
- split_lock_verify_msr(sld_state != sld_off);
-}
-
-static void __split_lock_reenable_unlock(struct work_struct *work)
-{
- sld_update_msr(true);
- up(&buslock_sem);
-}
-
-static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock);
-
-static void __split_lock_reenable(struct work_struct *work)
-{
- sld_update_msr(true);
-}
-static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
-
-/*
- * If a CPU goes offline with pending delayed work to re-enable split lock
- * detection then the delayed work will be executed on some other CPU. That
- * handles releasing the buslock_sem, but because it executes on a
- * different CPU probably won't re-enable split lock detection. This is a
- * problem on HT systems since the sibling CPU on the same core may then be
- * left running with split lock detection disabled.
- *
- * Unconditionally re-enable detection here.
- */
-static int splitlock_cpu_offline(unsigned int cpu)
-{
- sld_update_msr(true);
-
- return 0;
-}
-
-static void split_lock_warn(unsigned long ip)
-{
- struct delayed_work *work;
- int cpu;
-
- if (!current->reported_split_lock)
- pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
- current->comm, current->pid, ip);
- current->reported_split_lock = 1;
-
- if (sysctl_sld_mitigate) {
- /*
- * misery factor #1:
- * sleep 10ms before trying to execute split lock.
- */
- if (msleep_interruptible(10) > 0)
- return;
- /*
- * Misery factor #2:
- * only allow one buslocked disabled core at a time.
- */
- if (down_interruptible(&buslock_sem) == -EINTR)
- return;
- work = &sl_reenable_unlock;
- } else {
- work = &sl_reenable;
- }
-
- cpu = get_cpu();
- schedule_delayed_work_on(cpu, work, 2);
-
- /* Disable split lock detection on this CPU to make progress */
- sld_update_msr(false);
- put_cpu();
-}
-
-bool handle_guest_split_lock(unsigned long ip)
-{
- if (sld_state == sld_warn) {
- split_lock_warn(ip);
- return true;
- }
-
- pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
- current->comm, current->pid,
- sld_state == sld_fatal ? "fatal" : "bogus", ip);
-
- current->thread.error_code = 0;
- current->thread.trap_nr = X86_TRAP_AC;
- force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
- return false;
-}
-EXPORT_SYMBOL_GPL(handle_guest_split_lock);
-
-static void bus_lock_init(void)
-{
- u64 val;
-
- if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- return;
-
- rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
-
- if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
- (sld_state == sld_warn || sld_state == sld_fatal)) ||
- sld_state == sld_off) {
- /*
- * Warn and fatal are handled by #AC for split lock if #AC for
- * split lock is supported.
- */
- val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
- } else {
- val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
- }
-
- wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
-}
-
-bool handle_user_split_lock(struct pt_regs *regs, long error_code)
-{
- if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
- return false;
- split_lock_warn(regs->ip);
- return true;
-}
-
-void handle_bus_lock(struct pt_regs *regs)
-{
- switch (sld_state) {
- case sld_off:
- break;
- case sld_ratelimit:
- /* Enforce no more than bld_ratelimit bus locks/sec. */
- while (!__ratelimit(&bld_ratelimit))
- msleep(20);
- /* Warn on the bus lock. */
- fallthrough;
- case sld_warn:
- pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
- current->comm, current->pid, regs->ip);
- break;
- case sld_fatal:
- force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
- break;
- }
-}
-
-/*
- * CPU models that are known to have the per-core split-lock detection
- * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
- */
-static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
- X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
- X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
- X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
- {}
-};
-
-static void __init split_lock_setup(struct cpuinfo_x86 *c)
-{
- const struct x86_cpu_id *m;
- u64 ia32_core_caps;
-
- if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return;
-
- /* Check for CPUs that have support but do not enumerate it: */
- m = x86_match_cpu(split_lock_cpu_ids);
- if (m)
- goto supported;
-
- if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
- return;
-
- /*
- * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
- * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
- * it have split lock detection.
- */
- rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
- if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
- goto supported;
-
- /* CPU is not in the model list and does not have the MSR bit: */
- return;
-
-supported:
- cpu_model_supports_sld = true;
- __split_lock_setup();
-}
-
-static void sld_state_show(void)
-{
- if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
- !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
- return;
-
- switch (sld_state) {
- case sld_off:
- pr_info("disabled\n");
- break;
- case sld_warn:
- if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
- pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
- if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
- "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
- pr_warn("No splitlock CPU offline handler\n");
- } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
- pr_info("#DB: warning on user-space bus_locks\n");
- }
- break;
- case sld_fatal:
- if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
- pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
- } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
- pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
- boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
- " from non-WB" : "");
- }
- break;
- case sld_ratelimit:
- if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
- break;
- }
-}
-
-void __init sld_setup(struct cpuinfo_x86 *c)
-{
- split_lock_setup(c);
- sld_state_setup();
- sld_state_show();
-}
-
#define X86_HYBRID_CPU_TYPE_ID_SHIFT 24
/**
@@ -1299,3 +892,18 @@ u8 get_this_hybrid_cpu_type(void)
return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
}
+
+/**
+ * get_this_hybrid_cpu_native_id() - Get the native id of this hybrid CPU
+ *
+ * Returns the uarch native ID [23:0] of a CPU in a hybrid processor.
+ * If the processor is not hybrid, returns 0.
+ */
+u32 get_this_hybrid_cpu_native_id(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
+ return 0;
+
+ return cpuid_eax(0x0000001a) &
+ (BIT_ULL(X86_HYBRID_CPU_TYPE_ID_SHIFT) - 1);
+}
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 14bf8c232e45..6ca80fff1fea 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -778,29 +778,33 @@ bool amd_mce_usable_address(struct mce *m)
static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m = &err.m;
- mce_prep_record(&m);
+ mce_prep_record(&err);
- m.status = status;
- m.misc = misc;
- m.bank = bank;
- m.tsc = rdtsc();
+ m->status = status;
+ m->misc = misc;
+ m->bank = bank;
+ m->tsc = rdtsc();
- if (m.status & MCI_STATUS_ADDRV) {
- m.addr = addr;
+ if (m->status & MCI_STATUS_ADDRV) {
+ m->addr = addr;
- smca_extract_err_addr(&m);
+ smca_extract_err_addr(m);
}
if (mce_flags.smca) {
- rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
- if (m.status & MCI_STATUS_SYNDV)
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+ if (m->status & MCI_STATUS_SYNDV) {
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
+ rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
+ }
}
- mce_log(&m);
+ mce_log(&err);
}
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 3885fe05f01e..0a89947e47bc 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -28,7 +28,8 @@
void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m;
int lsb;
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
@@ -44,31 +45,33 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
else
lsb = PAGE_SHIFT;
- mce_prep_record(&m);
- m.bank = -1;
+ mce_prep_record(&err);
+ m = &err.m;
+ m->bank = -1;
/* Fake a memory read error with unknown channel */
- m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
- m.misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
+ m->status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
+ m->misc = (MCI_MISC_ADDR_PHYS << 6) | lsb;
if (severity >= GHES_SEV_RECOVERABLE)
- m.status |= MCI_STATUS_UC;
+ m->status |= MCI_STATUS_UC;
if (severity >= GHES_SEV_PANIC) {
- m.status |= MCI_STATUS_PCC;
- m.tsc = rdtsc();
+ m->status |= MCI_STATUS_PCC;
+ m->tsc = rdtsc();
}
- m.addr = mem_err->physical_addr;
- mce_log(&m);
+ m->addr = mem_err->physical_addr;
+ mce_log(&err);
}
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
+ unsigned int cpu, num_regs;
bool apicid_found = false;
- unsigned int cpu;
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m;
if (!boot_cpu_has(X86_FEATURE_SMCA))
return -EINVAL;
@@ -86,16 +89,12 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
return -EINVAL;
/*
- * The register array size must be large enough to include all the
- * SMCA registers which need to be extracted.
- *
* The number of registers in the register array is determined by
* Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2.
- * The register layout is fixed and currently the raw data in the
- * register array includes 6 SMCA registers which the kernel can
- * extract.
+ * Sanity-check registers array size.
*/
- if (ctx_info->reg_arr_size < 48)
+ num_regs = ctx_info->reg_arr_size >> 3;
+ if (!num_regs)
return -EINVAL;
for_each_possible_cpu(cpu) {
@@ -108,18 +107,68 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
if (!apicid_found)
return -EINVAL;
- mce_prep_record_common(&m);
- mce_prep_record_per_cpu(cpu, &m);
+ m = &err.m;
+ memset(&err, 0, sizeof(struct mce_hw_err));
+ mce_prep_record_common(m);
+ mce_prep_record_per_cpu(cpu, m);
+
+ m->bank = (ctx_info->msr_addr >> 4) & 0xFF;
- m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
- m.status = *i_mce;
- m.addr = *(i_mce + 1);
- m.misc = *(i_mce + 2);
- /* Skipping MCA_CONFIG */
- m.ipid = *(i_mce + 4);
- m.synd = *(i_mce + 5);
+ /*
+ * The SMCA register layout is fixed and includes 16 registers.
+ * The end of the array may be variable, but the beginning is known.
+ * Cap the number of registers to expected max (15).
+ */
+ if (num_regs > 15)
+ num_regs = 15;
+
+ switch (num_regs) {
+ /* MCA_SYND2 */
+ case 15:
+ err.vendor.amd.synd2 = *(i_mce + 14);
+ fallthrough;
+ /* MCA_SYND1 */
+ case 14:
+ err.vendor.amd.synd1 = *(i_mce + 13);
+ fallthrough;
+ /* MCA_MISC4 */
+ case 13:
+ /* MCA_MISC3 */
+ case 12:
+ /* MCA_MISC2 */
+ case 11:
+ /* MCA_MISC1 */
+ case 10:
+ /* MCA_DEADDR */
+ case 9:
+ /* MCA_DESTAT */
+ case 8:
+ /* reserved */
+ case 7:
+ /* MCA_SYND */
+ case 6:
+ m->synd = *(i_mce + 5);
+ fallthrough;
+ /* MCA_IPID */
+ case 5:
+ m->ipid = *(i_mce + 4);
+ fallthrough;
+ /* MCA_CONFIG */
+ case 4:
+ /* MCA_MISC0 */
+ case 3:
+ m->misc = *(i_mce + 2);
+ fallthrough;
+ /* MCA_ADDR */
+ case 2:
+ m->addr = *(i_mce + 1);
+ fallthrough;
+ /* MCA_STATUS */
+ case 1:
+ m->status = *i_mce;
+ }
- mce_log(&m);
+ mce_log(&err);
return 0;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2a938f429c4d..7fb5556a0b53 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -88,7 +88,7 @@ struct mca_config mca_cfg __read_mostly = {
.monarch_timeout = -1
};
-static DEFINE_PER_CPU(struct mce, mces_seen);
+static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
static unsigned long mce_need_notify;
/*
@@ -119,8 +119,6 @@ BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
void mce_prep_record_common(struct mce *m)
{
- memset(m, 0, sizeof(struct mce));
-
m->cpuid = cpuid_eax(1);
m->cpuvendor = boot_cpu_data.x86_vendor;
m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
@@ -138,9 +136,12 @@ void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m)
m->socketid = topology_physical_package_id(cpu);
}
-/* Do initial initialization of a struct mce */
-void mce_prep_record(struct mce *m)
+/* Do initial initialization of struct mce_hw_err */
+void mce_prep_record(struct mce_hw_err *err)
{
+ struct mce *m = &err->m;
+
+ memset(err, 0, sizeof(struct mce_hw_err));
mce_prep_record_common(m);
mce_prep_record_per_cpu(smp_processor_id(), m);
}
@@ -148,9 +149,9 @@ void mce_prep_record(struct mce *m)
DEFINE_PER_CPU(struct mce, injectm);
EXPORT_PER_CPU_SYMBOL_GPL(injectm);
-void mce_log(struct mce *m)
+void mce_log(struct mce_hw_err *err)
{
- if (!mce_gen_pool_add(m))
+ if (!mce_gen_pool_add(err))
irq_work_queue(&mce_irq_work);
}
EXPORT_SYMBOL_GPL(mce_log);
@@ -171,8 +172,10 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
-static void __print_mce(struct mce *m)
+static void __print_mce(struct mce_hw_err *err)
{
+ struct mce *m = &err->m;
+
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
m->extcpu,
(m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
@@ -199,6 +202,10 @@ static void __print_mce(struct mce *m)
if (mce_flags.smca) {
if (m->synd)
pr_cont("SYND %llx ", m->synd);
+ if (err->vendor.amd.synd1)
+ pr_cont("SYND1 %llx ", err->vendor.amd.synd1);
+ if (err->vendor.amd.synd2)
+ pr_cont("SYND2 %llx ", err->vendor.amd.synd2);
if (m->ipid)
pr_cont("IPID %llx ", m->ipid);
}
@@ -214,9 +221,11 @@ static void __print_mce(struct mce *m)
m->microcode);
}
-static void print_mce(struct mce *m)
+static void print_mce(struct mce_hw_err *err)
{
- __print_mce(m);
+ struct mce *m = &err->m;
+
+ __print_mce(err);
if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
@@ -251,7 +260,7 @@ static const char *mce_dump_aux_info(struct mce *m)
return NULL;
}
-static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
+static noinstr void mce_panic(const char *msg, struct mce_hw_err *final, char *exp)
{
struct llist_node *pending;
struct mce_evt_llist *l;
@@ -282,20 +291,22 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
llist_for_each_entry(l, pending, llnode) {
- struct mce *m = &l->mce;
+ struct mce_hw_err *err = &l->err;
+ struct mce *m = &err->m;
if (!(m->status & MCI_STATUS_UC)) {
- print_mce(m);
+ print_mce(err);
if (!apei_err)
apei_err = apei_write_mce(m);
}
}
/* Now print uncorrected but with the final one last */
llist_for_each_entry(l, pending, llnode) {
- struct mce *m = &l->mce;
+ struct mce_hw_err *err = &l->err;
+ struct mce *m = &err->m;
if (!(m->status & MCI_STATUS_UC))
continue;
- if (!final || mce_cmp(m, final)) {
- print_mce(m);
+ if (!final || mce_cmp(m, &final->m)) {
+ print_mce(err);
if (!apei_err)
apei_err = apei_write_mce(m);
}
@@ -303,12 +314,12 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
if (final) {
print_mce(final);
if (!apei_err)
- apei_err = apei_write_mce(final);
+ apei_err = apei_write_mce(&final->m);
}
if (exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
- memmsg = mce_dump_aux_info(final);
+ memmsg = mce_dump_aux_info(&final->m);
if (memmsg)
pr_emerg(HW_ERR "Machine check: %s\n", memmsg);
@@ -323,9 +334,9 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
* panic.
*/
if (kexec_crash_loaded()) {
- if (final && (final->status & MCI_STATUS_ADDRV)) {
+ if (final && (final->m.status & MCI_STATUS_ADDRV)) {
struct page *p;
- p = pfn_to_online_page(final->addr >> PAGE_SHIFT);
+ p = pfn_to_online_page(final->m.addr >> PAGE_SHIFT);
if (p)
SetPageHWPoison(p);
}
@@ -445,16 +456,18 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
* check into our "mce" struct so that we can use it later to assess
* the severity of the problem as we read per-bank specific details.
*/
-static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs)
+static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs)
{
+ struct mce *m;
/*
* Enable instrumentation around mce_prep_record() which calls external
* facilities.
*/
instrumentation_begin();
- mce_prep_record(m);
+ mce_prep_record(err);
instrumentation_end();
+ m = &err->m;
m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
if (regs) {
/*
@@ -574,13 +587,13 @@ EXPORT_SYMBOL_GPL(mce_is_correctable);
static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
- struct mce *m = (struct mce *)data;
+ struct mce_hw_err *err = to_mce_hw_err(data);
- if (!m)
+ if (!err)
return NOTIFY_DONE;
/* Emit the trace record: */
- trace_mce_record(m);
+ trace_mce_record(err);
set_bit(0, &mce_need_notify);
@@ -624,13 +637,13 @@ static struct notifier_block mce_uc_nb = {
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
- struct mce *m = (struct mce *)data;
+ struct mce_hw_err *err = to_mce_hw_err(data);
- if (!m)
+ if (!err)
return NOTIFY_DONE;
- if (mca_cfg.print_all || !m->kflags)
- __print_mce(m);
+ if (mca_cfg.print_all || !(err->m.kflags))
+ __print_mce(err);
return NOTIFY_DONE;
}
@@ -644,8 +657,10 @@ static struct notifier_block mce_default_nb = {
/*
* Read ADDR and MISC registers.
*/
-static noinstr void mce_read_aux(struct mce *m, int i)
+static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
{
+ struct mce *m = &err->m;
+
if (m->status & MCI_STATUS_MISCV)
m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
@@ -667,8 +682,11 @@ static noinstr void mce_read_aux(struct mce *m, int i)
if (mce_flags.smca) {
m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
- if (m->status & MCI_STATUS_SYNDV)
+ if (m->status & MCI_STATUS_SYNDV) {
m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
+ err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i));
+ err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i));
+ }
}
}
@@ -692,26 +710,28 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
- struct mce m;
+ struct mce_hw_err err;
+ struct mce *m;
int i;
this_cpu_inc(mce_poll_count);
- mce_gather_info(&m, NULL);
+ mce_gather_info(&err, NULL);
+ m = &err.m;
if (flags & MCP_TIMESTAMP)
- m.tsc = rdtsc();
+ m->tsc = rdtsc();
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
+ m->misc = 0;
+ m->addr = 0;
+ m->bank = i;
barrier();
- m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
/*
* Update storm tracking here, before checking for the
@@ -721,17 +741,17 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* storm status.
*/
if (!mca_cfg.cmci_disabled)
- mce_track_storm(&m);
+ mce_track_storm(m);
/* If this entry is not valid, ignore it */
- if (!(m.status & MCI_STATUS_VAL))
+ if (!(m->status & MCI_STATUS_VAL))
continue;
/*
* If we are logging everything (at CPU online) or this
* is a corrected error, then we must log it.
*/
- if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
+ if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
goto log_it;
/*
@@ -741,20 +761,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* everything else.
*/
if (!mca_cfg.ser) {
- if (m.status & MCI_STATUS_UC)
+ if (m->status & MCI_STATUS_UC)
continue;
goto log_it;
}
/* Log "not enabled" (speculative) errors */
- if (!(m.status & MCI_STATUS_EN))
+ if (!(m->status & MCI_STATUS_EN))
goto log_it;
/*
* Log UCNA (SDM: 15.6.3 "UCR Error Classification")
* UC == 1 && PCC == 0 && S == 0
*/
- if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
+ if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
goto log_it;
/*
@@ -768,20 +788,20 @@ log_it:
if (flags & MCP_DONTLOG)
goto clear_it;
- mce_read_aux(&m, i);
- m.severity = mce_severity(&m, NULL, NULL, false);
+ mce_read_aux(&err, i);
+ m->severity = mce_severity(m, NULL, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
- if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
+ if (mca_cfg.dont_log_ce && !mce_usable_address(m))
goto clear_it;
if (flags & MCP_QUEUE_LOG)
- mce_gen_pool_add(&m);
+ mce_gen_pool_add(&err);
else
- mce_log(&m);
+ mce_log(&err);
clear_it:
/*
@@ -905,9 +925,10 @@ static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_reg
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
-static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, unsigned long *validp,
struct pt_regs *regs)
{
+ struct mce *m = &err->m;
char *tmp = *msg;
int i;
@@ -925,7 +946,7 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo
m->bank = i;
if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
- mce_read_aux(m, i);
+ mce_read_aux(err, i);
*msg = tmp;
return 1;
}
@@ -1016,10 +1037,11 @@ out:
*/
static void mce_reign(void)
{
- int cpu;
+ struct mce_hw_err *err = NULL;
struct mce *m = NULL;
int global_worst = 0;
char *msg = NULL;
+ int cpu;
/*
* This CPU is the Monarch and the other CPUs have run
@@ -1027,11 +1049,13 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
- struct mce *mtmp = &per_cpu(mces_seen, cpu);
+ struct mce_hw_err *etmp = &per_cpu(hw_errs_seen, cpu);
+ struct mce *mtmp = &etmp->m;
if (mtmp->severity > global_worst) {
global_worst = mtmp->severity;
- m = &per_cpu(mces_seen, cpu);
+ err = &per_cpu(hw_errs_seen, cpu);
+ m = &err->m;
}
}
@@ -1043,7 +1067,7 @@ static void mce_reign(void)
if (m && global_worst >= MCE_PANIC_SEVERITY) {
/* call mce_severity() to get "msg" for panic */
mce_severity(m, NULL, &msg, true);
- mce_panic("Fatal machine check", m, msg);
+ mce_panic("Fatal machine check", err, msg);
}
/*
@@ -1060,11 +1084,11 @@ static void mce_reign(void)
mce_panic("Fatal machine check from unknown source", NULL, NULL);
/*
- * Now clear all the mces_seen so that they don't reappear on
+ * Now clear all the hw_errs_seen so that they don't reappear on
* the next mce.
*/
for_each_possible_cpu(cpu)
- memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
+ memset(&per_cpu(hw_errs_seen, cpu), 0, sizeof(struct mce_hw_err));
}
static atomic_t global_nwo;
@@ -1268,13 +1292,14 @@ static noinstr bool mce_check_crashing_cpu(void)
}
static __always_inline int
-__mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
- unsigned long *toclear, unsigned long *valid_banks, int no_way_out,
- int *worst)
+__mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs,
+ struct mce_hw_err *final, unsigned long *toclear,
+ unsigned long *valid_banks, int no_way_out, int *worst)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
int severity, i, taint = 0;
+ struct mce *m = &err->m;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
arch___clear_bit(i, toclear);
@@ -1319,7 +1344,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
if (severity == MCE_NO_SEVERITY)
continue;
- mce_read_aux(m, i);
+ mce_read_aux(err, i);
/* assuming valid severity level != 0 */
m->severity = severity;
@@ -1329,17 +1354,17 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
* done in #MC context, where instrumentation is disabled.
*/
instrumentation_begin();
- mce_log(m);
+ mce_log(err);
instrumentation_end();
if (severity > *worst) {
- *final = *m;
+ *final = *err;
*worst = severity;
}
}
/* mce_clear_state will clear *final, save locally for use later */
- *m = *final;
+ *err = *final;
return taint;
}
@@ -1399,9 +1424,10 @@ static void kill_me_never(struct callback_head *cb)
set_mce_nospec(pfn);
}
-static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
+static void queue_task_work(struct mce_hw_err *err, char *msg, void (*func)(struct callback_head *))
{
int count = ++current->mce_count;
+ struct mce *m = &err->m;
/* First call, save all the details */
if (count == 1) {
@@ -1414,11 +1440,12 @@ static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callba
/* Ten is likely overkill. Don't expect more than two faults before task_work() */
if (count > 10)
- mce_panic("Too many consecutive machine checks while accessing user data", m, msg);
+ mce_panic("Too many consecutive machine checks while accessing user data",
+ err, msg);
/* Second or later call, make sure page address matches the one from first call */
if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
- mce_panic("Consecutive machine checks to different user pages", m, msg);
+ mce_panic("Consecutive machine checks to different user pages", err, msg);
/* Do not call task_work_add() more than once */
if (count > 1)
@@ -1467,8 +1494,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
- struct mce m, *final;
+ struct mce_hw_err *final;
+ struct mce_hw_err err;
char *msg = NULL;
+ struct mce *m;
if (unlikely(mce_flags.p5))
return pentium_machine_check(regs);
@@ -1506,13 +1535,14 @@ noinstr void do_machine_check(struct pt_regs *regs)
this_cpu_inc(mce_exception_count);
- mce_gather_info(&m, regs);
- m.tsc = rdtsc();
+ mce_gather_info(&err, regs);
+ m = &err.m;
+ m->tsc = rdtsc();
- final = this_cpu_ptr(&mces_seen);
- *final = m;
+ final = this_cpu_ptr(&hw_errs_seen);
+ *final = err;
- no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
+ no_way_out = mce_no_way_out(&err, &msg, valid_banks, regs);
barrier();
@@ -1521,15 +1551,15 @@ noinstr void do_machine_check(struct pt_regs *regs)
* Assume the worst for now, but if we find the
* severity is MCE_AR_SEVERITY we have other options.
*/
- if (!(m.mcgstatus & MCG_STATUS_RIPV))
+ if (!(m->mcgstatus & MCG_STATUS_RIPV))
kill_current_task = 1;
/*
* Check if this MCE is signaled to only this logical processor,
* on Intel, Zhaoxin only.
*/
- if (m.cpuvendor == X86_VENDOR_INTEL ||
- m.cpuvendor == X86_VENDOR_ZHAOXIN)
- lmce = m.mcgstatus & MCG_STATUS_LMCES;
+ if (m->cpuvendor == X86_VENDOR_INTEL ||
+ m->cpuvendor == X86_VENDOR_ZHAOXIN)
+ lmce = m->mcgstatus & MCG_STATUS_LMCES;
/*
* Local machine check may already know that we have to panic.
@@ -1540,12 +1570,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
*/
if (lmce) {
if (no_way_out)
- mce_panic("Fatal local machine check", &m, msg);
+ mce_panic("Fatal local machine check", &err, msg);
} else {
order = mce_start(&no_way_out);
}
- taint = __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
+ taint = __mc_scan_banks(&err, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1560,7 +1590,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
no_way_out = worst >= MCE_PANIC_SEVERITY;
if (no_way_out)
- mce_panic("Fatal machine check on current CPU", &m, msg);
+ mce_panic("Fatal machine check on current CPU", &err, msg);
}
} else {
/*
@@ -1572,8 +1602,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY) {
- mce_severity(&m, regs, &msg, true);
- mce_panic("Local fatal machine check!", &m, msg);
+ mce_severity(m, regs, &msg, true);
+ mce_panic("Local fatal machine check!", &err, msg);
}
}
@@ -1591,16 +1621,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
goto out;
/* Fault was in user mode and we need to take some action */
- if ((m.cs & 3) == 3) {
+ if ((m->cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- if (!mce_usable_address(&m))
- queue_task_work(&m, msg, kill_me_now);
+ if (!mce_usable_address(m))
+ queue_task_work(&err, msg, kill_me_now);
else
- queue_task_work(&m, msg, kill_me_maybe);
+ queue_task_work(&err, msg, kill_me_maybe);
- } else if (m.mcgstatus & MCG_STATUS_SEAM_NR) {
+ } else if (m->mcgstatus & MCG_STATUS_SEAM_NR) {
/*
* Saved RIP on stack makes it look like the machine check
* was taken in the kernel on the instruction following
@@ -1612,8 +1642,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
* not occur there. Mark the page as poisoned so it won't
* be added to free list when the guest is terminated.
*/
- if (mce_usable_address(&m)) {
- struct page *p = pfn_to_online_page(m.addr >> PAGE_SHIFT);
+ if (mce_usable_address(m)) {
+ struct page *p = pfn_to_online_page(m->addr >> PAGE_SHIFT);
if (p)
SetPageHWPoison(p);
@@ -1628,13 +1658,13 @@ noinstr void do_machine_check(struct pt_regs *regs)
* corresponding exception handler which would do that is the
* proper one.
*/
- if (m.kflags & MCE_IN_KERNEL_RECOV) {
+ if (m->kflags & MCE_IN_KERNEL_RECOV) {
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
- mce_panic("Failed kernel mode recovery", &m, msg);
+ mce_panic("Failed kernel mode recovery", &err, msg);
}
- if (m.kflags & MCE_IN_KERNEL_COPYIN)
- queue_task_work(&m, msg, kill_me_never);
+ if (m->kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&err, msg, kill_me_never);
}
out:
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index af44fd5dbd7c..8d023239ce18 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -264,15 +264,8 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN:
return put_user(mcelog->len, p);
- case MCE_GETCLEAR_FLAGS: {
- unsigned flags;
-
- do {
- flags = mcelog->flags;
- } while (cmpxchg(&mcelog->flags, flags, 0) != flags);
-
- return put_user(flags, p);
- }
+ case MCE_GETCLEAR_FLAGS:
+ return put_user(xchg(&mcelog->flags, 0), p);
default:
return -ENOTTY;
}
diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c
index 4284749ec803..d0be6dda0c14 100644
--- a/arch/x86/kernel/cpu/mce/genpool.c
+++ b/arch/x86/kernel/cpu/mce/genpool.c
@@ -31,15 +31,15 @@ static LLIST_HEAD(mce_event_llist);
*/
static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
{
+ struct mce_hw_err *err1, *err2;
struct mce_evt_llist *node;
- struct mce *m1, *m2;
- m1 = &t->mce;
+ err1 = &t->err;
llist_for_each_entry(node, &l->llnode, llnode) {
- m2 = &node->mce;
+ err2 = &node->err;
- if (!mce_cmp(m1, m2))
+ if (!mce_cmp(&err1->m, &err2->m))
return true;
}
return false;
@@ -73,8 +73,8 @@ struct llist_node *mce_gen_pool_prepare_records(void)
void mce_gen_pool_process(struct work_struct *__unused)
{
- struct llist_node *head;
struct mce_evt_llist *node, *tmp;
+ struct llist_node *head;
struct mce *mce;
head = llist_del_all(&mce_event_llist);
@@ -83,7 +83,7 @@ void mce_gen_pool_process(struct work_struct *__unused)
head = llist_reverse_order(head);
llist_for_each_entry_safe(node, tmp, head, llnode) {
- mce = &node->mce;
+ mce = &node->err.m;
blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
}
@@ -94,11 +94,11 @@ bool mce_gen_pool_empty(void)
return llist_empty(&mce_event_llist);
}
-int mce_gen_pool_add(struct mce *mce)
+int mce_gen_pool_add(struct mce_hw_err *err)
{
struct mce_evt_llist *node;
- if (filter_mce(mce))
+ if (filter_mce(&err->m))
return -EINVAL;
if (!mce_evt_pool)
@@ -110,7 +110,7 @@ int mce_gen_pool_add(struct mce *mce)
return -ENOMEM;
}
- memcpy(&node->mce, mce, sizeof(*mce));
+ memcpy(&node->err, err, sizeof(*err));
llist_add(&node->llnode, &mce_event_llist);
return 0;
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 49ed3428785d..313fe682db33 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -502,8 +502,9 @@ static void prepare_msrs(void *info)
static void do_inject(void)
{
- u64 mcg_status = 0;
unsigned int cpu = i_mce.extcpu;
+ struct mce_hw_err err;
+ u64 mcg_status = 0;
u8 b = i_mce.bank;
i_mce.tsc = rdtsc_ordered();
@@ -517,7 +518,8 @@ static void do_inject(void)
i_mce.status |= MCI_STATUS_SYNDV;
if (inj_type == SW_INJ) {
- mce_log(&i_mce);
+ err.m = i_mce;
+ mce_log(&err);
return;
}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index f6103e6bf69a..b3cd2c61b11d 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -94,7 +94,7 @@ static int cmci_supported(int *banks)
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
- *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
+ *banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);
return !!(cap & MCG_CMCI_P);
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 43c7f3b71df5..84f810598231 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -26,12 +26,12 @@ extern struct blocking_notifier_head x86_mce_decoder_chain;
struct mce_evt_llist {
struct llist_node llnode;
- struct mce mce;
+ struct mce_hw_err err;
};
void mce_gen_pool_process(struct work_struct *__unused);
bool mce_gen_pool_empty(void);
-int mce_gen_pool_add(struct mce *mce);
+int mce_gen_pool_add(struct mce_hw_err *err);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 815fa67356a2..f3d534807d91 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -319,12 +319,6 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
return UCODE_OK;
}
- /*
- * Writeback and invalidate caches before updating microcode to avoid
- * internal issues depending on what the microcode is updating.
- */
- native_wbinvd();
-
/* write microcode via MSR 0x79 */
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
@@ -574,14 +568,14 @@ static bool is_blacklisted(unsigned int cpu)
/*
* Late loading on model 79 with microcode revision less than 0x0b000021
* and LLC size per core bigger than 2.5MB may result in a system hang.
- * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * This behavior is documented in item BDX90, #334165 (Intel Xeon
* Processor E7-8800/4800 v4 Product Family).
*/
if (c->x86_vfm == INTEL_BROADWELL_X &&
c->x86_stepping == 0x01 &&
llc_size_per_core > 2621440 &&
c->microcode < 0x0b000021) {
- pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Erratum BDX90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index e65fae63660e..41ed01f46bd9 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -41,11 +41,11 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
"fpu_exception\t: %s\n"
"cpuid level\t: %d\n"
"wp\t\t: yes\n",
- boot_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
- boot_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
- boot_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
- boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
- boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+ str_yes_no(boot_cpu_has_bug(X86_BUG_FDIV)),
+ str_yes_no(boot_cpu_has_bug(X86_BUG_F00F)),
+ str_yes_no(boot_cpu_has_bug(X86_BUG_COMA)),
+ str_yes_no(boot_cpu_has(X86_FEATURE_FPU)),
+ str_yes_no(boot_cpu_has(X86_FEATURE_FPU)),
c->cpuid_level);
}
#else
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 851b561850e0..5fcb3d635d91 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -1158,11 +1158,12 @@ static __init int snc_get_config(void)
ret = cpus_per_l3 / cpus_per_node;
- /* sanity check: Only valid results are 1, 2, 3, 4 */
+ /* sanity check: Only valid results are 1, 2, 3, 4, 6 */
switch (ret) {
case 1:
break;
case 2 ... 4:
+ case 6:
pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret);
rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE;
break;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index d7163b764c62..d906a1cd8491 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1596,7 +1596,7 @@ static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info
static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
{
- struct mon_config_info mon_info = {0};
+ struct mon_config_info mon_info;
struct rdt_mon_domain *dom;
bool sep = false;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c84c30188fdf..16f3ca30626a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,34 +24,36 @@ struct cpuid_bit {
* levels are different and there is a separate entry for each.
*/
static const struct cpuid_bit cpuid_bits[] = {
- { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
- { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
- { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
- { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
- { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
- { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
- { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
- { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
- { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
- { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
- { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
- { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
- { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
- { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
- { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 },
- { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 },
- { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 },
- { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
- { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
- { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
- { X86_FEATURE_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
- { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
- { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
- { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
- { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
- { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
+ { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
+ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
+ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
+ { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
+ { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
+ { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
+ { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
+ { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
+ { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
+ { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 },
+ { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 },
+ { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 },
+ { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
+ { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
+ { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
+ { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
+ { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
+ { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
+ { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+ { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 },
+ { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
+ { X86_FEATURE_AMD_HETEROGENEOUS_CORES, CPUID_EAX, 30, 0x80000026, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 9ace84486499..8ce352fc72ac 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -630,7 +630,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
if (!section->virt_addr)
return false;
- section->pages = vmalloc(nr_pages * sizeof(struct sgx_epc_page));
+ section->pages = vmalloc_array(nr_pages, sizeof(struct sgx_epc_page));
if (!section->pages) {
memunmap(section->virt_addr);
return false;
@@ -901,19 +901,15 @@ static struct miscdevice sgx_dev_provision = {
int sgx_set_attribute(unsigned long *allowed_attributes,
unsigned int attribute_fd)
{
- struct fd f = fdget(attribute_fd);
+ CLASS(fd, f)(attribute_fd);
- if (!fd_file(f))
+ if (fd_empty(f))
return -EINVAL;
- if (fd_file(f)->f_op != &sgx_provision_fops) {
- fdput(f);
+ if (fd_file(f)->f_op != &sgx_provision_fops)
return -EINVAL;
- }
*allowed_attributes |= SGX_ATTR_PROVISIONKEY;
-
- fdput(f);
return 0;
}
EXPORT_SYMBOL_GPL(sgx_set_attribute);
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index 7d476fa697ca..03b3c9c3a45e 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -182,6 +182,9 @@ static void parse_topology_amd(struct topo_scan *tscan)
if (cpu_feature_enabled(X86_FEATURE_TOPOEXT))
has_topoext = cpu_parse_topology_ext(tscan);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES))
+ tscan->c->topo.cpu_type = cpuid_ebx(0x80000026);
+
if (!has_topoext && !parse_8000_0008(tscan))
return;
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index 9a6069e7133c..8277c64f88db 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -3,6 +3,7 @@
#include <xen/xen.h>
+#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/smp.h>
@@ -27,6 +28,36 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
}
}
+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c)
+{
+ if (c->x86_vendor == X86_VENDOR_INTEL) {
+ switch (c->topo.intel_type) {
+ case INTEL_CPU_TYPE_ATOM: return TOPO_CPU_TYPE_EFFICIENCY;
+ case INTEL_CPU_TYPE_CORE: return TOPO_CPU_TYPE_PERFORMANCE;
+ }
+ }
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ switch (c->topo.amd_type) {
+ case 0: return TOPO_CPU_TYPE_PERFORMANCE;
+ case 1: return TOPO_CPU_TYPE_EFFICIENCY;
+ }
+ }
+
+ return TOPO_CPU_TYPE_UNKNOWN;
+}
+
+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c)
+{
+ switch (get_topology_cpu_type(c)) {
+ case TOPO_CPU_TYPE_PERFORMANCE:
+ return "performance";
+ case TOPO_CPU_TYPE_EFFICIENCY:
+ return "efficiency";
+ default:
+ return "unknown";
+ }
+}
+
static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c)
{
struct {
@@ -87,6 +118,7 @@ static void parse_topology(struct topo_scan *tscan, bool early)
.cu_id = 0xff,
.llc_id = BAD_APICID,
.l2c_id = BAD_APICID,
+ .cpu_type = TOPO_CPU_TYPE_UNKNOWN,
};
struct cpuinfo_x86 *c = tscan->c;
struct {
@@ -132,6 +164,8 @@ static void parse_topology(struct topo_scan *tscan, bool early)
case X86_VENDOR_INTEL:
if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan))
parse_legacy(tscan);
+ if (c->cpuid_level >= 0x1a)
+ c->topo.cpu_type = cpuid_eax(0x1a);
break;
case X86_VENDOR_HYGON:
if (IS_ENABLED(CONFIG_CPU_SUP_HYGON))
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 64280879c68c..59d23cdf4ed0 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -305,7 +305,7 @@ void __init x86_flattree_get_config(void)
map_len = size;
}
- early_init_dt_verify(dt);
+ early_init_dt_verify(dt, __pa(dt));
}
unflatten_and_copy_device_tree();
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 29d1f9104e94..6b6f32f40cbe 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -18,7 +18,7 @@
#include <linux/bcma/bcma_regs.h>
#include <linux/platform_data/x86/apple.h>
#include <drm/intel/i915_drm.h>
-#include <drm/intel/i915_pciids.h>
+#include <drm/intel/pciids.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
#include <asm/io_apic.h>
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8da0e66ca22d..adb09f78edb2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -647,7 +647,7 @@ void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- struct pt_regs *regs = &fregs->regs;
+ struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
prepare_ftrace_return(ip, (unsigned long *)stack, 0);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 16752b8dfa89..56163e2124cf 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -77,6 +77,7 @@ SYM_CODE_START_NOALIGN(startup_64)
lretq
.Lon_kernel_cs:
+ ANNOTATE_NOENDBR
UNWIND_HINT_END_OF_STACK
#ifdef CONFIG_AMD_MEM_ENCRYPT
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 15af7e98e161..2be55ec3f392 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -9,6 +9,7 @@
#include <linux/hardirq.h>
#include <linux/preempt.h>
#include <linux/ftrace.h>
+#include <asm/text-patching.h>
#include "common.h"
@@ -36,23 +37,25 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
} else {
- unsigned long orig_ip = regs->ip;
+ unsigned long orig_ip = instruction_pointer(regs);
+
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
- regs->ip = ip + sizeof(kprobe_opcode_t);
+ instruction_pointer_set(regs, ip + INT3_INSN_SIZE);
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs)) {
- /*
- * Emulate singlestep (and also recover regs->ip)
- * as if there is a 5byte nop
- */
- regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
if (unlikely(p->post_handler)) {
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE);
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
- regs->ip = orig_ip;
+ /* Recover IP address */
+ instruction_pointer_set(regs, orig_ip);
}
/*
* If pre_handler returns !0, it changes regs->ip. We have to
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 766f092dab80..b5a8f0891135 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -497,8 +497,9 @@ static int x86_cluster_flags(void)
static int x86_die_flags(void)
{
- if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
- return x86_sched_itmt_flags();
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU) ||
+ cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES))
+ return x86_sched_itmt_flags();
return 0;
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index dfe6847fd99e..67aeaba4ba9c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -174,10 +174,11 @@ static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long ts
c2n = per_cpu_ptr(&cyc2ns, cpu);
- raw_write_seqcount_latch(&c2n->seq);
+ write_seqcount_latch_begin(&c2n->seq);
c2n->data[0] = data;
- raw_write_seqcount_latch(&c2n->seq);
+ write_seqcount_latch(&c2n->seq);
c2n->data[1] = data;
+ write_seqcount_latch_end(&c2n->seq);
}
static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index d00c28aaa5be..d4705a348a80 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -723,7 +723,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
state->sp = task->thread.sp + sizeof(*frame);
state->bp = READ_ONCE_NOCHECK(frame->bp);
state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
- state->signal = (void *)state->ip == ret_from_fork;
+ state->signal = (void *)state->ip == ret_from_fork_asm;
}
if (get_stack_info((unsigned long *)state->sp, state->task,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index feb8102a9ca7..68efd8cd8bf1 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -193,29 +193,6 @@ SECTIONS
ORC_UNWIND_TABLE
- . = ALIGN(PAGE_SIZE);
- __vvar_page = .;
-
- .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
- /* work around gold bug 13023 */
- __vvar_beginning_hack = .;
-
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) \
- . = __vvar_beginning_hack + offset; \
- *(.vvar_ ## name)
-#include <asm/vvar.h>
-#undef EMIT_VVAR
-
- /*
- * Pad the rest of the page with zeros. Otherwise the loader
- * can leave garbage here.
- */
- . = __vvar_beginning_hack + PAGE_SIZE;
- } :data
-
- . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
-
/* Init code and data - will be freed after init */
. = ALIGN(PAGE_SIZE);
.init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
@@ -531,3 +508,22 @@ INIT_PER_CPU(irq_stack_backing_store);
#endif
#endif /* CONFIG_X86_64 */
+
+/*
+ * The symbols below are referenced using relative relocations in the
+ * respective ELF notes. This produces build time constants that the
+ * linker will never mark as relocatable. (Using just ABSOLUTE() is not
+ * sufficient for that).
+ */
+#ifdef CONFIG_XEN
+#ifdef CONFIG_XEN_PV
+xen_elfnote_entry_value =
+ ABSOLUTE(xen_elfnote_entry) + ABSOLUTE(startup_xen);
+#endif
+xen_elfnote_hypercall_page_value =
+ ABSOLUTE(xen_elfnote_hypercall_page) + ABSOLUTE(hypercall_page);
+#endif
+#ifdef CONFIG_PVH
+xen_elfnote_phys32_entry_value =
+ ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET);
+#endif
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index fb854cf20ac3..92d4711fd1e4 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -533,17 +533,12 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
static int __sev_issue_cmd(int fd, int id, void *data, int *error)
{
- struct fd f;
- int ret;
+ CLASS(fd, f)(fd);
- f = fdget(fd);
- if (!fd_file(f))
+ if (fd_empty(f))
return -EBADF;
- ret = sev_issue_cmd_external_user(fd_file(f), id, data, error);
-
- fdput(f);
- return ret;
+ return sev_issue_cmd_external_user(fd_file(f), id, data, error);
}
static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
@@ -2076,23 +2071,21 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
{
struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_info *src_sev, *cg_cleanup_sev;
- struct fd f = fdget(source_fd);
+ CLASS(fd, f)(source_fd);
struct kvm *source_kvm;
bool charged = false;
int ret;
- if (!fd_file(f))
+ if (fd_empty(f))
return -EBADF;
- if (!file_is_kvm(fd_file(f))) {
- ret = -EBADF;
- goto out_fput;
- }
+ if (!file_is_kvm(fd_file(f)))
+ return -EBADF;
source_kvm = fd_file(f)->private_data;
ret = sev_lock_two_vms(kvm, source_kvm);
if (ret)
- goto out_fput;
+ return ret;
if (kvm->arch.vm_type != source_kvm->arch.vm_type ||
sev_guest(kvm) || !sev_guest(source_kvm)) {
@@ -2139,8 +2132,6 @@ out_dst_cgroup:
cg_cleanup_sev->misc_cg = NULL;
out_unlock:
sev_unlock_two_vms(kvm, source_kvm);
-out_fput:
- fdput(f);
return ret;
}
@@ -2801,23 +2792,21 @@ failed:
int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
{
- struct fd f = fdget(source_fd);
+ CLASS(fd, f)(source_fd);
struct kvm *source_kvm;
struct kvm_sev_info *source_sev, *mirror_sev;
int ret;
- if (!fd_file(f))
+ if (fd_empty(f))
return -EBADF;
- if (!file_is_kvm(fd_file(f))) {
- ret = -EBADF;
- goto e_source_fput;
- }
+ if (!file_is_kvm(fd_file(f)))
+ return -EBADF;
source_kvm = fd_file(f)->private_data;
ret = sev_lock_two_vms(kvm, source_kvm);
if (ret)
- goto e_source_fput;
+ return ret;
/*
* Mirrors of mirrors should work, but let's not get silly. Also
@@ -2860,8 +2849,6 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
e_unlock:
sev_unlock_two_vms(kvm, source_kvm);
-e_source_fput:
- fdput(f);
return ret;
}
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 622fe24da910..a909b817b9c0 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -263,13 +263,6 @@ static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu)
atomic_set(&vcpu->arch.xen.timer_pending, 0);
}
-static void kvm_xen_init_timer(struct kvm_vcpu *vcpu)
-{
- hrtimer_init(&vcpu->arch.xen.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_HARD);
- vcpu->arch.xen.timer.function = xen_timer_callback;
-}
-
static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
{
struct kvm_vcpu_xen *vx = &v->arch.xen;
@@ -1070,9 +1063,6 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
- if (!vcpu->arch.xen.timer.function)
- kvm_xen_init_timer(vcpu);
-
/* Stop the timer (if it's running) before changing the vector */
kvm_xen_stop_timer(vcpu);
vcpu->arch.xen.timer_virq = data->u.timer.port;
@@ -2235,6 +2225,8 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.xen.poll_evtchn = 0;
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
+ hrtimer_init(&vcpu->arch.xen.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
+ vcpu->arch.xen.timer.function = xen_timer_callback;
kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm);
kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index eb503f53c319..101725c149c4 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -263,28 +263,33 @@ static void __init probe_page_size_mask(void)
}
/*
- * INVLPG may not properly flush Global entries
- * on these CPUs when PCIDs are enabled.
+ * INVLPG may not properly flush Global entries on
+ * these CPUs. New microcode fixes the issue.
*/
static const struct x86_cpu_id invlpg_miss_ids[] = {
- X86_MATCH_VFM(INTEL_ALDERLAKE, 0),
- X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0),
- X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, 0x2e),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0x42c),
+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0x11),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, 0x118),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0x4117),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0x2e),
{}
};
static void setup_pcid(void)
{
+ const struct x86_cpu_id *invlpg_miss_match;
+
if (!IS_ENABLED(CONFIG_X86_64))
return;
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
- if (x86_match_cpu(invlpg_miss_ids)) {
+ invlpg_miss_match = x86_match_cpu(invlpg_miss_ids);
+
+ if (invlpg_miss_match &&
+ boot_cpu_data.microcode < invlpg_miss_match->driver_data) {
pr_info("Incomplete global flushes, disabling PCID");
setup_clear_cpu_cap(X86_FEATURE_PCID);
return;
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 230f1dee4f09..e17e6e27b7ec 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -22,7 +22,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
#include <linux/memblock.h>
#include <linux/pgtable.h>
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 86a476a426c2..774f9677458f 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -311,59 +311,82 @@ static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool en
return 0;
}
-static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+int prepare_pte_enc(struct pte_enc_desc *d)
{
- pgprot_t old_prot, new_prot;
- unsigned long pfn, pa, size;
- pte_t new_pte;
+ pgprot_t old_prot;
- pfn = pg_level_to_pfn(level, kpte, &old_prot);
- if (!pfn)
- return;
+ d->pfn = pg_level_to_pfn(d->pte_level, d->kpte, &old_prot);
+ if (!d->pfn)
+ return 1;
- new_prot = old_prot;
- if (enc)
- pgprot_val(new_prot) |= _PAGE_ENC;
+ d->new_pgprot = old_prot;
+ if (d->encrypt)
+ pgprot_val(d->new_pgprot) |= _PAGE_ENC;
else
- pgprot_val(new_prot) &= ~_PAGE_ENC;
+ pgprot_val(d->new_pgprot) &= ~_PAGE_ENC;
/* If prot is same then do nothing. */
- if (pgprot_val(old_prot) == pgprot_val(new_prot))
- return;
+ if (pgprot_val(old_prot) == pgprot_val(d->new_pgprot))
+ return 1;
- pa = pfn << PAGE_SHIFT;
- size = page_level_size(level);
+ d->pa = d->pfn << PAGE_SHIFT;
+ d->size = page_level_size(d->pte_level);
/*
- * We are going to perform in-place en-/decryption and change the
- * physical page attribute from C=1 to C=0 or vice versa. Flush the
- * caches to ensure that data gets accessed with the correct C-bit.
+ * In-place en-/decryption and physical page attribute change
+ * from C=1 to C=0 or vice versa will be performed. Flush the
+ * caches to ensure that data gets accessed with the correct
+ * C-bit.
*/
- clflush_cache_range(__va(pa), size);
+ if (d->va)
+ clflush_cache_range(d->va, d->size);
+ else
+ clflush_cache_range(__va(d->pa), d->size);
+
+ return 0;
+}
+
+void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot)
+{
+ pte_t new_pte;
+
+ /* Change the page encryption mask. */
+ new_pte = pfn_pte(pfn, new_prot);
+ set_pte_atomic(kpte, new_pte);
+}
+
+static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+{
+ struct pte_enc_desc d = {
+ .kpte = kpte,
+ .pte_level = level,
+ .encrypt = enc
+ };
+
+ if (prepare_pte_enc(&d))
+ return;
/* Encrypt/decrypt the contents in-place */
if (enc) {
- sme_early_encrypt(pa, size);
+ sme_early_encrypt(d.pa, d.size);
} else {
- sme_early_decrypt(pa, size);
+ sme_early_decrypt(d.pa, d.size);
/*
* ON SNP, the page state in the RMP table must happen
* before the page table updates.
*/
- early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1);
+ early_snp_set_memory_shared((unsigned long)__va(d.pa), d.pa, 1);
}
- /* Change the page encryption mask. */
- new_pte = pfn_pte(pfn, new_prot);
- set_pte_atomic(kpte, new_pte);
+ set_pte_enc_mask(kpte, d.pfn, d.new_pgprot);
/*
* If page is set encrypted in the page table, then update the RMP table to
* add this page as private.
*/
if (enc)
- early_snp_set_memory_private((unsigned long)__va(pa), pa, 1);
+ early_snp_set_memory_private((unsigned long)__va(d.pa), d.pa, 1);
}
static int __init early_set_memory_enc_dec(unsigned long vaddr,
@@ -467,6 +490,8 @@ void __init sme_early_init(void)
x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
+ x86_platform.guest.enc_kexec_begin = snp_kexec_begin;
+ x86_platform.guest.enc_kexec_finish = snp_kexec_finish;
/*
* AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index ac33b2263a43..e6c7686f443a 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -495,10 +495,10 @@ void __head sme_enable(struct boot_params *bp)
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
unsigned long me_mask;
- bool snp;
+ bool snp_en;
u64 msr;
- snp = snp_init(bp);
+ snp_en = snp_init(bp);
/* Check for the SME/SEV support leaf */
eax = 0x80000000;
@@ -531,8 +531,11 @@ void __head sme_enable(struct boot_params *bp)
RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
- /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
- if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED))
+ /*
+ * Any discrepancies between the presence of a CC blob and SNP
+ * enablement abort the guest.
+ */
+ if (snp_en ^ !!(msr & MSR_AMD64_SEV_SNP_ENABLED))
snp_abort();
/* Check if memory encryption is enabled */
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index a2cabb1c81e1..b8a6ffffb451 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -163,11 +163,6 @@ unsigned long get_mmap_base(int is_legacy)
return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
}
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- return NULL;
-}
-
/**
* mmap_address_hint_valid - Validate the address hint of mmap
* @addr: Address hint
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 86593d1b787d..b0d5a644fc84 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -568,7 +568,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
* mm_cpumask. The TLB shootdown code can figure out from
* cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
*/
- if (WARN_ON_ONCE(prev != &init_mm &&
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && WARN_ON_ONCE(prev != &init_mm &&
!cpumask_test_cpu(cpu, mm_cpumask(next))))
cpumask_set_cpu(cpu, mm_cpumask(next));
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 06b080b61aa5..a43fc5af973d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -325,6 +325,22 @@ struct jit_context {
/* Number of bytes that will be skipped on tailcall */
#define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE)
+static void push_r9(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT2(0x41, 0x51); /* push r9 */
+ *pprog = prog;
+}
+
+static void pop_r9(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT2(0x41, 0x59); /* pop r9 */
+ *pprog = prog;
+}
+
static void push_r12(u8 **pprog)
{
u8 *prog = *pprog;
@@ -1404,6 +1420,24 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
*pprog = prog;
}
+static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr)
+{
+ u8 *prog = *pprog;
+
+ /* movabs r9, priv_frame_ptr */
+ emit_mov_imm64(&prog, X86_REG_R9, (__force long) priv_frame_ptr >> 32,
+ (u32) (__force long) priv_frame_ptr);
+
+#ifdef CONFIG_SMP
+ /* add <r9>, gs:[<off>] */
+ EMIT2(0x65, 0x4c);
+ EMIT3(0x03, 0x0c, 0x25);
+ EMIT((u32)(unsigned long)&this_cpu_off, 4);
+#endif
+
+ *pprog = prog;
+}
+
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
#define __LOAD_TCC_PTR(off) \
@@ -1412,6 +1446,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
#define LOAD_TAIL_CALL_CNT_PTR(stack) \
__LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack))
+/* Memory size/value to protect private stack overflow/underflow */
+#define PRIV_STACK_GUARD_SZ 8
+#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
@@ -1421,18 +1459,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
int insn_cnt = bpf_prog->len;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+ void __percpu *priv_frame_ptr = NULL;
u64 arena_vm_start, user_vm_start;
+ void __percpu *priv_stack_ptr;
int i, excnt = 0;
int ilen, proglen = 0;
u8 *prog = temp;
+ u32 stack_depth;
int err;
+ stack_depth = bpf_prog->aux->stack_depth;
+ priv_stack_ptr = bpf_prog->aux->priv_stack_ptr;
+ if (priv_stack_ptr) {
+ priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up(stack_depth, 8);
+ stack_depth = 0;
+ }
+
arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
detect_reg_usage(insn, insn_cnt, callee_regs_used);
- emit_prologue(&prog, bpf_prog->aux->stack_depth,
+ emit_prologue(&prog, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
/* Exception callback will clobber callee regs for its own use, and
@@ -1454,6 +1502,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
emit_mov_imm64(&prog, X86_REG_R12,
arena_vm_start >> 32, (u32) arena_vm_start);
+ if (priv_frame_ptr)
+ emit_priv_frame_ptr(&prog, priv_frame_ptr);
+
ilen = prog - temp;
if (rw_image)
memcpy(rw_image + proglen, temp, ilen);
@@ -1473,6 +1524,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
u8 *func;
int nops;
+ if (priv_frame_ptr) {
+ if (src_reg == BPF_REG_FP)
+ src_reg = X86_REG_R9;
+
+ if (dst_reg == BPF_REG_FP)
+ dst_reg = X86_REG_R9;
+ }
+
switch (insn->code) {
/* ALU */
case BPF_ALU | BPF_ADD | BPF_X:
@@ -2127,15 +2186,21 @@ populate_extable:
u8 *ip = image + addrs[i - 1];
func = (u8 *) __bpf_call_base + imm32;
- if (tail_call_reachable) {
- LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
+ if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) {
+ LOAD_TAIL_CALL_CNT_PTR(stack_depth);
ip += 7;
}
if (!imm32)
return -EINVAL;
+ if (priv_frame_ptr) {
+ push_r9(&prog);
+ ip += 2;
+ }
ip += x86_call_depth_emit_accounting(&prog, func, ip);
if (emit_call(&prog, func, ip))
return -EINVAL;
+ if (priv_frame_ptr)
+ pop_r9(&prog);
break;
}
@@ -2145,13 +2210,13 @@ populate_extable:
&bpf_prog->aux->poke_tab[imm32 - 1],
&prog, image + addrs[i - 1],
callee_regs_used,
- bpf_prog->aux->stack_depth,
+ stack_depth,
ctx);
else
emit_bpf_tail_call_indirect(bpf_prog,
&prog,
callee_regs_used,
- bpf_prog->aux->stack_depth,
+ stack_depth,
image + addrs[i - 1],
ctx);
break;
@@ -3303,6 +3368,42 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf);
}
+static const char *bpf_get_prog_name(struct bpf_prog *prog)
+{
+ if (prog->aux->ksym.prog)
+ return prog->aux->ksym.name;
+ return prog->aux->name;
+}
+
+static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ stack_ptr[0] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
+ }
+}
+
+static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
+ struct bpf_prog *prog)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL) {
+ pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
+ bpf_get_prog_name(prog));
+ break;
+ }
+ }
+}
+
struct x64_jit_data {
struct bpf_binary_header *rw_header;
struct bpf_binary_header *header;
@@ -3320,7 +3421,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
struct bpf_binary_header *rw_header = NULL;
struct bpf_binary_header *header = NULL;
struct bpf_prog *tmp, *orig_prog = prog;
+ void __percpu *priv_stack_ptr = NULL;
struct x64_jit_data *jit_data;
+ int priv_stack_alloc_sz;
int proglen, oldproglen = 0;
struct jit_context ctx = {};
bool tmp_blinded = false;
@@ -3356,6 +3459,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
prog->aux->jit_data = jit_data;
}
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
+ /* Allocate actual private stack size with verifier-calculated
+ * stack size plus two memory guards to protect overflow and
+ * underflow.
+ */
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 8, GFP_KERNEL);
+ if (!priv_stack_ptr) {
+ prog = orig_prog;
+ goto out_priv_stack;
+ }
+
+ priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
+ prog->aux->priv_stack_ptr = priv_stack_ptr;
+ }
addrs = jit_data->addrs;
if (addrs) {
ctx = jit_data->ctx;
@@ -3491,6 +3611,11 @@ out_image:
bpf_prog_fill_jited_linfo(prog, addrs + 1);
out_addrs:
kvfree(addrs);
+ if (!image && priv_stack_ptr) {
+ free_percpu(priv_stack_ptr);
+ prog->aux->priv_stack_ptr = NULL;
+ }
+out_priv_stack:
kfree(jit_data);
prog->aux->jit_data = NULL;
}
@@ -3529,6 +3654,8 @@ void bpf_jit_free(struct bpf_prog *prog)
if (prog->jited) {
struct x64_jit_data *jit_data = prog->aux->jit_data;
struct bpf_binary_header *hdr;
+ void __percpu *priv_stack_ptr;
+ int priv_stack_alloc_sz;
/*
* If we fail the final pass of JIT (from jit_subprogs),
@@ -3544,6 +3671,13 @@ void bpf_jit_free(struct bpf_prog *prog)
prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (priv_stack_ptr) {
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
+ free_percpu(prog->aux->priv_stack_ptr);
+ }
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
}
@@ -3559,6 +3693,11 @@ bool bpf_jit_supports_exceptions(void)
return IS_ENABLED(CONFIG_UNWINDER_ORC);
}
+bool bpf_jit_supports_private_stack(void)
+{
+ return true;
+}
+
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
{
#if defined(CONFIG_UNWINDER_ORC)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 88a96816de9a..a7ff189421c3 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -54,14 +54,12 @@
#include <asm/uv/uv.h>
static unsigned long efi_systab_phys __initdata;
-static unsigned long prop_phys = EFI_INVALID_TABLE_ADDR;
static unsigned long uga_phys = EFI_INVALID_TABLE_ADDR;
static unsigned long efi_runtime, efi_nr_tables;
unsigned long efi_fw_vendor, efi_config_table;
static const efi_config_table_type_t arch_tables[] __initconst = {
- {EFI_PROPERTIES_TABLE_GUID, &prop_phys, "PROP" },
{UGA_IO_PROTOCOL_GUID, &uga_phys, "UGA" },
#ifdef CONFIG_X86_UV
{UV_SYSTEM_TABLE_GUID, &uv_systab_phys, "UVsystab" },
@@ -82,7 +80,6 @@ static const unsigned long * const efi_tables[] = {
&efi_runtime,
&efi_config_table,
&efi.esrt,
- &prop_phys,
&efi_mem_attr_table,
#ifdef CONFIG_EFI_RCI2_TABLE
&rci2_table_phys,
@@ -502,22 +499,6 @@ void __init efi_init(void)
return;
}
- /* Parse the EFI Properties table if it exists */
- if (prop_phys != EFI_INVALID_TABLE_ADDR) {
- efi_properties_table_t *tbl;
-
- tbl = early_memremap_ro(prop_phys, sizeof(*tbl));
- if (tbl == NULL) {
- pr_err("Could not map Properties table!\n");
- } else {
- if (tbl->memory_protection_attribute &
- EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA)
- set_bit(EFI_NX_PE_DATA, &efi.flags);
-
- early_memunmap(tbl, sizeof(*tbl));
- }
- }
-
set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
efi_clean_memmap();
@@ -784,6 +765,7 @@ static void __init kexec_enter_virtual_mode(void)
efi_sync_low_kernel_mappings();
efi_native_runtime_setup();
+ efi_runtime_update_mappings();
#endif
}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 91d31ac422d6..ac57259a432b 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -412,51 +412,9 @@ static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *m
void __init efi_runtime_update_mappings(void)
{
- efi_memory_desc_t *md;
-
- /*
- * Use the EFI Memory Attribute Table for mapping permissions if it
- * exists, since it is intended to supersede EFI_PROPERTIES_TABLE.
- */
if (efi_enabled(EFI_MEM_ATTR)) {
efi_disable_ibt_for_runtime = false;
efi_memattr_apply_permissions(NULL, efi_update_mem_attr);
- return;
- }
-
- /*
- * EFI_MEMORY_ATTRIBUTES_TABLE is intended to replace
- * EFI_PROPERTIES_TABLE. So, use EFI_PROPERTIES_TABLE to update
- * permissions only if EFI_MEMORY_ATTRIBUTES_TABLE is not
- * published by the firmware. Even if we find a buggy implementation of
- * EFI_MEMORY_ATTRIBUTES_TABLE, don't fall back to
- * EFI_PROPERTIES_TABLE, because of the same reason.
- */
-
- if (!efi_enabled(EFI_NX_PE_DATA))
- return;
-
- for_each_efi_memory_desc(md) {
- unsigned long pf = 0;
-
- if (!(md->attribute & EFI_MEMORY_RUNTIME))
- continue;
-
- if (!(md->attribute & EFI_MEMORY_WB))
- pf |= _PAGE_PCD;
-
- if ((md->attribute & EFI_MEMORY_XP) ||
- (md->type == EFI_RUNTIME_SERVICES_DATA))
- pf |= _PAGE_NX;
-
- if (!(md->attribute & EFI_MEMORY_RO) &&
- (md->type != EFI_RUNTIME_SERVICES_CODE))
- pf |= _PAGE_RW;
-
- if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
- pf |= _PAGE_ENC;
-
- efi_update_mappings(md, pf);
}
}
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index f0cc00032751..846bf49f2508 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -656,8 +656,7 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
}
static const struct x86_cpu_id efi_capsule_quirk_ids[] = {
- X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000,
- &qrk_capsule_setup_info),
+ X86_MATCH_VFM(INTEL_QUARK_X1000, &qrk_capsule_setup_info),
{ }
};
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index 27288d8d3f71..cd7e0c71adde 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -358,18 +358,18 @@ static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return ret;
}
- ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
- if (ret) {
- dev_err(&pdev->dev, "I/O memory remapping failed\n");
- return ret;
- }
-
pwr = devm_kzalloc(dev, sizeof(*pwr), GFP_KERNEL);
if (!pwr)
return -ENOMEM;
+ pwr->regs = pcim_iomap_region(pdev, 0, "intel_mid_pwr");
+ ret = PTR_ERR_OR_ZERO(pwr->regs);
+ if (ret) {
+ dev_err(&pdev->dev, "Could not request / ioremap I/O-Mem: %d\n", ret);
+ return ret;
+ }
+
pwr->dev = dev;
- pwr->regs = pcim_iomap_table(pdev)[0];
pwr->irq = pdev->irq;
mutex_init(&pwr->lock);
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index d3d456925b2a..ee25b032c0b3 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -569,7 +569,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
}
static const struct x86_cpu_id imr_ids[] __initconst = {
- X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL),
+ X86_MATCH_VFM(INTEL_QUARK_X1000, NULL),
{}
};
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 84ba715f44d1..657925b0f428 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -105,7 +105,7 @@ static void __init imr_self_test(void)
}
static const struct x86_cpu_id imr_ids[] __initconst = {
- X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL),
+ X86_MATCH_VFM(INTEL_QUARK_X1000, NULL),
{}
};
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index c5f3bbdbdcfe..5591a2d9cfe8 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c
@@ -73,7 +73,7 @@ static struct platform_driver iris_driver = {
.name = "iris",
},
.probe = iris_probe,
- .remove_new = iris_remove,
+ .remove = iris_remove,
};
static struct resource iris_resources[] = {
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index 6a9c42de74e7..424eeae12759 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -159,7 +159,7 @@ static struct platform_driver cs5535_pms_driver = {
.name = "cs5535-pms",
},
.probe = xo1_pm_probe,
- .remove_new = xo1_pm_remove,
+ .remove = xo1_pm_remove,
};
static struct platform_driver cs5535_acpi_driver = {
@@ -167,7 +167,7 @@ static struct platform_driver cs5535_acpi_driver = {
.name = "olpc-xo1-pm-acpi",
},
.probe = xo1_pm_probe,
- .remove_new = xo1_pm_remove,
+ .remove = xo1_pm_remove,
};
static int __init xo1_pm_init(void)
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 46d42ff6e18a..ccb23c73cbe8 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -616,7 +616,7 @@ static struct platform_driver xo1_sci_driver = {
.dev_groups = lid_groups,
},
.probe = xo1_sci_probe,
- .remove_new = xo1_sci_remove,
+ .remove = xo1_sci_remove,
.suspend = xo1_sci_suspend,
.resume = xo1_sci_resume,
};
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 64fca49cd88f..4733a5f467b8 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -6,7 +6,9 @@
.code32
.text
+#ifdef CONFIG_X86_32
#define _pa(x) ((x) - __START_KERNEL_map)
+#endif
#define rva(x) ((x) - pvh_start_xen)
#include <linux/elfnote.h>
@@ -52,7 +54,7 @@
#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8)
#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8)
-SYM_CODE_START_LOCAL(pvh_start_xen)
+SYM_CODE_START(pvh_start_xen)
UNWIND_HINT_END_OF_STACK
cld
@@ -72,8 +74,7 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
movl $0, %esp
leal rva(gdt)(%ebp), %eax
- leal rva(gdt_start)(%ebp), %ecx
- movl %ecx, 2(%eax)
+ addl %eax, 2(%eax)
lgdt (%eax)
mov $PVH_DS_SEL,%eax
@@ -103,10 +104,23 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
btsl $_EFER_LME, %eax
wrmsr
+ /*
+ * Reuse the non-relocatable symbol emitted for the ELF note to
+ * subtract the build time physical address of pvh_start_xen() from
+ * its actual runtime address, without relying on absolute 32-bit ELF
+ * relocations, as these are not supported by the linker when running
+ * in -pie mode, and should be avoided in .head.text in general.
+ */
mov %ebp, %ebx
- subl $_pa(pvh_start_xen), %ebx /* offset */
+ subl rva(xen_elfnote_phys32_entry)(%ebp), %ebx
jz .Lpagetable_done
+ /*
+ * Store the resulting load offset in phys_base. __pa() needs
+ * phys_base set to calculate the hypercall page in xen_pvh_init().
+ */
+ movl %ebx, rva(phys_base)(%ebp)
+
/* Fixup page-tables for relocation. */
leal rva(pvh_init_top_pgt)(%ebp), %edi
movl $PTRS_PER_PGD, %ecx
@@ -165,20 +179,12 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
xor %edx, %edx
wrmsr
- /*
- * Calculate load offset and store in phys_base. __pa() needs
- * phys_base set to calculate the hypercall page in xen_pvh_init().
- */
- movq %rbp, %rbx
- subq $_pa(pvh_start_xen), %rbx
- movq %rbx, phys_base(%rip)
- call xen_prepare_pvh
- /*
- * Clear phys_base. __startup_64 will *add* to its value,
- * so reset to 0.
- */
- xor %rbx, %rbx
- movq %rbx, phys_base(%rip)
+ /* Call xen_prepare_pvh() via the kernel virtual mapping */
+ leaq xen_prepare_pvh(%rip), %rax
+ subq phys_base(%rip), %rax
+ addq $__START_KERNEL_map, %rax
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rax
/* startup_64 expects boot_params in %rsi. */
lea pvh_bootparams(%rip), %rsi
@@ -217,8 +223,8 @@ SYM_CODE_END(pvh_start_xen)
.section ".init.data","aw"
.balign 8
SYM_DATA_START_LOCAL(gdt)
- .word gdt_end - gdt_start
- .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
+ .word gdt_end - gdt_start - 1
+ .long gdt_start - gdt
.word 0
SYM_DATA_END(gdt)
SYM_DATA_START_LOCAL(gdt_start)
@@ -300,5 +306,5 @@ SYM_DATA_END(pvh_level2_kernel_pgt)
.long KERNEL_IMAGE_SIZE - 1)
#endif
- ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
- _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .global xen_elfnote_phys32_entry;
+ xen_elfnote_phys32_entry: _ASM_PTR xen_elfnote_phys32_entry_value - .)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index c101bed61940..27441e5863b2 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -56,6 +56,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
[S_ABS] =
"^(xen_irq_disable_direct_reloc$|"
"xen_save_fl_direct_reloc$|"
+ "xen_elfnote_.+_offset$|"
"VDSO|"
"__kcfi_typeid_|"
"__crc_)",
@@ -89,7 +90,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
"init_per_cpu__.*|"
"__end_rodata_hpage_align|"
#endif
- "__vvar_page|"
"_end)$"
};
diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile
index ef2a31bdcc70..eca6d71355fa 100644
--- a/arch/x86/virt/svm/Makefile
+++ b/arch/x86/virt/svm/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_KVM_AMD_SEV) += sev.o
+obj-$(CONFIG_CPU_SUP_AMD) += cmdline.o
diff --git a/arch/x86/virt/svm/cmdline.c b/arch/x86/virt/svm/cmdline.c
new file mode 100644
index 000000000000..affa2759fa20
--- /dev/null
+++ b/arch/x86/virt/svm/cmdline.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD SVM-SEV command line parsing support
+ *
+ * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc.
+ *
+ * Author: Michael Roth <[email protected]>
+ */
+
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/cache.h>
+#include <linux/cpufeature.h>
+
+#include <asm/sev-common.h>
+
+struct sev_config sev_cfg __read_mostly;
+
+static int __init init_sev_config(char *str)
+{
+ char *s;
+
+ while ((s = strsep(&str, ","))) {
+ if (!strcmp(s, "debug")) {
+ sev_cfg.debug = true;
+ continue;
+ }
+
+ if (!strcmp(s, "nosnp")) {
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+ continue;
+ } else {
+ goto warn;
+ }
+ }
+
+warn:
+ pr_info("SEV command-line option '%s' was not recognized\n", s);
+ }
+
+ return 1;
+}
+__setup("sev=", init_sev_config);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 758bcd47b72d..7f6c69dbb816 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -94,7 +94,8 @@ SYM_CODE_END(xen_cpu_bringup_again)
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map)
/* Map the p2m table to a 512GB-aligned user address. */
ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad (PUD_SIZE * PTRS_PER_PUD))
- ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
+ ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .globl xen_elfnote_entry;
+ xen_elfnote_entry: _ASM_PTR xen_elfnote_entry_value - .)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables")
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
@@ -115,7 +116,8 @@ SYM_CODE_END(xen_cpu_bringup_again)
#else
# define FEATURES_DOM0 0
#endif
- ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
+ ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .globl xen_elfnote_hypercall_page;
+ xen_elfnote_hypercall_page: _ASM_PTR xen_elfnote_hypercall_page_value - .)
ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
.long FEATURES_PV | FEATURES_PVH | FEATURES_DOM0)
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")