aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/amdtopology.c34
-rw-r--r--arch/x86/mm/extable.c78
-rw-r--r--arch/x86/mm/fault.c10
-rw-r--r--arch/x86/mm/mem_encrypt.c55
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c40
-rw-r--r--arch/x86/mm/pat/memtype.c9
-rw-r--r--arch/x86/mm/pat/set_memory.c24
8 files changed, 161 insertions, 90 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c80febc44cd2..6ec103bedcf1 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -16,6 +16,7 @@ KASAN_SANITIZE_pgprot.o := n
KCSAN_SANITIZE := n
# Avoid recursion by not calling KMSAN hooks for CEA code.
KMSAN_SANITIZE_cpu_entry_area.o := n
+KMSAN_SANITIZE_mem_encrypt_identity.o := n
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_mem_encrypt.o = -pg
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index b3ca7d23e4b0..9332b36a1091 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -54,13 +54,11 @@ static __init int find_northbridge(void)
int __init amd_numa_init(void)
{
- u64 start = PFN_PHYS(0);
+ unsigned int numnodes, cores, apicid;
+ u64 prevbase, start = PFN_PHYS(0);
u64 end = PFN_PHYS(max_pfn);
- unsigned numnodes;
- u64 prevbase;
- int i, j, nb;
u32 nodeid, reg;
- unsigned int bits, cores, apicid_base;
+ int i, j, nb;
if (!early_pci_allowed())
return -EINVAL;
@@ -158,26 +156,18 @@ int __init amd_numa_init(void)
return -ENOENT;
/*
- * We seem to have valid NUMA configuration. Map apicids to nodes
- * using the coreid bits from early_identify_cpu.
+ * We seem to have valid NUMA configuration. Map apicids to nodes
+ * using the size of the core domain in the APIC space.
*/
- bits = boot_cpu_data.x86_coreid_bits;
- cores = 1 << bits;
- apicid_base = 0;
+ cores = topology_get_domain_size(TOPO_CORE_DOMAIN);
- /*
- * get boot-time SMP configuration:
- */
- early_get_smp_config();
+ apicid = boot_cpu_physical_apicid;
+ if (apicid > 0)
+ pr_info("BSP APIC ID: %02x\n", apicid);
- if (boot_cpu_physical_apicid > 0) {
- pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
- apicid_base = boot_cpu_physical_apicid;
+ for_each_node_mask(i, numa_nodes_parsed) {
+ for (j = 0; j < cores; j++, apicid++)
+ set_apicid_to_node(apicid, i);
}
-
- for_each_node_mask(i, numa_nodes_parsed)
- for (j = apicid_base; j < cores + apicid_base; j++)
- set_apicid_to_node((i << bits) + j, i);
-
return 0;
}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 271dcb2deabc..b522933bfa56 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -6,6 +6,7 @@
#include <xen/xen.h>
#include <asm/fpu/api.h>
+#include <asm/fred.h>
#include <asm/sev.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
@@ -223,6 +224,79 @@ static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup,
return ex_handler_uaccess(fixup, regs, trapnr, fault_address);
}
+#ifdef CONFIG_X86_FRED
+static bool ex_handler_eretu(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, unsigned long error_code)
+{
+ struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, orig_ax));
+ unsigned short ss = uregs->ss;
+ unsigned short cs = uregs->cs;
+
+ /*
+ * Move the NMI bit from the invalid stack frame, which caused ERETU
+ * to fault, to the fault handler's stack frame, thus to unblock NMI
+ * with the fault handler's ERETS instruction ASAP if NMI is blocked.
+ */
+ regs->fred_ss.nmi = uregs->fred_ss.nmi;
+
+ /*
+ * Sync event information to uregs, i.e., the ERETU return frame, but
+ * is it safe to write to the ERETU return frame which is just above
+ * current event stack frame?
+ *
+ * The RSP used by FRED to push a stack frame is not the value in %rsp,
+ * it is calculated from %rsp with the following 2 steps:
+ * 1) RSP = %rsp - (IA32_FRED_CONFIG & 0x1c0) // Reserve N*64 bytes
+ * 2) RSP = RSP & ~0x3f // Align to a 64-byte cache line
+ * when an event delivery doesn't trigger a stack level change.
+ *
+ * Here is an example with N*64 (N=1) bytes reserved:
+ *
+ * 64-byte cache line ==> ______________
+ * |___Reserved___|
+ * |__Event_data__|
+ * |_____SS_______|
+ * |_____RSP______|
+ * |_____FLAGS____|
+ * |_____CS_______|
+ * |_____IP_______|
+ * 64-byte cache line ==> |__Error_code__| <== ERETU return frame
+ * |______________|
+ * |______________|
+ * |______________|
+ * |______________|
+ * |______________|
+ * |______________|
+ * |______________|
+ * 64-byte cache line ==> |______________| <== RSP after step 1) and 2)
+ * |___Reserved___|
+ * |__Event_data__|
+ * |_____SS_______|
+ * |_____RSP______|
+ * |_____FLAGS____|
+ * |_____CS_______|
+ * |_____IP_______|
+ * 64-byte cache line ==> |__Error_code__| <== ERETS return frame
+ *
+ * Thus a new FRED stack frame will always be pushed below a previous
+ * FRED stack frame ((N*64) bytes may be reserved between), and it is
+ * safe to write to a previous FRED stack frame as they never overlap.
+ */
+ fred_info(uregs)->edata = fred_event_data(regs);
+ uregs->ssx = regs->ssx;
+ uregs->fred_ss.ss = ss;
+ /* The NMI bit was moved away above */
+ uregs->fred_ss.nmi = 0;
+ uregs->csx = regs->csx;
+ uregs->fred_cs.sl = 0;
+ uregs->fred_cs.wfe = 0;
+ uregs->cs = cs;
+ uregs->orig_ax = error_code;
+
+ return ex_handler_default(fixup, regs);
+}
+#endif
+
int ex_get_fixup_type(unsigned long ip)
{
const struct exception_table_entry *e = search_exception_tables(ip);
@@ -300,6 +374,10 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
return ex_handler_ucopy_len(e, regs, trapnr, fault_addr, reg, imm);
case EX_TYPE_ZEROPAD:
return ex_handler_zeropad(e, regs, fault_addr);
+#ifdef CONFIG_X86_FRED
+ case EX_TYPE_ERETU:
+ return ex_handler_eretu(e, regs, error_code);
+#endif
}
BUG();
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d6375b3c633b..cdb5045a0428 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -34,6 +34,8 @@
#include <asm/kvm_para.h> /* kvm_handle_async_pf */
#include <asm/vdso.h> /* fixup_vdso_exception() */
#include <asm/irq_stack.h>
+#include <asm/fred.h>
+#include <asm/sev.h> /* snp_dump_hva_rmpentry() */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -547,6 +549,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
!(error_code & X86_PF_PROT) ? "not-present page" :
(error_code & X86_PF_RSVD) ? "reserved bit violation" :
(error_code & X86_PF_PK) ? "protection keys violation" :
+ (error_code & X86_PF_RMP) ? "RMP violation" :
"permissions violation");
if (!(error_code & X86_PF_USER) && user_mode(regs)) {
@@ -579,6 +582,9 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
}
dump_pagetable(address);
+
+ if (error_code & X86_PF_RMP)
+ snp_dump_hva_rmpentry(address);
}
static noinline void
@@ -1509,8 +1515,10 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
{
- unsigned long address = read_cr2();
irqentry_state_t state;
+ unsigned long address;
+
+ address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2();
prefetchw(&current->mm->mmap_lock);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index c290c55b632b..6f3b3e028718 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -14,6 +14,8 @@
#include <linux/mem_encrypt.h>
#include <linux/virtio_anchor.h>
+#include <asm/sev.h>
+
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
{
@@ -42,38 +44,45 @@ bool force_dma_unencrypted(struct device *dev)
static void print_mem_encrypt_feature_info(void)
{
- pr_info("Memory Encryption Features active:");
-
- if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
- pr_cont(" Intel TDX\n");
- return;
- }
+ pr_info("Memory Encryption Features active: ");
- pr_cont(" AMD");
+ switch (cc_vendor) {
+ case CC_VENDOR_INTEL:
+ pr_cont("Intel TDX\n");
+ break;
+ case CC_VENDOR_AMD:
+ pr_cont("AMD");
- /* Secure Memory Encryption */
- if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
+ /* Secure Memory Encryption */
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
/*
* SME is mutually exclusive with any of the SEV
* features below.
- */
- pr_cont(" SME\n");
- return;
- }
+ */
+ pr_cont(" SME\n");
+ return;
+ }
- /* Secure Encrypted Virtualization */
- if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
- pr_cont(" SEV");
+ /* Secure Encrypted Virtualization */
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ pr_cont(" SEV");
+
+ /* Encrypted Register State */
+ if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
+ pr_cont(" SEV-ES");
- /* Encrypted Register State */
- if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
- pr_cont(" SEV-ES");
+ /* Secure Nested Paging */
+ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ pr_cont(" SEV-SNP");
- /* Secure Nested Paging */
- if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
- pr_cont(" SEV-SNP");
+ pr_cont("\n");
- pr_cont("\n");
+ sev_show_status();
+
+ break;
+ default:
+ pr_cont("Unknown\n");
+ }
}
/* Architecture __weak replacement functions */
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index d73aeb16417f..0166ab1780cc 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -97,7 +97,6 @@ static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
-static char sme_cmdline_off[] __initdata = "off";
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
@@ -305,7 +304,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* instrumentation or checking boot_cpu_data in the cc_platform_has()
* function.
*/
- if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED)
+ if (!sme_get_me_mask() ||
+ RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED)
return;
/*
@@ -504,10 +504,9 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
void __init sme_enable(struct boot_params *bp)
{
- const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+ const char *cmdline_ptr, *cmdline_arg, *cmdline_on;
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
- bool active_by_default;
unsigned long me_mask;
char buffer[16];
bool snp;
@@ -543,11 +542,11 @@ void __init sme_enable(struct boot_params *bp)
me_mask = 1UL << (ebx & 0x3f);
/* Check the SEV MSR whether SEV or SME is enabled */
- sev_status = __rdmsr(MSR_AMD64_SEV);
- feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
+ RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
+ feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
/* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */
- if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED))
snp_abort();
/* Check if memory encryption is enabled */
@@ -573,7 +572,6 @@ void __init sme_enable(struct boot_params *bp)
return;
} else {
/* SEV state cannot be controlled by a command line option */
- sme_me_mask = me_mask;
goto out;
}
@@ -588,31 +586,17 @@ void __init sme_enable(struct boot_params *bp)
asm ("lea sme_cmdline_on(%%rip), %0"
: "=r" (cmdline_on)
: "p" (sme_cmdline_on));
- asm ("lea sme_cmdline_off(%%rip), %0"
- : "=r" (cmdline_off)
- : "p" (sme_cmdline_off));
-
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
- active_by_default = true;
- else
- active_by_default = false;
cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
((u64)bp->ext_cmd_line_ptr << 32));
- if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
+ if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 ||
+ strncmp(buffer, cmdline_on, sizeof(buffer)))
return;
- if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
- sme_me_mask = me_mask;
- else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
- sme_me_mask = 0;
- else
- sme_me_mask = active_by_default ? me_mask : 0;
out:
- if (sme_me_mask) {
- physical_mask &= ~sme_me_mask;
- cc_vendor = CC_VENDOR_AMD;
- cc_set_mask(sme_me_mask);
- }
+ RIP_REL_REF(sme_me_mask) = me_mask;
+ physical_mask &= ~me_mask;
+ cc_vendor = CC_VENDOR_AMD;
+ cc_set_mask(me_mask);
}
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 0904d7e8e126..0d72183b5dd0 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -240,6 +240,8 @@ void pat_cpu_init(void)
}
wrmsrl(MSR_IA32_CR_PAT, pat_msr_val);
+
+ __flush_tlb_all();
}
/**
@@ -296,13 +298,8 @@ void __init pat_bp_init(void)
/*
* Xen PV doesn't allow to set PAT MSR, but all cache modes are
* supported.
- * When running as TDX guest setting the PAT MSR won't work either
- * due to the requirement to set CR0.CD when doing so. Rely on
- * firmware to have set the PAT MSR correctly.
*/
- if (pat_disabled ||
- cpu_feature_enabled(X86_FEATURE_XENPV) ||
- cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ if (pat_disabled || cpu_feature_enabled(X86_FEATURE_XENPV)) {
init_cache_modes(pat_msr_val);
return;
}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index e9b448d1b1b7..102880404046 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -755,10 +755,14 @@ pmd_t *lookup_pmd_address(unsigned long address)
* areas on 32-bit NUMA systems. The percpu areas can
* end up in this kind of memory, for instance.
*
- * This could be optimized, but it is only intended to be
- * used at initialization time, and keeping it
- * unoptimized should increase the testing coverage for
- * the more obscure platforms.
+ * Note that as long as the PTEs are well-formed with correct PFNs, this
+ * works without checking the PRESENT bit in the leaf PTE. This is unlike
+ * the similar vmalloc_to_page() and derivatives. Callers may depend on
+ * this behavior.
+ *
+ * This could be optimized, but it is only used in paths that are not perf
+ * sensitive, and keeping it unoptimized should increase the testing coverage
+ * for the more obscure platforms.
*/
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
@@ -2041,17 +2045,12 @@ int set_mce_nospec(unsigned long pfn)
return rc;
}
-static int set_memory_p(unsigned long *addr, int numpages)
-{
- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
-}
-
/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
{
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
- return set_memory_p(&addr, 1);
+ return set_memory_p(addr, 1);
}
EXPORT_SYMBOL_GPL(clear_mce_nospec);
#endif /* CONFIG_X86_64 */
@@ -2104,6 +2103,11 @@ int set_memory_np_noalias(unsigned long addr, int numpages)
CPA_NO_CHECK_ALIAS, NULL);
}
+int set_memory_p(unsigned long addr, int numpages)
+{
+ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
+}
+
int set_memory_4k(unsigned long addr, int numpages)
{
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),