diff options
-rw-r--r-- | arch/x86/coco/tdx/tdx.c | 94 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/set_memory.h | 3 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 42 |
4 files changed, 141 insertions, 3 deletions
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 979891e97d83..078e2bac2553 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -7,6 +7,7 @@ #include <linux/cpufeature.h> #include <linux/export.h> #include <linux/io.h> +#include <linux/kexec.h> #include <asm/coco.h> #include <asm/tdx.h> #include <asm/vmx.h> @@ -14,6 +15,7 @@ #include <asm/insn.h> #include <asm/insn-eval.h> #include <asm/pgtable.h> +#include <asm/set_memory.h> /* MMIO direction */ #define EPT_READ 0 @@ -831,6 +833,95 @@ static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages, return 0; } +/* Stop new private<->shared conversions */ +static void tdx_kexec_begin(void) +{ + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + /* + * Crash kernel reaches here with interrupts disabled: can't wait for + * conversions to finish. + * + * If race happened, just report and proceed. + */ + if (!set_memory_enc_stop_conversion()) + pr_warn("Failed to stop shared<->private conversions\n"); +} + +/* Walk direct mapping and convert all shared memory back to private */ +static void tdx_kexec_finish(void) +{ + unsigned long addr, end; + long found = 0, shared; + + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; + + lockdep_assert_irqs_disabled(); + + addr = PAGE_OFFSET; + end = PAGE_OFFSET + get_max_mapped(); + + while (addr < end) { + unsigned long size; + unsigned int level; + pte_t *pte; + + pte = lookup_address(addr, &level); + size = page_level_size(level); + + if (pte && pte_decrypted(*pte)) { + int pages = size / PAGE_SIZE; + + /* + * Touching memory with shared bit set triggers implicit + * conversion to shared. + * + * Make sure nobody touches the shared range from + * now on. + */ + set_pte(pte, __pte(0)); + + /* + * Memory encryption state persists across kexec. + * If tdx_enc_status_changed() fails in the first + * kernel, it leaves memory in an unknown state. + * + * If that memory remains shared, accessing it in the + * *next* kernel through a private mapping will result + * in an unrecoverable guest shutdown. + * + * The kdump kernel boot is not impacted as it uses + * a pre-reserved memory range that is always private. + * However, gathering crash information could lead to + * a crash if it accesses unconverted memory through + * a private mapping which is possible when accessing + * that memory through /proc/vmcore, for example. + * + * In all cases, print error info in order to leave + * enough bread crumbs for debugging. + */ + if (!tdx_enc_status_changed(addr, pages, true)) { + pr_err("Failed to unshare range %#lx-%#lx\n", + addr, addr + size); + } + + found += pages; + } + + addr += size; + } + + __flush_tlb_all(); + + shared = atomic_long_read(&nr_shared); + if (shared != found) { + pr_err("shared page accounting is off\n"); + pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found); + } +} + void __init tdx_early_init(void) { struct tdx_module_args args = { @@ -890,6 +981,9 @@ void __init tdx_early_init(void) x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; + x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 65b8e5bb902c..e39311a89bf4 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -140,6 +140,11 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } +static inline bool pte_decrypted(pte_t pte) +{ + return cc_mkdec(pte_val(pte)) == pte_val(pte); +} + #define pmd_dirty pmd_dirty static inline bool pmd_dirty(pmd_t pmd) { diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 9aee31862b4a..4b2abce2e3e7 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); int set_memory_p(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); + +bool set_memory_enc_stop_conversion(void); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); + int set_memory_np_noalias(unsigned long addr, int numpages); int set_memory_nonglobal(unsigned long addr, int numpages); int set_memory_global(unsigned long addr, int numpages); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index a7a7a6c6a3fb..443a97e515c0 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2227,12 +2227,48 @@ vmm_fail: return ret; } +/* + * The lock serializes conversions between private and shared memory. + * + * It is taken for read on conversion. A write lock guarantees that no + * concurrent conversions are in progress. + */ +static DECLARE_RWSEM(mem_enc_lock); + +/* + * Stop new private<->shared conversions. + * + * Taking the exclusive mem_enc_lock waits for in-flight conversions to complete. + * The lock is not released to prevent new conversions from being started. + */ +bool set_memory_enc_stop_conversion(void) +{ + /* + * In a crash scenario, sleep is not allowed. Try to take the lock. + * Failure indicates that there is a race with the conversion. + */ + if (oops_in_progress) + return down_write_trylock(&mem_enc_lock); + + down_write(&mem_enc_lock); + + return true; +} + static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) { - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return __set_memory_enc_pgtable(addr, numpages, enc); + int ret = 0; - return 0; + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { + if (!down_read_trylock(&mem_enc_lock)) + return -EBUSY; + + ret = __set_memory_enc_pgtable(addr, numpages, enc); + + up_read(&mem_enc_lock); + } + + return ret; } int set_memory_encrypted(unsigned long addr, int numpages) |