aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/coco/tdx/tdx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/coco/tdx/tdx.c')
-rw-r--r--arch/x86/coco/tdx/tdx.c121
1 files changed, 112 insertions, 9 deletions
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index c1cb90369915..078e2bac2553 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -7,6 +7,7 @@
#include <linux/cpufeature.h>
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/kexec.h>
#include <asm/coco.h>
#include <asm/tdx.h>
#include <asm/vmx.h>
@@ -14,6 +15,7 @@
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
+#include <asm/set_memory.h>
/* MMIO direction */
#define EPT_READ 0
@@ -38,6 +40,8 @@
#define TDREPORT_SUBTYPE_0 0
+static atomic_long_t nr_shared;
+
/* Called from __tdx_hypercall() for unrecoverable failure */
noinstr void __noreturn __tdx_hypercall_failed(void)
{
@@ -798,28 +802,124 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
return true;
}
-static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
- bool enc)
+static int tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
+ bool enc)
{
/*
* Only handle shared->private conversion here.
* See the comment in tdx_early_init().
*/
- if (enc)
- return tdx_enc_status_changed(vaddr, numpages, enc);
- return true;
+ if (enc && !tdx_enc_status_changed(vaddr, numpages, enc))
+ return -EIO;
+
+ return 0;
}
-static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
+static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
bool enc)
{
/*
* Only handle private->shared conversion here.
* See the comment in tdx_early_init().
*/
- if (!enc)
- return tdx_enc_status_changed(vaddr, numpages, enc);
- return true;
+ if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc))
+ return -EIO;
+
+ if (enc)
+ atomic_long_sub(numpages, &nr_shared);
+ else
+ atomic_long_add(numpages, &nr_shared);
+
+ return 0;
+}
+
+/* Stop new private<->shared conversions */
+static void tdx_kexec_begin(void)
+{
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ /*
+ * Crash kernel reaches here with interrupts disabled: can't wait for
+ * conversions to finish.
+ *
+ * If race happened, just report and proceed.
+ */
+ if (!set_memory_enc_stop_conversion())
+ pr_warn("Failed to stop shared<->private conversions\n");
+}
+
+/* Walk direct mapping and convert all shared memory back to private */
+static void tdx_kexec_finish(void)
+{
+ unsigned long addr, end;
+ long found = 0, shared;
+
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ lockdep_assert_irqs_disabled();
+
+ addr = PAGE_OFFSET;
+ end = PAGE_OFFSET + get_max_mapped();
+
+ while (addr < end) {
+ unsigned long size;
+ unsigned int level;
+ pte_t *pte;
+
+ pte = lookup_address(addr, &level);
+ size = page_level_size(level);
+
+ if (pte && pte_decrypted(*pte)) {
+ int pages = size / PAGE_SIZE;
+
+ /*
+ * Touching memory with shared bit set triggers implicit
+ * conversion to shared.
+ *
+ * Make sure nobody touches the shared range from
+ * now on.
+ */
+ set_pte(pte, __pte(0));
+
+ /*
+ * Memory encryption state persists across kexec.
+ * If tdx_enc_status_changed() fails in the first
+ * kernel, it leaves memory in an unknown state.
+ *
+ * If that memory remains shared, accessing it in the
+ * *next* kernel through a private mapping will result
+ * in an unrecoverable guest shutdown.
+ *
+ * The kdump kernel boot is not impacted as it uses
+ * a pre-reserved memory range that is always private.
+ * However, gathering crash information could lead to
+ * a crash if it accesses unconverted memory through
+ * a private mapping which is possible when accessing
+ * that memory through /proc/vmcore, for example.
+ *
+ * In all cases, print error info in order to leave
+ * enough bread crumbs for debugging.
+ */
+ if (!tdx_enc_status_changed(addr, pages, true)) {
+ pr_err("Failed to unshare range %#lx-%#lx\n",
+ addr, addr + size);
+ }
+
+ found += pages;
+ }
+
+ addr += size;
+ }
+
+ __flush_tlb_all();
+
+ shared = atomic_long_read(&nr_shared);
+ if (shared != found) {
+ pr_err("shared page accounting is off\n");
+ pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found);
+ }
}
void __init tdx_early_init(void)
@@ -881,6 +981,9 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+ x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
+ x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
+
/*
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
* bringup low level code. That raises #VE which cannot be handled