aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r--arch/arm64/mm/Makefile1
-rw-r--r--arch/arm64/mm/copypage.c27
-rw-r--r--arch/arm64/mm/fault.c40
-rw-r--r--arch/arm64/mm/fixmap.c9
-rw-r--r--arch/arm64/mm/gcs.c254
-rw-r--r--arch/arm64/mm/hugetlbpage.c21
-rw-r--r--arch/arm64/mm/init.c10
-rw-r--r--arch/arm64/mm/mmap.c9
-rw-r--r--arch/arm64/mm/mmu.c10
-rw-r--r--arch/arm64/mm/pageattr.c98
-rw-r--r--arch/arm64/mm/proc.S19
-rw-r--r--arch/arm64/mm/ptdump.c8
12 files changed, 479 insertions, 27 deletions
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 2fc8c6dd0407..fc92170a8f37 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_ARM64_MTE) += mteswap.o
+obj-$(CONFIG_ARM64_GCS) += gcs.o
KASAN_SANITIZE_physaddr.o += n
obj-$(CONFIG_KASAN) += kasan_init.o
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index a7bb20055ce0..87b3f1a25535 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -18,15 +18,40 @@ void copy_highpage(struct page *to, struct page *from)
{
void *kto = page_address(to);
void *kfrom = page_address(from);
+ struct folio *src = page_folio(from);
+ struct folio *dst = page_folio(to);
+ unsigned int i, nr_pages;
copy_page(kto, kfrom);
if (kasan_hw_tags_enabled())
page_kasan_tag_reset(to);
- if (system_supports_mte() && page_mte_tagged(from)) {
+ if (!system_supports_mte())
+ return;
+
+ if (folio_test_hugetlb(src) &&
+ folio_test_hugetlb_mte_tagged(src)) {
+ if (!folio_try_hugetlb_mte_tagging(dst))
+ return;
+
+ /*
+ * Populate tags for all subpages.
+ *
+ * Don't assume the first page is head page since
+ * huge page copy may start from any subpage.
+ */
+ nr_pages = folio_nr_pages(src);
+ for (i = 0; i < nr_pages; i++) {
+ kfrom = page_address(folio_page(src, i));
+ kto = page_address(folio_page(dst, i));
+ mte_copy_page_tags(kto, kfrom);
+ }
+ folio_set_hugetlb_mte_tagged(dst);
+ } else if (page_mte_tagged(from)) {
/* It's a new page, shouldn't have been tagged yet */
WARN_ON_ONCE(!try_page_mte_tagging(to));
+
mte_copy_page_tags(kto, kfrom);
set_page_mte_tagged(to);
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 8b281cf308b3..c2f89a678ac0 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -504,6 +504,14 @@ static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
false);
}
+static bool is_gcs_fault(unsigned long esr)
+{
+ if (!esr_is_data_abort(esr))
+ return false;
+
+ return ESR_ELx_ISS2(esr) & ESR_ELx_GCS;
+}
+
static bool is_el0_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
@@ -518,6 +526,23 @@ static bool is_write_abort(unsigned long esr)
return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
}
+static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr)
+{
+ if (!system_supports_gcs())
+ return false;
+
+ if (unlikely(is_gcs_fault(esr))) {
+ /* GCS accesses must be performed on a GCS page */
+ if (!(vma->vm_flags & VM_SHADOW_STACK))
+ return true;
+ } else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) {
+ /* Only GCS operations can write to a GCS page */
+ return esr_is_data_abort(esr) && is_write_abort(esr);
+ }
+
+ return false;
+}
+
static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
struct pt_regs *regs)
{
@@ -554,6 +579,14 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
/* It was exec fault */
vm_flags = VM_EXEC;
mm_flags |= FAULT_FLAG_INSTRUCTION;
+ } else if (is_gcs_fault(esr)) {
+ /*
+ * The GCS permission on a page implies both read and
+ * write so always handle any GCS fault as a write fault,
+ * we need to trigger CoW even for GCS reads.
+ */
+ vm_flags = VM_WRITE;
+ mm_flags |= FAULT_FLAG_WRITE;
} else if (is_write_abort(esr)) {
/* It was write fault */
vm_flags = VM_WRITE;
@@ -587,6 +620,13 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
if (!vma)
goto lock_mmap;
+ if (is_invalid_gcs_access(vma, esr)) {
+ vma_end_read(vma);
+ fault = 0;
+ si_code = SEGV_ACCERR;
+ goto bad_area;
+ }
+
if (!(vma->vm_flags & vm_flags)) {
vma_end_read(vma);
fault = 0;
diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c
index de1e09d986ad..c5c5425791da 100644
--- a/arch/arm64/mm/fixmap.c
+++ b/arch/arm64/mm/fixmap.c
@@ -47,7 +47,8 @@ static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr)
if (pmd_none(pmd)) {
ptep = bm_pte[BM_PTE_TABLE_IDX(addr)];
- __pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE);
+ __pmd_populate(pmdp, __pa_symbol(ptep),
+ PMD_TYPE_TABLE | PMD_TABLE_AF);
}
}
@@ -59,7 +60,8 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr,
pmd_t *pmdp;
if (pud_none(pud))
- __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE);
+ __pud_populate(pudp, __pa_symbol(bm_pmd),
+ PUD_TYPE_TABLE | PUD_TABLE_AF);
pmdp = pmd_offset_kimg(pudp, addr);
do {
@@ -86,7 +88,8 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr,
}
if (p4d_none(p4d))
- __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE);
+ __p4d_populate(p4dp, __pa_symbol(bm_pud),
+ P4D_TYPE_TABLE | P4D_TABLE_AF);
pudp = pud_offset_kimg(p4dp, addr);
early_fixmap_init_pmd(pudp, addr, end);
diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c
new file mode 100644
index 000000000000..5c46ec527b1c
--- /dev/null
+++ b/arch/arm64/mm/gcs.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+
+#include <asm/cmpxchg.h>
+#include <asm/cpufeature.h>
+#include <asm/gcs.h>
+#include <asm/page.h>
+
+static unsigned long alloc_gcs(unsigned long addr, unsigned long size)
+{
+ int flags = MAP_ANONYMOUS | MAP_PRIVATE;
+ struct mm_struct *mm = current->mm;
+ unsigned long mapped_addr, unused;
+
+ if (addr)
+ flags |= MAP_FIXED_NOREPLACE;
+
+ mmap_write_lock(mm);
+ mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
+ VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
+ mmap_write_unlock(mm);
+
+ return mapped_addr;
+}
+
+static unsigned long gcs_size(unsigned long size)
+{
+ if (size)
+ return PAGE_ALIGN(size);
+
+ /* Allocate RLIMIT_STACK/2 with limits of PAGE_SIZE..2G */
+ size = PAGE_ALIGN(min_t(unsigned long long,
+ rlimit(RLIMIT_STACK) / 2, SZ_2G));
+ return max(PAGE_SIZE, size);
+}
+
+unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
+ const struct kernel_clone_args *args)
+{
+ unsigned long addr, size;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ if (!task_gcs_el0_enabled(tsk))
+ return 0;
+
+ if ((args->flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) {
+ tsk->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+ return 0;
+ }
+
+ size = args->stack_size / 2;
+
+ size = gcs_size(size);
+ addr = alloc_gcs(0, size);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ tsk->thread.gcs_base = addr;
+ tsk->thread.gcs_size = size;
+ tsk->thread.gcspr_el0 = addr + size - sizeof(u64);
+
+ return addr;
+}
+
+SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
+{
+ unsigned long alloc_size;
+ unsigned long __user *cap_ptr;
+ unsigned long cap_val;
+ int ret = 0;
+ int cap_offset;
+
+ if (!system_supports_gcs())
+ return -EOPNOTSUPP;
+
+ if (flags & ~(SHADOW_STACK_SET_TOKEN | SHADOW_STACK_SET_MARKER))
+ return -EINVAL;
+
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ if (size == 8 || !IS_ALIGNED(size, 8))
+ return -EINVAL;
+
+ /*
+ * An overflow would result in attempting to write the restore token
+ * to the wrong location. Not catastrophic, but just return the right
+ * error code and block it.
+ */
+ alloc_size = PAGE_ALIGN(size);
+ if (alloc_size < size)
+ return -EOVERFLOW;
+
+ addr = alloc_gcs(addr, alloc_size);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ /*
+ * Put a cap token at the end of the allocated region so it
+ * can be switched to.
+ */
+ if (flags & SHADOW_STACK_SET_TOKEN) {
+ /* Leave an extra empty frame as a top of stack marker? */
+ if (flags & SHADOW_STACK_SET_MARKER)
+ cap_offset = 2;
+ else
+ cap_offset = 1;
+
+ cap_ptr = (unsigned long __user *)(addr + size -
+ (cap_offset * sizeof(unsigned long)));
+ cap_val = GCS_CAP(cap_ptr);
+
+ put_user_gcs(cap_val, cap_ptr, &ret);
+ if (ret != 0) {
+ vm_munmap(addr, size);
+ return -EFAULT;
+ }
+
+ /*
+ * Ensure the new cap is ordered before standard
+ * memory accesses to the same location.
+ */
+ gcsb_dsync();
+ }
+
+ return addr;
+}
+
+/*
+ * Apply the GCS mode configured for the specified task to the
+ * hardware.
+ */
+void gcs_set_el0_mode(struct task_struct *task)
+{
+ u64 gcscre0_el1 = GCSCRE0_EL1_nTR;
+
+ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE)
+ gcscre0_el1 |= GCSCRE0_EL1_RVCHKEN | GCSCRE0_EL1_PCRSEL;
+
+ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_WRITE)
+ gcscre0_el1 |= GCSCRE0_EL1_STREn;
+
+ if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_PUSH)
+ gcscre0_el1 |= GCSCRE0_EL1_PUSHMEn;
+
+ write_sysreg_s(gcscre0_el1, SYS_GCSCRE0_EL1);
+}
+
+void gcs_free(struct task_struct *task)
+{
+ if (!system_supports_gcs())
+ return;
+
+ /*
+ * When fork() with CLONE_VM fails, the child (tsk) already
+ * has a GCS allocated, and exit_thread() calls this function
+ * to free it. In this case the parent (current) and the
+ * child share the same mm struct.
+ */
+ if (!task->mm || task->mm != current->mm)
+ return;
+
+ if (task->thread.gcs_base)
+ vm_munmap(task->thread.gcs_base, task->thread.gcs_size);
+
+ task->thread.gcspr_el0 = 0;
+ task->thread.gcs_base = 0;
+ task->thread.gcs_size = 0;
+}
+
+int arch_set_shadow_stack_status(struct task_struct *task, unsigned long arg)
+{
+ unsigned long gcs, size;
+ int ret;
+
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ if (is_compat_thread(task_thread_info(task)))
+ return -EINVAL;
+
+ /* Reject unknown flags */
+ if (arg & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
+ return -EINVAL;
+
+ ret = gcs_check_locked(task, arg);
+ if (ret != 0)
+ return ret;
+
+ /* If we are enabling GCS then make sure we have a stack */
+ if (arg & PR_SHADOW_STACK_ENABLE &&
+ !task_gcs_el0_enabled(task)) {
+ /* Do not allow GCS to be reenabled */
+ if (task->thread.gcs_base || task->thread.gcspr_el0)
+ return -EINVAL;
+
+ if (task != current)
+ return -EBUSY;
+
+ size = gcs_size(0);
+ gcs = alloc_gcs(0, size);
+ if (!gcs)
+ return -ENOMEM;
+
+ task->thread.gcspr_el0 = gcs + size - sizeof(u64);
+ task->thread.gcs_base = gcs;
+ task->thread.gcs_size = size;
+ if (task == current)
+ write_sysreg_s(task->thread.gcspr_el0,
+ SYS_GCSPR_EL0);
+ }
+
+ task->thread.gcs_el0_mode = arg;
+ if (task == current)
+ gcs_set_el0_mode(task);
+
+ return 0;
+}
+
+int arch_get_shadow_stack_status(struct task_struct *task,
+ unsigned long __user *arg)
+{
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ if (is_compat_thread(task_thread_info(task)))
+ return -EINVAL;
+
+ return put_user(task->thread.gcs_el0_mode, arg);
+}
+
+int arch_lock_shadow_stack_status(struct task_struct *task,
+ unsigned long arg)
+{
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ if (is_compat_thread(task_thread_info(task)))
+ return -EINVAL;
+
+ /*
+ * We support locking unknown bits so applications can prevent
+ * any changes in a future proof manner.
+ */
+ task->thread.gcs_el0_locked |= arg;
+
+ return 0;
+}
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 5f1e2103888b..3215adf48a1b 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -361,14 +361,25 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
{
size_t pagesize = 1UL << shift;
- entry = pte_mkhuge(entry);
- if (pagesize == CONT_PTE_SIZE) {
- entry = pte_mkcont(entry);
- } else if (pagesize == CONT_PMD_SIZE) {
+ switch (pagesize) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ entry = pud_pte(pud_mkhuge(pte_pud(entry)));
+ break;
+#endif
+ case CONT_PMD_SIZE:
entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
- } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
+ fallthrough;
+ case PMD_SIZE:
+ entry = pmd_pte(pmd_mkhuge(pte_pmd(entry)));
+ break;
+ case CONT_PTE_SIZE:
+ entry = pte_mkcont(entry);
+ break;
+ default:
pr_warn("%s: unrecognized huge page size 0x%lx\n",
__func__, pagesize);
+ break;
}
return entry;
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 27a32ff15412..d21f67d67cf5 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -41,6 +41,7 @@
#include <asm/kvm_host.h>
#include <asm/memory.h>
#include <asm/numa.h>
+#include <asm/rsi.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <linux/sizes.h>
@@ -366,8 +367,14 @@ void __init bootmem_init(void)
*/
void __init mem_init(void)
{
+ unsigned int flags = SWIOTLB_VERBOSE;
bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit);
+ if (is_realm_world()) {
+ swiotlb = true;
+ flags |= SWIOTLB_FORCE;
+ }
+
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb) {
/*
* If no bouncing needed for ZONE_DMA, reduce the swiotlb
@@ -379,7 +386,8 @@ void __init mem_init(void)
swiotlb = true;
}
- swiotlb_init(swiotlb, SWIOTLB_VERBOSE);
+ swiotlb_init(swiotlb, flags);
+ swiotlb_update_mem_attributes();
/* this will put all unused low memory onto the freelists */
memblock_free_all();
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 7e3ad97e27d8..07aeab8a7606 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -83,8 +83,15 @@ arch_initcall(adjust_protection_map);
pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
- pteval_t prot = pgprot_val(protection_map[vm_flags &
+ pteval_t prot;
+
+ /* Short circuit GCS to avoid bloating the table. */
+ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
+ prot = _PAGE_GCS_RO;
+ } else {
+ prot = pgprot_val(protection_map[vm_flags &
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
+ }
if (vm_flags & VM_ARM64_BTI)
prot |= PTE_GP;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e55b02fbddc8..e2739b69e11b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -119,7 +119,7 @@ static phys_addr_t __init early_pgtable_alloc(int shift)
return phys;
}
-bool pgattr_change_is_safe(u64 old, u64 new)
+bool pgattr_change_is_safe(pteval_t old, pteval_t new)
{
/*
* The following mapping attributes may be updated in live
@@ -201,7 +201,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
BUG_ON(pmd_sect(pmd));
if (pmd_none(pmd)) {
- pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN;
+ pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
phys_addr_t pte_phys;
if (flags & NO_EXEC_MAPPINGS)
@@ -288,7 +288,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
*/
BUG_ON(pud_sect(pud));
if (pud_none(pud)) {
- pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN;
+ pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF;
phys_addr_t pmd_phys;
if (flags & NO_EXEC_MAPPINGS)
@@ -333,7 +333,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
pud_t *pudp;
if (p4d_none(p4d)) {
- p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
+ p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN | P4D_TABLE_AF;
phys_addr_t pud_phys;
if (flags & NO_EXEC_MAPPINGS)
@@ -391,7 +391,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
p4d_t *p4dp;
if (pgd_none(pgd)) {
- pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN;
+ pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN | PGD_TABLE_AF;
phys_addr_t p4d_phys;
if (flags & NO_EXEC_MAPPINGS)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 0e270a1c51e6..6ae6ae806454 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -5,10 +5,12 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/mem_encrypt.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
+#include <asm/pgtable-prot.h>
#include <asm/set_memory.h>
#include <asm/tlbflush.h>
#include <asm/kfence.h>
@@ -23,14 +25,16 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
bool can_set_direct_map(void)
{
/*
- * rodata_full and DEBUG_PAGEALLOC require linear map to be
- * mapped at page granularity, so that it is possible to
+ * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear
+ * map to be mapped at page granularity, so that it is possible to
* protect/unprotect single pages.
*
* KFENCE pool requires page-granular mapping if initialized late.
+ *
+ * Realms need to make pages shared/protected at page granularity.
*/
return rodata_full || debug_pagealloc_enabled() ||
- arm64_kfence_can_set_direct_map();
+ arm64_kfence_can_set_direct_map() || is_realm_world();
}
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
@@ -60,7 +64,13 @@ static int __change_memory_common(unsigned long start, unsigned long size,
ret = apply_to_page_range(&init_mm, start, size, change_page_range,
&data);
- flush_tlb_kernel_range(start, start + size);
+ /*
+ * If the memory is being made valid without changing any other bits
+ * then a TLBI isn't required as a non-valid entry cannot be cached in
+ * the TLB.
+ */
+ if (pgprot_val(set_mask) != PTE_VALID || pgprot_val(clear_mask))
+ flush_tlb_kernel_range(start, start + size);
return ret;
}
@@ -192,6 +202,86 @@ int set_direct_map_default_noflush(struct page *page)
PAGE_SIZE, change_page_range, &data);
}
+static int __set_memory_enc_dec(unsigned long addr,
+ int numpages,
+ bool encrypt)
+{
+ unsigned long set_prot = 0, clear_prot = 0;
+ phys_addr_t start, end;
+ int ret;
+
+ if (!is_realm_world())
+ return 0;
+
+ if (!__is_lm_address(addr))
+ return -EINVAL;
+
+ start = __virt_to_phys(addr);
+ end = start + numpages * PAGE_SIZE;
+
+ if (encrypt)
+ clear_prot = PROT_NS_SHARED;
+ else
+ set_prot = PROT_NS_SHARED;
+
+ /*
+ * Break the mapping before we make any changes to avoid stale TLB
+ * entries or Synchronous External Aborts caused by RIPAS_EMPTY
+ */
+ ret = __change_memory_common(addr, PAGE_SIZE * numpages,
+ __pgprot(set_prot),
+ __pgprot(clear_prot | PTE_VALID));
+
+ if (ret)
+ return ret;
+
+ if (encrypt)
+ ret = rsi_set_memory_range_protected(start, end);
+ else
+ ret = rsi_set_memory_range_shared(start, end);
+
+ if (ret)
+ return ret;
+
+ return __change_memory_common(addr, PAGE_SIZE * numpages,
+ __pgprot(PTE_VALID),
+ __pgprot(0));
+}
+
+static int realm_set_memory_encrypted(unsigned long addr, int numpages)
+{
+ int ret = __set_memory_enc_dec(addr, numpages, true);
+
+ /*
+ * If the request to change state fails, then the only sensible cause
+ * of action for the caller is to leak the memory
+ */
+ WARN(ret, "Failed to encrypt memory, %d pages will be leaked",
+ numpages);
+
+ return ret;
+}
+
+static int realm_set_memory_decrypted(unsigned long addr, int numpages)
+{
+ int ret = __set_memory_enc_dec(addr, numpages, false);
+
+ WARN(ret, "Failed to decrypt memory, %d pages will be leaked",
+ numpages);
+
+ return ret;
+}
+
+static const struct arm64_mem_crypt_ops realm_crypt_ops = {
+ .encrypt = realm_set_memory_encrypted,
+ .decrypt = realm_set_memory_decrypted,
+};
+
+int realm_register_memory_enc_ops(void)
+{
+ return arm64_mem_crypt_ops_register(&realm_crypt_ops);
+}
+
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 8abdc7fed321..b8edc5765441 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -465,10 +465,12 @@ SYM_FUNC_START(__cpu_setup)
*/
mair .req x17
tcr .req x16
+ tcr2 .req x15
mov_q mair, MAIR_EL1_SET
mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \
TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
+ mov tcr2, xzr
tcr_clear_errata_bits tcr, x9, x5
@@ -493,9 +495,14 @@ alternative_else_nop_endif
* via capabilities.
*/
mrs x9, ID_AA64MMFR1_EL1
- and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK
+ ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4
cbz x9, 1f
orr tcr, tcr, #TCR_HA // hardware Access flag update
+#ifdef CONFIG_ARM64_HAFT
+ cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT
+ b.lt 1f
+ orr tcr2, tcr2, TCR2_EL1x_HAFT
+#endif /* CONFIG_ARM64_HAFT */
1:
#endif /* CONFIG_ARM64_HW_AFDBM */
msr mair_el1, mair
@@ -525,11 +532,16 @@ alternative_else_nop_endif
#undef PTE_MAYBE_NG
#undef PTE_MAYBE_SHARED
- mov x0, TCR2_EL1x_PIE
- msr REG_TCR2_EL1, x0
+ orr tcr2, tcr2, TCR2_EL1x_PIE
.Lskip_indirection:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4
+ cbz x1, 1f
+ msr REG_TCR2_EL1, tcr2
+1:
+
/*
* Prepare SCTLR
*/
@@ -538,4 +550,5 @@ alternative_else_nop_endif
.unreq mair
.unreq tcr
+ .unreq tcr2
SYM_FUNC_END(__cpu_setup)
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 264c5f9b97d8..688fbe0271ca 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -80,10 +80,10 @@ static const struct ptdump_prot_bits pte_bits[] = {
.set = "CON",
.clear = " ",
}, {
- .mask = PTE_TABLE_BIT,
- .val = PTE_TABLE_BIT,
- .set = " ",
- .clear = "BLK",
+ .mask = PTE_TABLE_BIT | PTE_VALID,
+ .val = PTE_VALID,
+ .set = "BLK",
+ .clear = " ",
}, {
.mask = PTE_UXN,
.val = PTE_UXN,