diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/extmem.c | 14 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 209 | ||||
-rw-r--r-- | arch/s390/mm/gmap.c | 157 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 16 | ||||
-rw-r--r-- | arch/s390/mm/pgalloc.c | 4 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 2 |
6 files changed, 161 insertions, 241 deletions
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 282fefe107a2..4692136c0af1 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -28,6 +28,7 @@ #include <asm/extmem.h> #include <asm/cpcmd.h> #include <asm/setup.h> +#include <asm/asm.h> #define DCSS_PURGESEG 0x08 #define DCSS_LOADSHRX 0x20 @@ -134,20 +135,21 @@ dcss_diag(int *func, void *parameter, unsigned long *ret1, unsigned long *ret2) { unsigned long rx, ry; - int rc; + int cc; rx = virt_to_phys(parameter); ry = (unsigned long) *func; diag_stat_inc(DIAG_STAT_X064); asm volatile( - " diag %0,%1,0x64\n" - " ipm %2\n" - " srl %2,28\n" - : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + " diag %[rx],%[ry],0x64\n" + CC_IPM(cc) + : CC_OUT(cc, cc), [rx] "+d" (rx), [ry] "+d" (ry) + : + : CC_CLOBBER); *ret1 = rx; *ret2 = ry; - return rc; + return CC_TRANSFORM(cc); } static inline int diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ad8b0d6b77ea..646326fa0fad 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -46,12 +46,6 @@ #include <asm/uv.h> #include "../kernel/entry.h" -enum fault_type { - KERNEL_FAULT, - USER_FAULT, - GMAP_FAULT, -}; - static DEFINE_STATIC_KEY_FALSE(have_store_indication); static int __init fault_init(void) @@ -65,28 +59,15 @@ early_initcall(fault_init); /* * Find out which address space caused the exception. */ -static enum fault_type get_fault_type(struct pt_regs *regs) +static bool is_kernel_fault(struct pt_regs *regs) { union teid teid = { .val = regs->int_parm_long }; - struct gmap *gmap; - if (likely(teid.as == PSW_BITS_AS_PRIMARY)) { - if (user_mode(regs)) - return USER_FAULT; - if (!IS_ENABLED(CONFIG_PGSTE)) - return KERNEL_FAULT; - gmap = (struct gmap *)get_lowcore()->gmap; - if (gmap && gmap->asce == regs->cr1) - return GMAP_FAULT; - return KERNEL_FAULT; - } + if (user_mode(regs)) + return false; if (teid.as == PSW_BITS_AS_SECONDARY) - return USER_FAULT; - /* Access register mode, not used in the kernel */ - if (teid.as == PSW_BITS_AS_ACCREG) - return USER_FAULT; - /* Home space -> access via kernel ASCE */ - return KERNEL_FAULT; + return false; + return true; } static unsigned long get_fault_address(struct pt_regs *regs) @@ -147,7 +128,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) goto out; table = __va(entry & _SEGMENT_ENTRY_ORIGIN); } - table += (address & _PAGE_INDEX) >> _PAGE_SHIFT; + table += (address & _PAGE_INDEX) >> PAGE_SHIFT; if (get_kernel_nofault(entry, table)) goto bad; pr_cont("P:%016lx ", entry); @@ -181,21 +162,12 @@ static void dump_fault_info(struct pt_regs *regs) break; } pr_cont("mode while using "); - switch (get_fault_type(regs)) { - case USER_FAULT: - asce = get_lowcore()->user_asce.val; - pr_cont("user "); - break; - case GMAP_FAULT: - asce = ((struct gmap *)get_lowcore()->gmap)->asce; - pr_cont("gmap "); - break; - case KERNEL_FAULT: + if (is_kernel_fault(regs)) { asce = get_lowcore()->kernel_asce.val; pr_cont("kernel "); - break; - default: - unreachable(); + } else { + asce = get_lowcore()->user_asce.val; + pr_cont("user "); } pr_cont("ASCE.\n"); dump_pagetable(asce, get_fault_address(regs)); @@ -230,7 +202,6 @@ static void do_sigsegv(struct pt_regs *regs, int si_code) static void handle_fault_error_nolock(struct pt_regs *regs, int si_code) { - enum fault_type fault_type; unsigned long address; bool is_write; @@ -241,17 +212,15 @@ static void handle_fault_error_nolock(struct pt_regs *regs, int si_code) } if (fixup_exception(regs)) return; - fault_type = get_fault_type(regs); - if (fault_type == KERNEL_FAULT) { + if (is_kernel_fault(regs)) { address = get_fault_address(regs); is_write = fault_is_write(regs); if (kfence_handle_page_fault(address, is_write, regs)) return; - } - if (fault_type == KERNEL_FAULT) pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n"); - else + } else { pr_alert("Unable to handle kernel paging request in virtual user address space\n"); + } dump_fault_info(regs); die(regs, "Oops"); } @@ -285,9 +254,7 @@ static void do_exception(struct pt_regs *regs, int access) struct vm_area_struct *vma; unsigned long address; struct mm_struct *mm; - enum fault_type type; unsigned int flags; - struct gmap *gmap; vm_fault_t fault; bool is_write; @@ -301,16 +268,8 @@ static void do_exception(struct pt_regs *regs, int access) mm = current->mm; address = get_fault_address(regs); is_write = fault_is_write(regs); - type = get_fault_type(regs); - switch (type) { - case KERNEL_FAULT: + if (is_kernel_fault(regs) || faulthandler_disabled() || !mm) return handle_fault_error_nolock(regs, 0); - case USER_FAULT: - case GMAP_FAULT: - if (faulthandler_disabled() || !mm) - return handle_fault_error_nolock(regs, 0); - break; - } perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_DEFAULT; if (user_mode(regs)) @@ -334,14 +293,11 @@ static void do_exception(struct pt_regs *regs, int access) vma_end_read(vma); if (!(fault & VM_FAULT_RETRY)) { count_vm_vma_lock_event(VMA_LOCK_SUCCESS); - if (unlikely(fault & VM_FAULT_ERROR)) - goto error; - return; + goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); if (fault & VM_FAULT_MAJOR) flags |= FAULT_FLAG_TRIED; - /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) @@ -349,81 +305,29 @@ static void do_exception(struct pt_regs *regs, int access) return; } lock_mmap: - mmap_read_lock(mm); - gmap = NULL; - if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { - gmap = (struct gmap *)get_lowcore()->gmap; - current->thread.gmap_addr = address; - current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); - current->thread.gmap_int_code = regs->int_code & 0xffff; - address = __gmap_translate(gmap, address); - if (address == -EFAULT) - return handle_fault_error(regs, SEGV_MAPERR); - if (gmap->pfault_enabled) - flags |= FAULT_FLAG_RETRY_NOWAIT; - } retry: - vma = find_vma(mm, address); + vma = lock_mm_and_find_vma(mm, address, regs); if (!vma) - return handle_fault_error(regs, SEGV_MAPERR); - if (unlikely(vma->vm_start > address)) { - if (!(vma->vm_flags & VM_GROWSDOWN)) - return handle_fault_error(regs, SEGV_MAPERR); - vma = expand_stack(mm, address); - if (!vma) - return handle_fault_error_nolock(regs, SEGV_MAPERR); - } + return handle_fault_error_nolock(regs, SEGV_MAPERR); if (unlikely(!(vma->vm_flags & access))) return handle_fault_error(regs, SEGV_ACCERR); fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) { - if (flags & FAULT_FLAG_RETRY_NOWAIT) - mmap_read_unlock(mm); if (!user_mode(regs)) handle_fault_error_nolock(regs, 0); return; } /* The fault is fully completed (including releasing mmap lock) */ - if (fault & VM_FAULT_COMPLETED) { - if (gmap) { - mmap_read_lock(mm); - goto gmap; - } + if (fault & VM_FAULT_COMPLETED) return; - } - if (unlikely(fault & VM_FAULT_ERROR)) { - mmap_read_unlock(mm); - goto error; - } if (fault & VM_FAULT_RETRY) { - if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { - /* - * FAULT_FLAG_RETRY_NOWAIT has been set, - * mmap_lock has not been released - */ - current->thread.gmap_pfault = 1; - return handle_fault_error(regs, 0); - } - flags &= ~FAULT_FLAG_RETRY_NOWAIT; flags |= FAULT_FLAG_TRIED; - mmap_read_lock(mm); goto retry; } -gmap: - if (IS_ENABLED(CONFIG_PGSTE) && gmap) { - address = __gmap_link(gmap, current->thread.gmap_addr, - address); - if (address == -EFAULT) - return handle_fault_error(regs, SEGV_MAPERR); - if (address == -ENOMEM) { - fault = VM_FAULT_OOM; - mmap_read_unlock(mm); - goto error; - } - } mmap_read_unlock(mm); - return; -error: +done: + if (!(fault & VM_FAULT_ERROR)) + return; if (fault & VM_FAULT_OOM) { if (!user_mode(regs)) handle_fault_error_nolock(regs, 0); @@ -496,7 +400,6 @@ void do_secure_storage_access(struct pt_regs *regs) struct folio_walk fw; struct mm_struct *mm; struct folio *folio; - struct gmap *gmap; int rc; /* @@ -521,17 +424,15 @@ void do_secure_storage_access(struct pt_regs *regs) */ panic("Unexpected PGM 0x3d with TEID bit 61=0"); } - switch (get_fault_type(regs)) { - case GMAP_FAULT: - mm = current->mm; - gmap = (struct gmap *)get_lowcore()->gmap; - mmap_read_lock(mm); - addr = __gmap_translate(gmap, addr); - mmap_read_unlock(mm); - if (IS_ERR_VALUE(addr)) - return handle_fault_error_nolock(regs, SEGV_MAPERR); - fallthrough; - case USER_FAULT: + if (is_kernel_fault(regs)) { + folio = phys_to_folio(addr); + if (unlikely(!folio_try_get(folio))) + return; + rc = arch_make_folio_accessible(folio); + folio_put(folio); + if (rc) + BUG(); + } else { mm = current->mm; mmap_read_lock(mm); vma = find_vma(mm, addr); @@ -540,7 +441,7 @@ void do_secure_storage_access(struct pt_regs *regs) folio = folio_walk_start(&fw, vma, addr, 0); if (!folio) { mmap_read_unlock(mm); - break; + return; } /* arch_make_folio_accessible() needs a raised refcount. */ folio_get(folio); @@ -550,56 +451,8 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) send_sig(SIGSEGV, current, 0); mmap_read_unlock(mm); - break; - case KERNEL_FAULT: - folio = phys_to_folio(addr); - if (unlikely(!folio_try_get(folio))) - break; - rc = arch_make_folio_accessible(folio); - folio_put(folio); - if (rc) - BUG(); - break; - default: - unreachable(); } } NOKPROBE_SYMBOL(do_secure_storage_access); -void do_non_secure_storage_access(struct pt_regs *regs) -{ - struct gmap *gmap = (struct gmap *)get_lowcore()->gmap; - unsigned long gaddr = get_fault_address(regs); - - if (WARN_ON_ONCE(get_fault_type(regs) != GMAP_FAULT)) - return handle_fault_error_nolock(regs, SEGV_MAPERR); - if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL) - send_sig(SIGSEGV, current, 0); -} -NOKPROBE_SYMBOL(do_non_secure_storage_access); - -void do_secure_storage_violation(struct pt_regs *regs) -{ - struct gmap *gmap = (struct gmap *)get_lowcore()->gmap; - unsigned long gaddr = get_fault_address(regs); - - /* - * If the VM has been rebooted, its address space might still contain - * secure pages from the previous boot. - * Clear the page so it can be reused. - */ - if (!gmap_destroy_page(gmap, gaddr)) - return; - /* - * Either KVM messed up the secure guest mapping or the same - * page is mapped into multiple secure guests. - * - * This exception is only triggered when a guest 2 is running - * and can therefore never occur in kernel context. - */ - pr_warn_ratelimited("Secure storage violation in task: %s, pid %d\n", - current->comm, current->pid); - send_sig(SIGSEGV, current, 0); -} - #endif /* CONFIG_PGSTE */ diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index eb0b51a36be0..643e47bfaddc 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -281,37 +281,6 @@ void gmap_remove(struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_remove); -/** - * gmap_enable - switch primary space to the guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_enable(struct gmap *gmap) -{ - get_lowcore()->gmap = (unsigned long)gmap; -} -EXPORT_SYMBOL_GPL(gmap_enable); - -/** - * gmap_disable - switch back to the standard primary address space - * @gmap: pointer to the guest address space structure - */ -void gmap_disable(struct gmap *gmap) -{ - get_lowcore()->gmap = 0UL; -} -EXPORT_SYMBOL_GPL(gmap_disable); - -/** - * gmap_get_enabled - get a pointer to the currently enabled gmap - * - * Returns a pointer to the currently enabled gmap. 0 if none is enabled. - */ -struct gmap *gmap_get_enabled(void) -{ - return (struct gmap *)get_lowcore()->gmap; -} -EXPORT_SYMBOL_GPL(gmap_get_enabled); - /* * gmap_alloc_table is assumed to be called with mmap_lock held */ @@ -637,44 +606,124 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) } /** - * gmap_fault - resolve a fault on a guest address + * fixup_user_fault_nowait - manually resolve a user page fault without waiting + * @mm: mm_struct of target mm + * @address: user address + * @fault_flags:flags to pass down to handle_mm_fault() + * @unlocked: did we unlock the mmap_lock while retrying + * + * This function behaves similarly to fixup_user_fault(), but it guarantees + * that the fault will be resolved without waiting. The function might drop + * and re-acquire the mm lock, in which case @unlocked will be set to true. + * + * The guarantee is that the fault is handled without waiting, but the + * function itself might sleep, due to the lock. + * + * Context: Needs to be called with mm->mmap_lock held in read mode, and will + * return with the lock held in read mode; @unlocked will indicate whether + * the lock has been dropped and re-acquired. This is the same behaviour as + * fixup_user_fault(). + * + * Return: 0 on success, -EAGAIN if the fault cannot be resolved without + * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of + * memory. + */ +static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address, + unsigned int fault_flags, bool *unlocked) +{ + struct vm_area_struct *vma; + unsigned int test_flags; + vm_fault_t fault; + int rc; + + fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; + test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ; + + vma = find_vma(mm, address); + if (unlikely(!vma || address < vma->vm_start)) + return -EFAULT; + if (unlikely(!(vma->vm_flags & test_flags))) + return -EFAULT; + + fault = handle_mm_fault(vma, address, fault_flags, NULL); + /* the mm lock has been dropped, take it again */ + if (fault & VM_FAULT_COMPLETED) { + *unlocked = true; + mmap_read_lock(mm); + return 0; + } + /* the mm lock has not been dropped */ + if (fault & VM_FAULT_ERROR) { + rc = vm_fault_to_errno(fault, 0); + BUG_ON(!rc); + return rc; + } + /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */ + if (fault & VM_FAULT_RETRY) + return -EAGAIN; + /* nothing needed to be done and the mm lock has not been dropped */ + return 0; +} + +/** + * __gmap_fault - resolve a fault on a guest address * @gmap: pointer to guest mapping meta data structure * @gaddr: guest address * @fault_flags: flags to pass down to handle_mm_fault() * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. + * Context: Needs to be called with mm->mmap_lock held in read mode. Might + * drop and re-acquire the lock. Will always return with the lock held. */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) +static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) { unsigned long vmaddr; - int rc; bool unlocked; - - mmap_read_lock(gmap->mm); + int rc = 0; retry: unlocked = false; + vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) { - rc = vmaddr; - goto out_up; - } - if (fixup_user_fault(gmap->mm, vmaddr, fault_flags, - &unlocked)) { - rc = -EFAULT; - goto out_up; - } + if (IS_ERR_VALUE(vmaddr)) + return vmaddr; + + if (fault_flags & FAULT_FLAG_RETRY_NOWAIT) + rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked); + else + rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked); + if (rc) + return rc; /* * In the case that fixup_user_fault unlocked the mmap_lock during - * faultin redo __gmap_translate to not race with a map/unmap_segment. + * fault-in, redo __gmap_translate() to avoid racing with a + * map/unmap_segment. + * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(), + * and __gmap_link() must all be called atomically in one go; if the + * lock had been dropped in between, a retry is needed. */ if (unlocked) goto retry; - rc = __gmap_link(gmap, gaddr, vmaddr); -out_up: + return __gmap_link(gmap, gaddr, vmaddr); +} + +/** + * gmap_fault - resolve a fault on a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @fault_flags: flags to pass down to handle_mm_fault() + * + * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the + * vm address is already mapped to a different guest segment, and -EAGAIN if + * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed + * immediately. + */ +int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) +{ + int rc; + + mmap_read_lock(gmap->mm); + rc = __gmap_fault(gmap, gaddr, fault_flags); mmap_read_unlock(gmap->mm); return rc; } @@ -851,7 +900,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, if (*table & _REGION_ENTRY_INVALID) return NULL; table = __va(*table & _SEGMENT_ENTRY_ORIGIN); - table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT; + table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT; } return table; } @@ -1317,7 +1366,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ if (!table || *table & _PAGE_INVALID) return; - gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1); + gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1); ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); } @@ -1335,7 +1384,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, int i; BUG_ON(!gmap_is_shadow(sg)); - for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE) + for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE) pgt[i] = _PAGE_INVALID; } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 5f805ad42d4c..4a0f422cfeb6 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -12,6 +12,7 @@ #include <asm/pgalloc.h> #include <asm/kfence.h> #include <asm/page.h> +#include <asm/asm.h> #include <asm/set_memory.h> static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) @@ -406,6 +407,21 @@ int set_direct_map_default_noflush(struct page *page) return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF); } +bool kernel_page_present(struct page *page) +{ + unsigned long addr; + unsigned int cc; + + addr = (unsigned long)page_address(page); + asm volatile( + " lra %[addr],0(%[addr])\n" + CC_IPM(cc) + : CC_OUT(cc, cc), [addr] "+a" (addr) + : + : CC_CLOBBER); + return CC_TRANSFORM(cc) == 0; +} + #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static void ipte_range(pte_t *pte, unsigned long address, int nr) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index f691e0fb66a2..58696a0c4e4a 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -278,7 +278,7 @@ static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \ return (next - 1) < (end - 1) ? next : end; \ } -BASE_ADDR_END_FUNC(page, _PAGE_SIZE) +BASE_ADDR_END_FUNC(page, PAGE_SIZE) BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE) BASE_ADDR_END_FUNC(region3, _REGION3_SIZE) BASE_ADDR_END_FUNC(region2, _REGION2_SIZE) @@ -302,7 +302,7 @@ static int base_page_walk(unsigned long *origin, unsigned long addr, if (!alloc) return 0; pte = origin; - pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT; + pte += (addr & _PAGE_INDEX) >> PAGE_SHIFT; do { next = base_page_addr_end(addr, end); *pte = base_lra(addr); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2c944bafb030..cea5dba80468 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -525,7 +525,7 @@ static inline void pudp_idte_global(struct mm_struct *mm, else /* * Invalid bit position is the same for pmd and pud, so we can - * re-use _pmd_csp() here + * reuse _pmd_csp() here */ __pmdp_csp((pmd_t *) pudp); } |