diff options
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r-- | mm/userfaultfd.c | 84 |
1 files changed, 55 insertions, 29 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 07d3befc80e4..650ab6cfd5f4 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -64,7 +64,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, pte_t _dst_pte, *dst_pte; bool writable = dst_vma->vm_flags & VM_WRITE; bool vm_shared = dst_vma->vm_flags & VM_SHARED; - bool page_in_cache = page->mapping; + bool page_in_cache = page_mapping(page); spinlock_t *ptl; struct inode *inode; pgoff_t offset, max_off; @@ -157,11 +157,28 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, if (!page) goto out; - page_kaddr = kmap_atomic(page); + page_kaddr = kmap_local_page(page); + /* + * The read mmap_lock is held here. Despite the + * mmap_lock being read recursive a deadlock is still + * possible if a writer has taken a lock. For example: + * + * process A thread 1 takes read lock on own mmap_lock + * process A thread 2 calls mmap, blocks taking write lock + * process B thread 1 takes page fault, read lock on own mmap lock + * process B thread 2 calls mmap, blocks taking write lock + * process A thread 1 blocks taking read lock on process B + * process B thread 1 blocks taking read lock on process A + * + * Disable page faults to prevent potential deadlock + * and retry the copy outside the mmap_lock. + */ + pagefault_disable(); ret = copy_from_user(page_kaddr, (const void __user *) src_addr, PAGE_SIZE); - kunmap_atomic(page_kaddr); + pagefault_enable(); + kunmap_local(page_kaddr); /* fallback to copy_from_user outside mmap_lock */ if (unlikely(ret)) { @@ -243,20 +260,22 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, { struct inode *inode = file_inode(dst_vma->vm_file); pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); + struct folio *folio; struct page *page; int ret; - ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC); - /* Our caller expects us to return -EFAULT if we failed to find page. */ + ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC); + /* Our caller expects us to return -EFAULT if we failed to find folio */ if (ret == -ENOENT) ret = -EFAULT; if (ret) goto out; - if (!page) { + if (!folio) { ret = -EFAULT; goto out; } + page = folio_file_page(folio, pgoff); if (PageHWPoison(page)) { ret = -EIO; goto out_release; @@ -267,13 +286,13 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, if (ret) goto out_release; - unlock_page(page); + folio_unlock(folio); ret = 0; out: return ret; out_release: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); goto out; } @@ -377,30 +396,30 @@ retry: BUG_ON(dst_addr >= dst_start + len); /* - * Serialize via i_mmap_rwsem and hugetlb_fault_mutex. - * i_mmap_rwsem ensures the dst_pte remains valid even + * Serialize via vma_lock and hugetlb_fault_mutex. + * vma_lock ensures the dst_pte remains valid even * in the case of shared pmds. fault mutex prevents * races with other faulting threads. */ - mapping = dst_vma->vm_file->f_mapping; - i_mmap_lock_read(mapping); idx = linear_page_index(dst_vma, dst_addr); + mapping = dst_vma->vm_file->f_mapping; hash = hugetlb_fault_mutex_hash(mapping, idx); mutex_lock(&hugetlb_fault_mutex_table[hash]); + hugetlb_vma_lock_read(dst_vma); err = -ENOMEM; dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize); if (!dst_pte) { + hugetlb_vma_unlock_read(dst_vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); - i_mmap_unlock_read(mapping); goto out_unlock; } if (mode != MCOPY_ATOMIC_CONTINUE && !huge_pte_none_mostly(huge_ptep_get(dst_pte))) { err = -EEXIST; + hugetlb_vma_unlock_read(dst_vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); - i_mmap_unlock_read(mapping); goto out_unlock; } @@ -408,8 +427,8 @@ retry: dst_addr, src_addr, mode, &page, wp_copy); + hugetlb_vma_unlock_read(dst_vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); - i_mmap_unlock_read(mapping); cond_resched(); @@ -644,11 +663,11 @@ retry: mmap_read_unlock(dst_mm); BUG_ON(!page); - page_kaddr = kmap(page); + page_kaddr = kmap_local_page(page); err = copy_from_user(page_kaddr, (const void __user *) src_addr, PAGE_SIZE); - kunmap(page); + kunmap_local(page_kaddr); if (unlikely(err)) { err = -EFAULT; goto out; @@ -703,14 +722,29 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, mmap_changing, 0); } +void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, + unsigned long start, unsigned long len, bool enable_wp) +{ + struct mmu_gather tlb; + pgprot_t newprot; + + if (enable_wp) + newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE)); + else + newprot = vm_get_page_prot(dst_vma->vm_flags); + + tlb_gather_mmu(&tlb, dst_mm); + change_protection(&tlb, dst_vma, start, start + len, newprot, + enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); + tlb_finish_mmu(&tlb); +} + int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing) { struct vm_area_struct *dst_vma; unsigned long page_mask; - struct mmu_gather tlb; - pgprot_t newprot; int err; /* @@ -750,15 +784,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, goto out_unlock; } - if (enable_wp) - newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE)); - else - newprot = vm_get_page_prot(dst_vma->vm_flags); - - tlb_gather_mmu(&tlb, dst_mm); - change_protection(&tlb, dst_vma, start, start + len, newprot, - enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE); - tlb_finish_mmu(&tlb); + uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); err = 0; out_unlock: |