aboutsummaryrefslogtreecommitdiff
path: root/mm/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r--mm/userfaultfd.c84
1 files changed, 55 insertions, 29 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 07d3befc80e4..650ab6cfd5f4 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -64,7 +64,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
pte_t _dst_pte, *dst_pte;
bool writable = dst_vma->vm_flags & VM_WRITE;
bool vm_shared = dst_vma->vm_flags & VM_SHARED;
- bool page_in_cache = page->mapping;
+ bool page_in_cache = page_mapping(page);
spinlock_t *ptl;
struct inode *inode;
pgoff_t offset, max_off;
@@ -157,11 +157,28 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
if (!page)
goto out;
- page_kaddr = kmap_atomic(page);
+ page_kaddr = kmap_local_page(page);
+ /*
+ * The read mmap_lock is held here. Despite the
+ * mmap_lock being read recursive a deadlock is still
+ * possible if a writer has taken a lock. For example:
+ *
+ * process A thread 1 takes read lock on own mmap_lock
+ * process A thread 2 calls mmap, blocks taking write lock
+ * process B thread 1 takes page fault, read lock on own mmap lock
+ * process B thread 2 calls mmap, blocks taking write lock
+ * process A thread 1 blocks taking read lock on process B
+ * process B thread 1 blocks taking read lock on process A
+ *
+ * Disable page faults to prevent potential deadlock
+ * and retry the copy outside the mmap_lock.
+ */
+ pagefault_disable();
ret = copy_from_user(page_kaddr,
(const void __user *) src_addr,
PAGE_SIZE);
- kunmap_atomic(page_kaddr);
+ pagefault_enable();
+ kunmap_local(page_kaddr);
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
@@ -243,20 +260,22 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
{
struct inode *inode = file_inode(dst_vma->vm_file);
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
+ struct folio *folio;
struct page *page;
int ret;
- ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC);
- /* Our caller expects us to return -EFAULT if we failed to find page. */
+ ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC);
+ /* Our caller expects us to return -EFAULT if we failed to find folio */
if (ret == -ENOENT)
ret = -EFAULT;
if (ret)
goto out;
- if (!page) {
+ if (!folio) {
ret = -EFAULT;
goto out;
}
+ page = folio_file_page(folio, pgoff);
if (PageHWPoison(page)) {
ret = -EIO;
goto out_release;
@@ -267,13 +286,13 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
if (ret)
goto out_release;
- unlock_page(page);
+ folio_unlock(folio);
ret = 0;
out:
return ret;
out_release:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
goto out;
}
@@ -377,30 +396,30 @@ retry:
BUG_ON(dst_addr >= dst_start + len);
/*
- * Serialize via i_mmap_rwsem and hugetlb_fault_mutex.
- * i_mmap_rwsem ensures the dst_pte remains valid even
+ * Serialize via vma_lock and hugetlb_fault_mutex.
+ * vma_lock ensures the dst_pte remains valid even
* in the case of shared pmds. fault mutex prevents
* races with other faulting threads.
*/
- mapping = dst_vma->vm_file->f_mapping;
- i_mmap_lock_read(mapping);
idx = linear_page_index(dst_vma, dst_addr);
+ mapping = dst_vma->vm_file->f_mapping;
hash = hugetlb_fault_mutex_hash(mapping, idx);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ hugetlb_vma_lock_read(dst_vma);
err = -ENOMEM;
dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
if (!dst_pte) {
+ hugetlb_vma_unlock_read(dst_vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- i_mmap_unlock_read(mapping);
goto out_unlock;
}
if (mode != MCOPY_ATOMIC_CONTINUE &&
!huge_pte_none_mostly(huge_ptep_get(dst_pte))) {
err = -EEXIST;
+ hugetlb_vma_unlock_read(dst_vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- i_mmap_unlock_read(mapping);
goto out_unlock;
}
@@ -408,8 +427,8 @@ retry:
dst_addr, src_addr, mode, &page,
wp_copy);
+ hugetlb_vma_unlock_read(dst_vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- i_mmap_unlock_read(mapping);
cond_resched();
@@ -644,11 +663,11 @@ retry:
mmap_read_unlock(dst_mm);
BUG_ON(!page);
- page_kaddr = kmap(page);
+ page_kaddr = kmap_local_page(page);
err = copy_from_user(page_kaddr,
(const void __user *) src_addr,
PAGE_SIZE);
- kunmap(page);
+ kunmap_local(page_kaddr);
if (unlikely(err)) {
err = -EFAULT;
goto out;
@@ -703,14 +722,29 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
mmap_changing, 0);
}
+void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
+ unsigned long start, unsigned long len, bool enable_wp)
+{
+ struct mmu_gather tlb;
+ pgprot_t newprot;
+
+ if (enable_wp)
+ newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
+ else
+ newprot = vm_get_page_prot(dst_vma->vm_flags);
+
+ tlb_gather_mmu(&tlb, dst_mm);
+ change_protection(&tlb, dst_vma, start, start + len, newprot,
+ enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
+ tlb_finish_mmu(&tlb);
+}
+
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool enable_wp,
atomic_t *mmap_changing)
{
struct vm_area_struct *dst_vma;
unsigned long page_mask;
- struct mmu_gather tlb;
- pgprot_t newprot;
int err;
/*
@@ -750,15 +784,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
goto out_unlock;
}
- if (enable_wp)
- newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
- else
- newprot = vm_get_page_prot(dst_vma->vm_flags);
-
- tlb_gather_mmu(&tlb, dst_mm);
- change_protection(&tlb, dst_vma, start, start + len, newprot,
- enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
- tlb_finish_mmu(&tlb);
+ uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
err = 0;
out_unlock: