diff options
author | Peter Xu <[email protected]> | 2021-05-04 18:33:13 -0700 |
---|---|---|
committer | Linus Torvalds <[email protected]> | 2021-05-05 11:27:20 -0700 |
commit | 6dfeaff93be1a4cab4fb48dad7df326d05059a99 (patch) | |
tree | 5f85f4d0f448ced021673c0c08c692cc681586df | |
parent | 537cf30bba241ae88d5f4b0b6a5e66271b394852 (diff) |
hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp
Huge pmd sharing for hugetlbfs is racy with userfaultfd-wp because
userfaultfd-wp is always based on pgtable entries, so they cannot be
shared.
Walk the hugetlb range and unshare all such mappings if there is, right
before UFFDIO_REGISTER will succeed and return to userspace.
This will pair with want_pmd_share() in hugetlb code so that huge pmd
sharing is completely disabled for userfaultfd-wp registered range.
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Peter Xu <[email protected]>
Reviewed-by: Mike Kravetz <[email protected]>
Cc: Peter Xu <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Axel Rasmussen <[email protected]>
Cc: Mike Rapoport <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Matthew Wilcox (Oracle) <[email protected]>
Cc: Adam Ruprecht <[email protected]>
Cc: Alexander Viro <[email protected]>
Cc: Alexey Dobriyan <[email protected]>
Cc: Anshuman Khandual <[email protected]>
Cc: Cannon Matthews <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Chinwen Chang <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: "Dr . David Alan Gilbert" <[email protected]>
Cc: Huang Ying <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jann Horn <[email protected]>
Cc: Jerome Glisse <[email protected]>
Cc: Lokesh Gidra <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: "Michal Koutn" <[email protected]>
Cc: Michel Lespinasse <[email protected]>
Cc: Mina Almasry <[email protected]>
Cc: Nicholas Piggin <[email protected]>
Cc: Oliver Upton <[email protected]>
Cc: Shaohua Li <[email protected]>
Cc: Shawn Anastasio <[email protected]>
Cc: Steven Price <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
-rw-r--r-- | fs/userfaultfd.c | 4 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 51 |
3 files changed, 58 insertions, 0 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 0be8cdd4425a..e5ce3b4e6c3d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -15,6 +15,7 @@ #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/mm.h> +#include <linux/mmu_notifier.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/seq_file.h> @@ -1449,6 +1450,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma->vm_flags = new_flags; vma->vm_userfaultfd_ctx.ctx = ctx; + if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma)) + hugetlb_unshare_all_pmds(vma); + skip: prev = vma; start = vma->vm_end; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e43668144664..0f5813522224 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -188,6 +188,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); bool is_hugetlb_entry_migration(pte_t pte); +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -369,6 +370,8 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, return 0; } +static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } + #endif /* !CONFIG_HUGETLB_PAGE */ /* * hugepages at page global directory. If arch support diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3868f3126534..e86d3abcc300 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5691,6 +5691,57 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) } } +/* + * This function will unconditionally remove all the shared pmd pgtable entries + * within the specific vma for a hugetlbfs memory range. + */ +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) +{ + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); + struct mm_struct *mm = vma->vm_mm; + struct mmu_notifier_range range; + unsigned long address, start, end; + spinlock_t *ptl; + pte_t *ptep; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + + start = ALIGN(vma->vm_start, PUD_SIZE); + end = ALIGN_DOWN(vma->vm_end, PUD_SIZE); + + if (start >= end) + return; + + /* + * No need to call adjust_range_if_pmd_sharing_possible(), because + * we have already done the PUD_SIZE alignment. + */ + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, + start, end); + mmu_notifier_invalidate_range_start(&range); + i_mmap_lock_write(vma->vm_file->f_mapping); + for (address = start; address < end; address += PUD_SIZE) { + unsigned long tmp = address; + + ptep = huge_pte_offset(mm, address, sz); + if (!ptep) + continue; + ptl = huge_pte_lock(h, mm, ptep); + /* We don't want 'address' to be changed */ + huge_pmd_unshare(mm, vma, &tmp, ptep); + spin_unlock(ptl); + } + flush_hugetlb_tlb_range(vma, start, end); + i_mmap_unlock_write(vma->vm_file->f_mapping); + /* + * No need to call mmu_notifier_invalidate_range(), see + * Documentation/vm/mmu_notifier.rst. + */ + mmu_notifier_invalidate_range_end(&range); +} + #ifdef CONFIG_CMA static bool cma_reserve_called __initdata; |