diff options
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r-- | mm/khugepaged.c | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b538c3d48386..653dbb1ff05c 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -4,7 +4,6 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/sched/mm.h> -#include <linux/sched/coredump.h> #include <linux/mmu_notifier.h> #include <linux/rmap.h> #include <linux/swap.h> @@ -20,6 +19,7 @@ #include <linux/rcupdate_wait.h> #include <linux/swapops.h> #include <linux/shmem_fs.h> +#include <linux/dax.h> #include <linux/ksm.h> #include <asm/tlb.h> @@ -416,9 +416,11 @@ static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm) static bool hugepage_pmd_enabled(void) { /* - * We cover both the anon and the file-backed case here; file-backed + * We cover the anon, shmem and the file-backed case here; file-backed * hugepages, when configured in, are determined by the global control. * Anon pmd-sized hugepages are determined by the pmd-size control. + * Shmem pmd-sized hugepages are also determined by its pmd-size control, + * except when the global shmem_huge is set to SHMEM_HUGE_DENY. */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && hugepage_global_enabled()) @@ -430,6 +432,8 @@ static bool hugepage_pmd_enabled(void) if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) && hugepage_global_enabled()) return true; + if (IS_ENABLED(CONFIG_SHMEM) && shmem_hpage_pmd_enabled()) + return true; return false; } @@ -1011,7 +1015,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, }; if (!pte++) { - pte = pte_offset_map_nolock(mm, pmd, address, &ptl); + /* + * Here the ptl is only used to check pte_same() in + * do_swap_page(), so readonly version is enough. + */ + pte = pte_offset_map_ro_nolock(mm, pmd, address, &ptl); if (!pte) { mmap_read_unlock(mm); result = SCAN_PMD_NULL; @@ -1601,7 +1609,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED)) pml = pmd_lock(mm, pmd); - start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl); + start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl); if (!start_pte) /* mmap_lock + page lock should prevent this */ goto abort; if (!pml) @@ -1609,6 +1617,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, else if (ptl != pml) spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); + if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) + goto abort; + /* step 2: clear page table and adjust rmap */ for (i = 0, addr = haddr, pte = start_pte; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) { @@ -1641,7 +1652,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, nr_ptes++; } - pte_unmap(start_pte); if (!pml) spin_unlock(ptl); @@ -1654,14 +1664,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, /* step 4: remove empty page table */ if (!pml) { pml = pmd_lock(mm, pmd); - if (ptl != pml) + if (ptl != pml) { spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); + if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) { + flush_tlb_mm(mm); + goto unlock; + } + } } pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd); pmdp_get_lockless_sync(); + pte_unmap_unlock(start_pte, ptl); if (ptl != pml) - spin_unlock(ptl); - spin_unlock(pml); + spin_unlock(pml); mmu_notifier_invalidate_range_end(&range); @@ -1681,6 +1696,7 @@ abort: folio_ref_sub(folio, nr_ptes); add_mm_counter(mm, mm_counter_file(folio), -nr_ptes); } +unlock: if (start_pte) pte_unmap_unlock(start_pte, ptl); if (pml && pml != ptl) @@ -1822,6 +1838,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, if (result != SCAN_SUCCEED) goto out; + mapping_set_update(&xas, mapping); + __folio_set_locked(new_folio); if (is_shmem) __folio_set_swapbacked(new_folio); |