aboutsummaryrefslogtreecommitdiff
path: root/mm/khugepaged.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r--mm/khugepaged.c34
1 files changed, 26 insertions, 8 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b538c3d48386..653dbb1ff05c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -4,7 +4,6 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
-#include <linux/sched/coredump.h>
#include <linux/mmu_notifier.h>
#include <linux/rmap.h>
#include <linux/swap.h>
@@ -20,6 +19,7 @@
#include <linux/rcupdate_wait.h>
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
+#include <linux/dax.h>
#include <linux/ksm.h>
#include <asm/tlb.h>
@@ -416,9 +416,11 @@ static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm)
static bool hugepage_pmd_enabled(void)
{
/*
- * We cover both the anon and the file-backed case here; file-backed
+ * We cover the anon, shmem and the file-backed case here; file-backed
* hugepages, when configured in, are determined by the global control.
* Anon pmd-sized hugepages are determined by the pmd-size control.
+ * Shmem pmd-sized hugepages are also determined by its pmd-size control,
+ * except when the global shmem_huge is set to SHMEM_HUGE_DENY.
*/
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
hugepage_global_enabled())
@@ -430,6 +432,8 @@ static bool hugepage_pmd_enabled(void)
if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) &&
hugepage_global_enabled())
return true;
+ if (IS_ENABLED(CONFIG_SHMEM) && shmem_hpage_pmd_enabled())
+ return true;
return false;
}
@@ -1011,7 +1015,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
};
if (!pte++) {
- pte = pte_offset_map_nolock(mm, pmd, address, &ptl);
+ /*
+ * Here the ptl is only used to check pte_same() in
+ * do_swap_page(), so readonly version is enough.
+ */
+ pte = pte_offset_map_ro_nolock(mm, pmd, address, &ptl);
if (!pte) {
mmap_read_unlock(mm);
result = SCAN_PMD_NULL;
@@ -1601,7 +1609,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
pml = pmd_lock(mm, pmd);
- start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
+ start_pte = pte_offset_map_rw_nolock(mm, pmd, haddr, &pgt_pmd, &ptl);
if (!start_pte) /* mmap_lock + page lock should prevent this */
goto abort;
if (!pml)
@@ -1609,6 +1617,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
else if (ptl != pml)
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+ if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd))))
+ goto abort;
+
/* step 2: clear page table and adjust rmap */
for (i = 0, addr = haddr, pte = start_pte;
i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
@@ -1641,7 +1652,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
nr_ptes++;
}
- pte_unmap(start_pte);
if (!pml)
spin_unlock(ptl);
@@ -1654,14 +1664,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
/* step 4: remove empty page table */
if (!pml) {
pml = pmd_lock(mm, pmd);
- if (ptl != pml)
+ if (ptl != pml) {
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+ if (unlikely(!pmd_same(pgt_pmd, pmdp_get_lockless(pmd)))) {
+ flush_tlb_mm(mm);
+ goto unlock;
+ }
+ }
}
pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
pmdp_get_lockless_sync();
+ pte_unmap_unlock(start_pte, ptl);
if (ptl != pml)
- spin_unlock(ptl);
- spin_unlock(pml);
+ spin_unlock(pml);
mmu_notifier_invalidate_range_end(&range);
@@ -1681,6 +1696,7 @@ abort:
folio_ref_sub(folio, nr_ptes);
add_mm_counter(mm, mm_counter_file(folio), -nr_ptes);
}
+unlock:
if (start_pte)
pte_unmap_unlock(start_pte, ptl);
if (pml && pml != ptl)
@@ -1822,6 +1838,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
if (result != SCAN_SUCCEED)
goto out;
+ mapping_set_update(&xas, mapping);
+
__folio_set_locked(new_folio);
if (is_shmem)
__folio_set_swapbacked(new_folio);