diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 3 | ||||
-rw-r--r-- | mm/compaction.c | 43 | ||||
-rw-r--r-- | mm/filemap.c | 3 | ||||
-rw-r--r-- | mm/huge_memory.c | 18 | ||||
-rw-r--r-- | mm/init-mm.c | 3 | ||||
-rw-r--r-- | mm/kasan/generic.c | 10 | ||||
-rw-r--r-- | mm/khugepaged.c | 1 | ||||
-rw-r--r-- | mm/memblock.c | 5 | ||||
-rw-r--r-- | mm/memcontrol.c | 29 | ||||
-rw-r--r-- | mm/memory-failure.c | 2 | ||||
-rw-r--r-- | mm/memory-tiers.c | 12 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 8 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mm_init.c | 6 | ||||
-rw-r--r-- | mm/mmap.c | 6 | ||||
-rw-r--r-- | mm/page-writeback.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 7 | ||||
-rw-r--r-- | mm/percpu.c | 8 | ||||
-rw-r--r-- | mm/readahead.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 1 | ||||
-rw-r--r-- | mm/swapfile.c | 1 | ||||
-rw-r--r-- | mm/userfaultfd.c | 21 |
23 files changed, 146 insertions, 51 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 1902cfe4cc4f..ffc3a2ba3a8c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1258,6 +1258,9 @@ config LOCK_MM_AND_FIND_VMA bool depends on !STACK_GROWSUP +config IOMMU_MM_DATA + bool + source "mm/damon/Kconfig" endmenu diff --git a/mm/compaction.c b/mm/compaction.c index 27ada42924d5..4add68d40e8d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -882,6 +882,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Time to isolate some pages for migration */ for (; low_pfn < end_pfn; low_pfn++) { + bool is_dirty, is_unevictable; if (skip_on_failure && low_pfn >= next_skip_pfn) { /* @@ -1079,8 +1080,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (!folio_test_lru(folio)) goto isolate_fail_put; + is_unevictable = folio_test_unevictable(folio); + /* Compaction might skip unevictable pages but CMA takes them */ - if (!(mode & ISOLATE_UNEVICTABLE) && folio_test_unevictable(folio)) + if (!(mode & ISOLATE_UNEVICTABLE) && is_unevictable) goto isolate_fail_put; /* @@ -1092,26 +1095,42 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if ((mode & ISOLATE_ASYNC_MIGRATE) && folio_test_writeback(folio)) goto isolate_fail_put; - if ((mode & ISOLATE_ASYNC_MIGRATE) && folio_test_dirty(folio)) { - bool migrate_dirty; + is_dirty = folio_test_dirty(folio); + + if (((mode & ISOLATE_ASYNC_MIGRATE) && is_dirty) || + (mapping && is_unevictable)) { + bool migrate_dirty = true; + bool is_unmovable; /* * Only folios without mappings or that have - * a ->migrate_folio callback are possible to - * migrate without blocking. However, we may - * be racing with truncation, which can free - * the mapping. Truncation holds the folio lock - * until after the folio is removed from the page - * cache so holding it ourselves is sufficient. + * a ->migrate_folio callback are possible to migrate + * without blocking. + * + * Folios from unmovable mappings are not migratable. + * + * However, we can be racing with truncation, which can + * free the mapping that we need to check. Truncation + * holds the folio lock until after the folio is removed + * from the page so holding it ourselves is sufficient. + * + * To avoid locking the folio just to check unmovable, + * assume every unmovable folio is also unevictable, + * which is a cheaper test. If our assumption goes + * wrong, it's not a correctness bug, just potentially + * wasted cycles. */ if (!folio_trylock(folio)) goto isolate_fail_put; mapping = folio_mapping(folio); - migrate_dirty = !mapping || - mapping->a_ops->migrate_folio; + if ((mode & ISOLATE_ASYNC_MIGRATE) && is_dirty) { + migrate_dirty = !mapping || + mapping->a_ops->migrate_folio; + } + is_unmovable = mapping && mapping_unmovable(mapping); folio_unlock(folio); - if (!migrate_dirty) + if (!migrate_dirty || is_unmovable) goto isolate_fail_put; } diff --git a/mm/filemap.c b/mm/filemap.c index c8dafe70d4cc..750e779c23db 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -45,6 +45,7 @@ #include <linux/migrate.h> #include <linux/pipe_fs_i.h> #include <linux/splice.h> +#include <linux/rcupdate_wait.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include "internal.h" @@ -2687,6 +2688,7 @@ int kiocb_write_and_wait(struct kiocb *iocb, size_t count) return filemap_write_and_wait_range(mapping, pos, end); } +EXPORT_SYMBOL_GPL(kiocb_write_and_wait); int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) { @@ -2714,6 +2716,7 @@ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT); } +EXPORT_SYMBOL_GPL(kiocb_invalidate_pages); /** * generic_file_read_iter - generic filesystem read routine diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 94ef5c02b459..94c958f7ebb5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include <linux/page_owner.h> #include <linux/sched/sysctl.h> #include <linux/memory-tiers.h> +#include <linux/compat.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -809,7 +810,10 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, { loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad, ret; + unsigned long len_pad, ret, off_sub; + + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; if (off_end <= off_align || (off_end - off_align) < size) return 0; @@ -835,7 +839,13 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, if (ret == addr) return addr; - ret += (off - ret) & (size - 1); + off_sub = (off - ret) & (size - 1); + + if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown && + !off_sub) + return ret + size; + + ret += off_sub; return ret; } @@ -2437,7 +2447,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, page = pmd_page(old_pmd); folio = page_folio(page); if (!folio_test_dirty(folio) && pmd_dirty(old_pmd)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (!folio_test_referenced(folio) && pmd_young(old_pmd)) folio_set_referenced(folio); folio_remove_rmap_pmd(folio, page, vma); @@ -3563,7 +3573,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, } if (pmd_dirty(pmdval)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (pmd_write(pmdval)) entry = make_writable_migration_entry(page_to_pfn(page)); else if (anon_exclusive) diff --git a/mm/init-mm.c b/mm/init-mm.c index cfd367822cdd..24c809379274 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -44,9 +44,6 @@ struct mm_struct init_mm = { #endif .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, -#ifdef CONFIG_IOMMU_SVA - .pasid = IOMMU_PASID_INVALID, -#endif INIT_MM_CONTEXT(init_mm) }; diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 24c13dfb1e94..df6627f62402 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -487,6 +487,7 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object) __memset(alloc_meta, 0, sizeof(*alloc_meta)); /* + * Prepare the lock for saving auxiliary stack traces. * Temporarily disable KASAN bug reporting to allow instrumented * raw_spin_lock_init to access aux_lock, which resides inside * of a redzone. @@ -510,8 +511,13 @@ static void release_alloc_meta(struct kasan_alloc_meta *meta) stack_depot_put(meta->aux_stack[0]); stack_depot_put(meta->aux_stack[1]); - /* Zero out alloc meta to mark it as invalid. */ - __memset(meta, 0, sizeof(*meta)); + /* + * Zero out alloc meta to mark it as invalid but keep aux_lock + * initialized to avoid having to reinitialize it when another object + * is allocated in the same slot. + */ + __memset(&meta->alloc_track, 0, sizeof(meta->alloc_track)); + __memset(meta->aux_stack, 0, sizeof(meta->aux_stack)); } static void release_free_meta(const void *object, struct kasan_free_meta *meta) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3defe6713ef1..2b219acb528e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -17,6 +17,7 @@ #include <linux/userfaultfd_k.h> #include <linux/page_idle.h> #include <linux/page_table_check.h> +#include <linux/rcupdate_wait.h> #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/ksm.h> diff --git a/mm/memblock.c b/mm/memblock.c index 8c194d8afeec..4dcb2ee35eca 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1885,7 +1885,7 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn, int mid = memblock_search(type, PFN_PHYS(pfn)); if (mid == -1) - return -1; + return NUMA_NO_NODE; *start_pfn = PFN_DOWN(type->regions[mid].base); *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); @@ -2176,6 +2176,9 @@ static void __init memmap_init_reserved_pages(void) start = region->base; end = start + region->size; + if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES) + nid = early_pfn_to_nid(PFN_DOWN(start)); + reserve_bootmem_region(start, end, nid); } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e4c8735e7c85..46d8d02114cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2623,8 +2623,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, } /* - * Scheduled by try_charge() to be executed from the userland return path - * and reclaims memory over the high limit. + * Reclaims memory over the high limit. Called directly from + * try_charge() (context permitting), as well as from the userland + * return path where reclaim is always able to block. */ void mem_cgroup_handle_over_high(gfp_t gfp_mask) { @@ -2644,6 +2645,17 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask) retry_reclaim: /* + * Bail if the task is already exiting. Unlike memory.max, + * memory.high enforcement isn't as strict, and there is no + * OOM killer involved, which means the excess could already + * be much bigger (and still growing) than it could for + * memory.max; the dying task could get stuck in fruitless + * reclaim for a long time, which isn't desirable. + */ + if (task_is_dying()) + goto out; + + /* * The allocating task should reclaim at least the batch size, but for * subsequent retries we only want to do what's necessary to prevent oom * or breaching resource isolation. @@ -2693,6 +2705,9 @@ retry_reclaim: } /* + * Reclaim didn't manage to push usage below the limit, slow + * this allocating task down. + * * If we exit early, we're guaranteed to die (since * schedule_timeout_killable sets TASK_KILLABLE). This means we don't * need to account for any ill-begotten jiffies to pay them off later. @@ -2887,11 +2902,17 @@ done_restock: } } while ((memcg = parent_mem_cgroup(memcg))); + /* + * Reclaim is set up above to be called from the userland + * return path. But also attempt synchronous reclaim to avoid + * excessive overrun while the task is still inside the + * kernel. If this is successful, the return path will see it + * when it rechecks the overage and simply bail out. + */ if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && !(current->flags & PF_MEMALLOC) && - gfpflags_allow_blocking(gfp_mask)) { + gfpflags_allow_blocking(gfp_mask)) mem_cgroup_handle_over_high(gfp_mask); - } return 0; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4f9b61f4a668..636280d04008 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -982,7 +982,7 @@ static bool has_extra_refcount(struct page_state *ps, struct page *p, int count = page_count(p) - 1; if (extra_pins) - count -= 1; + count -= folio_nr_pages(page_folio(p)); if (count > 0) { pr_err("%#lx: %s still referenced by %d users\n", diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 8d5291add2bc..5462d9e3c84c 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -109,7 +109,7 @@ static struct demotion_nodes *node_demotion __read_mostly; static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms); static bool default_dram_perf_error; -static struct node_hmem_attrs default_dram_perf; +static struct access_coordinate default_dram_perf; static int default_dram_perf_ref_nid = NUMA_NO_NODE; static const char *default_dram_perf_ref_source; @@ -601,15 +601,15 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype) } EXPORT_SYMBOL_GPL(clear_node_memory_type); -static void dump_hmem_attrs(struct node_hmem_attrs *attrs, const char *prefix) +static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix) { pr_info( "%sread_latency: %u, write_latency: %u, read_bandwidth: %u, write_bandwidth: %u\n", - prefix, attrs->read_latency, attrs->write_latency, - attrs->read_bandwidth, attrs->write_bandwidth); + prefix, coord->read_latency, coord->write_latency, + coord->read_bandwidth, coord->write_bandwidth); } -int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, +int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source) { int rc = 0; @@ -666,7 +666,7 @@ out: return rc; } -int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist) +int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) { if (default_dram_perf_error) return -EIO; diff --git a/mm/memory.c b/mm/memory.c index 7e1f4849463a..89bcae0b224d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1464,7 +1464,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, delay_rmap = 0; if (!folio_test_anon(folio)) { if (pte_dirty(ptent)) { - folio_set_dirty(folio); + folio_mark_dirty(folio); if (tlb_delay_rmap(tlb)) { delay_rmap = 1; force_flush = 1; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b3c0ff52bb72..21890994c1d3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -101,9 +101,11 @@ static int set_memmap_mode(const char *val, const struct kernel_param *kp) static int get_memmap_mode(char *buffer, const struct kernel_param *kp) { - if (*((int *)kp->arg) == MEMMAP_ON_MEMORY_FORCE) - return sprintf(buffer, "force\n"); - return param_get_bool(buffer, kp); + int mode = *((int *)kp->arg); + + if (mode == MEMMAP_ON_MEMORY_FORCE) + return sprintf(buffer, "force\n"); + return sprintf(buffer, "%c\n", mode ? 'Y' : 'N'); } static const struct kernel_param_ops memmap_mode_ops = { diff --git a/mm/migrate.c b/mm/migrate.c index bde8273cf15b..cc9f2bcd73b4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -962,6 +962,8 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, if (!mapping) rc = migrate_folio(mapping, dst, src, mode); + else if (mapping_unmovable(mapping)) + rc = -EOPNOTSUPP; else if (mapping->a_ops->migrate_folio) /* * Most folios have a mapping and most filesystems diff --git a/mm/mm_init.c b/mm/mm_init.c index 89dc29f1e6c6..2c19f5515e36 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -26,6 +26,7 @@ #include <linux/pgtable.h> #include <linux/swap.h> #include <linux/cma.h> +#include <linux/crash_dump.h> #include "internal.h" #include "slab.h" #include "shuffle.h" @@ -381,6 +382,11 @@ static void __init find_zone_movable_pfns_for_nodes(void) goto out; } + if (is_kdump_kernel()) { + pr_warn("The system is under kdump, ignore kernelcore=mirror.\n"); + goto out; + } + for_each_mem_region(r) { if (memblock_is_mirror(r)) continue; diff --git a/mm/mmap.c b/mm/mmap.c index b78e83d351d2..d89770eaab6b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1825,15 +1825,17 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, /* * mmap_region() will call shmem_zero_setup() to create a file, * so use shmem's get_unmapped_area in case it can be huge. - * do_mmap() will clear pgoff, so match alignment. */ - pgoff = 0; get_area = shmem_get_unmapped_area; } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Ensures that larger anonymous mappings are THP aligned. */ get_area = thp_get_unmapped_area; } + /* Always treat pgoff as zero for anonymous memory. */ + if (!file) + pgoff = 0; + addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cd4e4ae77c40..02147b61712b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a01baf0454f8..150d4f23b010 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -916,6 +916,9 @@ static inline bool page_expected_state(struct page *page, #ifdef CONFIG_MEMCG page->memcg_data | #endif +#ifdef CONFIG_PAGE_POOL + ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) | +#endif (page->flags & check_flags))) return false; @@ -942,6 +945,10 @@ static const char *page_bad_reason(struct page *page, unsigned long flags) if (unlikely(page->memcg_data)) bad_reason = "page still charged to cgroup"; #endif +#ifdef CONFIG_PAGE_POOL + if (unlikely((page->pp_magic & ~0x3UL) == PP_SIGNATURE)) + bad_reason = "page_pool leak"; +#endif return bad_reason; } diff --git a/mm/percpu.c b/mm/percpu.c index 7b97d31df767..4e11fc1e6def 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -3333,13 +3333,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t if (rc < 0) panic("failed to map percpu area, err=%d\n", rc); - /* - * FIXME: Archs with virtual cache should flush local - * cache for the linear mapping here - something - * equivalent to flush_cache_vmap() on the local cpu. - * flush_cache_vmap() can't be used as most supporting - * data structures are not set up yet. - */ + flush_cache_vmap_early(unit_addr, unit_addr + ai->unit_size); /* copy static data */ memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); diff --git a/mm/readahead.c b/mm/readahead.c index 23620c57c122..2648ec4f0494 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -575,7 +575,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; diff --git a/mm/shmem.c b/mm/shmem.c index 928aa2304932..d7c84ff62186 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -79,6 +79,7 @@ static struct vfsmount *shm_mnt __ro_after_init; #include <linux/rmap.h> #include <linux/uuid.h> #include <linux/quotaops.h> +#include <linux/rcupdate_wait.h> #include <linux/uaccess.h> diff --git a/mm/swapfile.c b/mm/swapfile.c index 3eec686484ef..556ff7347d5f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -42,6 +42,7 @@ #include <linux/completion.h> #include <linux/suspend.h> #include <linux/zswap.h> +#include <linux/plist.h> #include <asm/tlbflush.h> #include <linux/swapops.h> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 216ab4c8621f..75fcf1f783bc 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb( unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags) { struct mm_struct *dst_mm = dst_vma->vm_mm; @@ -472,6 +473,15 @@ retry: goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags); #endif /* CONFIG_HUGETLB_PAGE */ @@ -622,8 +633,8 @@ retry: * If this is a HUGETLB vma, pass off to appropriate routine */ if (is_vm_hugetlb_page(dst_vma)) - return mfill_atomic_hugetlb(dst_vma, dst_start, - src_start, len, flags); + return mfill_atomic_hugetlb(dst_vma, dst_start, src_start, + len, mmap_changing, flags); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; @@ -1393,6 +1404,12 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm, err = -ENOENT; break; } + /* Avoid moving zeropages for now */ + if (is_huge_zero_pmd(*src_pmd)) { + spin_unlock(ptl); + err = -EBUSY; + break; + } /* Check if we can move the pmd without splitting it. */ if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || |