diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/debug_vm_pgtable.c | 12 | ||||
| -rw-r--r-- | mm/filemap.c | 3 | ||||
| -rw-r--r-- | mm/gup.c | 2 | ||||
| -rw-r--r-- | mm/huge_memory.c | 11 | ||||
| -rw-r--r-- | mm/internal.h | 4 | ||||
| -rw-r--r-- | mm/kasan/init.c | 18 | ||||
| -rw-r--r-- | mm/kasan/kasan.h | 6 | ||||
| -rw-r--r-- | mm/kfence/core.c | 5 | ||||
| -rw-r--r-- | mm/kmemleak.c | 5 | ||||
| -rw-r--r-- | mm/ksm.c | 4 | ||||
| -rw-r--r-- | mm/memcontrol.c | 22 | ||||
| -rw-r--r-- | mm/memfd.c | 2 | ||||
| -rw-r--r-- | mm/memory-failure.c | 37 | ||||
| -rw-r--r-- | mm/memory.c | 5 | ||||
| -rw-r--r-- | mm/migrate.c | 2 | ||||
| -rw-r--r-- | mm/migrate_device.c | 2 | ||||
| -rw-r--r-- | mm/mmap.c | 14 | ||||
| -rw-r--r-- | mm/mprotect.c | 2 | ||||
| -rw-r--r-- | mm/nommu.c | 4 | ||||
| -rw-r--r-- | mm/page_alloc.c | 21 | ||||
| -rw-r--r-- | mm/pagewalk.c | 2 | ||||
| -rw-r--r-- | mm/percpu.c | 69 | ||||
| -rw-r--r-- | mm/userfaultfd.c | 2 | ||||
| -rw-r--r-- | mm/util.c | 6 | ||||
| -rw-r--r-- | mm/vmalloc.c | 26 |
25 files changed, 155 insertions, 131 deletions
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index d61eaa075c75..48e329ea5ba3 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -109,10 +109,10 @@ static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx) WARN_ON(!pte_same(pte, pte)); WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte)))); WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte)))); - WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte)))); + WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte), args->vma))); WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte)))); WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte)))); - WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte)))); + WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte, args->vma)))); WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte)))); WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte)))); } @@ -156,7 +156,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args) pte = pte_mkclean(pte); set_pte_at(args->mm, args->vaddr, args->ptep, pte); flush_dcache_page(page); - pte = pte_mkwrite(pte); + pte = pte_mkwrite(pte, args->vma); pte = pte_mkdirty(pte); ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1); pte = ptep_get(args->ptep); @@ -202,10 +202,10 @@ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) WARN_ON(!pmd_same(pmd, pmd)); WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd)))); WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd)))); - WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd)))); + WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd), args->vma))); WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd)))); WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd)))); - WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd)))); + WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd, args->vma)))); WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd)))); WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd)))); /* @@ -256,7 +256,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args) pmd = pmd_mkclean(pmd); set_pmd_at(args->mm, vaddr, args->pmdp, pmd); flush_dcache_page(page); - pmd = pmd_mkwrite(pmd); + pmd = pmd_mkwrite(pmd, args->vma); pmd = pmd_mkdirty(pmd); pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1); pmd = READ_ONCE(*args->pmdp); diff --git a/mm/filemap.c b/mm/filemap.c index bf6219d9aaac..582f5317ff71 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -121,9 +121,6 @@ * bdi.wb->list_lock (zap_pte_range->set_page_dirty) * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->block_dirty_folio) - * - * ->i_mmap_rwsem - * ->tasklist_lock (memory_failure, collect_procs_ao) */ static void page_cache_delete(struct address_space *mapping, @@ -1051,7 +1051,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) !writable_file_mapping_allowed(vma, gup_flags)) return -EFAULT; - if (!(vm_flags & VM_WRITE)) { + if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) { if (!(gup_flags & FOLL_FORCE)) return -EFAULT; /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fcafd9b69665..064fbd90822b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -551,7 +551,7 @@ __setup("transparent_hugepage=", setup_transparent_hugepage); pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { if (likely(vma->vm_flags & VM_WRITE)) - pmd = pmd_mkwrite(pmd); + pmd = pmd_mkwrite(pmd, vma); return pmd; } @@ -1566,7 +1566,7 @@ out_map: pmd = pmd_modify(oldpmd, vma->vm_page_prot); pmd = pmd_mkyoung(pmd); if (writable) - pmd = pmd_mkwrite(pmd); + pmd = pmd_mkwrite(pmd, vma); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); @@ -1675,6 +1675,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, */ orig_pmd = pmdp_huge_get_and_clear_full(vma, addr, pmd, tlb->fullmm); + arch_check_zapped_pmd(vma, orig_pmd); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); if (vma_is_special_huge(vma)) { if (arch_needs_pgtable_deposit()) @@ -1919,7 +1920,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, /* See change_pte_range(). */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) && can_change_pmd_writable(vma, addr, entry)) - entry = pmd_mkwrite(entry); + entry = pmd_mkwrite(entry, vma); ret = HPAGE_PMD_NR; set_pmd_at(mm, addr, pmd, entry); @@ -2233,7 +2234,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, } else { entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot)); if (write) - entry = pte_mkwrite(entry); + entry = pte_mkwrite(entry, vma); if (anon_exclusive) SetPageAnonExclusive(page + i); if (!young) @@ -3265,7 +3266,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); if (is_writable_migration_entry(entry)) - pmde = pmd_mkwrite(pmde); + pmde = pmd_mkwrite(pmde, vma); if (pmd_swp_uffd_wp(*pvmw->pmd)) pmde = pmd_mkuffd_wp(pmde); if (!is_migration_entry_young(entry)) diff --git a/mm/internal.h b/mm/internal.h index d1d4bf4e63c0..30cf724ddbce 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -556,14 +556,14 @@ static inline bool is_exec_mapping(vm_flags_t flags) } /* - * Stack area - automatically grows in one direction + * Stack area (including shadow stacks) * * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous: * do_mmap() forbids all other combinations. */ static inline bool is_stack_mapping(vm_flags_t flags) { - return (flags & VM_STACK) == VM_STACK; + return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK); } /* diff --git a/mm/kasan/init.c b/mm/kasan/init.c index dcfec277e839..89895f38f722 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr, return 0; } +void __weak __meminit pmd_init(void *addr) +{ +} + static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end) { @@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, if (!p) return -ENOMEM; } else { - pud_populate(&init_mm, pud, - early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); + pmd_init(p); + pud_populate(&init_mm, pud, p); } } zero_pmd_populate(pud, addr, next); @@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, return 0; } +void __weak __meminit pud_init(void *addr) +{ +} + static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end) { @@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, if (!p) return -ENOMEM; } else { - p4d_populate(&init_mm, p4d, - early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); + pud_init(p); + p4d_populate(&init_mm, p4d, p); } } zero_pud_populate(p4d, addr, next); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 2e973b36fe07..f70e3d7a602e 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -291,16 +291,22 @@ struct kasan_stack_ring { #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) +#ifndef __HAVE_ARCH_SHADOW_MAP static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT); } +#endif static __always_inline bool addr_has_metadata(const void *addr) { +#ifdef __HAVE_ARCH_SHADOW_MAP + return (kasan_mem_to_shadow((void *)addr) != NULL); +#else return (kasan_reset_tag(addr) >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); +#endif } /** diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 96fd0411f5c5..3872528d0963 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h) */ static unsigned long kfence_init_pool(void) { - unsigned long addr = (unsigned long)__kfence_pool; + unsigned long addr; struct page *pages; int i; if (!arch_kfence_init_pool()) - return addr; + return (unsigned long)__kfence_pool; + addr = (unsigned long)__kfence_pool; pages = virt_to_page(__kfence_pool); /* diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 2918150e31bd..54c2c90d3abc 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1584,6 +1584,9 @@ static void kmemleak_scan(void) for (pfn = start_pfn; pfn < end_pfn; pfn++) { struct page *page = pfn_to_online_page(pfn); + if (!(pfn & 63)) + cond_resched(); + if (!page) continue; @@ -1594,8 +1597,6 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); - if (!(pfn & 63)) - cond_resched(); } } put_online_mems(); @@ -2925,7 +2925,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill, struct anon_vma *av = rmap_item->anon_vma; anon_vma_lock_read(av); - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process(tsk) { struct anon_vma_chain *vmac; unsigned long addr; @@ -2944,7 +2944,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill, } } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); anon_vma_unlock_read(av); } } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b29b850cf399..a4d3282493b6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5326,7 +5326,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue); memcg->deferred_split_queue.split_queue_len = 0; #endif - idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); lru_gen_init_memcg(memcg); return memcg; fail: @@ -5398,14 +5397,27 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) if (alloc_shrinker_info(memcg)) goto offline_kmem; - /* Online state pins memcg ID, memcg ID pins CSS */ - refcount_set(&memcg->id.ref, 1); - css_get(css); - if (unlikely(mem_cgroup_is_root(memcg))) queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME); lru_gen_online_memcg(memcg); + + /* Online state pins memcg ID, memcg ID pins CSS */ + refcount_set(&memcg->id.ref, 1); + css_get(css); + + /* + * Ensure mem_cgroup_from_id() works once we're fully online. + * + * We could do this earlier and require callers to filter with + * css_tryget_online(). But right now there are no users that + * need earlier access, and the workingset code relies on the + * cgroup tree linkage (mem_cgroup_get_nr_swap_pages()). So + * publish it here at the end of onlining. This matches the + * regular ID destruction during offlining. + */ + idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); + return 0; offline_kmem: memcg_offline_kmem(memcg); diff --git a/mm/memfd.c b/mm/memfd.c index 1cad1904fc26..2dba2cb6f0d0 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -316,7 +316,7 @@ SYSCALL_DEFINE2(memfd_create, return -EINVAL; if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) { - pr_info_ratelimited( + pr_warn_once( "%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n", current->comm, task_pid_nr(current)); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 881c35ef1daa..4d6e43c88489 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -547,8 +547,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, * on behalf of the thread group. Return task_struct of the (first found) * dedicated thread if found, and return NULL otherwise. * - * We already hold read_lock(&tasklist_lock) in the caller, so we don't - * have to call rcu_read_lock/unlock() in this function. + * We already hold rcu lock in the caller, so we don't have to call + * rcu_read_lock/unlock() in this function. */ static struct task_struct *find_early_kill_thread(struct task_struct *tsk) { @@ -609,7 +609,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, return; pgoff = page_to_pgoff(page); - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process(tsk) { struct anon_vma_chain *vmac; struct task_struct *t = task_early_kill(tsk, force_early); @@ -626,7 +626,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, add_to_kill_anon_file(t, page, vma, to_kill); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); anon_vma_unlock_read(av); } @@ -642,7 +642,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, pgoff_t pgoff; i_mmap_lock_read(mapping); - read_lock(&tasklist_lock); + rcu_read_lock(); pgoff = page_to_pgoff(page); for_each_process(tsk) { struct task_struct *t = task_early_kill(tsk, force_early); @@ -662,7 +662,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, add_to_kill_anon_file(t, page, vma, to_kill); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); i_mmap_unlock_read(mapping); } @@ -685,7 +685,7 @@ static void collect_procs_fsdax(struct page *page, struct task_struct *tsk; i_mmap_lock_read(mapping); - read_lock(&tasklist_lock); + rcu_read_lock(); for_each_process(tsk) { struct task_struct *t = task_early_kill(tsk, true); @@ -696,7 +696,7 @@ static void collect_procs_fsdax(struct page *page, add_to_kill_fsdax(t, page, vma, to_kill, pgoff); } } - read_unlock(&tasklist_lock); + rcu_read_unlock(); i_mmap_unlock_read(mapping); } #endif /* CONFIG_FS_DAX */ @@ -717,7 +717,7 @@ static void collect_procs(struct page *page, struct list_head *tokill, collect_procs_file(page, tokill, force_early); } -struct hwp_walk { +struct hwpoison_walk { struct to_kill tk; unsigned long pfn; int flags; @@ -752,7 +752,7 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, #ifdef CONFIG_TRANSPARENT_HUGEPAGE static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, - struct hwp_walk *hwp) + struct hwpoison_walk *hwp) { pmd_t pmd = *pmdp; unsigned long pfn; @@ -770,7 +770,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, } #else static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, - struct hwp_walk *hwp) + struct hwpoison_walk *hwp) { return 0; } @@ -779,7 +779,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct hwp_walk *hwp = walk->private; + struct hwpoison_walk *hwp = walk->private; int ret = 0; pte_t *ptep, *mapped_pte; spinlock_t *ptl; @@ -813,7 +813,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct hwp_walk *hwp = walk->private; + struct hwpoison_walk *hwp = walk->private; pte_t pte = huge_ptep_get(ptep); struct hstate *h = hstate_vma(walk->vma); @@ -824,7 +824,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, #define hwpoison_hugetlb_range NULL #endif -static const struct mm_walk_ops hwp_walk_ops = { +static const struct mm_walk_ops hwpoison_walk_ops = { .pmd_entry = hwpoison_pte_range, .hugetlb_entry = hwpoison_hugetlb_range, .walk_lock = PGWALK_RDLOCK, @@ -847,7 +847,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, int flags) { int ret; - struct hwp_walk priv = { + struct hwpoison_walk priv = { .pfn = pfn, }; priv.tk.tsk = p; @@ -856,7 +856,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, return -EFAULT; mmap_read_lock(p->mm); - ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops, + ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops, (void *)&priv); if (ret == 1 && priv.tk.addr) kill_proc(&priv.tk, pfn, flags); @@ -1562,7 +1562,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, * Here we are interested only in user-mapped pages, so skip any * other types of pages. */ - if (PageReserved(p) || PageSlab(p) || PageTable(p)) + if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p)) return true; if (!(PageLRU(hpage) || PageHuge(p))) return true; @@ -2533,7 +2533,8 @@ int unpoison_memory(unsigned long pfn) goto unlock_mutex; } - if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio)) + if (folio_test_slab(folio) || PageTable(&folio->page) || + folio_test_reserved(folio) || PageOffline(&folio->page)) goto unlock_mutex; /* diff --git a/mm/memory.c b/mm/memory.c index 405a483d2fd1..6c264d2f969c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1430,6 +1430,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, continue; ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + arch_check_zapped_pte(vma, ptent); tlb_remove_tlb_entry(tlb, pte, addr); zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); @@ -4124,7 +4125,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) entry = mk_pte(&folio->page, vma->vm_page_prot); entry = pte_sw_mkyoung(entry); if (vma->vm_flags & VM_WRITE) - entry = pte_mkwrite(pte_mkdirty(entry)); + entry = pte_mkwrite(pte_mkdirty(entry), vma); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); @@ -4842,7 +4843,7 @@ out_map: pte = pte_modify(old_pte, vma->vm_page_prot); pte = pte_mkyoung(pte); if (writable) - pte = pte_mkwrite(pte); + pte = pte_mkwrite(pte, vma); ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); pte_unmap_unlock(vmf->pte, vmf->ptl); diff --git a/mm/migrate.c b/mm/migrate.c index 78c9bd505b62..b7fa020003f3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -220,7 +220,7 @@ static bool remove_migration_pte(struct folio *folio, if (folio_test_dirty(folio) && is_migration_entry_dirty(entry)) pte = pte_mkdirty(pte); if (is_writable_migration_entry(entry)) - pte = pte_mkwrite(pte); + pte = pte_mkwrite(pte, vma); else if (pte_swp_uffd_wp(old_pte)) pte = pte_mkuffd_wp(pte); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index d69131adc51c..8ac1f79f754a 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -624,7 +624,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, } entry = mk_pte(page, vma->vm_page_prot); if (vma->vm_flags & VM_WRITE) - entry = pte_mkwrite(pte_mkdirty(entry)); + entry = pte_mkwrite(pte_mkdirty(entry), vma); } ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); diff --git a/mm/mmap.c b/mm/mmap.c index 514ced13c65c..b56a7f0c9f85 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1182,11 +1182,11 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode, */ unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long pgoff, - unsigned long *populate, struct list_head *uf) + unsigned long flags, vm_flags_t vm_flags, + unsigned long pgoff, unsigned long *populate, + struct list_head *uf) { struct mm_struct *mm = current->mm; - vm_flags_t vm_flags; int pkey = 0; *populate = 0; @@ -1246,7 +1246,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ - vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | + vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_LOCKED) @@ -1564,7 +1564,7 @@ retry: gap = mas.index; gap += (info->align_offset - gap) & info->align_mask; tmp = mas_next(&mas, ULONG_MAX); - if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */ + if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */ if (vm_start_gap(tmp) < gap + length - 1) { low_limit = tmp->vm_end; mas_reset(&mas); @@ -1616,7 +1616,7 @@ retry: gap -= (gap - info->align_offset) & info->align_mask; gap_end = mas.last; tmp = mas_next(&mas, ULONG_MAX); - if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */ + if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */ if (vm_start_gap(tmp) <= gap_end) { high_limit = vm_start_gap(tmp); mas_reset(&mas); @@ -2998,7 +2998,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, file = get_file(vma->vm_file); ret = do_mmap(vma->vm_file, start, size, - prot, flags, pgoff, &populate, NULL); + prot, flags, 0, pgoff, &populate, NULL); fput(file); out: mmap_write_unlock(mm); diff --git a/mm/mprotect.c b/mm/mprotect.c index 130db91d3a8c..b94fbb45d5c7 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -185,7 +185,7 @@ static long change_pte_range(struct mmu_gather *tlb, if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent) && can_change_pte_writable(vma, addr, ptent)) - ptent = pte_mkwrite(ptent); + ptent = pte_mkwrite(ptent, vma); ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); if (pte_needs_flush(oldpte, ptent)) diff --git a/mm/nommu.c b/mm/nommu.c index 8dba41cfc44d..7f9e9e5a0e12 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1016,6 +1016,7 @@ unsigned long do_mmap(struct file *file, unsigned long len, unsigned long prot, unsigned long flags, + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf) @@ -1023,7 +1024,6 @@ unsigned long do_mmap(struct file *file, struct vm_area_struct *vma; struct vm_region *region; struct rb_node *rb; - vm_flags_t vm_flags; unsigned long capabilities, result; int ret; VMA_ITERATOR(vmi, current->mm, 0); @@ -1043,7 +1043,7 @@ unsigned long do_mmap(struct file *file, /* we've determined that we can make the mapping, now translate what we * now know into VMA flags */ - vm_flags = determine_vm_flags(file, prot, flags, capabilities); + vm_flags |= determine_vm_flags(file, prot, flags, capabilities); /* we're going to need to record the mapping */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 452459836b71..0c5be12f9336 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2641,12 +2641,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, do { page = NULL; spin_lock_irqsave(&zone->lock, flags); - /* - * order-0 request can reach here when the pcplist is skipped - * due to non-CMA allocation context. HIGHATOMIC area is - * reserved for high-order atomic allocation, so order-0 - * request should skip it. - */ if (alloc_flags & ALLOC_HIGHATOMIC) page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (!page) { @@ -2780,17 +2774,10 @@ struct page *rmqueue(struct zone *preferred_zone, WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); if (likely(pcp_allowed_order(order))) { - /* - * MIGRATE_MOVABLE pcplist could have the pages on CMA area and - * we need to skip it when CMA area isn't allowed. - */ - if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA || - migratetype != MIGRATE_MOVABLE) { - page = rmqueue_pcplist(preferred_zone, zone, order, - migratetype, alloc_flags); - if (likely(page)) - goto out; - } + page = rmqueue_pcplist(preferred_zone, zone, order, + migratetype, alloc_flags); + if (likely(page)) + goto out; } page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags, diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 9b2d23fbf4d3..b7d7e4fcfad7 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -58,7 +58,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte = pte_offset_map(pmd, addr); if (pte) { err = walk_pte_range_inner(pte, addr, end, walk); - if (walk->mm != &init_mm) + if (walk->mm != &init_mm && addr < TASK_SIZE) pte_unmap(pte); } } else { diff --git a/mm/percpu.c b/mm/percpu.c index 28e07ede46f6..a7665de8485f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1890,13 +1890,15 @@ fail_unlock: fail: trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); - if (!is_atomic && do_warn && warn_limit) { + if (do_warn && warn_limit) { pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", size, align, is_atomic, err); - dump_stack(); + if (!is_atomic) + dump_stack(); if (!--warn_limit) pr_info("limit reached, disable warning\n"); } + if (is_atomic) { /* see the flag handling in pcpu_balance_workfn() */ pcpu_atomic_alloc_failed = true; @@ -2581,14 +2583,12 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, { size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; size_t static_size, dyn_size; - struct pcpu_chunk *chunk; unsigned long *group_offsets; size_t *group_sizes; unsigned long *unit_off; unsigned int cpu; int *unit_map; int group, unit, i; - int map_size; unsigned long tmp_addr; size_t alloc_size; @@ -2615,7 +2615,6 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE)); PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE); - PCPU_SETUP_BUG_ON(!ai->dyn_size); PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE)); PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) || IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE))); @@ -2698,7 +2697,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_atom_size = ai->atom_size; - pcpu_chunk_struct_size = struct_size(chunk, populated, + pcpu_chunk_struct_size = struct_size((struct pcpu_chunk *)0, populated, BITS_TO_LONGS(pcpu_unit_pages)); pcpu_stats_save_ai(ai); @@ -2735,29 +2734,23 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, dyn_size = ai->dyn_size - (static_size - ai->static_size); /* - * Initialize first chunk. - * If the reserved_size is non-zero, this initializes the reserved - * chunk. If the reserved_size is zero, the reserved chunk is NULL - * and the dynamic region is initialized here. The first chunk, - * pcpu_first_chunk, will always point to the chunk that serves - * the dynamic region. + * Initialize first chunk: + * This chunk is broken up into 3 parts: + * < static | [reserved] | dynamic > + * - static - there is no backing chunk because these allocations can + * never be freed. + * - reserved (pcpu_reserved_chunk) - exists primarily to serve + * allocations from module load. + * - dynamic (pcpu_first_chunk) - serves the dynamic part of the first + * chunk. */ tmp_addr = (unsigned long)base_addr + static_size; - map_size = ai->reserved_size ?: dyn_size; - chunk = pcpu_alloc_first_chunk(tmp_addr, map_size); - - /* init dynamic chunk if necessary */ - if (ai->reserved_size) { - pcpu_reserved_chunk = chunk; + if (ai->reserved_size) + pcpu_reserved_chunk = pcpu_alloc_first_chunk(tmp_addr, + ai->reserved_size); + tmp_addr = (unsigned long)base_addr + static_size + ai->reserved_size; + pcpu_first_chunk = pcpu_alloc_first_chunk(tmp_addr, dyn_size); - tmp_addr = (unsigned long)base_addr + static_size + - ai->reserved_size; - map_size = dyn_size; - chunk = pcpu_alloc_first_chunk(tmp_addr, map_size); - } - - /* link the first chunk in */ - pcpu_first_chunk = chunk; pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; pcpu_chunk_relocate(pcpu_first_chunk, -1); @@ -3189,32 +3182,26 @@ void __init __weak pcpu_populate_pte(unsigned long addr) pmd_t *pmd; if (pgd_none(*pgd)) { - p4d_t *new; - - new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE); - if (!new) + p4d = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE); + if (!p4d) goto err_alloc; - pgd_populate(&init_mm, pgd, new); + pgd_populate(&init_mm, pgd, p4d); } p4d = p4d_offset(pgd, addr); if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); - if (!new) + pud = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); + if (!pud) goto err_alloc; - p4d_populate(&init_mm, p4d, new); + p4d_populate(&init_mm, p4d, pud); } pud = pud_offset(p4d, addr); if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); - if (!new) + pmd = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); + if (!pmd) goto err_alloc; - pud_populate(&init_mm, pud, new); + pud_populate(&init_mm, pud, pmd); } pmd = pmd_offset(pud, addr); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 0fc69efa4f1f..96d9eae5c7cc 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -86,7 +86,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, if (page_in_cache && !vm_shared) writable = false; if (writable) - _dst_pte = pte_mkwrite(_dst_pte); + _dst_pte = pte_mkwrite(_dst_pte, dst_vma); if (flags & MFILL_ATOMIC_WP) _dst_pte = pte_mkuffd_wp(_dst_pte); diff --git a/mm/util.c b/mm/util.c index 4ed8b9b5273c..8cbbfd3a3d59 100644 --- a/mm/util.c +++ b/mm/util.c @@ -543,7 +543,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; - ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate, + ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate, &uf); mmap_write_unlock(mm); userfaultfd_unmap_complete(mm, &uf); @@ -1068,7 +1068,9 @@ void mem_dump_obj(void *object) if (vmalloc_dump_obj(object)) return; - if (virt_addr_valid(object)) + if (is_vmalloc_addr(object)) + type = "vmalloc memory"; + else if (virt_addr_valid(object)) type = "non-slab/vmalloc memory"; else if (object == NULL) type = "NULL pointer"; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 228a4a5312f2..ef8599d394fd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4278,14 +4278,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) #ifdef CONFIG_PRINTK bool vmalloc_dump_obj(void *object) { - struct vm_struct *vm; void *objp = (void *)PAGE_ALIGN((unsigned long)object); + const void *caller; + struct vm_struct *vm; + struct vmap_area *va; + unsigned long addr; + unsigned int nr_pages; - vm = find_vm_area(objp); - if (!vm) + if (!spin_trylock(&vmap_area_lock)) + return false; + va = __find_vmap_area((unsigned long)objp, &vmap_area_root); + if (!va) { + spin_unlock(&vmap_area_lock); return false; + } + + vm = va->vm; + if (!vm) { + spin_unlock(&vmap_area_lock); + return false; + } + addr = (unsigned long)vm->addr; + caller = vm->caller; + nr_pages = vm->nr_pages; + spin_unlock(&vmap_area_lock); pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", - vm->nr_pages, (unsigned long)vm->addr, vm->caller); + nr_pages, addr, caller); return true; } #endif |