aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/debug_vm_pgtable.c12
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/gup.c2
-rw-r--r--mm/huge_memory.c11
-rw-r--r--mm/internal.h4
-rw-r--r--mm/kasan/init.c18
-rw-r--r--mm/kasan/kasan.h6
-rw-r--r--mm/kfence/core.c5
-rw-r--r--mm/kmemleak.c5
-rw-r--r--mm/ksm.c4
-rw-r--r--mm/memcontrol.c22
-rw-r--r--mm/memfd.c2
-rw-r--r--mm/memory-failure.c37
-rw-r--r--mm/memory.c5
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/migrate_device.c2
-rw-r--r--mm/mmap.c14
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page_alloc.c21
-rw-r--r--mm/pagewalk.c2
-rw-r--r--mm/percpu.c69
-rw-r--r--mm/userfaultfd.c2
-rw-r--r--mm/util.c6
-rw-r--r--mm/vmalloc.c26
25 files changed, 155 insertions, 131 deletions
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index d61eaa075c75..48e329ea5ba3 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -109,10 +109,10 @@ static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
WARN_ON(!pte_same(pte, pte));
WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
- WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte))));
+ WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte), args->vma)));
WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
- WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
+ WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte, args->vma))));
WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte))));
WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
}
@@ -156,7 +156,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
pte = pte_mkclean(pte);
set_pte_at(args->mm, args->vaddr, args->ptep, pte);
flush_dcache_page(page);
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, args->vma);
pte = pte_mkdirty(pte);
ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
pte = ptep_get(args->ptep);
@@ -202,10 +202,10 @@ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
WARN_ON(!pmd_same(pmd, pmd));
WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
- WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd))));
+ WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd), args->vma)));
WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
- WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
+ WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd, args->vma))));
WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd))));
WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd))));
/*
@@ -256,7 +256,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
pmd = pmd_mkclean(pmd);
set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
flush_dcache_page(page);
- pmd = pmd_mkwrite(pmd);
+ pmd = pmd_mkwrite(pmd, args->vma);
pmd = pmd_mkdirty(pmd);
pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
pmd = READ_ONCE(*args->pmdp);
diff --git a/mm/filemap.c b/mm/filemap.c
index bf6219d9aaac..582f5317ff71 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -121,9 +121,6 @@
* bdi.wb->list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->block_dirty_folio)
- *
- * ->i_mmap_rwsem
- * ->tasklist_lock (memory_failure, collect_procs_ao)
*/
static void page_cache_delete(struct address_space *mapping,
diff --git a/mm/gup.c b/mm/gup.c
index 948f3b454b00..2f8a2d89fde1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1051,7 +1051,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
!writable_file_mapping_allowed(vma, gup_flags))
return -EFAULT;
- if (!(vm_flags & VM_WRITE)) {
+ if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
if (!(gup_flags & FOLL_FORCE))
return -EFAULT;
/* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fcafd9b69665..064fbd90822b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -551,7 +551,7 @@ __setup("transparent_hugepage=", setup_transparent_hugepage);
pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
if (likely(vma->vm_flags & VM_WRITE))
- pmd = pmd_mkwrite(pmd);
+ pmd = pmd_mkwrite(pmd, vma);
return pmd;
}
@@ -1566,7 +1566,7 @@ out_map:
pmd = pmd_modify(oldpmd, vma->vm_page_prot);
pmd = pmd_mkyoung(pmd);
if (writable)
- pmd = pmd_mkwrite(pmd);
+ pmd = pmd_mkwrite(pmd, vma);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl);
@@ -1675,6 +1675,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/
orig_pmd = pmdp_huge_get_and_clear_full(vma, addr, pmd,
tlb->fullmm);
+ arch_check_zapped_pmd(vma, orig_pmd);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
if (vma_is_special_huge(vma)) {
if (arch_needs_pgtable_deposit())
@@ -1919,7 +1920,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
/* See change_pte_range(). */
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
can_change_pmd_writable(vma, addr, entry))
- entry = pmd_mkwrite(entry);
+ entry = pmd_mkwrite(entry, vma);
ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry);
@@ -2233,7 +2234,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
} else {
entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
if (write)
- entry = pte_mkwrite(entry);
+ entry = pte_mkwrite(entry, vma);
if (anon_exclusive)
SetPageAnonExclusive(page + i);
if (!young)
@@ -3265,7 +3266,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
if (is_writable_migration_entry(entry))
- pmde = pmd_mkwrite(pmde);
+ pmde = pmd_mkwrite(pmde, vma);
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_mkuffd_wp(pmde);
if (!is_migration_entry_young(entry))
diff --git a/mm/internal.h b/mm/internal.h
index d1d4bf4e63c0..30cf724ddbce 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -556,14 +556,14 @@ static inline bool is_exec_mapping(vm_flags_t flags)
}
/*
- * Stack area - automatically grows in one direction
+ * Stack area (including shadow stacks)
*
* VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
* do_mmap() forbids all other combinations.
*/
static inline bool is_stack_mapping(vm_flags_t flags)
{
- return (flags & VM_STACK) == VM_STACK;
+ return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK);
}
/*
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index dcfec277e839..89895f38f722 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
return 0;
}
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
unsigned long end)
{
@@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
if (!p)
return -ENOMEM;
} else {
- pud_populate(&init_mm, pud,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ pmd_init(p);
+ pud_populate(&init_mm, pud, p);
}
}
zero_pmd_populate(pud, addr, next);
@@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
return 0;
}
+void __weak __meminit pud_init(void *addr)
+{
+}
+
static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
unsigned long end)
{
@@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
if (!p)
return -ENOMEM;
} else {
- p4d_populate(&init_mm, p4d,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ pud_init(p);
+ p4d_populate(&init_mm, p4d, p);
}
}
zero_pud_populate(p4d, addr, next);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 2e973b36fe07..f70e3d7a602e 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -291,16 +291,22 @@ struct kasan_stack_ring {
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+#ifndef __HAVE_ARCH_SHADOW_MAP
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
{
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
<< KASAN_SHADOW_SCALE_SHIFT);
}
+#endif
static __always_inline bool addr_has_metadata(const void *addr)
{
+#ifdef __HAVE_ARCH_SHADOW_MAP
+ return (kasan_mem_to_shadow((void *)addr) != NULL);
+#else
return (kasan_reset_tag(addr) >=
kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+#endif
}
/**
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 96fd0411f5c5..3872528d0963 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h)
*/
static unsigned long kfence_init_pool(void)
{
- unsigned long addr = (unsigned long)__kfence_pool;
+ unsigned long addr;
struct page *pages;
int i;
if (!arch_kfence_init_pool())
- return addr;
+ return (unsigned long)__kfence_pool;
+ addr = (unsigned long)__kfence_pool;
pages = virt_to_page(__kfence_pool);
/*
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 2918150e31bd..54c2c90d3abc 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1584,6 +1584,9 @@ static void kmemleak_scan(void)
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
struct page *page = pfn_to_online_page(pfn);
+ if (!(pfn & 63))
+ cond_resched();
+
if (!page)
continue;
@@ -1594,8 +1597,6 @@ static void kmemleak_scan(void)
if (page_count(page) == 0)
continue;
scan_block(page, page + 1, NULL);
- if (!(pfn & 63))
- cond_resched();
}
}
put_online_mems();
diff --git a/mm/ksm.c b/mm/ksm.c
index 8d6aee05421d..981af9c72e7a 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2925,7 +2925,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill,
struct anon_vma *av = rmap_item->anon_vma;
anon_vma_lock_read(av);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct anon_vma_chain *vmac;
unsigned long addr;
@@ -2944,7 +2944,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill,
}
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
anon_vma_unlock_read(av);
}
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b29b850cf399..a4d3282493b6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5326,7 +5326,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
memcg->deferred_split_queue.split_queue_len = 0;
#endif
- idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
lru_gen_init_memcg(memcg);
return memcg;
fail:
@@ -5398,14 +5397,27 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
if (alloc_shrinker_info(memcg))
goto offline_kmem;
- /* Online state pins memcg ID, memcg ID pins CSS */
- refcount_set(&memcg->id.ref, 1);
- css_get(css);
-
if (unlikely(mem_cgroup_is_root(memcg)))
queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
FLUSH_TIME);
lru_gen_online_memcg(memcg);
+
+ /* Online state pins memcg ID, memcg ID pins CSS */
+ refcount_set(&memcg->id.ref, 1);
+ css_get(css);
+
+ /*
+ * Ensure mem_cgroup_from_id() works once we're fully online.
+ *
+ * We could do this earlier and require callers to filter with
+ * css_tryget_online(). But right now there are no users that
+ * need earlier access, and the workingset code relies on the
+ * cgroup tree linkage (mem_cgroup_get_nr_swap_pages()). So
+ * publish it here at the end of onlining. This matches the
+ * regular ID destruction during offlining.
+ */
+ idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+
return 0;
offline_kmem:
memcg_offline_kmem(memcg);
diff --git a/mm/memfd.c b/mm/memfd.c
index 1cad1904fc26..2dba2cb6f0d0 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -316,7 +316,7 @@ SYSCALL_DEFINE2(memfd_create,
return -EINVAL;
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- pr_info_ratelimited(
+ pr_warn_once(
"%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
current->comm, task_pid_nr(current));
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 881c35ef1daa..4d6e43c88489 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -547,8 +547,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* on behalf of the thread group. Return task_struct of the (first found)
* dedicated thread if found, and return NULL otherwise.
*
- * We already hold read_lock(&tasklist_lock) in the caller, so we don't
- * have to call rcu_read_lock/unlock() in this function.
+ * We already hold rcu lock in the caller, so we don't have to call
+ * rcu_read_lock/unlock() in this function.
*/
static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
{
@@ -609,7 +609,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
return;
pgoff = page_to_pgoff(page);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct anon_vma_chain *vmac;
struct task_struct *t = task_early_kill(tsk, force_early);
@@ -626,7 +626,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
add_to_kill_anon_file(t, page, vma, to_kill);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
anon_vma_unlock_read(av);
}
@@ -642,7 +642,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
pgoff_t pgoff;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
pgoff = page_to_pgoff(page);
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, force_early);
@@ -662,7 +662,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
add_to_kill_anon_file(t, page, vma, to_kill);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
@@ -685,7 +685,7 @@ static void collect_procs_fsdax(struct page *page,
struct task_struct *tsk;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, true);
@@ -696,7 +696,7 @@ static void collect_procs_fsdax(struct page *page,
add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
#endif /* CONFIG_FS_DAX */
@@ -717,7 +717,7 @@ static void collect_procs(struct page *page, struct list_head *tokill,
collect_procs_file(page, tokill, force_early);
}
-struct hwp_walk {
+struct hwpoison_walk {
struct to_kill tk;
unsigned long pfn;
int flags;
@@ -752,7 +752,7 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
- struct hwp_walk *hwp)
+ struct hwpoison_walk *hwp)
{
pmd_t pmd = *pmdp;
unsigned long pfn;
@@ -770,7 +770,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
}
#else
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
- struct hwp_walk *hwp)
+ struct hwpoison_walk *hwp)
{
return 0;
}
@@ -779,7 +779,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct hwp_walk *hwp = walk->private;
+ struct hwpoison_walk *hwp = walk->private;
int ret = 0;
pte_t *ptep, *mapped_pte;
spinlock_t *ptl;
@@ -813,7 +813,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct hwp_walk *hwp = walk->private;
+ struct hwpoison_walk *hwp = walk->private;
pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(walk->vma);
@@ -824,7 +824,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
#define hwpoison_hugetlb_range NULL
#endif
-static const struct mm_walk_ops hwp_walk_ops = {
+static const struct mm_walk_ops hwpoison_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
.walk_lock = PGWALK_RDLOCK,
@@ -847,7 +847,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
int flags)
{
int ret;
- struct hwp_walk priv = {
+ struct hwpoison_walk priv = {
.pfn = pfn,
};
priv.tk.tsk = p;
@@ -856,7 +856,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
return -EFAULT;
mmap_read_lock(p->mm);
- ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+ ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops,
(void *)&priv);
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
@@ -1562,7 +1562,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* Here we are interested only in user-mapped pages, so skip any
* other types of pages.
*/
- if (PageReserved(p) || PageSlab(p) || PageTable(p))
+ if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
return true;
if (!(PageLRU(hpage) || PageHuge(p)))
return true;
@@ -2533,7 +2533,8 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
- if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+ if (folio_test_slab(folio) || PageTable(&folio->page) ||
+ folio_test_reserved(folio) || PageOffline(&folio->page))
goto unlock_mutex;
/*
diff --git a/mm/memory.c b/mm/memory.c
index 405a483d2fd1..6c264d2f969c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1430,6 +1430,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue;
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
+ arch_check_zapped_pte(vma, ptent);
tlb_remove_tlb_entry(tlb, pte, addr);
zap_install_uffd_wp_if_needed(vma, addr, pte, details,
ptent);
@@ -4124,7 +4125,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
entry = mk_pte(&folio->page, vma->vm_page_prot);
entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
- entry = pte_mkwrite(pte_mkdirty(entry));
+ entry = pte_mkwrite(pte_mkdirty(entry), vma);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
@@ -4842,7 +4843,7 @@ out_map:
pte = pte_modify(old_pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (writable)
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, vma);
ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
pte_unmap_unlock(vmf->pte, vmf->ptl);
diff --git a/mm/migrate.c b/mm/migrate.c
index 78c9bd505b62..b7fa020003f3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -220,7 +220,7 @@ static bool remove_migration_pte(struct folio *folio,
if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
pte = pte_mkdirty(pte);
if (is_writable_migration_entry(entry))
- pte = pte_mkwrite(pte);
+ pte = pte_mkwrite(pte, vma);
else if (pte_swp_uffd_wp(old_pte))
pte = pte_mkuffd_wp(pte);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index d69131adc51c..8ac1f79f754a 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -624,7 +624,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
}
entry = mk_pte(page, vma->vm_page_prot);
if (vma->vm_flags & VM_WRITE)
- entry = pte_mkwrite(pte_mkdirty(entry));
+ entry = pte_mkwrite(pte_mkdirty(entry), vma);
}
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
diff --git a/mm/mmap.c b/mm/mmap.c
index 514ced13c65c..b56a7f0c9f85 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1182,11 +1182,11 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
*/
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long pgoff,
- unsigned long *populate, struct list_head *uf)
+ unsigned long flags, vm_flags_t vm_flags,
+ unsigned long pgoff, unsigned long *populate,
+ struct list_head *uf)
{
struct mm_struct *mm = current->mm;
- vm_flags_t vm_flags;
int pkey = 0;
*populate = 0;
@@ -1246,7 +1246,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
* to. we assume access permissions have been handled by the open
* of the memory object, so we don't do any here.
*/
- vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+ vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
if (flags & MAP_LOCKED)
@@ -1564,7 +1564,7 @@ retry:
gap = mas.index;
gap += (info->align_offset - gap) & info->align_mask;
tmp = mas_next(&mas, ULONG_MAX);
- if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
if (vm_start_gap(tmp) < gap + length - 1) {
low_limit = tmp->vm_end;
mas_reset(&mas);
@@ -1616,7 +1616,7 @@ retry:
gap -= (gap - info->align_offset) & info->align_mask;
gap_end = mas.last;
tmp = mas_next(&mas, ULONG_MAX);
- if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
if (vm_start_gap(tmp) <= gap_end) {
high_limit = vm_start_gap(tmp);
mas_reset(&mas);
@@ -2998,7 +2998,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
file = get_file(vma->vm_file);
ret = do_mmap(vma->vm_file, start, size,
- prot, flags, pgoff, &populate, NULL);
+ prot, flags, 0, pgoff, &populate, NULL);
fput(file);
out:
mmap_write_unlock(mm);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 130db91d3a8c..b94fbb45d5c7 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -185,7 +185,7 @@ static long change_pte_range(struct mmu_gather *tlb,
if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) &&
!pte_write(ptent) &&
can_change_pte_writable(vma, addr, ptent))
- ptent = pte_mkwrite(ptent);
+ ptent = pte_mkwrite(ptent, vma);
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
if (pte_needs_flush(oldpte, ptent))
diff --git a/mm/nommu.c b/mm/nommu.c
index 8dba41cfc44d..7f9e9e5a0e12 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1016,6 +1016,7 @@ unsigned long do_mmap(struct file *file,
unsigned long len,
unsigned long prot,
unsigned long flags,
+ vm_flags_t vm_flags,
unsigned long pgoff,
unsigned long *populate,
struct list_head *uf)
@@ -1023,7 +1024,6 @@ unsigned long do_mmap(struct file *file,
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *rb;
- vm_flags_t vm_flags;
unsigned long capabilities, result;
int ret;
VMA_ITERATOR(vmi, current->mm, 0);
@@ -1043,7 +1043,7 @@ unsigned long do_mmap(struct file *file,
/* we've determined that we can make the mapping, now translate what we
* now know into VMA flags */
- vm_flags = determine_vm_flags(file, prot, flags, capabilities);
+ vm_flags |= determine_vm_flags(file, prot, flags, capabilities);
/* we're going to need to record the mapping */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 452459836b71..0c5be12f9336 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2641,12 +2641,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
do {
page = NULL;
spin_lock_irqsave(&zone->lock, flags);
- /*
- * order-0 request can reach here when the pcplist is skipped
- * due to non-CMA allocation context. HIGHATOMIC area is
- * reserved for high-order atomic allocation, so order-0
- * request should skip it.
- */
if (alloc_flags & ALLOC_HIGHATOMIC)
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (!page) {
@@ -2780,17 +2774,10 @@ struct page *rmqueue(struct zone *preferred_zone,
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
if (likely(pcp_allowed_order(order))) {
- /*
- * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
- * we need to skip it when CMA area isn't allowed.
- */
- if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
- migratetype != MIGRATE_MOVABLE) {
- page = rmqueue_pcplist(preferred_zone, zone, order,
- migratetype, alloc_flags);
- if (likely(page))
- goto out;
- }
+ page = rmqueue_pcplist(preferred_zone, zone, order,
+ migratetype, alloc_flags);
+ if (likely(page))
+ goto out;
}
page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 9b2d23fbf4d3..b7d7e4fcfad7 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -58,7 +58,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte = pte_offset_map(pmd, addr);
if (pte) {
err = walk_pte_range_inner(pte, addr, end, walk);
- if (walk->mm != &init_mm)
+ if (walk->mm != &init_mm && addr < TASK_SIZE)
pte_unmap(pte);
}
} else {
diff --git a/mm/percpu.c b/mm/percpu.c
index 28e07ede46f6..a7665de8485f 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1890,13 +1890,15 @@ fail_unlock:
fail:
trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
- if (!is_atomic && do_warn && warn_limit) {
+ if (do_warn && warn_limit) {
pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
size, align, is_atomic, err);
- dump_stack();
+ if (!is_atomic)
+ dump_stack();
if (!--warn_limit)
pr_info("limit reached, disable warning\n");
}
+
if (is_atomic) {
/* see the flag handling in pcpu_balance_workfn() */
pcpu_atomic_alloc_failed = true;
@@ -2581,14 +2583,12 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
{
size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
size_t static_size, dyn_size;
- struct pcpu_chunk *chunk;
unsigned long *group_offsets;
size_t *group_sizes;
unsigned long *unit_off;
unsigned int cpu;
int *unit_map;
int group, unit, i;
- int map_size;
unsigned long tmp_addr;
size_t alloc_size;
@@ -2615,7 +2615,6 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE));
PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
- PCPU_SETUP_BUG_ON(!ai->dyn_size);
PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE));
PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) ||
IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE)));
@@ -2698,7 +2697,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
pcpu_atom_size = ai->atom_size;
- pcpu_chunk_struct_size = struct_size(chunk, populated,
+ pcpu_chunk_struct_size = struct_size((struct pcpu_chunk *)0, populated,
BITS_TO_LONGS(pcpu_unit_pages));
pcpu_stats_save_ai(ai);
@@ -2735,29 +2734,23 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
dyn_size = ai->dyn_size - (static_size - ai->static_size);
/*
- * Initialize first chunk.
- * If the reserved_size is non-zero, this initializes the reserved
- * chunk. If the reserved_size is zero, the reserved chunk is NULL
- * and the dynamic region is initialized here. The first chunk,
- * pcpu_first_chunk, will always point to the chunk that serves
- * the dynamic region.
+ * Initialize first chunk:
+ * This chunk is broken up into 3 parts:
+ * < static | [reserved] | dynamic >
+ * - static - there is no backing chunk because these allocations can
+ * never be freed.
+ * - reserved (pcpu_reserved_chunk) - exists primarily to serve
+ * allocations from module load.
+ * - dynamic (pcpu_first_chunk) - serves the dynamic part of the first
+ * chunk.
*/
tmp_addr = (unsigned long)base_addr + static_size;
- map_size = ai->reserved_size ?: dyn_size;
- chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
-
- /* init dynamic chunk if necessary */
- if (ai->reserved_size) {
- pcpu_reserved_chunk = chunk;
+ if (ai->reserved_size)
+ pcpu_reserved_chunk = pcpu_alloc_first_chunk(tmp_addr,
+ ai->reserved_size);
+ tmp_addr = (unsigned long)base_addr + static_size + ai->reserved_size;
+ pcpu_first_chunk = pcpu_alloc_first_chunk(tmp_addr, dyn_size);
- tmp_addr = (unsigned long)base_addr + static_size +
- ai->reserved_size;
- map_size = dyn_size;
- chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
- }
-
- /* link the first chunk in */
- pcpu_first_chunk = chunk;
pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
pcpu_chunk_relocate(pcpu_first_chunk, -1);
@@ -3189,32 +3182,26 @@ void __init __weak pcpu_populate_pte(unsigned long addr)
pmd_t *pmd;
if (pgd_none(*pgd)) {
- p4d_t *new;
-
- new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
- if (!new)
+ p4d = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
+ if (!p4d)
goto err_alloc;
- pgd_populate(&init_mm, pgd, new);
+ pgd_populate(&init_mm, pgd, p4d);
}
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
- if (!new)
+ pud = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ if (!pud)
goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
+ p4d_populate(&init_mm, p4d, pud);
}
pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
- if (!new)
+ pmd = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ if (!pmd)
goto err_alloc;
- pud_populate(&init_mm, pud, new);
+ pud_populate(&init_mm, pud, pmd);
}
pmd = pmd_offset(pud, addr);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 0fc69efa4f1f..96d9eae5c7cc 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -86,7 +86,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
if (page_in_cache && !vm_shared)
writable = false;
if (writable)
- _dst_pte = pte_mkwrite(_dst_pte);
+ _dst_pte = pte_mkwrite(_dst_pte, dst_vma);
if (flags & MFILL_ATOMIC_WP)
_dst_pte = pte_mkuffd_wp(_dst_pte);
diff --git a/mm/util.c b/mm/util.c
index 4ed8b9b5273c..8cbbfd3a3d59 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -543,7 +543,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
if (!ret) {
if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate,
+ ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate,
&uf);
mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
@@ -1068,7 +1068,9 @@ void mem_dump_obj(void *object)
if (vmalloc_dump_obj(object))
return;
- if (virt_addr_valid(object))
+ if (is_vmalloc_addr(object))
+ type = "vmalloc memory";
+ else if (virt_addr_valid(object))
type = "non-slab/vmalloc memory";
else if (object == NULL)
type = "NULL pointer";
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 228a4a5312f2..ef8599d394fd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4278,14 +4278,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
#ifdef CONFIG_PRINTK
bool vmalloc_dump_obj(void *object)
{
- struct vm_struct *vm;
void *objp = (void *)PAGE_ALIGN((unsigned long)object);
+ const void *caller;
+ struct vm_struct *vm;
+ struct vmap_area *va;
+ unsigned long addr;
+ unsigned int nr_pages;
- vm = find_vm_area(objp);
- if (!vm)
+ if (!spin_trylock(&vmap_area_lock))
+ return false;
+ va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
+ if (!va) {
+ spin_unlock(&vmap_area_lock);
return false;
+ }
+
+ vm = va->vm;
+ if (!vm) {
+ spin_unlock(&vmap_area_lock);
+ return false;
+ }
+ addr = (unsigned long)vm->addr;
+ caller = vm->caller;
+ nr_pages = vm->nr_pages;
+ spin_unlock(&vmap_area_lock);
pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
- vm->nr_pages, (unsigned long)vm->addr, vm->caller);
+ nr_pages, addr, caller);
return true;
}
#endif