diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/huge_memory.c | 29 | ||||
| -rw-r--r-- | mm/hugetlb_vmemmap.c | 13 | ||||
| -rw-r--r-- | mm/list_lru.c | 28 | ||||
| -rw-r--r-- | mm/memcontrol-v1.c | 7 | ||||
| -rw-r--r-- | mm/memcontrol.c | 22 | ||||
| -rw-r--r-- | mm/memory-failure.c | 20 | ||||
| -rw-r--r-- | mm/memory.c | 33 | ||||
| -rw-r--r-- | mm/migrate.c | 16 | ||||
| -rw-r--r-- | mm/mm_init.c | 15 | ||||
| -rw-r--r-- | mm/mseal.c | 14 | ||||
| -rw-r--r-- | mm/page_alloc.c | 52 | ||||
| -rw-r--r-- | mm/page_ext.c | 18 | ||||
| -rw-r--r-- | mm/shmem.c | 14 | ||||
| -rw-r--r-- | mm/slub.c | 3 | ||||
| -rw-r--r-- | mm/sparse-vmemmap.c | 11 | ||||
| -rw-r--r-- | mm/sparse.c | 5 | ||||
| -rw-r--r-- | mm/vmalloc.c | 11 | ||||
| -rw-r--r-- | mm/vmstat.c | 52 | 
18 files changed, 184 insertions, 179 deletions
| diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f4be468e06a4..67c86a5d64a6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)  	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);  	if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) {  		spin_unlock(vmf->ptl); -		goto out; +		return 0;  	}  	pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)  	if (!migrate_misplaced_folio(folio, vma, target_nid)) {  		flags |= TNF_MIGRATED;  		nid = target_nid; -	} else { -		flags |= TNF_MIGRATE_FAIL; -		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); -		if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { -			spin_unlock(vmf->ptl); -			goto out; -		} -		goto out_map; -	} - -out: -	if (nid != NUMA_NO_NODE)  		task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); +		return 0; +	} -	return 0; - +	flags |= TNF_MIGRATE_FAIL; +	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); +	if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { +		spin_unlock(vmf->ptl); +		return 0; +	}  out_map:  	/* Restore the PMD */  	pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1753,7 +1747,10 @@ out_map:  	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);  	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);  	spin_unlock(vmf->ptl); -	goto out; + +	if (nid != NUMA_NO_NODE) +		task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); +	return 0;  }  /* diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 829112b0a914..0c3f56b3578e 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,  static inline void free_vmemmap_page(struct page *page)  {  	if (PageReserved(page)) { +		memmap_boot_pages_add(-1);  		free_bootmem_page(page); -		mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1);  	} else { +		memmap_pages_add(-1);  		__free_page(page); -		mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1);  	}  } @@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,  		copy_page(page_to_virt(walk.reuse_page),  			  (void *)walk.reuse_addr);  		list_add(&walk.reuse_page->lru, vmemmap_pages); -		mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1); +		memmap_pages_add(1);  	}  	/* @@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,  	for (i = 0; i < nr_pages; i++) {  		page = alloc_pages_node(nid, gfp_mask, 0); -		if (!page) { -			mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i); +		if (!page)  			goto out; -		}  		list_add(&page->lru, list);  	} - -	mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); +	memmap_pages_add(nr_pages);  	return 0;  out: diff --git a/mm/list_lru.c b/mm/list_lru.c index a29d96929d7c..9b7ff06e9d32 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -85,6 +85,7 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)  }  #endif /* CONFIG_MEMCG */ +/* The caller must ensure the memcg lifetime. */  bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,  		    struct mem_cgroup *memcg)  { @@ -109,14 +110,22 @@ EXPORT_SYMBOL_GPL(list_lru_add);  bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)  { +	bool ret;  	int nid = page_to_nid(virt_to_page(item)); -	struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? -		mem_cgroup_from_slab_obj(item) : NULL; -	return list_lru_add(lru, item, nid, memcg); +	if (list_lru_memcg_aware(lru)) { +		rcu_read_lock(); +		ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); +		rcu_read_unlock(); +	} else { +		ret = list_lru_add(lru, item, nid, NULL); +	} + +	return ret;  }  EXPORT_SYMBOL_GPL(list_lru_add_obj); +/* The caller must ensure the memcg lifetime. */  bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,  		    struct mem_cgroup *memcg)  { @@ -139,11 +148,18 @@ EXPORT_SYMBOL_GPL(list_lru_del);  bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)  { +	bool ret;  	int nid = page_to_nid(virt_to_page(item)); -	struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? -		mem_cgroup_from_slab_obj(item) : NULL; -	return list_lru_del(lru, item, nid, memcg); +	if (list_lru_memcg_aware(lru)) { +		rcu_read_lock(); +		ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); +		rcu_read_unlock(); +	} else { +		ret = list_lru_del(lru, item, nid, NULL); +	} + +	return ret;  }  EXPORT_SYMBOL_GPL(list_lru_del_obj); diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 2aeea4d8bf8e..417c96f2da28 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1842,9 +1842,12 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,  	buf = endp + 1;  	cfd = simple_strtoul(buf, &endp, 10); -	if ((*endp != ' ') && (*endp != '\0')) +	if (*endp == '\0') +		buf = endp; +	else if (*endp == ' ') +		buf = endp + 1; +	else  		return -EINVAL; -	buf = endp + 1;  	event = kzalloc(sizeof(*event), GFP_KERNEL);  	if (!event) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 960371788687..f29157288b7d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3386,11 +3386,28 @@ static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)  #define MEM_CGROUP_ID_MAX	((1UL << MEM_CGROUP_ID_SHIFT) - 1)  static DEFINE_IDR(mem_cgroup_idr); +static DEFINE_SPINLOCK(memcg_idr_lock); + +static int mem_cgroup_alloc_id(void) +{ +	int ret; + +	idr_preload(GFP_KERNEL); +	spin_lock(&memcg_idr_lock); +	ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX + 1, +			GFP_NOWAIT); +	spin_unlock(&memcg_idr_lock); +	idr_preload_end(); +	return ret; +}  static void mem_cgroup_id_remove(struct mem_cgroup *memcg)  {  	if (memcg->id.id > 0) { +		spin_lock(&memcg_idr_lock);  		idr_remove(&mem_cgroup_idr, memcg->id.id); +		spin_unlock(&memcg_idr_lock); +  		memcg->id.id = 0;  	}  } @@ -3524,8 +3541,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)  	if (!memcg)  		return ERR_PTR(error); -	memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, -				 1, MEM_CGROUP_ID_MAX + 1, GFP_KERNEL); +	memcg->id.id = mem_cgroup_alloc_id();  	if (memcg->id.id < 0) {  		error = memcg->id.id;  		goto fail; @@ -3667,7 +3683,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)  	 * publish it here at the end of onlining. This matches the  	 * regular ID destruction during offlining.  	 */ +	spin_lock(&memcg_idr_lock);  	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); +	spin_unlock(&memcg_idr_lock);  	return 0;  offline_kmem: diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 581d3e5c9117..7066fc84f351 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry {  struct memory_failure_cpu {  	DECLARE_KFIFO(fifo, struct memory_failure_entry,  		      MEMORY_FAILURE_FIFO_SIZE); -	spinlock_t lock; +	raw_spinlock_t lock;  	struct work_struct work;  }; @@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags)  {  	struct memory_failure_cpu *mf_cpu;  	unsigned long proc_flags; +	bool buffer_overflow;  	struct memory_failure_entry entry = {  		.pfn =		pfn,  		.flags =	flags,  	};  	mf_cpu = &get_cpu_var(memory_failure_cpu); -	spin_lock_irqsave(&mf_cpu->lock, proc_flags); -	if (kfifo_put(&mf_cpu->fifo, entry)) +	raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); +	buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); +	if (!buffer_overflow)  		schedule_work_on(smp_processor_id(), &mf_cpu->work); -	else +	raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); +	put_cpu_var(memory_failure_cpu); +	if (buffer_overflow)  		pr_err("buffer overflow when queuing memory failure at %#lx\n",  		       pfn); -	spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); -	put_cpu_var(memory_failure_cpu);  }  EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work)  	mf_cpu = container_of(work, struct memory_failure_cpu, work);  	for (;;) { -		spin_lock_irqsave(&mf_cpu->lock, proc_flags); +		raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);  		gotten = kfifo_get(&mf_cpu->fifo, &entry); -		spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); +		raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);  		if (!gotten)  			break;  		if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void)  	for_each_possible_cpu(cpu) {  		mf_cpu = &per_cpu(memory_failure_cpu, cpu); -		spin_lock_init(&mf_cpu->lock); +		raw_spin_lock_init(&mf_cpu->lock);  		INIT_KFIFO(mf_cpu->fifo);  		INIT_WORK(&mf_cpu->work, memory_failure_work_func);  	} diff --git a/mm/memory.c b/mm/memory.c index 34f8402d2046..3c01d68065be 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)  	if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {  		pte_unmap_unlock(vmf->pte, vmf->ptl); -		goto out; +		return 0;  	}  	pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)  	if (!migrate_misplaced_folio(folio, vma, target_nid)) {  		nid = target_nid;  		flags |= TNF_MIGRATED; -	} else { -		flags |= TNF_MIGRATE_FAIL; -		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, -					       vmf->address, &vmf->ptl); -		if (unlikely(!vmf->pte)) -			goto out; -		if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { -			pte_unmap_unlock(vmf->pte, vmf->ptl); -			goto out; -		} -		goto out_map; +		task_numa_fault(last_cpupid, nid, nr_pages, flags); +		return 0;  	} -out: -	if (nid != NUMA_NO_NODE) -		task_numa_fault(last_cpupid, nid, nr_pages, flags); -	return 0; +	flags |= TNF_MIGRATE_FAIL; +	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, +				       vmf->address, &vmf->ptl); +	if (unlikely(!vmf->pte)) +		return 0; +	if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { +		pte_unmap_unlock(vmf->pte, vmf->ptl); +		return 0; +	}  out_map:  	/*  	 * Make it present again, depending on how arch implements @@ -5387,7 +5383,10 @@ out_map:  		numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,  					    writable);  	pte_unmap_unlock(vmf->pte, vmf->ptl); -	goto out; + +	if (nid != NUMA_NO_NODE) +		task_numa_fault(last_cpupid, nid, nr_pages, flags); +	return 0;  }  static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) diff --git a/mm/migrate.c b/mm/migrate.c index e7296c0fb5d5..923ea80ba744 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1479,11 +1479,17 @@ out:  	return rc;  } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, +				  enum migrate_mode mode)  {  	int rc; -	folio_lock(folio); +	if (mode == MIGRATE_ASYNC) { +		if (!folio_trylock(folio)) +			return -EAGAIN; +	} else { +		folio_lock(folio); +	}  	rc = split_folio_to_list(folio, split_folios);  	folio_unlock(folio);  	if (!rc) @@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from,  			 */  			if (nr_pages > 2 &&  			   !list_empty(&folio->_deferred_list)) { -				if (try_split_folio(folio, split_folios) == 0) { +				if (!try_split_folio(folio, split_folios, mode)) {  					nr_failed++;  					stats->nr_thp_failed += is_thp;  					stats->nr_thp_split += is_thp; @@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from,  			if (!thp_migration_supported() && is_thp) {  				nr_failed++;  				stats->nr_thp_failed++; -				if (!try_split_folio(folio, split_folios)) { +				if (!try_split_folio(folio, split_folios, mode)) {  					stats->nr_thp_split++;  					stats->nr_split++;  					continue; @@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from,  				stats->nr_thp_failed += is_thp;  				/* Large folio NUMA faulting doesn't split to retry. */  				if (is_large && !nosplit) { -					int ret = try_split_folio(folio, split_folios); +					int ret = try_split_folio(folio, split_folios, mode);  					if (!ret) {  						stats->nr_thp_split += is_thp; diff --git a/mm/mm_init.c b/mm/mm_init.c index 75c3bd42799b..51960079875b 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)  		panic("Failed to allocate %ld bytes for node %d memory map\n",  		      size, pgdat->node_id);  	pgdat->node_mem_map = map + offset; -	mod_node_early_perpage_metadata(pgdat->node_id, -					DIV_ROUND_UP(size, PAGE_SIZE)); +	memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));  	pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",  		 __func__, pgdat->node_id, (unsigned long)pgdat,  		 (unsigned long)pgdat->node_mem_map); @@ -2245,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page)  	set_pageblock_migratetype(page, MIGRATE_CMA);  	set_page_refcounted(page); +	/* pages were reserved and not allocated */ +	clear_page_tag_ref(page);  	__free_pages(page, pageblock_order);  	adjust_managed_page_count(page, pageblock_nr_pages); @@ -2460,15 +2461,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,  	}  	/* pages were reserved and not allocated */ -	if (mem_alloc_profiling_enabled()) { -		union codetag_ref *ref = get_page_tag_ref(page); - -		if (ref) { -			set_codetag_empty(ref); -			put_page_tag_ref(ref); -		} -	} - +	clear_page_tag_ref(page);  	__free_pages_core(page, order, MEMINIT_EARLY);  } diff --git a/mm/mseal.c b/mm/mseal.c index bf783bba8ed0..15bba28acc00 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma)  static bool is_madv_discard(int behavior)  { -	return	behavior & -		(MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED | -		 MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK); +	switch (behavior) { +	case MADV_FREE: +	case MADV_DONTNEED: +	case MADV_DONTNEED_LOCKED: +	case MADV_REMOVE: +	case MADV_DONTFORK: +	case MADV_WIPEONFORK: +		return true; +	} + +	return false;  }  static bool is_ro_anon(struct vm_area_struct *vma) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 28f80daf5c04..c565de8f48e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes);  static bool page_contains_unaccepted(struct page *page, unsigned int order);  static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order);  static inline bool has_unaccepted_memory(void);  static bool __free_unaccepted(struct page *page); @@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z,  	if (!(alloc_flags & ALLOC_CMA))  		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);  #endif -#ifdef CONFIG_UNACCEPTED_MEMORY -	unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif  	return unusable_free;  } @@ -3368,6 +3365,8 @@ retry:  			}  		} +		cond_accept_memory(zone, order); +  		/*  		 * Detect whether the number of free pages is below high  		 * watermark.  If so, we will decrease pcp->high and free @@ -3393,10 +3392,8 @@ check_alloc_wmark:  				       gfp_mask)) {  			int ret; -			if (has_unaccepted_memory()) { -				if (try_to_accept_memory(zone, order)) -					goto try_this_zone; -			} +			if (cond_accept_memory(zone, order)) +				goto try_this_zone;  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT  			/* @@ -3450,10 +3447,8 @@ try_this_zone:  			return page;  		} else { -			if (has_unaccepted_memory()) { -				if (try_to_accept_memory(zone, order)) -					goto try_this_zone; -			} +			if (cond_accept_memory(zone, order)) +				goto try_this_zone;  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT  			/* Try again if zone has deferred pages */ @@ -5755,7 +5750,6 @@ void __init setup_per_cpu_pageset(void)  	for_each_online_pgdat(pgdat)  		pgdat->per_cpu_nodestats =  			alloc_percpu(struct per_cpu_nodestat); -	store_early_perpage_metadata();  }  __meminit void zone_pcp_init(struct zone *zone) @@ -5821,14 +5815,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char  void free_reserved_page(struct page *page)  { -	if (mem_alloc_profiling_enabled()) { -		union codetag_ref *ref = get_page_tag_ref(page); - -		if (ref) { -			set_codetag_empty(ref); -			put_page_tag_ref(ref); -		} -	} +	clear_page_tag_ref(page);  	ClearPageReserved(page);  	init_page_count(page);  	__free_page(page); @@ -6951,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone)  	struct page *page;  	bool last; -	if (list_empty(&zone->unaccepted_pages)) -		return false; -  	spin_lock_irqsave(&zone->lock, flags);  	page = list_first_entry_or_null(&zone->unaccepted_pages,  					struct page, lru); @@ -6979,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone)  	return true;  } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order)  {  	long to_accept; -	int ret = false; +	bool ret = false; + +	if (!has_unaccepted_memory()) +		return false; + +	if (list_empty(&zone->unaccepted_pages)) +		return false;  	/* How much to accept to get to high watermark? */  	to_accept = high_wmark_pages(zone) -  		    (zone_page_state(zone, NR_FREE_PAGES) - -		    __zone_watermark_unusable_free(zone, order, 0)); +		    __zone_watermark_unusable_free(zone, order, 0) - +		    zone_page_state(zone, NR_UNACCEPTED)); -	/* Accept at least one page */ -	do { +	while (to_accept > 0) {  		if (!try_to_accept_memory_one(zone))  			break;  		ret = true;  		to_accept -= MAX_ORDER_NR_PAGES; -	} while (to_accept > 0); +	}  	return ret;  } @@ -7038,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order)  {  } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order)  {  	return false;  } diff --git a/mm/page_ext.c b/mm/page_ext.c index c191e490c401..641d93f6af4c 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid)  		return -ENOMEM;  	NODE_DATA(nid)->node_page_ext = base;  	total_usage += table_size; -	mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, -			    DIV_ROUND_UP(table_size, PAGE_SIZE)); +	memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE));  	return 0;  } @@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid)  	else  		addr = vzalloc_node(size, nid); -	if (addr) { -		mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, -				    DIV_ROUND_UP(size, PAGE_SIZE)); -	} +	if (addr) +		memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));  	return addr;  } @@ -323,25 +320,18 @@ static void free_page_ext(void *addr)  {  	size_t table_size;  	struct page *page; -	struct pglist_data *pgdat;  	table_size = page_ext_size * PAGES_PER_SECTION; +	memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE)));  	if (is_vmalloc_addr(addr)) { -		page = vmalloc_to_page(addr); -		pgdat = page_pgdat(page);  		vfree(addr);  	} else {  		page = virt_to_page(addr); -		pgdat = page_pgdat(page);  		BUG_ON(PageReserved(page));  		kmemleak_free(addr);  		free_pages_exact(addr, table_size);  	} - -	mod_node_page_state(pgdat, NR_MEMMAP, -			    -1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); -  }  static void __free_page_ext(unsigned long pfn) diff --git a/mm/shmem.c b/mm/shmem.c index 2faa9daaf54b..5a77acf6ac6a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1629,11 +1629,6 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,  	unsigned long mask = READ_ONCE(huge_shmem_orders_always);  	unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);  	unsigned long vm_flags = vma->vm_flags; -	/* -	 * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that -	 * are enabled for this vma. -	 */ -	unsigned long orders = BIT(PMD_ORDER + 1) - 1;  	loff_t i_size;  	int order; @@ -1678,7 +1673,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,  	if (global_huge)  		mask |= READ_ONCE(huge_shmem_orders_inherit); -	return orders & mask; +	return THP_ORDERS_ALL_FILE_DEFAULT & mask;  }  static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf, @@ -1686,6 +1681,7 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault  					   unsigned long orders)  {  	struct vm_area_struct *vma = vmf->vma; +	pgoff_t aligned_index;  	unsigned long pages;  	int order; @@ -1697,9 +1693,9 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault  	order = highest_order(orders);  	while (orders) {  		pages = 1UL << order; -		index = round_down(index, pages); -		if (!xa_find(&mapping->i_pages, &index, -			     index + pages - 1, XA_PRESENT)) +		aligned_index = round_down(index, pages); +		if (!xa_find(&mapping->i_pages, &aligned_index, +			     aligned_index + pages - 1, XA_PRESENT))  			break;  		order = next_order(&orders, order);  	} diff --git a/mm/slub.c b/mm/slub.c index 3520acaf9afa..c9d8a2497fd6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4690,6 +4690,9 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)  		if (!df.slab)  			continue; +		if (kfence_free(df.freelist)) +			continue; +  		do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,  			     _RET_IP_);  	} while (likely(size)); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 1dda6c53370b..edcc7a6b0f6f 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,  	if (r < 0)  		return NULL; -	if (system_state == SYSTEM_BOOTING) { -		mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start, -								  PAGE_SIZE)); -	} else { -		mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, -				    DIV_ROUND_UP(end - start, PAGE_SIZE)); -	} +	if (system_state == SYSTEM_BOOTING) +		memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); +	else +		memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));  	return pfn_to_page(pfn);  } diff --git a/mm/sparse.c b/mm/sparse.c index e4b830091d13..0f018c6f9ec5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)  	sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);  	sparsemap_buf_end = sparsemap_buf + size;  #ifndef CONFIG_SPARSEMEM_VMEMMAP -	mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE)); +	memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));  #endif  } @@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,  	unsigned long start = (unsigned long) pfn_to_page(pfn);  	unsigned long end = start + nr_pages * sizeof(struct page); -	mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP, -			    -1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); +	memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));  	vmemmap_free(start, end, altmap);  }  static void free_map_bootmem(struct page *memmap) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b783baf12a1..af2de36549d6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,  			page = alloc_pages_noprof(alloc_gfp, order);  		else  			page = alloc_pages_node_noprof(nid, alloc_gfp, order); -		if (unlikely(!page)) { -			if (!nofail) -				break; - -			/* fall back to the zero order allocations */ -			alloc_gfp |= __GFP_NOFAIL; -			order = 0; -			continue; -		} +		if (unlikely(!page)) +			break;  		/*  		 * Higher order allocations must be able to be treated as diff --git a/mm/vmstat.c b/mm/vmstat.c index 04a1cb6cc636..e875f2a4915f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat,  }  #endif +/* + * Count number of pages "struct page" and "struct page_ext" consume. + * nr_memmap_boot_pages: # of pages allocated by boot allocator + * nr_memmap_pages: # of pages that were allocated by buddy allocator + */ +static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0); +static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0); + +void memmap_boot_pages_add(long delta) +{ +	atomic_long_add(delta, &nr_memmap_boot_pages); +} + +void memmap_pages_add(long delta) +{ +	atomic_long_add(delta, &nr_memmap_pages); +} +  #ifdef CONFIG_COMPACTION  struct contig_page_info { @@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = {  	"pgdemote_kswapd",  	"pgdemote_direct",  	"pgdemote_khugepaged", -	"nr_memmap", -	"nr_memmap_boot", -	/* enum writeback_stat_item counters */ +	/* system-wide enum vm_stat_item counters */  	"nr_dirty_threshold",  	"nr_dirty_background_threshold", +	"nr_memmap_pages", +	"nr_memmap_boot_pages",  #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)  	/* enum vm_event_item counters */ @@ -1790,7 +1808,7 @@ static const struct seq_operations zoneinfo_op = {  #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \  			 NR_VM_NUMA_EVENT_ITEMS + \  			 NR_VM_NODE_STAT_ITEMS + \ -			 NR_VM_WRITEBACK_STAT_ITEMS + \ +			 NR_VM_STAT_ITEMS + \  			 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \  			  NR_VM_EVENT_ITEMS : 0)) @@ -1827,7 +1845,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)  	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,  			    v + NR_DIRTY_THRESHOLD); -	v += NR_VM_WRITEBACK_STAT_ITEMS; +	v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages); +	v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages); +	v += NR_VM_STAT_ITEMS;  #ifdef CONFIG_VM_EVENT_COUNTERS  	all_vm_events(v); @@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void)  module_init(extfrag_debug_init);  #endif - -/* - * Page metadata size (struct page and page_ext) in pages - */ -static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata; - -void __meminit mod_node_early_perpage_metadata(int nid, long delta) -{ -	early_perpage_metadata[nid] += delta; -} - -void __meminit store_early_perpage_metadata(void) -{ -	int nid; -	struct pglist_data *pgdat; - -	for_each_online_pgdat(pgdat) { -		nid = pgdat->node_id; -		mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, -				    early_perpage_metadata[nid]); -	} -} |