diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 219 | 
1 files changed, 128 insertions, 91 deletions
| diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 150d4f23b010..14d39f34d336 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -32,6 +32,7 @@  #include <linux/sysctl.h>  #include <linux/cpu.h>  #include <linux/cpuset.h> +#include <linux/pagevec.h>  #include <linux/memory_hotplug.h>  #include <linux/nodemask.h>  #include <linux/vmstat.h> @@ -464,19 +465,19 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)  /*   * Temporary debugging check for pages not lying within a given zone.   */ -static int __maybe_unused bad_range(struct zone *zone, struct page *page) +static bool __maybe_unused bad_range(struct zone *zone, struct page *page)  {  	if (page_outside_zone_boundaries(zone, page)) -		return 1; +		return true;  	if (zone != page_zone(page)) -		return 1; +		return true; -	return 0; +	return false;  }  #else -static inline int __maybe_unused bad_range(struct zone *zone, struct page *page) +static inline bool __maybe_unused bad_range(struct zone *zone, struct page *page)  { -	return 0; +	return false;  }  #endif @@ -1061,7 +1062,7 @@ out:   * on-demand allocation and then freed again before the deferred pages   * initialization is done, but this is not likely to happen.   */ -static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) +static inline bool should_skip_kasan_poison(struct page *page)  {  	if (IS_ENABLED(CONFIG_KASAN_GENERIC))  		return deferred_pages_enabled(); @@ -1080,11 +1081,11 @@ static void kernel_init_pages(struct page *page, int numpages)  	kasan_enable_current();  } -static __always_inline bool free_pages_prepare(struct page *page, -			unsigned int order, fpi_t fpi_flags) +__always_inline bool free_pages_prepare(struct page *page, +			unsigned int order)  {  	int bad = 0; -	bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags); +	bool skip_kasan_poison = should_skip_kasan_poison(page);  	bool init = want_init_on_free();  	bool compound = PageCompound(page); @@ -1266,7 +1267,7 @@ static void __free_pages_ok(struct page *page, unsigned int order,  	unsigned long pfn = page_to_pfn(page);  	struct zone *zone = page_zone(page); -	if (!free_pages_prepare(page, order, fpi_flags)) +	if (!free_pages_prepare(page, order))  		return;  	/* @@ -1422,14 +1423,14 @@ static void check_new_page_bad(struct page *page)  /*   * This page is about to be returned from the page allocator   */ -static int check_new_page(struct page *page) +static bool check_new_page(struct page *page)  {  	if (likely(page_expected_state(page,  				PAGE_FLAGS_CHECK_AT_PREP|__PG_HWPOISON))) -		return 0; +		return false;  	check_new_page_bad(page); -	return 1; +	return true;  }  static inline bool check_new_pages(struct page *page, unsigned int order) @@ -2343,7 +2344,7 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn,  {  	int migratetype; -	if (!free_pages_prepare(page, order, FPI_NONE)) +	if (!free_pages_prepare(page, order))  		return false;  	migratetype = get_pfnblock_migratetype(page, pfn); @@ -2515,66 +2516,70 @@ void free_unref_page(struct page *page, unsigned int order)  }  /* - * Free a list of 0-order pages + * Free a batch of folios   */ -void free_unref_page_list(struct list_head *list) +void free_unref_folios(struct folio_batch *folios)  {  	unsigned long __maybe_unused UP_flags; -	struct page *page, *next;  	struct per_cpu_pages *pcp = NULL;  	struct zone *locked_zone = NULL; -	int batch_count = 0; -	int migratetype; +	int i, j, migratetype; -	/* Prepare pages for freeing */ -	list_for_each_entry_safe(page, next, list, lru) { -		unsigned long pfn = page_to_pfn(page); -		if (!free_unref_page_prepare(page, pfn, 0)) { -			list_del(&page->lru); +	/* Prepare folios for freeing */ +	for (i = 0, j = 0; i < folios->nr; i++) { +		struct folio *folio = folios->folios[i]; +		unsigned long pfn = folio_pfn(folio); +		unsigned int order = folio_order(folio); + +		if (order > 0 && folio_test_large_rmappable(folio)) +			folio_undo_large_rmappable(folio); +		if (!free_unref_page_prepare(&folio->page, pfn, order))  			continue; -		}  		/* -		 * Free isolated pages directly to the allocator, see -		 * comment in free_unref_page. +		 * Free isolated folios and orders not handled on the PCP +		 * directly to the allocator, see comment in free_unref_page.  		 */ -		migratetype = get_pcppage_migratetype(page); -		if (unlikely(is_migrate_isolate(migratetype))) { -			list_del(&page->lru); -			free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE); +		migratetype = get_pcppage_migratetype(&folio->page); +		if (!pcp_allowed_order(order) || +		    is_migrate_isolate(migratetype)) { +			free_one_page(folio_zone(folio), &folio->page, pfn, +					order, migratetype, FPI_NONE);  			continue;  		} +		folio->private = (void *)(unsigned long)order; +		if (j != i) +			folios->folios[j] = folio; +		j++;  	} +	folios->nr = j; -	list_for_each_entry_safe(page, next, list, lru) { -		struct zone *zone = page_zone(page); +	for (i = 0; i < folios->nr; i++) { +		struct folio *folio = folios->folios[i]; +		struct zone *zone = folio_zone(folio); +		unsigned int order = (unsigned long)folio->private; -		list_del(&page->lru); -		migratetype = get_pcppage_migratetype(page); +		folio->private = NULL; +		migratetype = get_pcppage_migratetype(&folio->page); -		/* -		 * Either different zone requiring a different pcp lock or -		 * excessive lock hold times when freeing a large list of -		 * pages. -		 */ -		if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) { +		/* Different zone requires a different pcp lock */ +		if (zone != locked_zone) {  			if (pcp) {  				pcp_spin_unlock(pcp);  				pcp_trylock_finish(UP_flags);  			} -			batch_count = 0; -  			/* -			 * trylock is necessary as pages may be getting freed +			 * trylock is necessary as folios may be getting freed  			 * from IRQ or SoftIRQ context after an IO completion.  			 */  			pcp_trylock_prepare(UP_flags);  			pcp = pcp_spin_trylock(zone->per_cpu_pageset);  			if (unlikely(!pcp)) {  				pcp_trylock_finish(UP_flags); -				free_one_page(zone, page, page_to_pfn(page), -					      0, migratetype, FPI_NONE); +				free_one_page(zone, &folio->page, +						folio_pfn(folio), order, +						migratetype, FPI_NONE);  				locked_zone = NULL;  				continue;  			} @@ -2588,15 +2593,16 @@ void free_unref_page_list(struct list_head *list)  		if (unlikely(migratetype >= MIGRATE_PCPTYPES))  			migratetype = MIGRATE_MOVABLE; -		trace_mm_page_free_batched(page); -		free_unref_page_commit(zone, pcp, page, migratetype, 0); -		batch_count++; +		trace_mm_page_free_batched(&folio->page); +		free_unref_page_commit(zone, pcp, &folio->page, migratetype, +				order);  	}  	if (pcp) {  		pcp_spin_unlock(pcp);  		pcp_trylock_finish(UP_flags);  	} +	folio_batch_reinit(folios);  }  /* @@ -2616,8 +2622,8 @@ void split_page(struct page *page, unsigned int order)  	for (i = 1; i < (1 << order); i++)  		set_page_refcounted(page + i); -	split_page_owner(page, 1 << order); -	split_page_memcg(page, 1 << order); +	split_page_owner(page, order, 0); +	split_page_memcg(page, order, 0);  }  EXPORT_SYMBOL_GPL(split_page); @@ -4041,6 +4047,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,  						struct alloc_context *ac)  {  	bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; +	bool can_compact = gfp_compaction_allowed(gfp_mask);  	const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;  	struct page *page = NULL;  	unsigned int alloc_flags; @@ -4111,7 +4118,7 @@ restart:  	 * Don't try this for allocations that are allowed to ignore  	 * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.  	 */ -	if (can_direct_reclaim && +	if (can_direct_reclaim && can_compact &&  			(costly_order ||  			   (order > 0 && ac->migratetype != MIGRATE_MOVABLE))  			&& !gfp_pfmemalloc_allowed(gfp_mask)) { @@ -4209,9 +4216,10 @@ retry:  	/*  	 * Do not retry costly high order allocations unless they are -	 * __GFP_RETRY_MAYFAIL +	 * __GFP_RETRY_MAYFAIL and we can compact  	 */ -	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) +	if (costly_order && (!can_compact || +			     !(gfp_mask & __GFP_RETRY_MAYFAIL)))  		goto nopage;  	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, @@ -4224,7 +4232,7 @@ retry:  	 * implementation of the compaction depends on the sufficient amount  	 * of free memory (see __compaction_suitable)  	 */ -	if (did_some_progress > 0 && +	if (did_some_progress > 0 && can_compact &&  			should_compact_retry(ac, order, alloc_flags,  				compact_result, &compact_priority,  				&compaction_retries)) @@ -4685,8 +4693,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,  	gfp_t gfp = gfp_mask;  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) -	gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | -		    __GFP_NOMEMALLOC; +	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP | +		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;  	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,  				PAGE_FRAG_CACHE_MAX_ORDER);  	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; @@ -4699,6 +4707,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,  	return page;  } +void page_frag_cache_drain(struct page_frag_cache *nc) +{ +	if (!nc->va) +		return; + +	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias); +	nc->va = NULL; +} +EXPORT_SYMBOL(page_frag_cache_drain); +  void __page_frag_cache_drain(struct page *page, unsigned int count)  {  	VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); @@ -4708,9 +4726,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)  }  EXPORT_SYMBOL(__page_frag_cache_drain); -void *page_frag_alloc_align(struct page_frag_cache *nc, -		      unsigned int fragsz, gfp_t gfp_mask, -		      unsigned int align_mask) +void *__page_frag_alloc_align(struct page_frag_cache *nc, +			      unsigned int fragsz, gfp_t gfp_mask, +			      unsigned int align_mask)  {  	unsigned int size = PAGE_SIZE;  	struct page *page; @@ -4779,7 +4797,7 @@ refill:  	return nc->va + offset;  } -EXPORT_SYMBOL(page_frag_alloc_align); +EXPORT_SYMBOL(__page_frag_alloc_align);  /*   * Frees a page fragment allocated out of either a compound or order 0 page. @@ -4801,8 +4819,8 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,  		struct page *page = virt_to_page((void *)addr);  		struct page *last = page + nr; -		split_page_owner(page, 1 << order); -		split_page_memcg(page, 1 << order); +		split_page_owner(page, order, 0); +		split_page_memcg(page, order, 0);  		while (page < --last)  			set_page_refcounted(last); @@ -5572,37 +5590,34 @@ static void zone_pcp_update(struct zone *zone, int cpu_online)  	mutex_unlock(&pcp_batch_high_lock);  } -static void zone_pcp_update_cacheinfo(struct zone *zone) +static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)  { -	int cpu;  	struct per_cpu_pages *pcp;  	struct cpu_cacheinfo *cci; -	for_each_online_cpu(cpu) { -		pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); -		cci = get_cpu_cacheinfo(cpu); -		/* -		 * If data cache slice of CPU is large enough, "pcp->batch" -		 * pages can be preserved in PCP before draining PCP for -		 * consecutive high-order pages freeing without allocation. -		 * This can reduce zone lock contention without hurting -		 * cache-hot pages sharing. -		 */ -		spin_lock(&pcp->lock); -		if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) -			pcp->flags |= PCPF_FREE_HIGH_BATCH; -		else -			pcp->flags &= ~PCPF_FREE_HIGH_BATCH; -		spin_unlock(&pcp->lock); -	} +	pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); +	cci = get_cpu_cacheinfo(cpu); +	/* +	 * If data cache slice of CPU is large enough, "pcp->batch" +	 * pages can be preserved in PCP before draining PCP for +	 * consecutive high-order pages freeing without allocation. +	 * This can reduce zone lock contention without hurting +	 * cache-hot pages sharing. +	 */ +	spin_lock(&pcp->lock); +	if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) +		pcp->flags |= PCPF_FREE_HIGH_BATCH; +	else +		pcp->flags &= ~PCPF_FREE_HIGH_BATCH; +	spin_unlock(&pcp->lock);  } -void setup_pcp_cacheinfo(void) +void setup_pcp_cacheinfo(unsigned int cpu)  {  	struct zone *zone;  	for_each_populated_zone(zone) -		zone_pcp_update_cacheinfo(zone); +		zone_pcp_update_cacheinfo(zone, cpu);  }  /* @@ -5845,7 +5860,7 @@ static void __setup_per_zone_wmarks(void)  		spin_lock_irqsave(&zone->lock, flags);  		tmp = (u64)pages_min * zone_managed_pages(zone); -		do_div(tmp, lowmem_pages); +		tmp = div64_ul(tmp, lowmem_pages);  		if (is_highmem(zone) || zone_idx(zone) == ZONE_MOVABLE) {  			/*  			 * __GFP_HIGH and PF_MEMALLOC allocations usually don't @@ -6219,9 +6234,14 @@ static void alloc_contig_dump_pages(struct list_head *page_list)  	}  } -/* [start, end) must belong to a single zone. */ +/* + * [start, end) must belong to a single zone. + * @migratetype: using migratetype to filter the type of migration in + *		trace_mm_alloc_contig_migrate_range_info. + */  int __alloc_contig_migrate_range(struct compact_control *cc, -					unsigned long start, unsigned long end) +					unsigned long start, unsigned long end, +					int migratetype)  {  	/* This function is based on compact_zone() from compaction.c. */  	unsigned int nr_reclaimed; @@ -6232,6 +6252,10 @@ int __alloc_contig_migrate_range(struct compact_control *cc,  		.nid = zone_to_nid(cc->zone),  		.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,  	}; +	struct page *page; +	unsigned long total_mapped = 0; +	unsigned long total_migrated = 0; +	unsigned long total_reclaimed = 0;  	lru_cache_disable(); @@ -6257,9 +6281,18 @@ int __alloc_contig_migrate_range(struct compact_control *cc,  							&cc->migratepages);  		cc->nr_migratepages -= nr_reclaimed; +		if (trace_mm_alloc_contig_migrate_range_info_enabled()) { +			total_reclaimed += nr_reclaimed; +			list_for_each_entry(page, &cc->migratepages, lru) +				total_mapped += page_mapcount(page); +		} +  		ret = migrate_pages(&cc->migratepages, alloc_migration_target,  			NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL); +		if (trace_mm_alloc_contig_migrate_range_info_enabled() && !ret) +			total_migrated += cc->nr_migratepages; +  		/*  		 * On -ENOMEM, migrate_pages() bails out right away. It is pointless  		 * to retry again over this error, so do the same here. @@ -6273,9 +6306,13 @@ int __alloc_contig_migrate_range(struct compact_control *cc,  		if (!(cc->gfp_mask & __GFP_NOWARN) && ret == -EBUSY)  			alloc_contig_dump_pages(&cc->migratepages);  		putback_movable_pages(&cc->migratepages); -		return ret;  	} -	return 0; + +	trace_mm_alloc_contig_migrate_range_info(start, end, migratetype, +						 total_migrated, +						 total_reclaimed, +						 total_mapped); +	return (ret < 0) ? ret : 0;  }  /** @@ -6355,7 +6392,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,  	 * allocated.  So, if we fall through be sure to clear ret so that  	 * -EBUSY is not accidentally used or returned to caller.  	 */ -	ret = __alloc_contig_migrate_range(&cc, start, end); +	ret = __alloc_contig_migrate_range(&cc, start, end, migratetype);  	if (ret && ret != -EBUSY)  		goto done;  	ret = 0; |