diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 246 | 
1 files changed, 161 insertions, 85 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c4eb750a199..114c56c3685d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -74,6 +74,7 @@  #include <asm/div64.h>  #include "internal.h"  #include "shuffle.h" +#include "page_reporting.h"  /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */  static DEFINE_MUTEX(pcp_batch_high_lock); @@ -95,7 +96,6 @@ DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key);   */  DEFINE_PER_CPU(int, _numa_mem_);		/* Kernel "local memory" node */  EXPORT_PER_CPU_SYMBOL(_numa_mem_); -int _node_numa_mem_[MAX_NUMNODES];  #endif  /* work_structs for global per-cpu drains */ @@ -689,6 +689,8 @@ void prep_compound_page(struct page *page, unsigned int order)  		set_compound_head(p, page);  	}  	atomic_set(compound_mapcount_ptr(page), -1); +	if (hpage_pincount_available(page)) +		atomic_set(compound_pincount_ptr(page), 0);  }  #ifdef CONFIG_DEBUG_PAGEALLOC @@ -791,32 +793,25 @@ static inline void set_page_order(struct page *page, unsigned int order)   *   * For recording page's order, we use page_private(page).   */ -static inline int page_is_buddy(struct page *page, struct page *buddy, +static inline bool page_is_buddy(struct page *page, struct page *buddy,  							unsigned int order)  { -	if (page_is_guard(buddy) && page_order(buddy) == order) { -		if (page_zone_id(page) != page_zone_id(buddy)) -			return 0; - -		VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); +	if (!page_is_guard(buddy) && !PageBuddy(buddy)) +		return false; -		return 1; -	} +	if (page_order(buddy) != order) +		return false; -	if (PageBuddy(buddy) && page_order(buddy) == order) { -		/* -		 * zone check is done late to avoid uselessly -		 * calculating zone/node ids for pages that could -		 * never merge. -		 */ -		if (page_zone_id(page) != page_zone_id(buddy)) -			return 0; +	/* +	 * zone check is done late to avoid uselessly calculating +	 * zone/node ids for pages that could never merge. +	 */ +	if (page_zone_id(page) != page_zone_id(buddy)) +		return false; -		VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); +	VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); -		return 1; -	} -	return 0; +	return true;  }  #ifdef CONFIG_COMPACTION @@ -870,6 +865,78 @@ compaction_capture(struct capture_control *capc, struct page *page,  }  #endif /* CONFIG_COMPACTION */ +/* Used for pages not on another list */ +static inline void add_to_free_list(struct page *page, struct zone *zone, +				    unsigned int order, int migratetype) +{ +	struct free_area *area = &zone->free_area[order]; + +	list_add(&page->lru, &area->free_list[migratetype]); +	area->nr_free++; +} + +/* Used for pages not on another list */ +static inline void add_to_free_list_tail(struct page *page, struct zone *zone, +					 unsigned int order, int migratetype) +{ +	struct free_area *area = &zone->free_area[order]; + +	list_add_tail(&page->lru, &area->free_list[migratetype]); +	area->nr_free++; +} + +/* Used for pages which are on another list */ +static inline void move_to_free_list(struct page *page, struct zone *zone, +				     unsigned int order, int migratetype) +{ +	struct free_area *area = &zone->free_area[order]; + +	list_move(&page->lru, &area->free_list[migratetype]); +} + +static inline void del_page_from_free_list(struct page *page, struct zone *zone, +					   unsigned int order) +{ +	/* clear reported state and update reported page count */ +	if (page_reported(page)) +		__ClearPageReported(page); + +	list_del(&page->lru); +	__ClearPageBuddy(page); +	set_page_private(page, 0); +	zone->free_area[order].nr_free--; +} + +/* + * If this is not the largest possible page, check if the buddy + * of the next-highest order is free. If it is, it's possible + * that pages are being freed that will coalesce soon. In case, + * that is happening, add the free page to the tail of the list + * so it's less likely to be used soon and more likely to be merged + * as a higher order page + */ +static inline bool +buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn, +		   struct page *page, unsigned int order) +{ +	struct page *higher_page, *higher_buddy; +	unsigned long combined_pfn; + +	if (order >= MAX_ORDER - 2) +		return false; + +	if (!pfn_valid_within(buddy_pfn)) +		return false; + +	combined_pfn = buddy_pfn & pfn; +	higher_page = page + (combined_pfn - pfn); +	buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); +	higher_buddy = higher_page + (buddy_pfn - combined_pfn); + +	return pfn_valid_within(buddy_pfn) && +	       page_is_buddy(higher_page, higher_buddy, order + 1); +} +  /*   * Freeing function for a buddy system allocator.   * @@ -897,13 +964,14 @@ compaction_capture(struct capture_control *capc, struct page *page,  static inline void __free_one_page(struct page *page,  		unsigned long pfn,  		struct zone *zone, unsigned int order, -		int migratetype) +		int migratetype, bool report)  { -	unsigned long combined_pfn; +	struct capture_control *capc = task_capc(zone);  	unsigned long uninitialized_var(buddy_pfn); -	struct page *buddy; +	unsigned long combined_pfn;  	unsigned int max_order; -	struct capture_control *capc = task_capc(zone); +	struct page *buddy; +	bool to_tail;  	max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); @@ -938,7 +1006,7 @@ continue_merging:  		if (page_is_guard(buddy))  			clear_page_guard(zone, buddy, order, migratetype);  		else -			del_page_from_free_area(buddy, &zone->free_area[order]); +			del_page_from_free_list(buddy, zone, order);  		combined_pfn = buddy_pfn & pfn;  		page = page + (combined_pfn - pfn);  		pfn = combined_pfn; @@ -972,35 +1040,19 @@ continue_merging:  done_merging:  	set_page_order(page, order); -	/* -	 * If this is not the largest possible page, check if the buddy -	 * of the next-highest order is free. If it is, it's possible -	 * that pages are being freed that will coalesce soon. In case, -	 * that is happening, add the free page to the tail of the list -	 * so it's less likely to be used soon and more likely to be merged -	 * as a higher order page -	 */ -	if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn) -			&& !is_shuffle_order(order)) { -		struct page *higher_page, *higher_buddy; -		combined_pfn = buddy_pfn & pfn; -		higher_page = page + (combined_pfn - pfn); -		buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); -		higher_buddy = higher_page + (buddy_pfn - combined_pfn); -		if (pfn_valid_within(buddy_pfn) && -		    page_is_buddy(higher_page, higher_buddy, order + 1)) { -			add_to_free_area_tail(page, &zone->free_area[order], -					      migratetype); -			return; -		} -	} -  	if (is_shuffle_order(order)) -		add_to_free_area_random(page, &zone->free_area[order], -				migratetype); +		to_tail = shuffle_pick_tail();  	else -		add_to_free_area(page, &zone->free_area[order], migratetype); +		to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order); +	if (to_tail) +		add_to_free_list_tail(page, zone, order, migratetype); +	else +		add_to_free_list(page, zone, order, migratetype); + +	/* Notify page reporting subsystem of freed page */ +	if (report) +		page_reporting_notify_free(order);  }  /* @@ -1152,7 +1204,7 @@ static __always_inline bool free_pages_prepare(struct page *page,  	if (PageMappingFlags(page))  		page->mapping = NULL;  	if (memcg_kmem_enabled() && PageKmemcg(page)) -		__memcg_kmem_uncharge(page, order); +		__memcg_kmem_uncharge_page(page, order);  	if (check_free)  		bad += free_pages_check(page);  	if (bad) @@ -1317,7 +1369,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,  		if (unlikely(isolated_pageblocks))  			mt = get_pageblock_migratetype(page); -		__free_one_page(page, page_to_pfn(page), zone, 0, mt); +		__free_one_page(page, page_to_pfn(page), zone, 0, mt, true);  		trace_mm_page_pcpu_drain(page, 0, mt);  	}  	spin_unlock(&zone->lock); @@ -1333,7 +1385,7 @@ static void free_one_page(struct zone *zone,  		is_migrate_isolate(migratetype))) {  		migratetype = get_pfnblock_migratetype(page, pfn);  	} -	__free_one_page(page, pfn, zone, order, migratetype); +	__free_one_page(page, pfn, zone, order, migratetype, true);  	spin_unlock(&zone->lock);  } @@ -2014,13 +2066,11 @@ void __init init_cma_reserved_pageblock(struct page *page)   * -- nyc   */  static inline void expand(struct zone *zone, struct page *page, -	int low, int high, struct free_area *area, -	int migratetype) +	int low, int high, int migratetype)  {  	unsigned long size = 1 << high;  	while (high > low) { -		area--;  		high--;  		size >>= 1;  		VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); @@ -2034,7 +2084,7 @@ static inline void expand(struct zone *zone, struct page *page,  		if (set_page_guard(zone, &page[size], high, migratetype))  			continue; -		add_to_free_area(&page[size], area, migratetype); +		add_to_free_list(&page[size], zone, high, migratetype);  		set_page_order(&page[size], high);  	}  } @@ -2192,8 +2242,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,  		page = get_page_from_free_area(area, migratetype);  		if (!page)  			continue; -		del_page_from_free_area(page, area); -		expand(zone, page, order, current_order, area, migratetype); +		del_page_from_free_list(page, zone, current_order); +		expand(zone, page, order, current_order, migratetype);  		set_pcppage_migratetype(page, migratetype);  		return page;  	} @@ -2267,7 +2317,7 @@ static int move_freepages(struct zone *zone,  		VM_BUG_ON_PAGE(page_zone(page) != zone, page);  		order = page_order(page); -		move_to_free_area(page, &zone->free_area[order], migratetype); +		move_to_free_list(page, zone, order, migratetype);  		page += 1 << order;  		pages_moved += 1 << order;  	} @@ -2383,7 +2433,6 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,  		unsigned int alloc_flags, int start_type, bool whole_block)  {  	unsigned int current_order = page_order(page); -	struct free_area *area;  	int free_pages, movable_pages, alike_pages;  	int old_block_type; @@ -2454,8 +2503,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,  	return;  single_page: -	area = &zone->free_area[current_order]; -	move_to_free_area(page, area, start_type); +	move_to_free_list(page, zone, current_order, start_type);  }  /* @@ -3126,7 +3174,6 @@ EXPORT_SYMBOL_GPL(split_page);  int __isolate_free_page(struct page *page, unsigned int order)  { -	struct free_area *area = &page_zone(page)->free_area[order];  	unsigned long watermark;  	struct zone *zone;  	int mt; @@ -3152,7 +3199,7 @@ int __isolate_free_page(struct page *page, unsigned int order)  	/* Remove page from free list */ -	del_page_from_free_area(page, area); +	del_page_from_free_list(page, zone, order);  	/*  	 * Set the pageblock if the isolated page is at least half of a @@ -3173,6 +3220,25 @@ int __isolate_free_page(struct page *page, unsigned int order)  	return 1UL << order;  } +/** + * __putback_isolated_page - Return a now-isolated page back where we got it + * @page: Page that was isolated + * @order: Order of the isolated page + * + * This function is meant to return a page pulled from the free lists via + * __isolate_free_page back to the free lists they were pulled from. + */ +void __putback_isolated_page(struct page *page, unsigned int order, int mt) +{ +	struct zone *zone = page_zone(page); + +	/* zone lock should be held when this function is called */ +	lockdep_assert_held(&zone->lock); + +	/* Return isolated page to tail of freelist. */ +	__free_one_page(page, page_to_pfn(page), zone, order, mt, false); +} +  /*   * Update NUMA hit/miss statistics   * @@ -3459,8 +3525,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,  			return true;  		}  #endif -		if (alloc_harder && -			!list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) +		if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC))  			return true;  	}  	return false; @@ -3535,10 +3600,13 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)  static inline unsigned int  alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)  { -	unsigned int alloc_flags = 0; +	unsigned int alloc_flags; -	if (gfp_mask & __GFP_KSWAPD_RECLAIM) -		alloc_flags |= ALLOC_KSWAPD; +	/* +	 * __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD +	 * to save a branch. +	 */ +	alloc_flags = (__force int) (gfp_mask & __GFP_KSWAPD_RECLAIM);  #ifdef CONFIG_ZONE_DMA32  	if (!zone) @@ -4174,8 +4242,13 @@ gfp_to_alloc_flags(gfp_t gfp_mask)  {  	unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; -	/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ +	/* +	 * __GFP_HIGH is assumed to be the same as ALLOC_HIGH +	 * and __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD +	 * to save two branches. +	 */  	BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); +	BUILD_BUG_ON(__GFP_KSWAPD_RECLAIM != (__force gfp_t) ALLOC_KSWAPD);  	/*  	 * The caller may dip into page reserves a bit more if the caller @@ -4183,7 +4256,8 @@ gfp_to_alloc_flags(gfp_t gfp_mask)  	 * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will  	 * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).  	 */ -	alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); +	alloc_flags |= (__force int) +		(gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));  	if (gfp_mask & __GFP_ATOMIC) {  		/* @@ -4200,9 +4274,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)  	} else if (unlikely(rt_task(current)) && !in_interrupt())  		alloc_flags |= ALLOC_HARDER; -	if (gfp_mask & __GFP_KSWAPD_RECLAIM) -		alloc_flags |= ALLOC_KSWAPD; -  #ifdef CONFIG_CMA  	if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)  		alloc_flags |= ALLOC_CMA; @@ -4745,14 +4816,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,  	 * Restore the original nodemask if it was potentially replaced with  	 * &cpuset_current_mems_allowed to optimize the fast-path attempt.  	 */ -	if (unlikely(ac.nodemask != nodemask)) -		ac.nodemask = nodemask; +	ac.nodemask = nodemask;  	page = __alloc_pages_slowpath(alloc_mask, order, &ac);  out:  	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && -	    unlikely(__memcg_kmem_charge(page, gfp_mask, order) != 0)) { +	    unlikely(__memcg_kmem_charge_page(page, gfp_mask, order) != 0)) {  		__free_pages(page, order);  		page = NULL;  	} @@ -7867,8 +7937,8 @@ int __meminit init_per_zone_wmark_min(void)  		min_free_kbytes = new_min_free_kbytes;  		if (min_free_kbytes < 128)  			min_free_kbytes = 128; -		if (min_free_kbytes > 65536) -			min_free_kbytes = 65536; +		if (min_free_kbytes > 262144) +			min_free_kbytes = 262144;  	} else {  		pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",  				new_min_free_kbytes, user_min_free_kbytes); @@ -8253,15 +8323,20 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page,  		/*  		 * Hugepages are not in LRU lists, but they're movable. +		 * THPs are on the LRU, but need to be counted as #small pages.  		 * We need not scan over tail pages because we don't  		 * handle each tail page individually in migration.  		 */ -		if (PageHuge(page)) { +		if (PageHuge(page) || PageTransCompound(page)) {  			struct page *head = compound_head(page);  			unsigned int skip_pages; -			if (!hugepage_migration_supported(page_hstate(head))) +			if (PageHuge(page)) { +				if (!hugepage_migration_supported(page_hstate(head))) +					return page; +			} else if (!PageLRU(head) && !__PageMovable(head)) {  				return page; +			}  			skip_pages = compound_nr(head) - (page - head);  			iter += skip_pages - 1; @@ -8402,6 +8477,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,  		.ignore_skip_hint = true,  		.no_set_skip_hint = true,  		.gfp_mask = current_gfp_context(gfp_mask), +		.alloc_contig = true,  	};  	INIT_LIST_HEAD(&cc.migratepages); @@ -8709,7 +8785,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)  		BUG_ON(!PageBuddy(page));  		order = page_order(page);  		offlined_pages += 1 << order; -		del_page_from_free_area(page, &zone->free_area[order]); +		del_page_from_free_list(page, zone, order);  		pfn += (1 << order);  	}  	spin_unlock_irqrestore(&zone->lock, flags);  |