diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 141 | 
1 files changed, 79 insertions, 62 deletions
| diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b37435c274cf..c5952749ad40 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -677,10 +677,8 @@ static inline int pindex_to_order(unsigned int pindex)  	int order = pindex / MIGRATE_PCPTYPES;  #ifdef CONFIG_TRANSPARENT_HUGEPAGE -	if (order > PAGE_ALLOC_COSTLY_ORDER) { +	if (order > PAGE_ALLOC_COSTLY_ORDER)  		order = pageblock_order; -		VM_BUG_ON(order != pageblock_order); -	}  #else  	VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);  #endif @@ -724,7 +722,7 @@ static inline void free_the_page(struct page *page, unsigned int order)  void free_compound_page(struct page *page)  { -	mem_cgroup_uncharge(page); +	mem_cgroup_uncharge(page_folio(page));  	free_the_page(page, compound_order(page));  } @@ -1312,8 +1310,10 @@ static __always_inline bool free_pages_prepare(struct page *page,  		VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); -		if (compound) +		if (compound) {  			ClearPageDoubleMap(page); +			ClearPageHasHWPoisoned(page); +		}  		for (i = 1; i < (1 << order); i++) {  			if (compound)  				bad += free_tail_pages_check(page, page + i); @@ -1428,14 +1428,8 @@ static inline void prefetch_buddy(struct page *page)  /*   * Frees a number of pages from the PCP lists - * Assumes all pages on list are in same zone, and of same order. + * Assumes all pages on list are in same zone.   * count is the number of pages to free. - * - * If the zone was previously in an "all pages pinned" state then look to - * see if this freeing clears that state. - * - * And clear the zone's pages_scanned counter, to hold off the "all pages are - * pinned" detection logic.   */  static void free_pcppages_bulk(struct zone *zone, int count,  					struct per_cpu_pages *pcp) @@ -1589,7 +1583,7 @@ static void __meminit init_reserved_page(unsigned long pfn)  	for (zid = 0; zid < MAX_NR_ZONES; zid++) {  		struct zone *zone = &pgdat->node_zones[zid]; -		if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) +		if (zone_spans_pfn(zone, pfn))  			break;  	}  	__init_single_page(pfn_to_page(pfn), pfn, zid, nid); @@ -3147,9 +3141,9 @@ static void drain_local_pages_wq(struct work_struct *work)  	 * cpu which is alright but we also have to make sure to not move to  	 * a different one.  	 */ -	preempt_disable(); +	migrate_disable();  	drain_local_pages(drain->zone); -	preempt_enable(); +	migrate_enable();  }  /* @@ -3966,6 +3960,8 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,  }  #ifdef CONFIG_NUMA +int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; +  static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)  {  	return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= @@ -4795,30 +4791,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,  		trace_reclaim_retry_zone(z, order, reclaimable,  				available, min_wmark, *no_progress_loops, wmark);  		if (wmark) { -			/* -			 * If we didn't make any progress and have a lot of -			 * dirty + writeback pages then we should wait for -			 * an IO to complete to slow down the reclaim and -			 * prevent from pre mature OOM -			 */ -			if (!did_some_progress) { -				unsigned long write_pending; - -				write_pending = zone_page_state_snapshot(zone, -							NR_ZONE_WRITE_PENDING); - -				if (2 * write_pending > reclaimable) { -					congestion_wait(BLK_RW_ASYNC, HZ/10); -					return true; -				} -			} -  			ret = true; -			goto out; +			break;  		}  	} -out:  	/*  	 * Memory allocation/reclaim might be called from a WQ context and the  	 * current implementation of the WQ concurrency control doesn't @@ -4914,6 +4891,19 @@ retry_cpuset:  	if (!ac->preferred_zoneref->zone)  		goto nopage; +	/* +	 * Check for insane configurations where the cpuset doesn't contain +	 * any suitable zone to satisfy the request - e.g. non-movable +	 * GFP_HIGHUSER allocations from MOVABLE nodes only. +	 */ +	if (cpusets_insane_config() && (gfp_mask & __GFP_HARDWALL)) { +		struct zoneref *z = first_zones_zonelist(ac->zonelist, +					ac->highest_zoneidx, +					&cpuset_current_mems_allowed); +		if (!z->zone) +			goto nopage; +	} +  	if (alloc_flags & ALLOC_KSWAPD)  		wake_all_kswapds(order, gfp_mask, ac); @@ -5223,6 +5213,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,  	if (unlikely(page_array && nr_pages - nr_populated == 0))  		goto out; +	/* Bulk allocator does not support memcg accounting. */ +	if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT)) +		goto failed; +  	/* Use the single page allocator for one page. */  	if (nr_pages - nr_populated == 1)  		goto failed; @@ -5400,6 +5394,18 @@ out:  }  EXPORT_SYMBOL(__alloc_pages); +struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, +		nodemask_t *nodemask) +{ +	struct page *page = __alloc_pages(gfp | __GFP_COMP, order, +			preferred_nid, nodemask); + +	if (page && order > 1) +		prep_transhuge_page(page); +	return (struct folio *)page; +} +EXPORT_SYMBOL(__folio_alloc); +  /*   * Common helper functions. Never use with __GFP_HIGHMEM because the returned   * address cannot represent highmem pages. Use alloc_pages and then kmap if @@ -5612,8 +5618,8 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)  	unsigned int order = get_order(size);  	unsigned long addr; -	if (WARN_ON_ONCE(gfp_mask & __GFP_COMP)) -		gfp_mask &= ~__GFP_COMP; +	if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) +		gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);  	addr = __get_free_pages(gfp_mask, order);  	return make_alloc_exact(addr, order, size); @@ -5637,8 +5643,8 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)  	unsigned int order = get_order(size);  	struct page *p; -	if (WARN_ON_ONCE(gfp_mask & __GFP_COMP)) -		gfp_mask &= ~__GFP_COMP; +	if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) +		gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);  	p = alloc_pages_node(nid, gfp_mask, order);  	if (!p) @@ -5980,6 +5986,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)  		printk(KERN_CONT  			"%s"  			" free:%lukB" +			" boost:%lukB"  			" min:%lukB"  			" low:%lukB"  			" high:%lukB" @@ -6000,6 +6007,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)  			"\n",  			zone->name,  			K(zone_page_state(zone, NR_FREE_PAGES)), +			K(zone->watermark_boost),  			K(min_wmark_pages(zone)),  			K(low_wmark_pages(zone)),  			K(high_wmark_pages(zone)), @@ -6255,7 +6263,7 @@ static void build_zonelists(pg_data_t *pgdat)  		 */  		if (node_distance(local_node, node) !=  		    node_distance(local_node, prev_node)) -			node_load[node] = load; +			node_load[node] += load;  		node_order[nr_nodes++] = node;  		prev_node = node; @@ -6264,6 +6272,10 @@ static void build_zonelists(pg_data_t *pgdat)  	build_zonelists_in_node_order(pgdat, node_order, nr_nodes);  	build_thisnode_zonelists(pgdat); +	pr_info("Fallback order for Node %d: ", local_node); +	for (node = 0; node < nr_nodes; node++) +		pr_cont("%d ", node_order[node]); +	pr_cont("\n");  }  #ifdef CONFIG_HAVE_MEMORYLESS_NODES @@ -7389,6 +7401,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}  static void __meminit pgdat_init_internals(struct pglist_data *pgdat)  { +	int i; +  	pgdat_resize_init(pgdat);  	pgdat_init_split_queue(pgdat); @@ -7397,6 +7411,9 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)  	init_waitqueue_head(&pgdat->kswapd_wait);  	init_waitqueue_head(&pgdat->pfmemalloc_wait); +	for (i = 0; i < NR_VMSCAN_THROTTLE; i++) +		init_waitqueue_head(&pgdat->reclaim_wait[i]); +  	pgdat_page_ext_init(pgdat);  	lruvec_init(&pgdat->__lruvec);  } @@ -8126,8 +8143,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char  	}  	if (pages && s) -		pr_info("Freeing %s memory: %ldK\n", -			s, pages << (PAGE_SHIFT - 10)); +		pr_info("Freeing %s memory: %ldK\n", s, K(pages));  	return pages;  } @@ -8172,14 +8188,13 @@ void __init mem_init_print_info(void)  		", %luK highmem"  #endif  		")\n", -		nr_free_pages() << (PAGE_SHIFT - 10), -		physpages << (PAGE_SHIFT - 10), +		K(nr_free_pages()), K(physpages),  		codesize >> 10, datasize >> 10, rosize >> 10,  		(init_data_size + init_code_size) >> 10, bss_size >> 10, -		(physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10), -		totalcma_pages << (PAGE_SHIFT - 10) +		K(physpages - totalram_pages() - totalcma_pages), +		K(totalcma_pages)  #ifdef	CONFIG_HIGHMEM -		, totalhigh_pages() << (PAGE_SHIFT - 10) +		, K(totalhigh_pages())  #endif  		);  } @@ -8452,7 +8467,7 @@ void setup_per_zone_wmarks(void)   * 8192MB:	11584k   * 16384MB:	16384k   */ -int __meminit init_per_zone_wmark_min(void) +void calculate_min_free_kbytes(void)  {  	unsigned long lowmem_kbytes;  	int new_min_free_kbytes; @@ -8460,16 +8475,17 @@ int __meminit init_per_zone_wmark_min(void)  	lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10);  	new_min_free_kbytes = int_sqrt(lowmem_kbytes * 16); -	if (new_min_free_kbytes > user_min_free_kbytes) { -		min_free_kbytes = new_min_free_kbytes; -		if (min_free_kbytes < 128) -			min_free_kbytes = 128; -		if (min_free_kbytes > 262144) -			min_free_kbytes = 262144; -	} else { +	if (new_min_free_kbytes > user_min_free_kbytes) +		min_free_kbytes = clamp(new_min_free_kbytes, 128, 262144); +	else  		pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",  				new_min_free_kbytes, user_min_free_kbytes); -	} + +} + +int __meminit init_per_zone_wmark_min(void) +{ +	calculate_min_free_kbytes();  	setup_per_zone_wmarks();  	refresh_zone_stat_thresholds();  	setup_per_zone_lowmem_reserve(); @@ -8756,7 +8772,8 @@ void *__init alloc_large_system_hash(const char *tablename,  		} else if (get_order(size) >= MAX_ORDER || hashdist) {  			table = __vmalloc(size, gfp_flags);  			virt = true; -			huge = is_vm_area_hugepages(table); +			if (table) +				huge = is_vm_area_hugepages(table);  		} else {  			/*  			 * If bucketsize is not a power-of-two, we may free @@ -9353,21 +9370,21 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)  }  #endif +/* + * This function returns a stable result only if called under zone lock. + */  bool is_free_buddy_page(struct page *page)  { -	struct zone *zone = page_zone(page);  	unsigned long pfn = page_to_pfn(page); -	unsigned long flags;  	unsigned int order; -	spin_lock_irqsave(&zone->lock, flags);  	for (order = 0; order < MAX_ORDER; order++) {  		struct page *page_head = page - (pfn & ((1 << order) - 1)); -		if (PageBuddy(page_head) && buddy_order(page_head) >= order) +		if (PageBuddy(page_head) && +		    buddy_order_unsafe(page_head) >= order)  			break;  	} -	spin_unlock_irqrestore(&zone->lock, flags);  	return order < MAX_ORDER;  } |