diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 192 | 
1 files changed, 132 insertions, 60 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d295c9bc01a8..3eb01dedfb50 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -289,8 +289,8 @@ EXPORT_SYMBOL(movable_zone);  #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */  #if MAX_NUMNODES > 1 -int nr_node_ids __read_mostly = MAX_NUMNODES; -int nr_online_nodes __read_mostly = 1; +unsigned int nr_node_ids __read_mostly = MAX_NUMNODES; +unsigned int nr_online_nodes __read_mostly = 1;  EXPORT_SYMBOL(nr_node_ids);  EXPORT_SYMBOL(nr_online_nodes);  #endif @@ -789,6 +789,57 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,  	return 0;  } +#ifdef CONFIG_COMPACTION +static inline struct capture_control *task_capc(struct zone *zone) +{ +	struct capture_control *capc = current->capture_control; + +	return capc && +		!(current->flags & PF_KTHREAD) && +		!capc->page && +		capc->cc->zone == zone && +		capc->cc->direct_compaction ? capc : NULL; +} + +static inline bool +compaction_capture(struct capture_control *capc, struct page *page, +		   int order, int migratetype) +{ +	if (!capc || order != capc->cc->order) +		return false; + +	/* Do not accidentally pollute CMA or isolated regions*/ +	if (is_migrate_cma(migratetype) || +	    is_migrate_isolate(migratetype)) +		return false; + +	/* +	 * Do not let lower order allocations polluate a movable pageblock. +	 * This might let an unmovable request use a reclaimable pageblock +	 * and vice-versa but no more than normal fallback logic which can +	 * have trouble finding a high-order free page. +	 */ +	if (order < pageblock_order && migratetype == MIGRATE_MOVABLE) +		return false; + +	capc->page = page; +	return true; +} + +#else +static inline struct capture_control *task_capc(struct zone *zone) +{ +	return NULL; +} + +static inline bool +compaction_capture(struct capture_control *capc, struct page *page, +		   int order, int migratetype) +{ +	return false; +} +#endif /* CONFIG_COMPACTION */ +  /*   * Freeing function for a buddy system allocator.   * @@ -822,6 +873,7 @@ static inline void __free_one_page(struct page *page,  	unsigned long uninitialized_var(buddy_pfn);  	struct page *buddy;  	unsigned int max_order; +	struct capture_control *capc = task_capc(zone);  	max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); @@ -837,6 +889,11 @@ static inline void __free_one_page(struct page *page,  continue_merging:  	while (order < max_order - 1) { +		if (compaction_capture(capc, page, order, migratetype)) { +			__mod_zone_freepage_state(zone, -(1 << order), +								migratetype); +			return; +		}  		buddy_pfn = __find_buddy_pfn(pfn, order);  		buddy = page + (buddy_pfn - pfn); @@ -1056,7 +1113,7 @@ static __always_inline bool free_pages_prepare(struct page *page,  	if (PageMappingFlags(page))  		page->mapping = NULL;  	if (memcg_kmem_enabled() && PageKmemcg(page)) -		memcg_kmem_uncharge(page, order); +		__memcg_kmem_uncharge(page, order);  	if (check_free)  		bad += free_pages_check(page);  	if (bad) @@ -1303,7 +1360,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)  	local_irq_restore(flags);  } -static void __init __free_pages_boot_core(struct page *page, unsigned int order) +void __free_pages_core(struct page *page, unsigned int order)  {  	unsigned int nr_pages = 1 << order;  	struct page *p = page; @@ -1382,7 +1439,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,  {  	if (early_page_uninitialised(pfn))  		return; -	return __free_pages_boot_core(page, order); +	__free_pages_core(page, order);  }  /* @@ -1472,14 +1529,14 @@ static void __init deferred_free_range(unsigned long pfn,  	if (nr_pages == pageblock_nr_pages &&  	    (pfn & (pageblock_nr_pages - 1)) == 0) {  		set_pageblock_migratetype(page, MIGRATE_MOVABLE); -		__free_pages_boot_core(page, pageblock_order); +		__free_pages_core(page, pageblock_order);  		return;  	}  	for (i = 0; i < nr_pages; i++, page++, pfn++) {  		if ((pfn & (pageblock_nr_pages - 1)) == 0)  			set_pageblock_migratetype(page, MIGRATE_MOVABLE); -		__free_pages_boot_core(page, 0); +		__free_pages_core(page, 0);  	}  } @@ -1945,8 +2002,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order,  	arch_alloc_page(page, order);  	kernel_map_pages(page, 1 << order, 1); -	kernel_poison_pages(page, 1 << order, 1);  	kasan_alloc_pages(page, order); +	kernel_poison_pages(page, 1 << order, 1);  	set_page_owner(page, order, gfp_flags);  } @@ -2170,6 +2227,18 @@ static inline void boost_watermark(struct zone *zone)  	max_boost = mult_frac(zone->_watermark[WMARK_HIGH],  			watermark_boost_factor, 10000); + +	/* +	 * high watermark may be uninitialised if fragmentation occurs +	 * very early in boot so do not boost. We do not fall +	 * through and boost by pageblock_nr_pages as failing +	 * allocations that early means that reclaim is not going +	 * to help and it may even be impossible to reclaim the +	 * boosted watermark resulting in a hang. +	 */ +	if (!max_boost) +		return; +  	max_boost = max(pageblock_nr_pages, max_boost);  	zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, @@ -2950,7 +3019,7 @@ int __isolate_free_page(struct page *page, unsigned int order)  		 * watermark, because we already know our high-order page  		 * exists.  		 */ -		watermark = min_wmark_pages(zone) + (1UL << order); +		watermark = zone->_watermark[WMARK_MIN] + (1UL << order);  		if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))  			return 0; @@ -3161,24 +3230,14 @@ static int __init fail_page_alloc_debugfs(void)  	dir = fault_create_debugfs_attr("fail_page_alloc", NULL,  					&fail_page_alloc.attr); -	if (IS_ERR(dir)) -		return PTR_ERR(dir); - -	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, -				&fail_page_alloc.ignore_gfp_reclaim)) -		goto fail; -	if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir, -				&fail_page_alloc.ignore_gfp_highmem)) -		goto fail; -	if (!debugfs_create_u32("min-order", mode, dir, -				&fail_page_alloc.min_order)) -		goto fail; -	return 0; -fail: -	debugfs_remove_recursive(dir); +	debugfs_create_bool("ignore-gfp-wait", mode, dir, +			    &fail_page_alloc.ignore_gfp_reclaim); +	debugfs_create_bool("ignore-gfp-highmem", mode, dir, +			    &fail_page_alloc.ignore_gfp_highmem); +	debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order); -	return -ENOMEM; +	return 0;  }  late_initcall(fail_page_alloc_debugfs); @@ -3698,7 +3757,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,  		unsigned int alloc_flags, const struct alloc_context *ac,  		enum compact_priority prio, enum compact_result *compact_result)  { -	struct page *page; +	struct page *page = NULL;  	unsigned long pflags;  	unsigned int noreclaim_flag; @@ -3709,13 +3768,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,  	noreclaim_flag = memalloc_noreclaim_save();  	*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, -									prio); +								prio, &page);  	memalloc_noreclaim_restore(noreclaim_flag);  	psi_memstall_leave(&pflags); -	if (*compact_result <= COMPACT_INACTIVE) +	if (*compact_result <= COMPACT_INACTIVE) { +		WARN_ON_ONCE(page);  		return NULL; +	}  	/*  	 * At least in one zone compaction wasn't deferred or skipped, so let's @@ -3723,7 +3784,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,  	 */  	count_vm_event(COMPACTSTALL); -	page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); +	/* Prep a captured page if available */ +	if (page) +		prep_new_page(page, order, gfp_mask, alloc_flags); + +	/* Try get a page from the freelist if available */ +	if (!page) +		page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);  	if (page) {  		struct zone *zone = page_zone(page); @@ -4556,7 +4623,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,  out:  	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && -	    unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { +	    unlikely(__memcg_kmem_charge(page, gfp_mask, order) != 0)) {  		__free_pages(page, order);  		page = NULL;  	} @@ -4675,11 +4742,11 @@ refill:  		/* Even if we own the page, we do not use atomic_set().  		 * This would break get_page_unless_zero() users.  		 */ -		page_ref_add(page, size - 1); +		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);  		/* reset page count bias and offset to start of new frag */  		nc->pfmemalloc = page_is_pfmemalloc(page); -		nc->pagecnt_bias = size; +		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;  		nc->offset = size;  	} @@ -4695,10 +4762,10 @@ refill:  		size = nc->size;  #endif  		/* OK, page count is 0, we can safely set it */ -		set_page_count(page, size); +		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);  		/* reset page count bias and offset to start of new frag */ -		nc->pagecnt_bias = size; +		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;  		offset = size - fragsz;  	} @@ -4749,6 +4816,8 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,   * This function is also limited by MAX_ORDER.   *   * Memory allocated by this function must be released by free_pages_exact(). + * + * Return: pointer to the allocated area or %NULL in case of error.   */  void *alloc_pages_exact(size_t size, gfp_t gfp_mask)  { @@ -4769,6 +4838,8 @@ EXPORT_SYMBOL(alloc_pages_exact);   *   * Like alloc_pages_exact(), but try to allocate on node nid first before falling   * back. + * + * Return: pointer to the allocated area or %NULL in case of error.   */  void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)  { @@ -4802,11 +4873,13 @@ EXPORT_SYMBOL(free_pages_exact);   * nr_free_zone_pages - count number of pages beyond high watermark   * @offset: The zone index of the highest zone   * - * nr_free_zone_pages() counts the number of counts pages which are beyond the + * nr_free_zone_pages() counts the number of pages which are beyond the   * high watermark within all zones at or below a given zone index.  For each   * zone, the number of pages is calculated as:   *   *     nr_free_zone_pages = managed_pages - high_pages + * + * Return: number of pages beyond high watermark.   */  static unsigned long nr_free_zone_pages(int offset)  { @@ -4833,6 +4906,9 @@ static unsigned long nr_free_zone_pages(int offset)   *   * nr_free_buffer_pages() counts the number of pages which are beyond the high   * watermark within ZONE_DMA and ZONE_NORMAL. + * + * Return: number of pages beyond high watermark within ZONE_DMA and + * ZONE_NORMAL.   */  unsigned long nr_free_buffer_pages(void)  { @@ -4845,6 +4921,8 @@ EXPORT_SYMBOL_GPL(nr_free_buffer_pages);   *   * nr_free_pagecache_pages() counts the number of pages which are beyond the   * high watermark within all zones. + * + * Return: number of pages beyond high watermark within all zones.   */  unsigned long nr_free_pagecache_pages(void)  { @@ -5291,7 +5369,8 @@ static int node_load[MAX_NUMNODES];   * from each node to each node in the system), and should also prefer nodes   * with no CPUs, since presumably they'll have very little allocation pressure   * on them otherwise. - * It returns -1 if no node is found. + * + * Return: node id of the found node or %NUMA_NO_NODE if no node is found.   */  static int find_next_best_node(int node, nodemask_t *used_node_mask)  { @@ -5597,7 +5676,7 @@ void __ref build_all_zonelists(pg_data_t *pgdat)  	else  		page_group_by_mobility_disabled = 0; -	pr_info("Built %i zonelists, mobility grouping %s.  Total pages: %ld\n", +	pr_info("Built %u zonelists, mobility grouping %s.  Total pages: %ld\n",  		nr_online_nodes,  		page_group_by_mobility_disabled ? "off" : "on",  		vm_total_pages); @@ -5701,18 +5780,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,  			cond_resched();  		}  	} -#ifdef CONFIG_SPARSEMEM -	/* -	 * If the zone does not span the rest of the section then -	 * we should at least initialize those pages. Otherwise we -	 * could blow up on a poisoned page in some paths which depend -	 * on full sections being initialized (e.g. memory hotplug). -	 */ -	while (end_pfn % PAGES_PER_SECTION) { -		__init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid); -		end_pfn++; -	} -#endif  }  #ifdef CONFIG_ZONE_DEVICE @@ -6016,7 +6083,7 @@ int __meminit __early_pfn_to_nid(unsigned long pfn,  		return state->last_nid;  	nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); -	if (nid != -1) { +	if (nid != NUMA_NO_NODE) {  		state->last_start = start_pfn;  		state->last_end = end_pfn;  		state->last_nid = nid; @@ -6214,7 +6281,7 @@ unsigned long __init __absent_pages_in_range(int nid,   * @start_pfn: The start PFN to start searching for holes   * @end_pfn: The end PFN to stop searching for holes   * - * It returns the number of pages frames in memory holes within a range. + * Return: the number of pages frames in memory holes within a range.   */  unsigned long __init absent_pages_in_range(unsigned long start_pfn,  							unsigned long end_pfn) @@ -6376,10 +6443,14 @@ static void __ref setup_usemap(struct pglist_data *pgdat,  {  	unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);  	zone->pageblock_flags = NULL; -	if (usemapsize) +	if (usemapsize) {  		zone->pageblock_flags =  			memblock_alloc_node_nopanic(usemapsize,  							 pgdat->node_id); +		if (!zone->pageblock_flags) +			panic("Failed to allocate %ld bytes for zone %s pageblock flags on node %d\n", +			      usemapsize, zone->name, pgdat->node_id); +	}  }  #else  static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, @@ -6609,6 +6680,9 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)  		end = ALIGN(end, MAX_ORDER_NR_PAGES);  		size =  (end - start) * sizeof(struct page);  		map = memblock_alloc_node_nopanic(size, pgdat->node_id); +		if (!map) +			panic("Failed to allocate %ld bytes for node %d memory map\n", +			      size, pgdat->node_id);  		pgdat->node_mem_map = map + offset;  	}  	pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", @@ -6764,14 +6838,14 @@ void __init setup_nr_node_ids(void)   * model has fine enough granularity to avoid incorrect mapping for the   * populated node map.   * - * Returns the determined alignment in pfn's.  0 if there is no alignment + * Return: the determined alignment in pfn's.  0 if there is no alignment   * requirement (single node).   */  unsigned long __init node_map_pfn_alignment(void)  {  	unsigned long accl_mask = 0, last_end = 0;  	unsigned long start, end, mask; -	int last_nid = -1; +	int last_nid = NUMA_NO_NODE;  	int i, nid;  	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { @@ -6819,7 +6893,7 @@ static unsigned long __init find_min_pfn_for_node(int nid)  /**   * find_min_pfn_with_active_regions - Find the minimum PFN registered   * - * It returns the minimum PFN based on information provided via + * Return: the minimum PFN based on information provided via   * memblock_set_node().   */  unsigned long __init find_min_pfn_with_active_regions(void) @@ -7267,7 +7341,6 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char  	return pages;  } -EXPORT_SYMBOL(free_reserved_area);  #ifdef	CONFIG_HIGHMEM  void free_highmem_page(struct page *page) @@ -7496,7 +7569,7 @@ static void __setup_per_zone_wmarks(void)  			 * value here.  			 *  			 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) -			 * deltas control asynch page reclaim, and so should +			 * deltas control async page reclaim, and so should  			 * not be capped for highmem.  			 */  			unsigned long min_pages; @@ -7973,7 +8046,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,  		/*  		 * Hugepages are not in LRU lists, but they're movable. -		 * We need not scan over tail pages bacause we don't +		 * We need not scan over tail pages because we don't  		 * handle each tail page individually in migration.  		 */  		if (PageHuge(page)) { @@ -8112,7 +8185,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,   * pageblocks in the range.  Once isolated, the pageblocks should not   * be modified by others.   * - * Returns zero on success or negative error code.  On success all + * Return: zero on success or negative error code.  On success all   * pages which PFN is in [start, end) are allocated for the caller and   * need to be freed with free_contig_range().   */ @@ -8196,7 +8269,6 @@ int alloc_contig_range(unsigned long start, unsigned long end,  	 */  	lru_add_drain_all(); -	drain_all_pages(cc.zone);  	order = 0;  	outer_start = start;  |