diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 227 | 
1 files changed, 154 insertions, 73 deletions
| diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3222193c46c6..e2ef1c17942f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -32,7 +32,6 @@  #include <linux/slab.h>  #include <linux/ratelimit.h>  #include <linux/oom.h> -#include <linux/notifier.h>  #include <linux/topology.h>  #include <linux/sysctl.h>  #include <linux/cpu.h> @@ -155,16 +154,17 @@ static inline void set_pcppage_migratetype(struct page *page, int migratetype)   * The following functions are used by the suspend/hibernate code to temporarily   * change gfp_allowed_mask in order to avoid using I/O during memory allocations   * while devices are suspended.  To avoid races with the suspend/hibernate code, - * they should always be called with pm_mutex held (gfp_allowed_mask also should - * only be modified with pm_mutex held, unless the suspend/hibernate code is - * guaranteed not to run in parallel with that modification). + * they should always be called with system_transition_mutex held + * (gfp_allowed_mask also should only be modified with system_transition_mutex + * held, unless the suspend/hibernate code is guaranteed not to run in parallel + * with that modification).   */  static gfp_t saved_gfp_mask;  void pm_restore_gfp_mask(void)  { -	WARN_ON(!mutex_is_locked(&pm_mutex)); +	WARN_ON(!mutex_is_locked(&system_transition_mutex));  	if (saved_gfp_mask) {  		gfp_allowed_mask = saved_gfp_mask;  		saved_gfp_mask = 0; @@ -173,7 +173,7 @@ void pm_restore_gfp_mask(void)  void pm_restrict_gfp_mask(void)  { -	WARN_ON(!mutex_is_locked(&pm_mutex)); +	WARN_ON(!mutex_is_locked(&system_transition_mutex));  	WARN_ON(saved_gfp_mask);  	saved_gfp_mask = gfp_allowed_mask;  	gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS); @@ -2908,10 +2908,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)  	if (!static_branch_likely(&vm_numa_stat_key))  		return; -	if (z->node != numa_node_id()) +	if (zone_to_nid(z) != numa_node_id())  		local_stat = NUMA_OTHER; -	if (z->node == preferred_zone->node) +	if (zone_to_nid(z) == zone_to_nid(preferred_zone))  		__inc_numa_state(z, NUMA_HIT);  	else {  		__inc_numa_state(z, NUMA_MISS); @@ -4164,11 +4164,12 @@ retry:  		alloc_flags = reserve_flags;  	/* -	 * Reset the zonelist iterators if memory policies can be ignored. -	 * These allocations are high priority and system rather than user -	 * orientated. +	 * Reset the nodemask and zonelist iterators if memory policies can be +	 * ignored. These allocations are high priority and system rather than +	 * user oriented.  	 */  	if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) { +		ac->nodemask = NULL;  		ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,  					ac->high_zoneidx, ac->nodemask);  	} @@ -4402,19 +4403,15 @@ out:  EXPORT_SYMBOL(__alloc_pages_nodemask);  /* - * Common helper functions. + * Common helper functions. Never use with __GFP_HIGHMEM because the returned + * address cannot represent highmem pages. Use alloc_pages and then kmap if + * you need to access high mem.   */  unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)  {  	struct page *page; -	/* -	 * __get_free_pages() returns a virtual address, which cannot represent -	 * a highmem page -	 */ -	VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); - -	page = alloc_pages(gfp_mask, order); +	page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order);  	if (!page)  		return 0;  	return (unsigned long) page_address(page); @@ -5280,7 +5277,7 @@ int local_memory_node(int node)  	z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),  				   gfp_zone(GFP_KERNEL),  				   NULL); -	return z->zone->node; +	return zone_to_nid(z->zone);  }  #endif @@ -5566,13 +5563,12 @@ static int zone_batchsize(struct zone *zone)  	/*  	 * The per-cpu-pages pools are set to around 1000th of the -	 * size of the zone.  But no more than 1/2 of a meg. -	 * -	 * OK, so we don't know how big the cache is.  So guess. +	 * size of the zone.  	 */  	batch = zone->managed_pages / 1024; -	if (batch * PAGE_SIZE > 512 * 1024) -		batch = (512 * 1024) / PAGE_SIZE; +	/* But no more than a meg. */ +	if (batch * PAGE_SIZE > 1024 * 1024) +		batch = (1024 * 1024) / PAGE_SIZE;  	batch /= 4;		/* We effectively *= 4 below */  	if (batch < 1)  		batch = 1; @@ -6123,7 +6119,7 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l  	return usemapsize / 8;  } -static void __init setup_usemap(struct pglist_data *pgdat, +static void __ref setup_usemap(struct pglist_data *pgdat,  				struct zone *zone,  				unsigned long zone_start_pfn,  				unsigned long zonesize) @@ -6143,7 +6139,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,  #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE  /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ -void __paginginit set_pageblock_order(void) +void __init set_pageblock_order(void)  {  	unsigned int order; @@ -6171,14 +6167,14 @@ void __paginginit set_pageblock_order(void)   * include/linux/pageblock-flags.h for the values of pageblock_order based on   * the kernel config   */ -void __paginginit set_pageblock_order(void) +void __init set_pageblock_order(void)  {  }  #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, -						   unsigned long present_pages) +static unsigned long __init calc_memmap_size(unsigned long spanned_pages, +						unsigned long present_pages)  {  	unsigned long pages = spanned_pages; @@ -6197,39 +6193,87 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,  	return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;  } -/* - * Set up the zone data structures: - *   - mark all pages reserved - *   - mark all memory queues empty - *   - clear the memory bitmaps - * - * NOTE: pgdat should get zeroed by caller. - */ -static void __paginginit free_area_init_core(struct pglist_data *pgdat) -{ -	enum zone_type j; -	int nid = pgdat->node_id; - -	pgdat_resize_init(pgdat); -#ifdef CONFIG_NUMA_BALANCING -	spin_lock_init(&pgdat->numabalancing_migrate_lock); -	pgdat->numabalancing_migrate_nr_pages = 0; -	pgdat->numabalancing_migrate_next_window = jiffies; -#endif  #ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void pgdat_init_split_queue(struct pglist_data *pgdat) +{  	spin_lock_init(&pgdat->split_queue_lock);  	INIT_LIST_HEAD(&pgdat->split_queue);  	pgdat->split_queue_len = 0; +} +#else +static void pgdat_init_split_queue(struct pglist_data *pgdat) {}  #endif -	init_waitqueue_head(&pgdat->kswapd_wait); -	init_waitqueue_head(&pgdat->pfmemalloc_wait); +  #ifdef CONFIG_COMPACTION +static void pgdat_init_kcompactd(struct pglist_data *pgdat) +{  	init_waitqueue_head(&pgdat->kcompactd_wait); +} +#else +static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}  #endif + +static void __meminit pgdat_init_internals(struct pglist_data *pgdat) +{ +	pgdat_resize_init(pgdat); + +	pgdat_init_split_queue(pgdat); +	pgdat_init_kcompactd(pgdat); + +	init_waitqueue_head(&pgdat->kswapd_wait); +	init_waitqueue_head(&pgdat->pfmemalloc_wait); +  	pgdat_page_ext_init(pgdat);  	spin_lock_init(&pgdat->lru_lock);  	lruvec_init(node_lruvec(pgdat)); +} +static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, +							unsigned long remaining_pages) +{ +	zone->managed_pages = remaining_pages; +	zone_set_nid(zone, nid); +	zone->name = zone_names[idx]; +	zone->zone_pgdat = NODE_DATA(nid); +	spin_lock_init(&zone->lock); +	zone_seqlock_init(zone); +	zone_pcp_init(zone); +} + +/* + * Set up the zone data structures + * - init pgdat internals + * - init all zones belonging to this node + * + * NOTE: this function is only called during memory hotplug + */ +#ifdef CONFIG_MEMORY_HOTPLUG +void __ref free_area_init_core_hotplug(int nid) +{ +	enum zone_type z; +	pg_data_t *pgdat = NODE_DATA(nid); + +	pgdat_init_internals(pgdat); +	for (z = 0; z < MAX_NR_ZONES; z++) +		zone_init_internals(&pgdat->node_zones[z], z, nid, 0); +} +#endif + +/* + * Set up the zone data structures: + *   - mark all pages reserved + *   - mark all memory queues empty + *   - clear the memory bitmaps + * + * NOTE: pgdat should get zeroed by caller. + * NOTE: this function is only called during early init. + */ +static void __init free_area_init_core(struct pglist_data *pgdat) +{ +	enum zone_type j; +	int nid = pgdat->node_id; + +	pgdat_init_internals(pgdat);  	pgdat->per_cpu_nodestats = &boot_nodestats;  	for (j = 0; j < MAX_NR_ZONES; j++) { @@ -6277,15 +6321,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)  		 * when the bootmem allocator frees pages into the buddy system.  		 * And all highmem pages will be managed by the buddy system.  		 */ -		zone->managed_pages = freesize; -#ifdef CONFIG_NUMA -		zone->node = nid; -#endif -		zone->name = zone_names[j]; -		zone->zone_pgdat = pgdat; -		spin_lock_init(&zone->lock); -		zone_seqlock_init(zone); -		zone_pcp_init(zone); +		zone_init_internals(zone, j, nid, freesize);  		if (!size)  			continue; @@ -6345,8 +6381,24 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)  static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }  #endif /* CONFIG_FLAT_NODE_MEM_MAP */ -void __paginginit free_area_init_node(int nid, unsigned long *zones_size, -		unsigned long node_start_pfn, unsigned long *zholes_size) +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT +static inline void pgdat_set_deferred_range(pg_data_t *pgdat) +{ +	/* +	 * We start only with one section of pages, more pages are added as +	 * needed until the rest of deferred pages are initialized. +	 */ +	pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION, +						pgdat->node_spanned_pages); +	pgdat->first_deferred_pfn = ULONG_MAX; +} +#else +static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} +#endif + +void __init free_area_init_node(int nid, unsigned long *zones_size, +				   unsigned long node_start_pfn, +				   unsigned long *zholes_size)  {  	pg_data_t *pgdat = NODE_DATA(nid);  	unsigned long start_pfn = 0; @@ -6370,16 +6422,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,  				  zones_size, zholes_size);  	alloc_node_mem_map(pgdat); +	pgdat_set_deferred_range(pgdat); -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -	/* -	 * We start only with one section of pages, more pages are added as -	 * needed until the rest of deferred pages are initialized. -	 */ -	pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION, -					 pgdat->node_spanned_pages); -	pgdat->first_deferred_pfn = ULONG_MAX; -#endif  	free_area_init_core(pgdat);  } @@ -6391,7 +6435,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,   * may be accessed (for example page_to_pfn() on some configuration accesses   * flags). We must explicitly zero those struct pages.   */ -void __paginginit zero_resv_unavail(void) +void __init zero_resv_unavail(void)  {  	phys_addr_t start, end;  	unsigned long pfn; @@ -6404,8 +6448,11 @@ void __paginginit zero_resv_unavail(void)  	pgcnt = 0;  	for_each_resv_unavail_range(i, &start, &end) {  		for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { -			if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) +			if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { +				pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) +					+ pageblock_nr_pages - 1;  				continue; +			}  			mm_zero_struct_page(pfn_to_page(pfn));  			pgcnt++;  		} @@ -7649,6 +7696,10 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,  		 * handle each tail page individually in migration.  		 */  		if (PageHuge(page)) { + +			if (!hugepage_migration_supported(page_hstate(page))) +				goto unmovable; +  			iter = round_up(iter + 1, 1<<compound_order(page)) - 1;  			continue;  		} @@ -8036,3 +8087,33 @@ bool is_free_buddy_page(struct page *page)  	return order < MAX_ORDER;  } + +#ifdef CONFIG_MEMORY_FAILURE +/* + * Set PG_hwpoison flag if a given page is confirmed to be a free page.  This + * test is performed under the zone lock to prevent a race against page + * allocation. + */ +bool set_hwpoison_free_buddy_page(struct page *page) +{ +	struct zone *zone = page_zone(page); +	unsigned long pfn = page_to_pfn(page); +	unsigned long flags; +	unsigned int order; +	bool hwpoisoned = false; + +	spin_lock_irqsave(&zone->lock, flags); +	for (order = 0; order < MAX_ORDER; order++) { +		struct page *page_head = page - (pfn & ((1 << order) - 1)); + +		if (PageBuddy(page_head) && page_order(page_head) >= order) { +			if (!TestSetPageHWPoison(page)) +				hwpoisoned = true; +			break; +		} +	} +	spin_unlock_irqrestore(&zone->lock, flags); + +	return hwpoisoned; +} +#endif |