diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 92 | 
1 files changed, 55 insertions, 37 deletions
| diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9ecf99190ea2..8337926b89d4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -498,7 +498,8 @@ static void bad_page(struct page *page, const char *reason)  	dump_stack();  out:  	/* Leave bad fields for debug, except PageBuddy could make trouble */ -	page_mapcount_reset(page); /* remove PageBuddy */ +	if (PageBuddy(page)) +		__ClearPageBuddy(page);  	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);  } @@ -711,12 +712,12 @@ static inline struct page *get_page_from_free_area(struct free_area *area,  }  /* - * If this is not the largest possible page, check if the buddy - * of the next-highest order is free. If it is, it's possible + * If this is less than the 2nd largest possible page, check if the buddy + * of the next-higher order is free. If it is, it's possible   * that pages are being freed that will coalesce soon. In case,   * that is happening, add the free page to the tail of the list   * so it's less likely to be used soon and more likely to be merged - * as a higher order page + * as a 2-level higher order page   */  static inline bool  buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn, @@ -1218,7 +1219,8 @@ static void __free_pages_ok(struct page *page, unsigned int order,  	__count_vm_events(PGFREE, 1 << order);  } -void __free_pages_core(struct page *page, unsigned int order) +void __meminit __free_pages_core(struct page *page, unsigned int order, +		enum meminit_context context)  {  	unsigned int nr_pages = 1 << order;  	struct page *p = page; @@ -1228,17 +1230,34 @@ void __free_pages_core(struct page *page, unsigned int order)  	 * When initializing the memmap, __init_single_page() sets the refcount  	 * of all pages to 1 ("allocated"/"not free"). We have to set the  	 * refcount of all involved pages to 0. +	 * +	 * Note that hotplugged memory pages are initialized to PageOffline(). +	 * Pages freed from memblock might be marked as reserved.  	 */ -	prefetchw(p); -	for (loop = 0; loop < (nr_pages - 1); loop++, p++) { -		prefetchw(p + 1); -		__ClearPageReserved(p); -		set_page_count(p, 0); -	} -	__ClearPageReserved(p); -	set_page_count(p, 0); +	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && +	    unlikely(context == MEMINIT_HOTPLUG)) { +		for (loop = 0; loop < nr_pages; loop++, p++) { +			VM_WARN_ON_ONCE(PageReserved(p)); +			__ClearPageOffline(p); +			set_page_count(p, 0); +		} -	atomic_long_add(nr_pages, &page_zone(page)->managed_pages); +		/* +		 * Freeing the page with debug_pagealloc enabled will try to +		 * unmap it; some archs don't like double-unmappings, so +		 * map it first. +		 */ +		debug_pagealloc_map_pages(page, nr_pages); +		adjust_managed_page_count(page, nr_pages); +	} else { +		for (loop = 0; loop < nr_pages; loop++, p++) { +			__ClearPageReserved(p); +			set_page_count(p, 0); +		} + +		/* memblock adjusts totalram_pages() manually. */ +		atomic_long_add(nr_pages, &page_zone(page)->managed_pages); +	}  	if (page_contains_unaccepted(page, order)) {  		if (order == MAX_PAGE_ORDER && __free_unaccepted(page)) @@ -1351,7 +1370,8 @@ static void check_new_page_bad(struct page *page)  {  	if (unlikely(page->flags & __PG_HWPOISON)) {  		/* Don't complain about hwpoisoned pages */ -		page_mapcount_reset(page); /* remove PageBuddy */ +		if (PageBuddy(page)) +			__ClearPageBuddy(page);  		return;  	} @@ -2632,8 +2652,7 @@ void free_unref_folios(struct folio_batch *folios)  		unsigned long pfn = folio_pfn(folio);  		unsigned int order = folio_order(folio); -		if (order > 0 && folio_test_large_rmappable(folio)) -			folio_undo_large_rmappable(folio); +		folio_undo_large_rmappable(folio);  		if (!free_pages_prepare(&folio->page, order))  			continue;  		/* @@ -3031,12 +3050,6 @@ out:  	return page;  } -noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) -{ -	return __should_fail_alloc_page(gfp_mask, order); -} -ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); -  static inline long __zone_watermark_unusable_free(struct zone *z,  				unsigned int order, unsigned int alloc_flags)  { @@ -5114,7 +5127,7 @@ static char numa_zonelist_order[] = "Node";  /*   * sysctl handler for numa_zonelist_order   */ -static int numa_zonelist_order_handler(struct ctl_table *table, int write, +static int numa_zonelist_order_handler(const struct ctl_table *table, int write,  		void *buffer, size_t *length, loff_t *ppos)  {  	if (write) @@ -5213,7 +5226,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int *node_order,  }  /* - * Build gfp_thisnode zonelists + * Build __GFP_THISNODE zonelists   */  static void build_thisnode_zonelists(pg_data_t *pgdat)  { @@ -5738,6 +5751,7 @@ void __init setup_per_cpu_pageset(void)  	for_each_online_pgdat(pgdat)  		pgdat->per_cpu_nodestats =  			alloc_percpu(struct per_cpu_nodestat); +	store_early_perpage_metadata();  }  __meminit void zone_pcp_init(struct zone *zone) @@ -5762,10 +5776,6 @@ void adjust_managed_page_count(struct page *page, long count)  {  	atomic_long_add(count, &page_zone(page)->managed_pages);  	totalram_pages_add(count); -#ifdef CONFIG_HIGHMEM -	if (PageHighMem(page)) -		totalhigh_pages_add(count); -#endif  }  EXPORT_SYMBOL(adjust_managed_page_count); @@ -6081,7 +6091,7 @@ postcore_initcall(init_per_zone_wmark_min)   *	that we can call two helper functions whenever min_free_kbytes   *	changes.   */ -static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, +static int min_free_kbytes_sysctl_handler(const struct ctl_table *table, int write,  		void *buffer, size_t *length, loff_t *ppos)  {  	int rc; @@ -6097,7 +6107,7 @@ static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,  	return 0;  } -static int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, +static int watermark_scale_factor_sysctl_handler(const struct ctl_table *table, int write,  		void *buffer, size_t *length, loff_t *ppos)  {  	int rc; @@ -6127,7 +6137,7 @@ static void setup_min_unmapped_ratio(void)  } -static int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, +static int sysctl_min_unmapped_ratio_sysctl_handler(const struct ctl_table *table, int write,  		void *buffer, size_t *length, loff_t *ppos)  {  	int rc; @@ -6154,7 +6164,7 @@ static void setup_min_slab_ratio(void)  						     sysctl_min_slab_ratio) / 100;  } -static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, +static int sysctl_min_slab_ratio_sysctl_handler(const struct ctl_table *table, int write,  		void *buffer, size_t *length, loff_t *ppos)  {  	int rc; @@ -6178,7 +6188,7 @@ static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int wri   * minimum watermarks. The lowmem reserve ratio can only make sense   * if in function of the boot time zone sizes.   */ -static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, +static int lowmem_reserve_ratio_sysctl_handler(const struct ctl_table *table,  		int write, void *buffer, size_t *length, loff_t *ppos)  {  	int i; @@ -6199,7 +6209,7 @@ static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table,   * cpu. It is the fraction of total pages in each zone that a hot per cpu   * pagelist can have before it gets flushed back to buddy allocator.   */ -static int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table, +static int percpu_pagelist_high_fraction_sysctl_handler(const struct ctl_table *table,  		int write, void *buffer, size_t *length, loff_t *ppos)  {  	struct zone *zone; @@ -6690,14 +6700,19 @@ void zone_pcp_reset(struct zone *zone)  /*   * All pages in the range must be in a single zone, must not contain holes,   * must span full sections, and must be isolated before calling this function. + * + * Returns the number of managed (non-PageOffline()) pages in the range: the + * number of pages for which memory offlining code must adjust managed page + * counters using adjust_managed_page_count().   */ -void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) +unsigned long __offline_isolated_pages(unsigned long start_pfn, +		unsigned long end_pfn)  { +	unsigned long already_offline = 0, flags;  	unsigned long pfn = start_pfn;  	struct page *page;  	struct zone *zone;  	unsigned int order; -	unsigned long flags;  	offline_mem_sections(pfn, end_pfn);  	zone = page_zone(pfn_to_page(pfn)); @@ -6719,6 +6734,7 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)  		if (PageOffline(page)) {  			BUG_ON(page_count(page));  			BUG_ON(PageBuddy(page)); +			already_offline++;  			pfn++;  			continue;  		} @@ -6731,6 +6747,8 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)  		pfn += (1 << order);  	}  	spin_unlock_irqrestore(&zone->lock, flags); + +	return end_pfn - start_pfn - already_offline;  }  #endif |