diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 161 |
1 files changed, 95 insertions, 66 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9ecf99190ea2..c565de8f48e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -498,7 +498,8 @@ static void bad_page(struct page *page, const char *reason) dump_stack(); out: /* Leave bad fields for debug, except PageBuddy could make trouble */ - page_mapcount_reset(page); /* remove PageBuddy */ + if (PageBuddy(page)) + __ClearPageBuddy(page); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } @@ -711,12 +712,12 @@ static inline struct page *get_page_from_free_area(struct free_area *area, } /* - * If this is not the largest possible page, check if the buddy - * of the next-highest order is free. If it is, it's possible + * If this is less than the 2nd largest possible page, check if the buddy + * of the next-higher order is free. If it is, it's possible * that pages are being freed that will coalesce soon. In case, * that is happening, add the free page to the tail of the list * so it's less likely to be used soon and more likely to be merged - * as a higher order page + * as a 2-level higher order page */ static inline bool buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn, @@ -1218,7 +1219,8 @@ static void __free_pages_ok(struct page *page, unsigned int order, __count_vm_events(PGFREE, 1 << order); } -void __free_pages_core(struct page *page, unsigned int order) +void __meminit __free_pages_core(struct page *page, unsigned int order, + enum meminit_context context) { unsigned int nr_pages = 1 << order; struct page *p = page; @@ -1228,17 +1230,34 @@ void __free_pages_core(struct page *page, unsigned int order) * When initializing the memmap, __init_single_page() sets the refcount * of all pages to 1 ("allocated"/"not free"). We have to set the * refcount of all involved pages to 0. + * + * Note that hotplugged memory pages are initialized to PageOffline(). + * Pages freed from memblock might be marked as reserved. */ - prefetchw(p); - for (loop = 0; loop < (nr_pages - 1); loop++, p++) { - prefetchw(p + 1); - __ClearPageReserved(p); - set_page_count(p, 0); - } - __ClearPageReserved(p); - set_page_count(p, 0); + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && + unlikely(context == MEMINIT_HOTPLUG)) { + for (loop = 0; loop < nr_pages; loop++, p++) { + VM_WARN_ON_ONCE(PageReserved(p)); + __ClearPageOffline(p); + set_page_count(p, 0); + } + + /* + * Freeing the page with debug_pagealloc enabled will try to + * unmap it; some archs don't like double-unmappings, so + * map it first. + */ + debug_pagealloc_map_pages(page, nr_pages); + adjust_managed_page_count(page, nr_pages); + } else { + for (loop = 0; loop < nr_pages; loop++, p++) { + __ClearPageReserved(p); + set_page_count(p, 0); + } - atomic_long_add(nr_pages, &page_zone(page)->managed_pages); + /* memblock adjusts totalram_pages() manually. */ + atomic_long_add(nr_pages, &page_zone(page)->managed_pages); + } if (page_contains_unaccepted(page, order)) { if (order == MAX_PAGE_ORDER && __free_unaccepted(page)) @@ -1351,7 +1370,8 @@ static void check_new_page_bad(struct page *page) { if (unlikely(page->flags & __PG_HWPOISON)) { /* Don't complain about hwpoisoned pages */ - page_mapcount_reset(page); /* remove PageBuddy */ + if (PageBuddy(page)) + __ClearPageBuddy(page); return; } @@ -2323,16 +2343,20 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) static void drain_pages_zone(unsigned int cpu, struct zone *zone) { struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); - int count = READ_ONCE(pcp->count); - - while (count) { - int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); - count -= to_drain; + int count; + do { spin_lock(&pcp->lock); - free_pcppages_bulk(zone, to_drain, pcp, 0); + count = pcp->count; + if (count) { + int to_drain = min(count, + pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); + + free_pcppages_bulk(zone, to_drain, pcp, 0); + count -= to_drain; + } spin_unlock(&pcp->lock); - } + } while (count); } /* @@ -2632,8 +2656,7 @@ void free_unref_folios(struct folio_batch *folios) unsigned long pfn = folio_pfn(folio); unsigned int order = folio_order(folio); - if (order > 0 && folio_test_large_rmappable(folio)) - folio_undo_large_rmappable(folio); + folio_undo_large_rmappable(folio); if (!free_pages_prepare(&folio->page, order)) continue; /* @@ -3031,12 +3054,6 @@ out: return page; } -noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) -{ - return __should_fail_alloc_page(gfp_mask, order); -} -ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); - static inline long __zone_watermark_unusable_free(struct zone *z, unsigned int order, unsigned int alloc_flags) { @@ -3055,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3351,6 +3365,8 @@ retry: } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3376,10 +3392,8 @@ check_alloc_wmark: gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3433,10 +3447,8 @@ try_this_zone: return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -5114,7 +5126,7 @@ static char numa_zonelist_order[] = "Node"; /* * sysctl handler for numa_zonelist_order */ -static int numa_zonelist_order_handler(struct ctl_table *table, int write, +static int numa_zonelist_order_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { if (write) @@ -5213,7 +5225,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int *node_order, } /* - * Build gfp_thisnode zonelists + * Build __GFP_THISNODE zonelists */ static void build_thisnode_zonelists(pg_data_t *pgdat) { @@ -5762,10 +5774,6 @@ void adjust_managed_page_count(struct page *page, long count) { atomic_long_add(count, &page_zone(page)->managed_pages); totalram_pages_add(count); -#ifdef CONFIG_HIGHMEM - if (PageHighMem(page)) - totalhigh_pages_add(count); -#endif } EXPORT_SYMBOL(adjust_managed_page_count); @@ -5805,6 +5813,16 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char return pages; } +void free_reserved_page(struct page *page) +{ + clear_page_tag_ref(page); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + adjust_managed_page_count(page, 1); +} +EXPORT_SYMBOL(free_reserved_page); + static int page_alloc_cpu_dead(unsigned int cpu) { struct zone *zone; @@ -6081,7 +6099,7 @@ postcore_initcall(init_per_zone_wmark_min) * that we can call two helper functions whenever min_free_kbytes * changes. */ -static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, +static int min_free_kbytes_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int rc; @@ -6097,7 +6115,7 @@ static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, return 0; } -static int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, +static int watermark_scale_factor_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int rc; @@ -6127,7 +6145,7 @@ static void setup_min_unmapped_ratio(void) } -static int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, +static int sysctl_min_unmapped_ratio_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int rc; @@ -6154,7 +6172,7 @@ static void setup_min_slab_ratio(void) sysctl_min_slab_ratio) / 100; } -static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, +static int sysctl_min_slab_ratio_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int rc; @@ -6178,7 +6196,7 @@ static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int wri * minimum watermarks. The lowmem reserve ratio can only make sense * if in function of the boot time zone sizes. */ -static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, +static int lowmem_reserve_ratio_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int i; @@ -6199,7 +6217,7 @@ static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, * cpu. It is the fraction of total pages in each zone that a hot per cpu * pagelist can have before it gets flushed back to buddy allocator. */ -static int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table, +static int percpu_pagelist_high_fraction_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { struct zone *zone; @@ -6690,14 +6708,19 @@ void zone_pcp_reset(struct zone *zone) /* * All pages in the range must be in a single zone, must not contain holes, * must span full sections, and must be isolated before calling this function. + * + * Returns the number of managed (non-PageOffline()) pages in the range: the + * number of pages for which memory offlining code must adjust managed page + * counters using adjust_managed_page_count(). */ -void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) +unsigned long __offline_isolated_pages(unsigned long start_pfn, + unsigned long end_pfn) { + unsigned long already_offline = 0, flags; unsigned long pfn = start_pfn; struct page *page; struct zone *zone; unsigned int order; - unsigned long flags; offline_mem_sections(pfn, end_pfn); zone = page_zone(pfn_to_page(pfn)); @@ -6719,6 +6742,7 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) if (PageOffline(page)) { BUG_ON(page_count(page)); BUG_ON(PageBuddy(page)); + already_offline++; pfn++; continue; } @@ -6731,6 +6755,8 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) pfn += (1 << order); } spin_unlock_irqrestore(&zone->lock, flags); + + return end_pfn - start_pfn - already_offline; } #endif @@ -6912,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6940,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -6999,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } |