aboutsummaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c161
1 files changed, 95 insertions, 66 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9ecf99190ea2..c565de8f48e9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes);
static bool page_contains_unaccepted(struct page *page, unsigned int order);
static void accept_page(struct page *page, unsigned int order);
-static bool try_to_accept_memory(struct zone *zone, unsigned int order);
+static bool cond_accept_memory(struct zone *zone, unsigned int order);
static inline bool has_unaccepted_memory(void);
static bool __free_unaccepted(struct page *page);
@@ -498,7 +498,8 @@ static void bad_page(struct page *page, const char *reason)
dump_stack();
out:
/* Leave bad fields for debug, except PageBuddy could make trouble */
- page_mapcount_reset(page); /* remove PageBuddy */
+ if (PageBuddy(page))
+ __ClearPageBuddy(page);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
@@ -711,12 +712,12 @@ static inline struct page *get_page_from_free_area(struct free_area *area,
}
/*
- * If this is not the largest possible page, check if the buddy
- * of the next-highest order is free. If it is, it's possible
+ * If this is less than the 2nd largest possible page, check if the buddy
+ * of the next-higher order is free. If it is, it's possible
* that pages are being freed that will coalesce soon. In case,
* that is happening, add the free page to the tail of the list
* so it's less likely to be used soon and more likely to be merged
- * as a higher order page
+ * as a 2-level higher order page
*/
static inline bool
buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
@@ -1218,7 +1219,8 @@ static void __free_pages_ok(struct page *page, unsigned int order,
__count_vm_events(PGFREE, 1 << order);
}
-void __free_pages_core(struct page *page, unsigned int order)
+void __meminit __free_pages_core(struct page *page, unsigned int order,
+ enum meminit_context context)
{
unsigned int nr_pages = 1 << order;
struct page *p = page;
@@ -1228,17 +1230,34 @@ void __free_pages_core(struct page *page, unsigned int order)
* When initializing the memmap, __init_single_page() sets the refcount
* of all pages to 1 ("allocated"/"not free"). We have to set the
* refcount of all involved pages to 0.
+ *
+ * Note that hotplugged memory pages are initialized to PageOffline().
+ * Pages freed from memblock might be marked as reserved.
*/
- prefetchw(p);
- for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
- prefetchw(p + 1);
- __ClearPageReserved(p);
- set_page_count(p, 0);
- }
- __ClearPageReserved(p);
- set_page_count(p, 0);
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) &&
+ unlikely(context == MEMINIT_HOTPLUG)) {
+ for (loop = 0; loop < nr_pages; loop++, p++) {
+ VM_WARN_ON_ONCE(PageReserved(p));
+ __ClearPageOffline(p);
+ set_page_count(p, 0);
+ }
+
+ /*
+ * Freeing the page with debug_pagealloc enabled will try to
+ * unmap it; some archs don't like double-unmappings, so
+ * map it first.
+ */
+ debug_pagealloc_map_pages(page, nr_pages);
+ adjust_managed_page_count(page, nr_pages);
+ } else {
+ for (loop = 0; loop < nr_pages; loop++, p++) {
+ __ClearPageReserved(p);
+ set_page_count(p, 0);
+ }
- atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
+ /* memblock adjusts totalram_pages() manually. */
+ atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
+ }
if (page_contains_unaccepted(page, order)) {
if (order == MAX_PAGE_ORDER && __free_unaccepted(page))
@@ -1351,7 +1370,8 @@ static void check_new_page_bad(struct page *page)
{
if (unlikely(page->flags & __PG_HWPOISON)) {
/* Don't complain about hwpoisoned pages */
- page_mapcount_reset(page); /* remove PageBuddy */
+ if (PageBuddy(page))
+ __ClearPageBuddy(page);
return;
}
@@ -2323,16 +2343,20 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
static void drain_pages_zone(unsigned int cpu, struct zone *zone)
{
struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
- int count = READ_ONCE(pcp->count);
-
- while (count) {
- int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
- count -= to_drain;
+ int count;
+ do {
spin_lock(&pcp->lock);
- free_pcppages_bulk(zone, to_drain, pcp, 0);
+ count = pcp->count;
+ if (count) {
+ int to_drain = min(count,
+ pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
+
+ free_pcppages_bulk(zone, to_drain, pcp, 0);
+ count -= to_drain;
+ }
spin_unlock(&pcp->lock);
- }
+ } while (count);
}
/*
@@ -2632,8 +2656,7 @@ void free_unref_folios(struct folio_batch *folios)
unsigned long pfn = folio_pfn(folio);
unsigned int order = folio_order(folio);
- if (order > 0 && folio_test_large_rmappable(folio))
- folio_undo_large_rmappable(folio);
+ folio_undo_large_rmappable(folio);
if (!free_pages_prepare(&folio->page, order))
continue;
/*
@@ -3031,12 +3054,6 @@ out:
return page;
}
-noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
-{
- return __should_fail_alloc_page(gfp_mask, order);
-}
-ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
-
static inline long __zone_watermark_unusable_free(struct zone *z,
unsigned int order, unsigned int alloc_flags)
{
@@ -3055,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
if (!(alloc_flags & ALLOC_CMA))
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
-#ifdef CONFIG_UNACCEPTED_MEMORY
- unusable_free += zone_page_state(z, NR_UNACCEPTED);
-#endif
return unusable_free;
}
@@ -3351,6 +3365,8 @@ retry:
}
}
+ cond_accept_memory(zone, order);
+
/*
* Detect whether the number of free pages is below high
* watermark. If so, we will decrease pcp->high and free
@@ -3376,10 +3392,8 @@ check_alloc_wmark:
gfp_mask)) {
int ret;
- if (has_unaccepted_memory()) {
- if (try_to_accept_memory(zone, order))
- goto try_this_zone;
- }
+ if (cond_accept_memory(zone, order))
+ goto try_this_zone;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
@@ -3433,10 +3447,8 @@ try_this_zone:
return page;
} else {
- if (has_unaccepted_memory()) {
- if (try_to_accept_memory(zone, order))
- goto try_this_zone;
- }
+ if (cond_accept_memory(zone, order))
+ goto try_this_zone;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/* Try again if zone has deferred pages */
@@ -5114,7 +5126,7 @@ static char numa_zonelist_order[] = "Node";
/*
* sysctl handler for numa_zonelist_order
*/
-static int numa_zonelist_order_handler(struct ctl_table *table, int write,
+static int numa_zonelist_order_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
if (write)
@@ -5213,7 +5225,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int *node_order,
}
/*
- * Build gfp_thisnode zonelists
+ * Build __GFP_THISNODE zonelists
*/
static void build_thisnode_zonelists(pg_data_t *pgdat)
{
@@ -5762,10 +5774,6 @@ void adjust_managed_page_count(struct page *page, long count)
{
atomic_long_add(count, &page_zone(page)->managed_pages);
totalram_pages_add(count);
-#ifdef CONFIG_HIGHMEM
- if (PageHighMem(page))
- totalhigh_pages_add(count);
-#endif
}
EXPORT_SYMBOL(adjust_managed_page_count);
@@ -5805,6 +5813,16 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
return pages;
}
+void free_reserved_page(struct page *page)
+{
+ clear_page_tag_ref(page);
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
+ adjust_managed_page_count(page, 1);
+}
+EXPORT_SYMBOL(free_reserved_page);
+
static int page_alloc_cpu_dead(unsigned int cpu)
{
struct zone *zone;
@@ -6081,7 +6099,7 @@ postcore_initcall(init_per_zone_wmark_min)
* that we can call two helper functions whenever min_free_kbytes
* changes.
*/
-static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
+static int min_free_kbytes_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -6097,7 +6115,7 @@ static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
return 0;
}
-static int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
+static int watermark_scale_factor_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -6127,7 +6145,7 @@ static void setup_min_unmapped_ratio(void)
}
-static int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
+static int sysctl_min_unmapped_ratio_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -6154,7 +6172,7 @@ static void setup_min_slab_ratio(void)
sysctl_min_slab_ratio) / 100;
}
-static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+static int sysctl_min_slab_ratio_sysctl_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -6178,7 +6196,7 @@ static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int wri
* minimum watermarks. The lowmem reserve ratio can only make sense
* if in function of the boot time zone sizes.
*/
-static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table,
+static int lowmem_reserve_ratio_sysctl_handler(const struct ctl_table *table,
int write, void *buffer, size_t *length, loff_t *ppos)
{
int i;
@@ -6199,7 +6217,7 @@ static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table,
* cpu. It is the fraction of total pages in each zone that a hot per cpu
* pagelist can have before it gets flushed back to buddy allocator.
*/
-static int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
+static int percpu_pagelist_high_fraction_sysctl_handler(const struct ctl_table *table,
int write, void *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
@@ -6690,14 +6708,19 @@ void zone_pcp_reset(struct zone *zone)
/*
* All pages in the range must be in a single zone, must not contain holes,
* must span full sections, and must be isolated before calling this function.
+ *
+ * Returns the number of managed (non-PageOffline()) pages in the range: the
+ * number of pages for which memory offlining code must adjust managed page
+ * counters using adjust_managed_page_count().
*/
-void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
+unsigned long __offline_isolated_pages(unsigned long start_pfn,
+ unsigned long end_pfn)
{
+ unsigned long already_offline = 0, flags;
unsigned long pfn = start_pfn;
struct page *page;
struct zone *zone;
unsigned int order;
- unsigned long flags;
offline_mem_sections(pfn, end_pfn);
zone = page_zone(pfn_to_page(pfn));
@@ -6719,6 +6742,7 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
if (PageOffline(page)) {
BUG_ON(page_count(page));
BUG_ON(PageBuddy(page));
+ already_offline++;
pfn++;
continue;
}
@@ -6731,6 +6755,8 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
pfn += (1 << order);
}
spin_unlock_irqrestore(&zone->lock, flags);
+
+ return end_pfn - start_pfn - already_offline;
}
#endif
@@ -6912,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone)
struct page *page;
bool last;
- if (list_empty(&zone->unaccepted_pages))
- return false;
-
spin_lock_irqsave(&zone->lock, flags);
page = list_first_entry_or_null(&zone->unaccepted_pages,
struct page, lru);
@@ -6940,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone)
return true;
}
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
long to_accept;
- int ret = false;
+ bool ret = false;
+
+ if (!has_unaccepted_memory())
+ return false;
+
+ if (list_empty(&zone->unaccepted_pages))
+ return false;
/* How much to accept to get to high watermark? */
to_accept = high_wmark_pages(zone) -
(zone_page_state(zone, NR_FREE_PAGES) -
- __zone_watermark_unusable_free(zone, order, 0));
+ __zone_watermark_unusable_free(zone, order, 0) -
+ zone_page_state(zone, NR_UNACCEPTED));
- /* Accept at least one page */
- do {
+ while (to_accept > 0) {
if (!try_to_accept_memory_one(zone))
break;
ret = true;
to_accept -= MAX_ORDER_NR_PAGES;
- } while (to_accept > 0);
+ }
return ret;
}
@@ -6999,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order)
{
}
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
return false;
}