aboutsummaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c304
1 files changed, 200 insertions, 104 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c5ef7240cbcb..a5ad0b35ab8e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -46,9 +46,11 @@
#include <linux/delayacct.h>
#include <linux/sysctl.h>
#include <linux/oom.h>
+#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
#include <linux/dax.h>
+#include <linux/psi.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -86,6 +88,9 @@ struct scan_control {
/* Can pages be swapped as part of reclaim? */
unsigned int may_swap:1;
+ /* e.g. boosted watermark reclaim leaves slabs alone */
+ unsigned int may_shrinkslab:1;
+
/*
* Cgroups are not reclaimed below their configured memory.low,
* unless we threaten to OOM. If any cgroups are skipped due to
@@ -369,7 +374,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
*/
int prealloc_shrinker(struct shrinker *shrinker)
{
- size_t size = sizeof(*shrinker->nr_deferred);
+ unsigned int size = sizeof(*shrinker->nr_deferred);
if (shrinker->flags & SHRINKER_NUMA_AWARE)
size *= nr_node_ids;
@@ -473,19 +478,18 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
total_scan = nr;
- delta = freeable >> priority;
- delta *= 4;
- do_div(delta, shrinker->seeks);
-
- /*
- * Make sure we apply some minimal pressure on default priority
- * even on small cgroups. Stale objects are not only consuming memory
- * by themselves, but can also hold a reference to a dying cgroup,
- * preventing it from being reclaimed. A dying cgroup with all
- * corresponding structures like per-cpu stats and kmem caches
- * can be really big, so it may lead to a significant waste of memory.
- */
- delta = max_t(unsigned long long, delta, min(freeable, batch_size));
+ if (shrinker->seeks) {
+ delta = freeable >> priority;
+ delta *= 4;
+ do_div(delta, shrinker->seeks);
+ } else {
+ /*
+ * These objects don't require any IO to create. Trim
+ * them aggressively under memory pressure to keep
+ * them from causing refetches in the IO caches.
+ */
+ delta = freeable / 2;
+ }
total_scan += delta;
if (total_scan < 0) {
@@ -741,12 +745,12 @@ static inline int is_page_cache_freeable(struct page *page)
{
/*
* A freeable page cache page is referenced only by the caller
- * that isolated the page, the page cache radix tree and
- * optional buffer heads at page->private.
+ * that isolated the page, the page cache and optional buffer
+ * heads at page->private.
*/
- int radix_pins = PageTransHuge(page) && PageSwapCache(page) ?
+ int page_cache_pins = PageTransHuge(page) && PageSwapCache(page) ?
HPAGE_PMD_NR : 1;
- return page_count(page) - page_has_private(page) == 1 + radix_pins;
+ return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
}
static int may_write_to_inode(struct inode *inode, struct scan_control *sc)
@@ -922,7 +926,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
mem_cgroup_swapout(page, swap);
- __delete_from_swap_cache(page);
+ __delete_from_swap_cache(page, swap);
xa_unlock_irqrestore(&mapping->i_pages, flags);
put_swap_page(page, swap);
} else {
@@ -948,7 +952,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
*/
if (reclaimed && page_is_file_cache(page) &&
!mapping_exiting(mapping) && !dax_mapping(mapping))
- shadow = workingset_eviction(mapping, page);
+ shadow = workingset_eviction(page);
__delete_from_page_cache(page, shadow);
xa_unlock_irqrestore(&mapping->i_pages, flags);
@@ -1102,16 +1106,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
{
LIST_HEAD(ret_pages);
LIST_HEAD(free_pages);
- int pgactivate = 0;
- unsigned nr_unqueued_dirty = 0;
- unsigned nr_dirty = 0;
- unsigned nr_congested = 0;
unsigned nr_reclaimed = 0;
- unsigned nr_writeback = 0;
- unsigned nr_immediate = 0;
- unsigned nr_ref_keep = 0;
- unsigned nr_unmap_fail = 0;
+ memset(stat, 0, sizeof(*stat));
cond_resched();
while (!list_empty(page_list)) {
@@ -1155,10 +1152,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
page_check_dirty_writeback(page, &dirty, &writeback);
if (dirty || writeback)
- nr_dirty++;
+ stat->nr_dirty++;
if (dirty && !writeback)
- nr_unqueued_dirty++;
+ stat->nr_unqueued_dirty++;
/*
* Treat this page as congested if the underlying BDI is or if
@@ -1170,7 +1167,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (((dirty || writeback) && mapping &&
inode_write_congested(mapping->host)) ||
(writeback && PageReclaim(page)))
- nr_congested++;
+ stat->nr_congested++;
/*
* If a page at the tail of the LRU is under writeback, there
@@ -1219,7 +1216,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (current_is_kswapd() &&
PageReclaim(page) &&
test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
- nr_immediate++;
+ stat->nr_immediate++;
goto activate_locked;
/* Case 2 above */
@@ -1237,7 +1234,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* and it's also appropriate in global reclaim.
*/
SetPageReclaim(page);
- nr_writeback++;
+ stat->nr_writeback++;
goto activate_locked;
/* Case 3 above */
@@ -1257,7 +1254,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
case PAGEREF_ACTIVATE:
goto activate_locked;
case PAGEREF_KEEP:
- nr_ref_keep++;
+ stat->nr_ref_keep++;
goto keep_locked;
case PAGEREF_RECLAIM:
case PAGEREF_RECLAIM_CLEAN:
@@ -1322,7 +1319,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (unlikely(PageTransHuge(page)))
flags |= TTU_SPLIT_HUGE_PMD;
if (!try_to_unmap(page, flags)) {
- nr_unmap_fail++;
+ stat->nr_unmap_fail++;
goto activate_locked;
}
}
@@ -1446,14 +1443,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
count_memcg_page_event(page, PGLAZYFREED);
} else if (!mapping || !__remove_mapping(mapping, page, true))
goto keep_locked;
- /*
- * At this point, we have no other references and there is
- * no way to pick any more up (removed from LRU, removed
- * from pagecache). Can use non-atomic bitops now (and
- * we obviously don't have to worry about waking up a process
- * waiting on the page lock, because there are no references.
- */
- __ClearPageLocked(page);
+
+ unlock_page(page);
free_it:
nr_reclaimed++;
@@ -1476,7 +1467,7 @@ activate_locked:
VM_BUG_ON_PAGE(PageActive(page), page);
if (!PageMlocked(page)) {
SetPageActive(page);
- pgactivate++;
+ stat->nr_activate++;
count_memcg_page_event(page, PGACTIVATE);
}
keep_locked:
@@ -1491,18 +1482,8 @@ keep:
free_unref_page_list(&free_pages);
list_splice(&ret_pages, page_list);
- count_vm_events(PGACTIVATE, pgactivate);
-
- if (stat) {
- stat->nr_dirty = nr_dirty;
- stat->nr_congested = nr_congested;
- stat->nr_unqueued_dirty = nr_unqueued_dirty;
- stat->nr_writeback = nr_writeback;
- stat->nr_immediate = nr_immediate;
- stat->nr_activate = pgactivate;
- stat->nr_ref_keep = nr_ref_keep;
- stat->nr_unmap_fail = nr_unmap_fail;
- }
+ count_vm_events(PGACTIVATE, stat->nr_activate);
+
return nr_reclaimed;
}
@@ -1514,6 +1495,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
.priority = DEF_PRIORITY,
.may_unmap = 1,
};
+ struct reclaim_stat dummy_stat;
unsigned long ret;
struct page *page, *next;
LIST_HEAD(clean_pages);
@@ -1527,7 +1509,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
}
ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
- TTU_IGNORE_ACCESS, NULL, true);
+ TTU_IGNORE_ACCESS, &dummy_stat, true);
list_splice(&clean_pages, page_list);
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret);
return ret;
@@ -1632,8 +1614,8 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
}
-/*
- * zone_lru_lock is heavily contended. Some of the functions that
+/**
+ * pgdat->lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
* and working on them outside the LRU lock.
*
@@ -1655,7 +1637,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
struct lruvec *lruvec, struct list_head *dst,
unsigned long *nr_scanned, struct scan_control *sc,
- isolate_mode_t mode, enum lru_list lru)
+ enum lru_list lru)
{
struct list_head *src = &lruvec->lists[lru];
unsigned long nr_taken = 0;
@@ -1664,6 +1646,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
unsigned long skipped = 0;
unsigned long scan, total_scan, nr_pages;
LIST_HEAD(pages_skipped);
+ isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
scan = 0;
for (total_scan = 0;
@@ -1767,11 +1750,11 @@ int isolate_lru_page(struct page *page)
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
if (PageLRU(page)) {
- struct zone *zone = page_zone(page);
+ pg_data_t *pgdat = page_pgdat(page);
struct lruvec *lruvec;
- spin_lock_irq(zone_lru_lock(zone));
- lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+ spin_lock_irq(&pgdat->lru_lock);
+ lruvec = mem_cgroup_page_lruvec(page, pgdat);
if (PageLRU(page)) {
int lru = page_lru(page);
get_page(page);
@@ -1779,7 +1762,7 @@ int isolate_lru_page(struct page *page)
del_page_from_lru_list(page, lruvec, lru);
ret = 0;
}
- spin_unlock_irq(zone_lru_lock(zone));
+ spin_unlock_irq(&pgdat->lru_lock);
}
return ret;
}
@@ -1901,8 +1884,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
unsigned long nr_scanned;
unsigned long nr_reclaimed = 0;
unsigned long nr_taken;
- struct reclaim_stat stat = {};
- isolate_mode_t isolate_mode = 0;
+ struct reclaim_stat stat;
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
@@ -1923,13 +1905,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
lru_add_drain();
- if (!sc->may_unmap)
- isolate_mode |= ISOLATE_UNMAPPED;
-
spin_lock_irq(&pgdat->lru_lock);
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
- &nr_scanned, sc, isolate_mode, lru);
+ &nr_scanned, sc, lru);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
reclaim_stat->recent_scanned[file] += nr_taken;
@@ -2011,9 +1990,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* processes, from rmap.
*
* If the pages are mostly unmapped, the processing is fast and it is
- * appropriate to hold zone_lru_lock across the whole operation. But if
+ * appropriate to hold pgdat->lru_lock across the whole operation. But if
* the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop zone_lru_lock around each page. It's impossible to balance
+ * should drop pgdat->lru_lock around each page. It's impossible to balance
* this, so instead we remove the pages from the LRU while processing them.
* It is safe to rely on PG_active against the non-LRU pages in here because
* nobody will play with that bit on a non-LRU page.
@@ -2086,19 +2065,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
unsigned nr_deactivate, nr_activate;
unsigned nr_rotated = 0;
- isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
lru_add_drain();
- if (!sc->may_unmap)
- isolate_mode |= ISOLATE_UNMAPPED;
-
spin_lock_irq(&pgdat->lru_lock);
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
- &nr_scanned, sc, isolate_mode, lru);
+ &nr_scanned, sc, lru);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
reclaim_stat->recent_scanned[file] += nr_taken;
@@ -2145,6 +2120,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
}
ClearPageActive(page); /* we are de-activating */
+ SetPageWorkingset(page);
list_add(&page->lru, &l_inactive);
}
@@ -2456,9 +2432,11 @@ out:
/*
* Scan types proportional to swappiness and
* their relative recent reclaim efficiency.
+ * Make sure we don't miss the last page
+ * because of a round-off error.
*/
- scan = div64_u64(scan * fraction[file],
- denominator);
+ scan = DIV64_U64_ROUND_UP(scan * fraction[file],
+ denominator);
break;
case SCAN_FILE:
case SCAN_ANON:
@@ -2742,8 +2720,10 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
node_lru_pages += lru_pages;
- shrink_slab(sc->gfp_mask, pgdat->node_id,
+ if (sc->may_shrinkslab) {
+ shrink_slab(sc->gfp_mask, pgdat->node_id,
memcg, sc->priority);
+ }
/* Record the group's reclaim efficiency */
vmpressure(sc->gfp_mask, memcg, false,
@@ -2751,16 +2731,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
sc->nr_reclaimed - reclaimed);
/*
- * Direct reclaim and kswapd have to scan all memory
- * cgroups to fulfill the overall scan target for the
- * node.
+ * Kswapd have to scan all memory cgroups to fulfill
+ * the overall scan target for the node.
*
* Limit reclaim, on the other hand, only cares about
* nr_to_reclaim pages to be reclaimed and it will
* retry with decreasing priority if one round over the
* whole hierarchy is not sufficient.
*/
- if (!global_reclaim(sc) &&
+ if (!current_is_kswapd() &&
sc->nr_reclaimed >= sc->nr_to_reclaim) {
mem_cgroup_iter_break(root, memcg);
break;
@@ -3225,6 +3204,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
+ .may_shrinkslab = 1,
};
/*
@@ -3269,6 +3249,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
.may_unmap = 1,
.reclaim_idx = MAX_NR_ZONES - 1,
.may_swap = !noswap,
+ .may_shrinkslab = 1,
};
unsigned long lru_pages;
@@ -3302,6 +3283,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
{
struct zonelist *zonelist;
unsigned long nr_reclaimed;
+ unsigned long pflags;
int nid;
unsigned int noreclaim_flag;
struct scan_control sc = {
@@ -3314,6 +3296,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = may_swap,
+ .may_shrinkslab = 1,
};
/*
@@ -3330,9 +3313,13 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
sc.gfp_mask,
sc.reclaim_idx);
+ psi_memstall_enter(&pflags);
noreclaim_flag = memalloc_noreclaim_save();
+
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+
memalloc_noreclaim_restore(noreclaim_flag);
+ psi_memstall_leave(&pflags);
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
@@ -3360,6 +3347,30 @@ static void age_active_anon(struct pglist_data *pgdat,
} while (memcg);
}
+static bool pgdat_watermark_boosted(pg_data_t *pgdat, int classzone_idx)
+{
+ int i;
+ struct zone *zone;
+
+ /*
+ * Check for watermark boosts top-down as the higher zones
+ * are more likely to be boosted. Both watermarks and boosts
+ * should not be checked at the time time as reclaim would
+ * start prematurely when there is no boosting and a lower
+ * zone is balanced.
+ */
+ for (i = classzone_idx; i >= 0; i--) {
+ zone = pgdat->node_zones + i;
+ if (!managed_zone(zone))
+ continue;
+
+ if (zone->watermark_boost)
+ return true;
+ }
+
+ return false;
+}
+
/*
* Returns true if there is an eligible zone balanced for the request order
* and classzone_idx
@@ -3370,6 +3381,10 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
unsigned long mark = -1;
struct zone *zone;
+ /*
+ * Check watermarks bottom-up as lower zones are more likely to
+ * meet watermarks.
+ */
for (i = 0; i <= classzone_idx; i++) {
zone = pgdat->node_zones + i;
@@ -3488,7 +3503,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
*
* kswapd scans the zones in the highmem->normal->dma direction. It skips
* zones which have free_pages > high_wmark_pages(zone), but once a zone is
- * found to have free_pages <= high_wmark_pages(zone), any page is that zone
+ * found to have free_pages <= high_wmark_pages(zone), any page in that zone
* or lower is eligible for reclaim until at least one usable zone is
* balanced.
*/
@@ -3497,23 +3512,44 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
int i;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
+ unsigned long pflags;
+ unsigned long nr_boost_reclaim;
+ unsigned long zone_boosts[MAX_NR_ZONES] = { 0, };
+ bool boosted;
struct zone *zone;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.order = order,
- .priority = DEF_PRIORITY,
- .may_writepage = !laptop_mode,
.may_unmap = 1,
- .may_swap = 1,
};
+ psi_memstall_enter(&pflags);
__fs_reclaim_acquire();
count_vm_event(PAGEOUTRUN);
+ /*
+ * Account for the reclaim boost. Note that the zone boost is left in
+ * place so that parallel allocations that are near the watermark will
+ * stall or direct reclaim until kswapd is finished.
+ */
+ nr_boost_reclaim = 0;
+ for (i = 0; i <= classzone_idx; i++) {
+ zone = pgdat->node_zones + i;
+ if (!managed_zone(zone))
+ continue;
+
+ nr_boost_reclaim += zone->watermark_boost;
+ zone_boosts[i] = zone->watermark_boost;
+ }
+ boosted = nr_boost_reclaim;
+
+restart:
+ sc.priority = DEF_PRIORITY;
do {
unsigned long nr_reclaimed = sc.nr_reclaimed;
bool raise_priority = true;
+ bool balanced;
bool ret;
sc.reclaim_idx = classzone_idx;
@@ -3540,13 +3576,40 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
}
/*
- * Only reclaim if there are no eligible zones. Note that
- * sc.reclaim_idx is not used as buffer_heads_over_limit may
- * have adjusted it.
+ * If the pgdat is imbalanced then ignore boosting and preserve
+ * the watermarks for a later time and restart. Note that the
+ * zone watermarks will be still reset at the end of balancing
+ * on the grounds that the normal reclaim should be enough to
+ * re-evaluate if boosting is required when kswapd next wakes.
*/
- if (pgdat_balanced(pgdat, sc.order, classzone_idx))
+ balanced = pgdat_balanced(pgdat, sc.order, classzone_idx);
+ if (!balanced && nr_boost_reclaim) {
+ nr_boost_reclaim = 0;
+ goto restart;
+ }
+
+ /*
+ * If boosting is not active then only reclaim if there are no
+ * eligible zones. Note that sc.reclaim_idx is not used as
+ * buffer_heads_over_limit may have adjusted it.
+ */
+ if (!nr_boost_reclaim && balanced)
goto out;
+ /* Limit the priority of boosting to avoid reclaim writeback */
+ if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2)
+ raise_priority = false;
+
+ /*
+ * Do not writeback or swap pages for boosted reclaim. The
+ * intent is to relieve pressure not issue sub-optimal IO
+ * from reclaim context. If no pages are reclaimed, the
+ * reclaim will be aborted.
+ */
+ sc.may_writepage = !laptop_mode && !nr_boost_reclaim;
+ sc.may_swap = !nr_boost_reclaim;
+ sc.may_shrinkslab = !nr_boost_reclaim;
+
/*
* Do some background aging of the anon list, to give
* pages a chance to be referenced before reclaiming. All
@@ -3598,6 +3661,16 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
* progress in reclaiming pages
*/
nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
+ nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed);
+
+ /*
+ * If reclaim made no progress for a boost, stop reclaim as
+ * IO cannot be queued and it could be an infinite loop in
+ * extreme circumstances.
+ */
+ if (nr_boost_reclaim && !nr_reclaimed)
+ break;
+
if (raise_priority || !nr_reclaimed)
sc.priority--;
} while (sc.priority >= 1);
@@ -3606,8 +3679,31 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
pgdat->kswapd_failures++;
out:
+ /* If reclaim was boosted, account for the reclaim done in this pass */
+ if (boosted) {
+ unsigned long flags;
+
+ for (i = 0; i <= classzone_idx; i++) {
+ if (!zone_boosts[i])
+ continue;
+
+ /* Increments are under the zone lock */
+ zone = pgdat->node_zones + i;
+ spin_lock_irqsave(&zone->lock, flags);
+ zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+
+ /*
+ * As there is now likely space, wakeup kcompact to defragment
+ * pageblocks.
+ */
+ wakeup_kcompactd(pgdat, pageblock_order, classzone_idx);
+ }
+
snapshot_refaults(NULL, pgdat);
__fs_reclaim_release();
+ psi_memstall_leave(&pflags);
/*
* Return the order kswapd stopped reclaiming at as
* prepare_kswapd_sleep() takes it into account. If another caller
@@ -3833,7 +3929,8 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
/* Hopeless node, leave it to direct reclaim if possible */
if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ||
- pgdat_balanced(pgdat, order, classzone_idx)) {
+ (pgdat_balanced(pgdat, order, classzone_idx) &&
+ !pgdat_watermark_boosted(pgdat, classzone_idx))) {
/*
* There may be plenty of free memory available, but it's too
* fragmented for high-order allocations. Wake up kcompactd
@@ -4161,17 +4258,16 @@ int page_evictable(struct page *page)
return ret;
}
-#ifdef CONFIG_SHMEM
/**
- * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
- * @pages: array of pages to check
- * @nr_pages: number of pages to check
- *
- * Checks pages for evictability and moves them to the appropriate lru list.
+ * check_move_unevictable_pages - check pages for evictability and move to
+ * appropriate zone lru list
+ * @pvec: pagevec with lru pages to check
*
- * This function is only used for SysV IPC SHM_UNLOCK.
+ * Checks pages for evictability, if an evictable page is in the unevictable
+ * lru list, moves it to the appropriate evictable lru list. This function
+ * should be only used for lru pages.
*/
-void check_move_unevictable_pages(struct page **pages, int nr_pages)
+void check_move_unevictable_pages(struct pagevec *pvec)
{
struct lruvec *lruvec;
struct pglist_data *pgdat = NULL;
@@ -4179,8 +4275,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
int pgrescued = 0;
int i;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pages[i];
+ for (i = 0; i < pvec->nr; i++) {
+ struct page *page = pvec->pages[i];
struct pglist_data *pagepgdat = page_pgdat(page);
pgscanned++;
@@ -4212,4 +4308,4 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
spin_unlock_irq(&pgdat->lru_lock);
}
}
-#endif /* CONFIG_SHMEM */
+EXPORT_SYMBOL_GPL(check_move_unevictable_pages);