aboutsummaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c67
1 files changed, 46 insertions, 21 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a8d61a8b6894..749cdc110c74 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -628,7 +628,7 @@ typedef enum {
* Calls ->writepage().
*/
static pageout_t pageout(struct folio *folio, struct address_space *mapping,
- struct swap_iocb **plug)
+ struct swap_iocb **plug, struct list_head *folio_list)
{
/*
* If the folio is dirty, only perform writeback if that write
@@ -676,6 +676,14 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping,
.swap_plug = plug,
};
+ /*
+ * The large shmem folio can be split if CONFIG_THP_SWAP is
+ * not enabled or contiguous swap entries are failed to
+ * allocate.
+ */
+ if (shmem_mapping(mapping) && folio_test_large(folio))
+ wbc.list = folio_list;
+
folio_set_reclaim(folio);
res = mapping->a_ops->writepage(&folio->page, &wbc);
if (res < 0)
@@ -863,7 +871,12 @@ static enum folio_references folio_check_references(struct folio *folio,
if (vm_flags & VM_LOCKED)
return FOLIOREF_ACTIVATE;
- /* rmap lock contention: rotate */
+ /*
+ * There are two cases to consider.
+ * 1) Rmap lock contention: rotate.
+ * 2) Skip the non-shared swapbacked folio mapped solely by
+ * the exiting or OOM-reaped process.
+ */
if (referenced_ptes == -1)
return FOLIOREF_KEEP;
@@ -1003,9 +1016,6 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
(unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
&nr_succeeded);
- mod_node_page_state(pgdat, PGDEMOTE_KSWAPD + reclaimer_offset(),
- nr_succeeded);
-
return nr_succeeded;
}
@@ -1222,13 +1232,14 @@ retry:
goto keep_locked;
if (folio_test_large(folio)) {
/* cannot split folio, skip it */
- if (!can_split_folio(folio, NULL))
+ if (!can_split_folio(folio, 1, NULL))
goto activate_locked;
/*
* Split partially mapped folios right away.
* We can free the unmapped pages without IO.
*/
- if (data_race(!list_empty(&folio->_deferred_list)) &&
+ if (data_race(!list_empty(&folio->_deferred_list) &&
+ folio_test_partially_mapped(folio)) &&
split_folio_to_list(folio, folio_list))
goto activate_locked;
}
@@ -1252,11 +1263,6 @@ retry:
goto activate_locked_split;
}
}
- } else if (folio_test_swapbacked(folio) &&
- folio_test_large(folio)) {
- /* Split shmem folio */
- if (split_folio_to_list(folio, folio_list))
- goto keep_locked;
}
/*
@@ -1357,12 +1363,25 @@ retry:
* starts and then write it out here.
*/
try_to_unmap_flush_dirty();
- switch (pageout(folio, mapping, &plug)) {
+ switch (pageout(folio, mapping, &plug, folio_list)) {
case PAGE_KEEP:
goto keep_locked;
case PAGE_ACTIVATE:
+ /*
+ * If shmem folio is split when writeback to swap,
+ * the tail pages will make their own pass through
+ * this function and be accounted then.
+ */
+ if (nr_pages > 1 && !folio_test_large(folio)) {
+ sc->nr_scanned -= (nr_pages - 1);
+ nr_pages = 1;
+ }
goto activate_locked;
case PAGE_SUCCESS:
+ if (nr_pages > 1 && !folio_test_large(folio)) {
+ sc->nr_scanned -= (nr_pages - 1);
+ nr_pages = 1;
+ }
stat->nr_pageout += nr_pages;
if (folio_test_writeback(folio))
@@ -1495,7 +1514,8 @@ keep:
/* 'folio_list' is always empty here */
/* Migrate folios selected for demotion */
- nr_reclaimed += demote_folio_list(&demote_folios, pgdat);
+ stat->nr_demoted = demote_folio_list(&demote_folios, pgdat);
+ nr_reclaimed += stat->nr_demoted;
/* Folios that could not be demoted are still in @demote_folios */
if (!list_empty(&demote_folios)) {
/* Folios which weren't demoted go back on @folio_list */
@@ -1941,6 +1961,8 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
spin_lock_irq(&lruvec->lru_lock);
move_folios_to_lru(lruvec, &folio_list);
+ __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(),
+ stat.nr_demoted);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
item = PGSTEAL_KSWAPD + reclaimer_offset();
if (!cgroup_reclaim(sc))
@@ -2239,10 +2261,11 @@ static void prepare_scan_control(pg_data_t *pgdat, struct scan_control *sc)
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
/*
- * Flush the memory cgroup stats, so that we read accurate per-memcg
- * lruvec stats for heuristics.
+ * Flush the memory cgroup stats in rate-limited way as we don't need
+ * most accurate stats here. We may switch to regular stats flushing
+ * in the future once it is cheap enough.
*/
- mem_cgroup_flush_stats(sc->target_mem_cgroup);
+ mem_cgroup_flush_stats_ratelimited(sc->target_mem_cgroup);
/*
* Determine the scan balance between anon and file LRUs.
@@ -3456,7 +3479,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
goto next;
if (!pmd_trans_huge(pmd[i])) {
- if (should_clear_pmd_young())
+ if (!walk->force_scan && should_clear_pmd_young())
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
}
@@ -3543,7 +3566,7 @@ restart:
walk->mm_stats[MM_NONLEAF_TOTAL]++;
- if (should_clear_pmd_young()) {
+ if (!walk->force_scan && should_clear_pmd_young()) {
if (!pmd_young(val))
continue;
@@ -6644,7 +6667,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
continue;
if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
- mark = wmark_pages(zone, WMARK_PROMO);
+ mark = promo_wmark_pages(zone);
else
mark = high_wmark_pages(zone);
if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx))
@@ -7519,7 +7542,9 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
ret = __node_reclaim(pgdat, gfp_mask, order);
clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags);
- if (!ret)
+ if (ret)
+ count_vm_event(PGSCAN_ZONE_RECLAIM_SUCCESS);
+ else
count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
return ret;