diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 67 |
1 files changed, 46 insertions, 21 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index a8d61a8b6894..749cdc110c74 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -628,7 +628,7 @@ typedef enum { * Calls ->writepage(). */ static pageout_t pageout(struct folio *folio, struct address_space *mapping, - struct swap_iocb **plug) + struct swap_iocb **plug, struct list_head *folio_list) { /* * If the folio is dirty, only perform writeback if that write @@ -676,6 +676,14 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping, .swap_plug = plug, }; + /* + * The large shmem folio can be split if CONFIG_THP_SWAP is + * not enabled or contiguous swap entries are failed to + * allocate. + */ + if (shmem_mapping(mapping) && folio_test_large(folio)) + wbc.list = folio_list; + folio_set_reclaim(folio); res = mapping->a_ops->writepage(&folio->page, &wbc); if (res < 0) @@ -863,7 +871,12 @@ static enum folio_references folio_check_references(struct folio *folio, if (vm_flags & VM_LOCKED) return FOLIOREF_ACTIVATE; - /* rmap lock contention: rotate */ + /* + * There are two cases to consider. + * 1) Rmap lock contention: rotate. + * 2) Skip the non-shared swapbacked folio mapped solely by + * the exiting or OOM-reaped process. + */ if (referenced_ptes == -1) return FOLIOREF_KEEP; @@ -1003,9 +1016,6 @@ static unsigned int demote_folio_list(struct list_head *demote_folios, (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); - mod_node_page_state(pgdat, PGDEMOTE_KSWAPD + reclaimer_offset(), - nr_succeeded); - return nr_succeeded; } @@ -1222,13 +1232,14 @@ retry: goto keep_locked; if (folio_test_large(folio)) { /* cannot split folio, skip it */ - if (!can_split_folio(folio, NULL)) + if (!can_split_folio(folio, 1, NULL)) goto activate_locked; /* * Split partially mapped folios right away. * We can free the unmapped pages without IO. */ - if (data_race(!list_empty(&folio->_deferred_list)) && + if (data_race(!list_empty(&folio->_deferred_list) && + folio_test_partially_mapped(folio)) && split_folio_to_list(folio, folio_list)) goto activate_locked; } @@ -1252,11 +1263,6 @@ retry: goto activate_locked_split; } } - } else if (folio_test_swapbacked(folio) && - folio_test_large(folio)) { - /* Split shmem folio */ - if (split_folio_to_list(folio, folio_list)) - goto keep_locked; } /* @@ -1357,12 +1363,25 @@ retry: * starts and then write it out here. */ try_to_unmap_flush_dirty(); - switch (pageout(folio, mapping, &plug)) { + switch (pageout(folio, mapping, &plug, folio_list)) { case PAGE_KEEP: goto keep_locked; case PAGE_ACTIVATE: + /* + * If shmem folio is split when writeback to swap, + * the tail pages will make their own pass through + * this function and be accounted then. + */ + if (nr_pages > 1 && !folio_test_large(folio)) { + sc->nr_scanned -= (nr_pages - 1); + nr_pages = 1; + } goto activate_locked; case PAGE_SUCCESS: + if (nr_pages > 1 && !folio_test_large(folio)) { + sc->nr_scanned -= (nr_pages - 1); + nr_pages = 1; + } stat->nr_pageout += nr_pages; if (folio_test_writeback(folio)) @@ -1495,7 +1514,8 @@ keep: /* 'folio_list' is always empty here */ /* Migrate folios selected for demotion */ - nr_reclaimed += demote_folio_list(&demote_folios, pgdat); + stat->nr_demoted = demote_folio_list(&demote_folios, pgdat); + nr_reclaimed += stat->nr_demoted; /* Folios that could not be demoted are still in @demote_folios */ if (!list_empty(&demote_folios)) { /* Folios which weren't demoted go back on @folio_list */ @@ -1941,6 +1961,8 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, spin_lock_irq(&lruvec->lru_lock); move_folios_to_lru(lruvec, &folio_list); + __mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(), + stat.nr_demoted); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); item = PGSTEAL_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) @@ -2239,10 +2261,11 @@ static void prepare_scan_control(pg_data_t *pgdat, struct scan_control *sc) target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); /* - * Flush the memory cgroup stats, so that we read accurate per-memcg - * lruvec stats for heuristics. + * Flush the memory cgroup stats in rate-limited way as we don't need + * most accurate stats here. We may switch to regular stats flushing + * in the future once it is cheap enough. */ - mem_cgroup_flush_stats(sc->target_mem_cgroup); + mem_cgroup_flush_stats_ratelimited(sc->target_mem_cgroup); /* * Determine the scan balance between anon and file LRUs. @@ -3456,7 +3479,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (should_clear_pmd_young()) + if (!walk->force_scan && should_clear_pmd_young()) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; } @@ -3543,7 +3566,7 @@ restart: walk->mm_stats[MM_NONLEAF_TOTAL]++; - if (should_clear_pmd_young()) { + if (!walk->force_scan && should_clear_pmd_young()) { if (!pmd_young(val)) continue; @@ -6644,7 +6667,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) continue; if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) - mark = wmark_pages(zone, WMARK_PROMO); + mark = promo_wmark_pages(zone); else mark = high_wmark_pages(zone); if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx)) @@ -7519,7 +7542,9 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) ret = __node_reclaim(pgdat, gfp_mask, order); clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); - if (!ret) + if (ret) + count_vm_event(PGSCAN_ZONE_RECLAIM_SUCCESS); + else count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED); return ret; |