aboutsummaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2024-11-23 09:58:07 -0800
committerLinus Torvalds <[email protected]>2024-11-23 09:58:07 -0800
commit5c00ff742bf5caf85f60e1c73999f99376fb865d (patch)
treefa484e83c27af79f1c0511e7e0673507461c9379 /mm/vmscan.c
parent228a1157fb9fec47eb135b51c0202b574e079ebf (diff)
parent2532e6c74a67e65b95f310946e0c0e0a41b3a34b (diff)
Merge tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - The series "zram: optimal post-processing target selection" from Sergey Senozhatsky improves zram's post-processing selection algorithm. This leads to improved memory savings. - Wei Yang has gone to town on the mapletree code, contributing several series which clean up the implementation: - "refine mas_mab_cp()" - "Reduce the space to be cleared for maple_big_node" - "maple_tree: simplify mas_push_node()" - "Following cleanup after introduce mas_wr_store_type()" - "refine storing null" - The series "selftests/mm: hugetlb_fault_after_madv improvements" from David Hildenbrand fixes this selftest for s390. - The series "introduce pte_offset_map_{ro|rw}_nolock()" from Qi Zheng implements some rationaizations and cleanups in the page mapping code. - The series "mm: optimize shadow entries removal" from Shakeel Butt optimizes the file truncation code by speeding up the handling of shadow entries. - The series "Remove PageKsm()" from Matthew Wilcox completes the migration of this flag over to being a folio-based flag. - The series "Unify hugetlb into arch_get_unmapped_area functions" from Oscar Salvador implements a bunch of consolidations and cleanups in the hugetlb code. - The series "Do not shatter hugezeropage on wp-fault" from Dev Jain takes away the wp-fault time practice of turning a huge zero page into small pages. Instead we replace the whole thing with a THP. More consistent cleaner and potentiall saves a large number of pagefaults. - The series "percpu: Add a test case and fix for clang" from Andy Shevchenko enhances and fixes the kernel's built in percpu test code. - The series "mm/mremap: Remove extra vma tree walk" from Liam Howlett optimizes mremap() by avoiding doing things which we didn't need to do. - The series "Improve the tmpfs large folio read performance" from Baolin Wang teaches tmpfs to copy data into userspace at the folio size rather than as individual pages. A 20% speedup was observed. - The series "mm/damon/vaddr: Fix issue in damon_va_evenly_split_region()" fro Zheng Yejian fixes DAMON splitting. - The series "memcg-v1: fully deprecate charge moving" from Shakeel Butt removes the long-deprecated memcgv2 charge moving feature. - The series "fix error handling in mmap_region() and refactor" from Lorenzo Stoakes cleanup up some of the mmap() error handling and addresses some potential performance issues. - The series "x86/module: use large ROX pages for text allocations" from Mike Rapoport teaches x86 to use large pages for read-only-execute module text. - The series "page allocation tag compression" from Suren Baghdasaryan is followon maintenance work for the new page allocation profiling feature. - The series "page->index removals in mm" from Matthew Wilcox remove most references to page->index in mm/. A slow march towards shrinking struct page. - The series "damon/{self,kunit}tests: minor fixups for DAMON debugfs interface tests" from Andrew Paniakin performs maintenance work for DAMON's self testing code. - The series "mm: zswap swap-out of large folios" from Kanchana Sridhar improves zswap's batching of compression and decompression. It is a step along the way towards using Intel IAA hardware acceleration for this zswap operation. - The series "kasan: migrate the last module test to kunit" from Sabyrzhan Tasbolatov completes the migration of the KASAN built-in tests over to the KUnit framework. - The series "implement lightweight guard pages" from Lorenzo Stoakes permits userapace to place fault-generating guard pages within a single VMA, rather than requiring that multiple VMAs be created for this. Improved efficiencies for userspace memory allocators are expected. - The series "memcg: tracepoint for flushing stats" from JP Kobryn uses tracepoints to provide increased visibility into memcg stats flushing activity. - The series "zram: IDLE flag handling fixes" from Sergey Senozhatsky fixes a zram buglet which potentially affected performance. - The series "mm: add more kernel parameters to control mTHP" from MaĆ­ra Canal enhances our ability to control/configuremultisize THP from the kernel boot command line. - The series "kasan: few improvements on kunit tests" from Sabyrzhan Tasbolatov has a couple of fixups for the KASAN KUnit tests. - The series "mm/list_lru: Split list_lru lock into per-cgroup scope" from Kairui Song optimizes list_lru memory utilization when lockdep is enabled. * tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (215 commits) cma: enforce non-zero pageblock_order during cma_init_reserved_mem() mm/kfence: add a new kunit test test_use_after_free_read_nofault() zram: fix NULL pointer in comp_algorithm_show() memcg/hugetlb: add hugeTLB counters to memcg vmstat: call fold_vm_zone_numa_events() before show per zone NUMA event mm: mmap_lock: check trace_mmap_lock_$type_enabled() instead of regcount zram: ZRAM_DEF_COMP should depend on ZRAM MAINTAINERS/MEMORY MANAGEMENT: add document files for mm Docs/mm/damon: recommend academic papers to read and/or cite mm: define general function pXd_init() kmemleak: iommu/iova: fix transient kmemleak false positive mm/list_lru: simplify the list_lru walk callback function mm/list_lru: split the lock to per-cgroup scope mm/list_lru: simplify reparenting and initial allocation mm/list_lru: code clean up for reparenting mm/list_lru: don't export list_lru_add mm/list_lru: don't pass unnecessary key parameters kasan: add kunit tests for kmalloc_track_caller, kmalloc_node_track_caller kasan: change kasan_atomics kunit test as KUNIT_CASE_SLOW kasan: use EXPORT_SYMBOL_IF_KUNIT to export symbols ...
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c68
1 files changed, 41 insertions, 27 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 28ba2b06fc7d..76378bc257e3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1258,8 +1258,8 @@ retry:
THP_SWPOUT_FALLBACK, 1);
count_vm_event(THP_SWPOUT_FALLBACK);
}
- count_mthp_stat(order, MTHP_STAT_SWPOUT_FALLBACK);
#endif
+ count_mthp_stat(order, MTHP_STAT_SWPOUT_FALLBACK);
if (!add_to_swap(folio))
goto activate_locked_split;
}
@@ -2129,7 +2129,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
static unsigned int reclaim_folio_list(struct list_head *folio_list,
struct pglist_data *pgdat)
{
- struct reclaim_stat dummy_stat;
+ struct reclaim_stat stat;
unsigned int nr_reclaimed;
struct folio *folio;
struct scan_control sc = {
@@ -2140,12 +2140,13 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
.no_demotion = 1,
};
- nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, true);
+ nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &stat, true);
while (!list_empty(folio_list)) {
folio = lru_to_folio(folio_list);
list_del(&folio->lru);
folio_putback_lru(folio);
}
+ trace_mm_vmscan_reclaim_pages(pgdat->node_id, sc.nr_scanned, nr_reclaimed, &stat);
return nr_reclaimed;
}
@@ -2602,8 +2603,6 @@ static bool should_clear_pmd_young(void)
* shorthand helpers
******************************************************************************/
-#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset))
-
#define DEFINE_MAX_SEQ(lruvec) \
unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq)
@@ -3138,7 +3137,6 @@ static int folio_update_gen(struct folio *folio, int gen)
unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
- VM_WARN_ON_ONCE(!rcu_read_lock_held());
do {
/* lru_gen_del_folio() has isolated this page? */
@@ -3354,7 +3352,7 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
if (folio_nid(folio) != pgdat->node_id)
return NULL;
- if (folio_memcg_rcu(folio) != memcg)
+ if (folio_memcg(folio) != memcg)
return NULL;
/* file VMAs can contain anon pages from COW */
@@ -3386,8 +3384,10 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
DEFINE_MAX_SEQ(walk->lruvec);
int old_gen, new_gen = lru_gen_from_seq(max_seq);
+ pmd_t pmdval;
- pte = pte_offset_map_nolock(args->mm, pmd, start & PMD_MASK, &ptl);
+ pte = pte_offset_map_rw_nolock(args->mm, pmd, start & PMD_MASK, &pmdval,
+ &ptl);
if (!pte)
return false;
if (!spin_trylock(ptl)) {
@@ -3395,6 +3395,11 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
return false;
}
+ if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
+ pte_unmap_unlock(pte, ptl);
+ return false;
+ }
+
arch_enter_lazy_mmu_mode();
restart:
for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) {
@@ -3643,10 +3648,8 @@ static void walk_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
.p4d_entry = walk_pud_range,
.walk_lock = PGWALK_RDLOCK,
};
-
int err;
struct lruvec *lruvec = walk->lruvec;
- struct mem_cgroup *memcg = lruvec_memcg(lruvec);
walk->next_addr = FIRST_USER_ADDRESS;
@@ -3659,10 +3662,6 @@ static void walk_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
if (walk->seq != max_seq)
break;
- /* folio_update_gen() requires stable folio_memcg() */
- if (!mem_cgroup_trylock_pages(memcg))
- break;
-
/* the caller might be holding the lock for write */
if (mmap_read_trylock(mm)) {
err = walk_page_range(mm, walk->next_addr, ULONG_MAX, &mm_walk_ops, walk);
@@ -3670,8 +3669,6 @@ static void walk_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
mmap_read_unlock(mm);
}
- mem_cgroup_unlock_pages();
-
if (walk->batched) {
spin_lock_irq(&lruvec->lru_lock);
reset_batch_size(walk);
@@ -4093,10 +4090,6 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
}
}
- /* folio_update_gen() requires stable folio_memcg() */
- if (!mem_cgroup_trylock_pages(memcg))
- return true;
-
arch_enter_lazy_mmu_mode();
pte -= (addr - start) / PAGE_SIZE;
@@ -4134,12 +4127,13 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
old_gen = folio_lru_gen(folio);
if (old_gen < 0)
folio_set_referenced(folio);
- else if (old_gen != new_gen)
+ else if (old_gen != new_gen) {
+ folio_clear_lru_refs(folio);
folio_activate(folio);
+ }
}
arch_leave_lazy_mmu_mode();
- mem_cgroup_unlock_pages();
/* feedback from rmap walkers to page table walkers */
if (mm_state && suitable_to_scan(i, young))
@@ -4290,6 +4284,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
int tier_idx)
{
bool success;
+ bool dirty, writeback;
int gen = folio_lru_gen(folio);
int type = folio_is_file_lru(folio);
int zone = folio_zonenum(folio);
@@ -4335,9 +4330,17 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
return true;
}
+ dirty = folio_test_dirty(folio);
+ writeback = folio_test_writeback(folio);
+ if (type == LRU_GEN_FILE && dirty) {
+ sc->nr.file_taken += delta;
+ if (!writeback)
+ sc->nr.unqueued_dirty += delta;
+ }
+
/* waiting for writeback */
- if (folio_test_locked(folio) || folio_test_writeback(folio) ||
- (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
+ if (folio_test_locked(folio) || writeback ||
+ (type == LRU_GEN_FILE && dirty)) {
gen = folio_inc_gen(lruvec, folio, true);
list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
@@ -4368,7 +4371,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
/* see the comment on MAX_NR_TIERS */
if (!folio_test_referenced(folio))
- set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0);
+ folio_clear_lru_refs(folio);
/* for shrink_folio_list() */
folio_clear_reclaim(folio);
@@ -4453,7 +4456,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
scanned, skipped, isolated,
type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
-
+ if (type == LRU_GEN_FILE)
+ sc->nr.file_taken += isolated;
/*
* There might not be eligible folios due to reclaim_idx. Check the
* remaining to prevent livelock if it's not making progress.
@@ -4587,6 +4591,7 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
return scanned;
retry:
reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
+ sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
sc->nr_reclaimed += reclaimed;
trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
scanned, reclaimed, &stat, sc->priority,
@@ -4795,6 +4800,13 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
cond_resched();
}
+ /*
+ * If too many file cache in the coldest generation can't be evicted
+ * due to being dirty, wake up the flusher.
+ */
+ if (sc->nr.unqueued_dirty && sc->nr.unqueued_dirty == sc->nr.file_taken)
+ wakeup_flusher_threads(WB_REASON_VMSCAN);
+
/* whether this lruvec should be rotated */
return nr_to_scan < 0;
}
@@ -5940,6 +5952,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
bool reclaimable = false;
if (lru_gen_enabled() && root_reclaim(sc)) {
+ memset(&sc->nr, 0, sizeof(sc->nr));
lru_gen_shrink_node(pgdat, sc);
return;
}
@@ -5990,7 +6003,8 @@ again:
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/* Allow kswapd to start writing pages during reclaim.*/
- if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+ if (sc->nr.unqueued_dirty &&
+ sc->nr.unqueued_dirty == sc->nr.file_taken)
set_bit(PGDAT_DIRTY, &pgdat->flags);
/*