diff options
| author | Alex Shi <[email protected]> | 2020-12-15 12:34:29 -0800 |
|---|---|---|
| committer | Linus Torvalds <[email protected]> | 2020-12-15 14:48:04 -0800 |
| commit | 6168d0da2b479ce25a4647de194045de1bdd1f1d (patch) | |
| tree | d3a8a1d484d5ca32097347ca9b314483b6186fbd /include/linux | |
| parent | fc574c23558c63799dd99a9bb1d62e33708abaf5 (diff) | |
mm/lru: replace pgdat lru_lock with lruvec lock
This patch moves per node lru_lock into lruvec, thus bring a lru_lock for
each of memcg per node. So on a large machine, each of memcg don't have
to suffer from per node pgdat->lru_lock competition. They could go fast
with their self lru_lock.
After move memcg charge before lru inserting, page isolation could
serialize page's memcg, then per memcg lruvec lock is stable and could
replace per node lru lock.
In isolate_migratepages_block(), compact_unlock_should_abort and
lock_page_lruvec_irqsave are open coded to work with compact_control.
Also add a debug func in locking which may give some clues if there are
sth out of hands.
Daniel Jordan's testing show 62% improvement on modified readtwice case on
his 2P * 10 core * 2 HT broadwell box.
https://lore.kernel.org/lkml/[email protected]/
Hugh Dickins helped on the patch polish, thanks!
[[email protected]: fix comment typo]
Link: https://lkml.kernel.org/r/[email protected]
[[email protected]: use page_memcg()]
Link: https://lkml.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Alex Shi <[email protected]>
Acked-by: Hugh Dickins <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Cc: Rong Chen <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Vladimir Davydov <[email protected]>
Cc: Yang Shi <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Konstantin Khlebnikov <[email protected]>
Cc: Daniel Jordan <[email protected]>
Cc: Alexander Duyck <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Andrey Ryabinin <[email protected]>
Cc: "Huang, Ying" <[email protected]>
Cc: Jann Horn <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Mika Penttilä <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Wei Yang <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/memcontrol.h | 58 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 3 |
2 files changed, 60 insertions, 1 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f530d634f055..aa5d559853c2 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -491,6 +491,19 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); struct mem_cgroup *get_mem_cgroup_from_page(struct page *page); +struct lruvec *lock_page_lruvec(struct page *page); +struct lruvec *lock_page_lruvec_irq(struct page *page); +struct lruvec *lock_page_lruvec_irqsave(struct page *page, + unsigned long *flags); + +#ifdef CONFIG_DEBUG_VM +void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page); +#else +static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +{ +} +#endif + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ return css ? container_of(css, struct mem_cgroup, css) : NULL; @@ -996,6 +1009,31 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } +static inline struct lruvec *lock_page_lruvec(struct page *page) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock(&pgdat->__lruvec.lru_lock); + return &pgdat->__lruvec; +} + +static inline struct lruvec *lock_page_lruvec_irq(struct page *page) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock_irq(&pgdat->__lruvec.lru_lock); + return &pgdat->__lruvec; +} + +static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page, + unsigned long *flagsp) +{ + struct pglist_data *pgdat = page_pgdat(page); + + spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); + return &pgdat->__lruvec; +} + static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -1215,6 +1253,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } + +static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +{ +} #endif /* CONFIG_MEMCG */ /* idx can be of type enum memcg_stat_item or node_stat_item */ @@ -1296,6 +1338,22 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec)); } +static inline void unlock_page_lruvec(struct lruvec *lruvec) +{ + spin_unlock(&lruvec->lru_lock); +} + +static inline void unlock_page_lruvec_irq(struct lruvec *lruvec) +{ + spin_unlock_irq(&lruvec->lru_lock); +} + +static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, + unsigned long flags) +{ + spin_unlock_irqrestore(&lruvec->lru_lock, flags); +} + #ifdef CONFIG_CGROUP_WRITEBACK struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 98a80c01d150..9da23c019dc5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -276,6 +276,8 @@ enum lruvec_flags { struct lruvec { struct list_head lists[NR_LRU_LISTS]; + /* per lruvec lru_lock for memcg */ + spinlock_t lru_lock; /* * These track the cost of reclaiming one LRU - file or anon - * over the other. As the observed cost of reclaiming one LRU @@ -782,7 +784,6 @@ typedef struct pglist_data { /* Write-intensive fields used by page reclaim */ ZONE_PADDING(_pad1_) - spinlock_t lru_lock; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* |