aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorAlex Shi <[email protected]>2020-12-15 12:34:29 -0800
committerLinus Torvalds <[email protected]>2020-12-15 14:48:04 -0800
commit6168d0da2b479ce25a4647de194045de1bdd1f1d (patch)
treed3a8a1d484d5ca32097347ca9b314483b6186fbd /include/linux
parentfc574c23558c63799dd99a9bb1d62e33708abaf5 (diff)
mm/lru: replace pgdat lru_lock with lruvec lock
This patch moves per node lru_lock into lruvec, thus bring a lru_lock for each of memcg per node. So on a large machine, each of memcg don't have to suffer from per node pgdat->lru_lock competition. They could go fast with their self lru_lock. After move memcg charge before lru inserting, page isolation could serialize page's memcg, then per memcg lruvec lock is stable and could replace per node lru lock. In isolate_migratepages_block(), compact_unlock_should_abort and lock_page_lruvec_irqsave are open coded to work with compact_control. Also add a debug func in locking which may give some clues if there are sth out of hands. Daniel Jordan's testing show 62% improvement on modified readtwice case on his 2P * 10 core * 2 HT broadwell box. https://lore.kernel.org/lkml/[email protected]/ Hugh Dickins helped on the patch polish, thanks! [[email protected]: fix comment typo] Link: https://lkml.kernel.org/r/[email protected] [[email protected]: use page_memcg()] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Alex Shi <[email protected]> Acked-by: Hugh Dickins <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Rong Chen <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Vladimir Davydov <[email protected]> Cc: Yang Shi <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Konstantin Khlebnikov <[email protected]> Cc: Daniel Jordan <[email protected]> Cc: Alexander Duyck <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Andrey Ryabinin <[email protected]> Cc: "Huang, Ying" <[email protected]> Cc: Jann Horn <[email protected]> Cc: Joonsoo Kim <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Mika Penttilä <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: Wei Yang <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/memcontrol.h58
-rw-r--r--include/linux/mmzone.h3
2 files changed, 60 insertions, 1 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f530d634f055..aa5d559853c2 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -491,6 +491,19 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
+struct lruvec *lock_page_lruvec(struct page *page);
+struct lruvec *lock_page_lruvec_irq(struct page *page);
+struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+ unsigned long *flags);
+
+#ifdef CONFIG_DEBUG_VM
+void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page);
+#else
+static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+{
+}
+#endif
+
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -996,6 +1009,31 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
+static inline struct lruvec *lock_page_lruvec(struct page *page)
+{
+ struct pglist_data *pgdat = page_pgdat(page);
+
+ spin_lock(&pgdat->__lruvec.lru_lock);
+ return &pgdat->__lruvec;
+}
+
+static inline struct lruvec *lock_page_lruvec_irq(struct page *page)
+{
+ struct pglist_data *pgdat = page_pgdat(page);
+
+ spin_lock_irq(&pgdat->__lruvec.lru_lock);
+ return &pgdat->__lruvec;
+}
+
+static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+ unsigned long *flagsp)
+{
+ struct pglist_data *pgdat = page_pgdat(page);
+
+ spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
+ return &pgdat->__lruvec;
+}
+
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
@@ -1215,6 +1253,10 @@ static inline
void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{
}
+
+static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+{
+}
#endif /* CONFIG_MEMCG */
/* idx can be of type enum memcg_stat_item or node_stat_item */
@@ -1296,6 +1338,22 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec));
}
+static inline void unlock_page_lruvec(struct lruvec *lruvec)
+{
+ spin_unlock(&lruvec->lru_lock);
+}
+
+static inline void unlock_page_lruvec_irq(struct lruvec *lruvec)
+{
+ spin_unlock_irq(&lruvec->lru_lock);
+}
+
+static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
+ unsigned long flags)
+{
+ spin_unlock_irqrestore(&lruvec->lru_lock, flags);
+}
+
#ifdef CONFIG_CGROUP_WRITEBACK
struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 98a80c01d150..9da23c019dc5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -276,6 +276,8 @@ enum lruvec_flags {
struct lruvec {
struct list_head lists[NR_LRU_LISTS];
+ /* per lruvec lru_lock for memcg */
+ spinlock_t lru_lock;
/*
* These track the cost of reclaiming one LRU - file or anon -
* over the other. As the observed cost of reclaiming one LRU
@@ -782,7 +784,6 @@ typedef struct pglist_data {
/* Write-intensive fields used by page reclaim */
ZONE_PADDING(_pad1_)
- spinlock_t lru_lock;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*