diff options
author | Roman Gushchin <[email protected]> | 2020-08-11 18:30:25 -0700 |
---|---|---|
committer | Linus Torvalds <[email protected]> | 2020-08-12 10:57:55 -0700 |
commit | 3e38e0aaca9eafb12b1c4b731d1c10975cbe7974 (patch) | |
tree | 9df2be98d1908c91e92c1a968931d70ea8db0896 | |
parent | 772616b031f06e05846488b01dab46a7c832da13 (diff) |
mm: memcg: charge memcg percpu memory to the parent cgroup
Memory cgroups are using large chunks of percpu memory to store vmstat
data. Yet this memory is not accounted at all, so in the case when there
are many (dying) cgroups, it's not exactly clear where all the memory is.
Because the size of memory cgroup internal structures can dramatically
exceed the size of object or page which is pinning it in the memory, it's
not a good idea to simply ignore it. It actually breaks the isolation
between cgroups.
Let's account the consumed percpu memory to the parent cgroup.
[[email protected]: add WARN_ON_ONCE()s, per Johannes]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Roman Gushchin <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Reviewed-by: Shakeel Butt <[email protected]>
Acked-by: Dennis Zhou <[email protected]>
Acked-by: Johannes Weiner <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Joonsoo Kim <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Pekka Enberg <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: Tobin C. Harding <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Waiman Long <[email protected]>
Cc: Bixuan Cui <[email protected]>
Cc: Michal Koutný <[email protected]>
Cc: Stephen Rothwell <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
-rw-r--r-- | mm/memcontrol.c | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 36d5300f9b69..f1fd265b9f9e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5131,13 +5131,18 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - pn->lruvec_stat_local = alloc_percpu(struct lruvec_stat); + /* We charge the parent cgroup, never the current task */ + WARN_ON_ONCE(!current->active_memcg); + + pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat, + GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_local) { kfree(pn); return 1; } - pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat); + pn->lruvec_stat_cpu = alloc_percpu_gfp(struct lruvec_stat, + GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_cpu) { free_percpu(pn->lruvec_stat_local); kfree(pn); @@ -5211,11 +5216,16 @@ static struct mem_cgroup *mem_cgroup_alloc(void) goto fail; } - memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu); + /* We charge the parent cgroup, never the current task */ + WARN_ON_ONCE(!current->active_memcg); + + memcg->vmstats_local = alloc_percpu_gfp(struct memcg_vmstats_percpu, + GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_local) goto fail; - memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu); + memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu, + GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_percpu) goto fail; @@ -5264,7 +5274,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) struct mem_cgroup *memcg; long error = -ENOMEM; + memalloc_use_memcg(parent); memcg = mem_cgroup_alloc(); + memalloc_unuse_memcg(); if (IS_ERR(memcg)) return ERR_CAST(memcg); |