diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 183 | 
1 files changed, 99 insertions, 84 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ab457f0394ab..5abffe6f8389 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -63,7 +63,6 @@  #include <linux/resume_user_mode.h>  #include <linux/psi.h>  #include <linux/seq_buf.h> -#include <linux/parser.h>  #include "internal.h"  #include <net/sock.h>  #include <net/ip.h> @@ -89,6 +88,9 @@ static bool cgroup_memory_nosocket __ro_after_init;  /* Kernel memory accounting disabled? */  static bool cgroup_memory_nokmem __ro_after_init; +/* BPF memory accounting disabled? */ +static bool cgroup_memory_nobpf __ro_after_init; +  #ifdef CONFIG_CGROUP_WRITEBACK  static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);  #endif @@ -346,26 +348,27 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,   * conditional to this static branch, we'll have to allow modules that does   * kmem_cache_alloc and the such to see this symbol as well   */ -DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key); -EXPORT_SYMBOL(memcg_kmem_enabled_key); +DEFINE_STATIC_KEY_FALSE(memcg_kmem_online_key); +EXPORT_SYMBOL(memcg_kmem_online_key); + +DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key); +EXPORT_SYMBOL(memcg_bpf_enabled_key);  #endif  /** - * mem_cgroup_css_from_page - css of the memcg associated with a page - * @page: page of interest + * mem_cgroup_css_from_folio - css of the memcg associated with a folio + * @folio: folio of interest   *   * If memcg is bound to the default hierarchy, css of the memcg associated - * with @page is returned.  The returned css remains associated with @page + * with @folio is returned.  The returned css remains associated with @folio   * until it is released.   *   * If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup   * is returned.   */ -struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page) +struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)  { -	struct mem_cgroup *memcg; - -	memcg = page_memcg(page); +	struct mem_cgroup *memcg = folio_memcg(folio);  	if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))  		memcg = root_mem_cgroup; @@ -478,6 +481,12 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)  	struct mem_cgroup_per_node *mz;  	struct mem_cgroup_tree_per_node *mctz; +	if (lru_gen_enabled()) { +		if (soft_limit_excess(memcg)) +			lru_gen_soft_reclaim(&memcg->nodeinfo[nid]->lruvec); +		return; +	} +  	mctz = soft_limit_tree.rb_tree_per_node[nid];  	if (!mctz)  		return; @@ -2393,8 +2402,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,  		psi_memstall_enter(&pflags);  		nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,  							gfp_mask, -							MEMCG_RECLAIM_MAY_SWAP, -							NULL); +							MEMCG_RECLAIM_MAY_SWAP);  		psi_memstall_leave(&pflags);  	} while ((memcg = parent_mem_cgroup(memcg)) &&  		 !mem_cgroup_is_root(memcg)); @@ -2685,8 +2693,7 @@ retry:  	psi_memstall_enter(&pflags);  	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, -						    gfp_mask, reclaim_options, -						    NULL); +						    gfp_mask, reclaim_options);  	psi_memstall_leave(&pflags);  	if (mem_cgroup_margin(mem_over_limit) >= nr_pages) @@ -2942,13 +2949,13 @@ struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p)  	}  	/* -	 * page_memcg_check() is used here, because in theory we can encounter +	 * folio_memcg_check() is used here, because in theory we can encounter  	 * a folio where the slab flag has been cleared already, but  	 * slab->memcg_data has not been freed yet -	 * page_memcg_check(page) will guarantee that a proper memory +	 * folio_memcg_check() will guarantee that a proper memory  	 * cgroup pointer or NULL will be returned.  	 */ -	return page_memcg_check(folio_page(folio, 0)); +	return folio_memcg_check(folio);  }  /* @@ -3033,7 +3040,7 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page)  {  	struct obj_cgroup *objcg; -	if (!memcg_kmem_enabled()) +	if (!memcg_kmem_online())  		return NULL;  	if (PageMemcgKmem(page)) { @@ -3506,8 +3513,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,  		}  		if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, -					memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, -					NULL)) { +					memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {  			ret = -EBUSY;  			break;  		} @@ -3530,6 +3536,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,  	struct mem_cgroup_tree_per_node *mctz;  	unsigned long excess; +	if (lru_gen_enabled()) +		return 0; +  	if (order > 0)  		return 0; @@ -3618,8 +3627,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)  			return -EINTR;  		if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, -						  MEMCG_RECLAIM_MAY_SWAP, -						  NULL)) +						  MEMCG_RECLAIM_MAY_SWAP))  			nr_retries--;  	} @@ -3744,7 +3752,7 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)  	objcg->memcg = memcg;  	rcu_assign_pointer(memcg->objcg, objcg); -	static_branch_enable(&memcg_kmem_enabled_key); +	static_branch_enable(&memcg_kmem_online_key);  	memcg->kmemcg_id = memcg->id.id; @@ -3919,6 +3927,10 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,  {  	struct mem_cgroup *memcg = mem_cgroup_from_css(css); +	pr_warn_once("Cgroup memory moving (move_charge_at_immigrate) is deprecated. " +		     "Please report your usecase to [email protected] if you " +		     "depend on this functionality.\n"); +  	if (val & ~MOVE_MASK)  		return -EINVAL; @@ -5362,6 +5374,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)  	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)  		static_branch_inc(&memcg_sockets_enabled_key); +#if defined(CONFIG_MEMCG_KMEM) +	if (!cgroup_memory_nobpf) +		static_branch_inc(&memcg_bpf_enabled_key); +#endif +  	return &memcg->css;  } @@ -5387,6 +5404,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)  	if (unlikely(mem_cgroup_is_root(memcg)))  		queue_delayed_work(system_unbound_wq, &stats_flush_dwork,  				   2UL*HZ); +	lru_gen_online_memcg(memcg);  	return 0;  offline_kmem:  	memcg_offline_kmem(memcg); @@ -5418,6 +5436,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)  	memcg_offline_kmem(memcg);  	reparent_shrinker_deferred(memcg);  	wb_memcg_offline(memcg); +	lru_gen_offline_memcg(memcg);  	drain_all_stock(memcg); @@ -5429,6 +5448,7 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css)  	struct mem_cgroup *memcg = mem_cgroup_from_css(css);  	invalidate_reclaim_iterators(memcg); +	lru_gen_release_memcg(memcg);  }  static void mem_cgroup_css_free(struct cgroup_subsys_state *css) @@ -5446,6 +5466,11 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)  	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_active)  		static_branch_dec(&memcg_sockets_enabled_key); +#if defined(CONFIG_MEMCG_KMEM) +	if (!cgroup_memory_nobpf) +		static_branch_dec(&memcg_bpf_enabled_key); +#endif +  	vmpressure_cleanup(&memcg->vmpressure);  	cancel_work_sync(&memcg->high_work);  	mem_cgroup_remove_from_trees(memcg); @@ -5692,7 +5717,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,   * @from: mem_cgroup which the page is moved from.   * @to:	mem_cgroup which the page is moved to. @from != @to.   * - * The caller must make sure the page is not on LRU (isolate_page() is useful.) + * The page must be locked and not on the LRU.   *   * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"   * from old cgroup. @@ -5709,20 +5734,13 @@ static int mem_cgroup_move_account(struct page *page,  	int nid, ret;  	VM_BUG_ON(from == to); +	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);  	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);  	VM_BUG_ON(compound && !folio_test_large(folio)); -	/* -	 * Prevent mem_cgroup_migrate() from looking at -	 * page's memory cgroup of its source page while we change it. -	 */ -	ret = -EBUSY; -	if (!folio_trylock(folio)) -		goto out; -  	ret = -EINVAL;  	if (folio_memcg(folio) != from) -		goto out_unlock; +		goto out;  	pgdat = folio_pgdat(folio);  	from_vec = mem_cgroup_lruvec(from, pgdat); @@ -5809,8 +5827,6 @@ static int mem_cgroup_move_account(struct page *page,  	mem_cgroup_charge_statistics(from, -nr_pages);  	memcg_check_events(from, nid);  	local_irq_enable(); -out_unlock: -	folio_unlock(folio);  out:  	return ret;  } @@ -5859,6 +5875,29 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,  	else if (is_swap_pte(ptent))  		page = mc_handle_swap_pte(vma, ptent, &ent); +	if (target && page) { +		if (!trylock_page(page)) { +			put_page(page); +			return ret; +		} +		/* +		 * page_mapped() must be stable during the move. This +		 * pte is locked, so if it's present, the page cannot +		 * become unmapped. If it isn't, we have only partial +		 * control over the mapped state: the page lock will +		 * prevent new faults against pagecache and swapcache, +		 * so an unmapped page cannot become mapped. However, +		 * if the page is already mapped elsewhere, it can +		 * unmap, and there is nothing we can do about it. +		 * Alas, skip moving the page in this case. +		 */ +		if (!pte_present(ptent) && page_mapped(page)) { +			unlock_page(page); +			put_page(page); +			return ret; +		} +	} +  	if (!page && !ent.val)  		return ret;  	if (page) { @@ -5875,8 +5914,11 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,  			if (target)  				target->page = page;  		} -		if (!ret || !target) +		if (!ret || !target) { +			if (target) +				unlock_page(page);  			put_page(page); +		}  	}  	/*  	 * There is a swap entry and a page doesn't exist or isn't charged. @@ -5916,6 +5958,10 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,  		ret = MC_TARGET_PAGE;  		if (target) {  			get_page(page); +			if (!trylock_page(page)) { +				put_page(page); +				return MC_TARGET_NONE; +			}  			target->page = page;  		}  	} @@ -6146,7 +6192,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,  		target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);  		if (target_type == MC_TARGET_PAGE) {  			page = target.page; -			if (!isolate_lru_page(page)) { +			if (isolate_lru_page(page)) {  				if (!mem_cgroup_move_account(page, true,  							     mc.from, mc.to)) {  					mc.precharge -= HPAGE_PMD_NR; @@ -6154,6 +6200,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,  				}  				putback_lru_page(page);  			} +			unlock_page(page);  			put_page(page);  		} else if (target_type == MC_TARGET_DEVICE) {  			page = target.page; @@ -6162,6 +6209,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,  				mc.precharge -= HPAGE_PMD_NR;  				mc.moved_charge += HPAGE_PMD_NR;  			} +			unlock_page(page);  			put_page(page);  		}  		spin_unlock(ptl); @@ -6194,7 +6242,7 @@ retry:  			 */  			if (PageTransCompound(page))  				goto put; -			if (!device && isolate_lru_page(page)) +			if (!device && !isolate_lru_page(page))  				goto put;  			if (!mem_cgroup_move_account(page, false,  						mc.from, mc.to)) { @@ -6204,7 +6252,8 @@ retry:  			}  			if (!device)  				putback_lru_page(page); -put:			/* get_mctgt_type() gets the page */ +put:			/* get_mctgt_type() gets & locks the page */ +			unlock_page(page);  			put_page(page);  			break;  		case MC_TARGET_SWAP: @@ -6429,8 +6478,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,  		}  		reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, -					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, -					NULL); +					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);  		if (!reclaimed && !nr_retries--)  			break; @@ -6479,8 +6527,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,  		if (nr_reclaims) {  			if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, -					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, -					NULL)) +					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))  				nr_reclaims--;  			continue;  		} @@ -6603,54 +6650,21 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,  	return nbytes;  } -enum { -	MEMORY_RECLAIM_NODES = 0, -	MEMORY_RECLAIM_NULL, -}; - -static const match_table_t if_tokens = { -	{ MEMORY_RECLAIM_NODES, "nodes=%s" }, -	{ MEMORY_RECLAIM_NULL, NULL }, -}; -  static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,  			      size_t nbytes, loff_t off)  {  	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));  	unsigned int nr_retries = MAX_RECLAIM_RETRIES;  	unsigned long nr_to_reclaim, nr_reclaimed = 0; -	unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP | -				       MEMCG_RECLAIM_PROACTIVE; -	char *old_buf, *start; -	substring_t args[MAX_OPT_ARGS]; -	int token; -	char value[256]; -	nodemask_t nodemask = NODE_MASK_ALL; - -	buf = strstrip(buf); - -	old_buf = buf; -	nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE; -	if (buf == old_buf) -		return -EINVAL; +	unsigned int reclaim_options; +	int err;  	buf = strstrip(buf); +	err = page_counter_memparse(buf, "", &nr_to_reclaim); +	if (err) +		return err; -	while ((start = strsep(&buf, " ")) != NULL) { -		if (!strlen(start)) -			continue; -		token = match_token(start, if_tokens, args); -		match_strlcpy(value, args, sizeof(value)); -		switch (token) { -		case MEMORY_RECLAIM_NODES: -			if (nodelist_parse(value, nodemask) < 0) -				return -EINVAL; -			break; -		default: -			return -EINVAL; -		} -	} - +	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;  	while (nr_reclaimed < nr_to_reclaim) {  		unsigned long reclaimed; @@ -6667,8 +6681,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,  		reclaimed = try_to_free_mem_cgroup_pages(memcg,  						nr_to_reclaim - nr_reclaimed, -						GFP_KERNEL, reclaim_options, -						&nodemask); +						GFP_KERNEL, reclaim_options);  		if (!reclaimed && !nr_retries--)  			return -EAGAIN; @@ -7310,6 +7323,8 @@ static int __init cgroup_memory(char *s)  			cgroup_memory_nosocket = true;  		if (!strcmp(token, "nokmem"))  			cgroup_memory_nokmem = true; +		if (!strcmp(token, "nobpf")) +			cgroup_memory_nobpf = true;  	}  	return 1;  }  |