diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 133 | 
1 files changed, 69 insertions, 64 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1ed40f9d3a27..fabce2b50c69 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -33,6 +33,7 @@  #include <linux/shmem_fs.h>  #include <linux/hugetlb.h>  #include <linux/pagemap.h> +#include <linux/pagevec.h>  #include <linux/vm_event_item.h>  #include <linux/smp.h>  #include <linux/page-flags.h> @@ -3606,22 +3607,24 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)  /*   * Because page_memcg(head) is not set on tails, set it now.   */ -void split_page_memcg(struct page *head, unsigned int nr) +void split_page_memcg(struct page *head, int old_order, int new_order)  {  	struct folio *folio = page_folio(head);  	struct mem_cgroup *memcg = folio_memcg(folio);  	int i; +	unsigned int old_nr = 1 << old_order; +	unsigned int new_nr = 1 << new_order;  	if (mem_cgroup_disabled() || !memcg)  		return; -	for (i = 1; i < nr; i++) +	for (i = new_nr; i < old_nr; i += new_nr)  		folio_page(folio, i)->memcg_data = folio->memcg_data;  	if (folio_memcg_kmem(folio)) -		obj_cgroup_get_many(__folio_objcg(folio), nr - 1); +		obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1);  	else -		css_get_many(&memcg->css, nr - 1); +		css_get_many(&memcg->css, old_nr / new_nr - 1);  }  #ifdef CONFIG_SWAP @@ -4800,7 +4803,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,  	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);  	struct mem_cgroup *parent; -	mem_cgroup_flush_stats(memcg); +	mem_cgroup_flush_stats_ratelimited(memcg);  	*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);  	*pwriteback = memcg_page_state(memcg, NR_WRITEBACK); @@ -5621,7 +5624,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)  	if (alloc_shrinker_info(memcg))  		goto offline_kmem; -	if (unlikely(mem_cgroup_is_root(memcg))) +	if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())  		queue_delayed_work(system_unbound_wq, &stats_flush_dwork,  				   FLUSH_TIME);  	lru_gen_online_memcg(memcg); @@ -5873,7 +5876,7 @@ static int mem_cgroup_do_precharge(unsigned long count)  }  union mc_target { -	struct page	*page; +	struct folio	*folio;  	swp_entry_t	ent;  }; @@ -5965,23 +5968,22 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,  }  /** - * mem_cgroup_move_account - move account of the page - * @page: the page + * mem_cgroup_move_account - move account of the folio + * @folio: The folio.   * @compound: charge the page as compound or small page - * @from: mem_cgroup which the page is moved from. - * @to:	mem_cgroup which the page is moved to. @from != @to. + * @from: mem_cgroup which the folio is moved from. + * @to:	mem_cgroup which the folio is moved to. @from != @to.   * - * The page must be locked and not on the LRU. + * The folio must be locked and not on the LRU.   *   * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"   * from old cgroup.   */ -static int mem_cgroup_move_account(struct page *page, +static int mem_cgroup_move_account(struct folio *folio,  				   bool compound,  				   struct mem_cgroup *from,  				   struct mem_cgroup *to)  { -	struct folio *folio = page_folio(page);  	struct lruvec *from_vec, *to_vec;  	struct pglist_data *pgdat;  	unsigned int nr_pages = compound ? folio_nr_pages(folio) : 1; @@ -6096,7 +6098,7 @@ out:   * Return:   * * MC_TARGET_NONE - If the pte is not a target for move charge.   * * MC_TARGET_PAGE - If the page corresponding to this pte is a target for - *   move charge. If @target is not NULL, the page is stored in target->page + *   move charge. If @target is not NULL, the folio is stored in target->folio   *   with extra refcnt taken (Caller should release it).   * * MC_TARGET_SWAP - If the swap entry corresponding to this pte is a   *   target for charge migration.  If @target is not NULL, the entry is @@ -6110,6 +6112,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,  		unsigned long addr, pte_t ptent, union mc_target *target)  {  	struct page *page = NULL; +	struct folio *folio;  	enum mc_target_type ret = MC_TARGET_NONE;  	swp_entry_t ent = { .val = 0 }; @@ -6124,9 +6127,11 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,  	else if (is_swap_pte(ptent))  		page = mc_handle_swap_pte(vma, ptent, &ent); +	if (page) +		folio = page_folio(page);  	if (target && page) { -		if (!trylock_page(page)) { -			put_page(page); +		if (!folio_trylock(folio)) { +			folio_put(folio);  			return ret;  		}  		/* @@ -6141,8 +6146,8 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,  		 * Alas, skip moving the page in this case.  		 */  		if (!pte_present(ptent) && page_mapped(page)) { -			unlock_page(page); -			put_page(page); +			folio_unlock(folio); +			folio_put(folio);  			return ret;  		}  	} @@ -6155,18 +6160,18 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,  		 * mem_cgroup_move_account() checks the page is valid or  		 * not under LRU exclusion.  		 */ -		if (page_memcg(page) == mc.from) { +		if (folio_memcg(folio) == mc.from) {  			ret = MC_TARGET_PAGE; -			if (is_device_private_page(page) || -			    is_device_coherent_page(page)) +			if (folio_is_device_private(folio) || +			    folio_is_device_coherent(folio))  				ret = MC_TARGET_DEVICE;  			if (target) -				target->page = page; +				target->folio = folio;  		}  		if (!ret || !target) {  			if (target) -				unlock_page(page); -			put_page(page); +				folio_unlock(folio); +			folio_put(folio);  		}  	}  	/* @@ -6192,6 +6197,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,  		unsigned long addr, pmd_t pmd, union mc_target *target)  {  	struct page *page = NULL; +	struct folio *folio;  	enum mc_target_type ret = MC_TARGET_NONE;  	if (unlikely(is_swap_pmd(pmd))) { @@ -6201,17 +6207,18 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,  	}  	page = pmd_page(pmd);  	VM_BUG_ON_PAGE(!page || !PageHead(page), page); +	folio = page_folio(page);  	if (!(mc.flags & MOVE_ANON))  		return ret; -	if (page_memcg(page) == mc.from) { +	if (folio_memcg(folio) == mc.from) {  		ret = MC_TARGET_PAGE;  		if (target) { -			get_page(page); -			if (!trylock_page(page)) { -				put_page(page); +			folio_get(folio); +			if (!folio_trylock(folio)) { +				folio_put(folio);  				return MC_TARGET_NONE;  			} -			target->page = page; +			target->folio = folio;  		}  	}  	return ret; @@ -6431,7 +6438,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,  	spinlock_t *ptl;  	enum mc_target_type target_type;  	union mc_target target; -	struct page *page; +	struct folio *folio;  	ptl = pmd_trans_huge_lock(pmd, vma);  	if (ptl) { @@ -6441,26 +6448,26 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,  		}  		target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);  		if (target_type == MC_TARGET_PAGE) { -			page = target.page; -			if (isolate_lru_page(page)) { -				if (!mem_cgroup_move_account(page, true, +			folio = target.folio; +			if (folio_isolate_lru(folio)) { +				if (!mem_cgroup_move_account(folio, true,  							     mc.from, mc.to)) {  					mc.precharge -= HPAGE_PMD_NR;  					mc.moved_charge += HPAGE_PMD_NR;  				} -				putback_lru_page(page); +				folio_putback_lru(folio);  			} -			unlock_page(page); -			put_page(page); +			folio_unlock(folio); +			folio_put(folio);  		} else if (target_type == MC_TARGET_DEVICE) { -			page = target.page; -			if (!mem_cgroup_move_account(page, true, +			folio = target.folio; +			if (!mem_cgroup_move_account(folio, true,  						     mc.from, mc.to)) {  				mc.precharge -= HPAGE_PMD_NR;  				mc.moved_charge += HPAGE_PMD_NR;  			} -			unlock_page(page); -			put_page(page); +			folio_unlock(folio); +			folio_put(folio);  		}  		spin_unlock(ptl);  		return 0; @@ -6483,28 +6490,28 @@ retry:  			device = true;  			fallthrough;  		case MC_TARGET_PAGE: -			page = target.page; +			folio = target.folio;  			/*  			 * We can have a part of the split pmd here. Moving it  			 * can be done but it would be too convoluted so simply  			 * ignore such a partial THP and keep it in original  			 * memcg. There should be somebody mapping the head.  			 */ -			if (PageTransCompound(page)) +			if (folio_test_large(folio))  				goto put; -			if (!device && !isolate_lru_page(page)) +			if (!device && !folio_isolate_lru(folio))  				goto put; -			if (!mem_cgroup_move_account(page, false, +			if (!mem_cgroup_move_account(folio, false,  						mc.from, mc.to)) {  				mc.precharge--;  				/* we uncharge from mc.from later. */  				mc.moved_charge++;  			}  			if (!device) -				putback_lru_page(page); +				folio_putback_lru(folio);  put:			/* get_mctgt_type() gets & locks the page */ -			unlock_page(page); -			put_page(page); +			folio_unlock(folio); +			folio_put(folio);  			break;  		case MC_TARGET_SWAP:  			ent = target.ent; @@ -6977,6 +6984,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,  	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;  	while (nr_reclaimed < nr_to_reclaim) { +		/* Will converge on zero, but reclaim enforces a minimum */ +		unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;  		unsigned long reclaimed;  		if (signal_pending(current)) @@ -6991,8 +7000,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,  			lru_add_drain_all();  		reclaimed = try_to_free_mem_cgroup_pages(memcg, -					min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX), -					GFP_KERNEL, reclaim_options); +					batch_size, GFP_KERNEL, reclaim_options);  		if (!reclaimed && !nr_retries--)  			return -EAGAIN; @@ -7505,21 +7513,14 @@ void __mem_cgroup_uncharge(struct folio *folio)  	uncharge_batch(&ug);  } -/** - * __mem_cgroup_uncharge_list - uncharge a list of page - * @page_list: list of pages to uncharge - * - * Uncharge a list of pages previously charged with - * __mem_cgroup_charge(). - */ -void __mem_cgroup_uncharge_list(struct list_head *page_list) +void __mem_cgroup_uncharge_folios(struct folio_batch *folios)  {  	struct uncharge_gather ug; -	struct folio *folio; +	unsigned int i;  	uncharge_gather_clear(&ug); -	list_for_each_entry(folio, page_list, lru) -		uncharge_folio(folio, &ug); +	for (i = 0; i < folios->nr; i++) +		uncharge_folio(folios->folios[i], &ug);  	if (ug.memcg)  		uncharge_batch(&ug);  } @@ -7971,9 +7972,13 @@ bool mem_cgroup_swap_full(struct folio *folio)  static int __init setup_swap_account(char *s)  { -	pr_warn_once("The swapaccount= commandline option is deprecated. " -		     "Please report your usecase to [email protected] if you " -		     "depend on this functionality.\n"); +	bool res; + +	if (!kstrtobool(s, &res) && !res) +		pr_warn_once("The swapaccount=0 commandline option is deprecated " +			     "in favor of configuring swap control via cgroupfs. " +			     "Please report your usecase to [email protected] if you " +			     "depend on this functionality.\n");  	return 1;  }  __setup("swapaccount=", setup_swap_account);  |