diff options
Diffstat (limited to 'mm/vmscan.c')
| -rw-r--r-- | mm/vmscan.c | 211 | 
1 files changed, 118 insertions, 93 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index a6c5d0b28321..e5d52d6a24af 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -171,11 +171,22 @@ int vm_swappiness = 60;   */  unsigned long vm_total_pages; +static void set_task_reclaim_state(struct task_struct *task, +				   struct reclaim_state *rs) +{ +	/* Check for an overwrite */ +	WARN_ON_ONCE(rs && task->reclaim_state); + +	/* Check for the nulling of an already-nulled member */ +	WARN_ON_ONCE(!rs && !task->reclaim_state); + +	task->reclaim_state = rs; +} +  static LIST_HEAD(shrinker_list);  static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_MEMCG_KMEM - +#ifdef CONFIG_MEMCG  /*   * We allow subsystems to populate their shrinker-related   * LRU lists before register_shrinker_prepared() is called @@ -227,30 +238,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)  	idr_remove(&shrinker_idr, id);  	up_write(&shrinker_rwsem);  } -#else /* CONFIG_MEMCG_KMEM */ -static int prealloc_memcg_shrinker(struct shrinker *shrinker) -{ -	return 0; -} - -static void unregister_memcg_shrinker(struct shrinker *shrinker) -{ -} -#endif /* CONFIG_MEMCG_KMEM */ -static void set_task_reclaim_state(struct task_struct *task, -				   struct reclaim_state *rs) -{ -	/* Check for an overwrite */ -	WARN_ON_ONCE(rs && task->reclaim_state); - -	/* Check for the nulling of an already-nulled member */ -	WARN_ON_ONCE(!rs && !task->reclaim_state); - -	task->reclaim_state = rs; -} - -#ifdef CONFIG_MEMCG  static bool global_reclaim(struct scan_control *sc)  {  	return !sc->target_mem_cgroup; @@ -305,6 +293,15 @@ static bool memcg_congested(pg_data_t *pgdat,  }  #else +static int prealloc_memcg_shrinker(struct shrinker *shrinker) +{ +	return 0; +} + +static void unregister_memcg_shrinker(struct shrinker *shrinker) +{ +} +  static bool global_reclaim(struct scan_control *sc)  {  	return true; @@ -591,7 +588,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,  	return freed;  } -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG  static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,  			struct mem_cgroup *memcg, int priority)  { @@ -599,7 +596,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,  	unsigned long ret, freed = 0;  	int i; -	if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)) +	if (!mem_cgroup_online(memcg))  		return 0;  	if (!down_read_trylock(&shrinker_rwsem)) @@ -625,6 +622,11 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,  			continue;  		} +		/* Call non-slab shrinkers even though kmem is disabled */ +		if (!memcg_kmem_enabled() && +		    !(shrinker->flags & SHRINKER_NONSLAB)) +			continue; +  		ret = do_shrink_slab(&sc, shrinker, priority);  		if (ret == SHRINK_EMPTY) {  			clear_bit(i, map->map); @@ -661,13 +663,13 @@ unlock:  	up_read(&shrinker_rwsem);  	return freed;  } -#else /* CONFIG_MEMCG_KMEM */ +#else /* CONFIG_MEMCG */  static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,  			struct mem_cgroup *memcg, int priority)  {  	return 0;  } -#endif /* CONFIG_MEMCG_KMEM */ +#endif /* CONFIG_MEMCG */  /**   * shrink_slab - shrink slab caches @@ -1121,7 +1123,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,  				      struct scan_control *sc,  				      enum ttu_flags ttu_flags,  				      struct reclaim_stat *stat, -				      bool force_reclaim) +				      bool ignore_references)  {  	LIST_HEAD(ret_pages);  	LIST_HEAD(free_pages); @@ -1135,7 +1137,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,  		struct address_space *mapping;  		struct page *page;  		int may_enter_fs; -		enum page_references references = PAGEREF_RECLAIM_CLEAN; +		enum page_references references = PAGEREF_RECLAIM;  		bool dirty, writeback;  		unsigned int nr_pages; @@ -1149,7 +1151,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,  		VM_BUG_ON_PAGE(PageActive(page), page); -		nr_pages = 1 << compound_order(page); +		nr_pages = compound_nr(page);  		/* Account the number of base pages even though THP */  		sc->nr_scanned += nr_pages; @@ -1266,7 +1268,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,  			}  		} -		if (!force_reclaim) +		if (!ignore_references)  			references = page_check_references(page, sc);  		switch (references) { @@ -1487,10 +1489,9 @@ free_it:  		 * Is there need to periodically free_page_list? It would  		 * appear not as the counts should be low  		 */ -		if (unlikely(PageTransHuge(page))) { -			mem_cgroup_uncharge(page); +		if (unlikely(PageTransHuge(page)))  			(*get_compound_page_dtor(page))(page); -		} else +		else  			list_add(&page->lru, &free_pages);  		continue; @@ -1705,7 +1706,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,  		VM_BUG_ON_PAGE(!PageLRU(page), page); -		nr_pages = 1 << compound_order(page); +		nr_pages = compound_nr(page);  		total_scan += nr_pages;  		if (page_zonenum(page) > sc->reclaim_idx) { @@ -1911,7 +1912,6 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,  			if (unlikely(PageCompound(page))) {  				spin_unlock_irq(&pgdat->lru_lock); -				mem_cgroup_uncharge(page);  				(*get_compound_page_dtor(page))(page);  				spin_lock_irq(&pgdat->lru_lock);  			} else @@ -2145,6 +2145,62 @@ static void shrink_active_list(unsigned long nr_to_scan,  			nr_deactivate, nr_rotated, sc->priority, file);  } +unsigned long reclaim_pages(struct list_head *page_list) +{ +	int nid = -1; +	unsigned long nr_reclaimed = 0; +	LIST_HEAD(node_page_list); +	struct reclaim_stat dummy_stat; +	struct page *page; +	struct scan_control sc = { +		.gfp_mask = GFP_KERNEL, +		.priority = DEF_PRIORITY, +		.may_writepage = 1, +		.may_unmap = 1, +		.may_swap = 1, +	}; + +	while (!list_empty(page_list)) { +		page = lru_to_page(page_list); +		if (nid == -1) { +			nid = page_to_nid(page); +			INIT_LIST_HEAD(&node_page_list); +		} + +		if (nid == page_to_nid(page)) { +			ClearPageActive(page); +			list_move(&page->lru, &node_page_list); +			continue; +		} + +		nr_reclaimed += shrink_page_list(&node_page_list, +						NODE_DATA(nid), +						&sc, 0, +						&dummy_stat, false); +		while (!list_empty(&node_page_list)) { +			page = lru_to_page(&node_page_list); +			list_del(&page->lru); +			putback_lru_page(page); +		} + +		nid = -1; +	} + +	if (!list_empty(&node_page_list)) { +		nr_reclaimed += shrink_page_list(&node_page_list, +						NODE_DATA(nid), +						&sc, 0, +						&dummy_stat, false); +		while (!list_empty(&node_page_list)) { +			page = lru_to_page(&node_page_list); +			list_del(&page->lru); +			putback_lru_page(page); +		} +	} + +	return nr_reclaimed; +} +  /*   * The inactive anon list should be small enough that the VM never has   * to do too much work. @@ -2586,7 +2642,6 @@ static bool in_reclaim_compaction(struct scan_control *sc)   */  static inline bool should_continue_reclaim(struct pglist_data *pgdat,  					unsigned long nr_reclaimed, -					unsigned long nr_scanned,  					struct scan_control *sc)  {  	unsigned long pages_for_compaction; @@ -2597,40 +2652,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,  	if (!in_reclaim_compaction(sc))  		return false; -	/* Consider stopping depending on scan and reclaim activity */ -	if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) { -		/* -		 * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the -		 * full LRU list has been scanned and we are still failing -		 * to reclaim pages. This full LRU scan is potentially -		 * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed -		 */ -		if (!nr_reclaimed && !nr_scanned) -			return false; -	} else { -		/* -		 * For non-__GFP_RETRY_MAYFAIL allocations which can presumably -		 * fail without consequence, stop if we failed to reclaim -		 * any pages from the last SWAP_CLUSTER_MAX number of -		 * pages that were scanned. This will return to the -		 * caller faster at the risk reclaim/compaction and -		 * the resulting allocation attempt fails -		 */ -		if (!nr_reclaimed) -			return false; -	} -  	/* -	 * If we have not reclaimed enough pages for compaction and the -	 * inactive lists are large enough, continue reclaiming +	 * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX +	 * number of pages that were scanned. This will return to the caller +	 * with the risk reclaim/compaction and the resulting allocation attempt +	 * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL +	 * allocations through requiring that the full LRU list has been scanned +	 * first, by assuming that zero delta of sc->nr_scanned means full LRU +	 * scan, but that approximation was wrong, and there were corner cases +	 * where always a non-zero amount of pages were scanned.  	 */ -	pages_for_compaction = compact_gap(sc->order); -	inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); -	if (get_nr_swap_pages() > 0) -		inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); -	if (sc->nr_reclaimed < pages_for_compaction && -			inactive_lru_pages > pages_for_compaction) -		return true; +	if (!nr_reclaimed) +		return false;  	/* If compaction would go ahead or the allocation would succeed, stop */  	for (z = 0; z <= sc->reclaim_idx; z++) { @@ -2647,7 +2680,17 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,  			;  		}  	} -	return true; + +	/* +	 * If we have not reclaimed enough pages for compaction and the +	 * inactive lists are large enough, continue reclaiming +	 */ +	pages_for_compaction = compact_gap(sc->order); +	inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); +	if (get_nr_swap_pages() > 0) +		inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); + +	return inactive_lru_pages > pages_for_compaction;  }  static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg) @@ -2664,10 +2707,6 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)  	do {  		struct mem_cgroup *root = sc->target_mem_cgroup; -		struct mem_cgroup_reclaim_cookie reclaim = { -			.pgdat = pgdat, -			.priority = sc->priority, -		};  		unsigned long node_lru_pages = 0;  		struct mem_cgroup *memcg; @@ -2676,7 +2715,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)  		nr_reclaimed = sc->nr_reclaimed;  		nr_scanned = sc->nr_scanned; -		memcg = mem_cgroup_iter(root, NULL, &reclaim); +		memcg = mem_cgroup_iter(root, NULL, NULL);  		do {  			unsigned long lru_pages;  			unsigned long reclaimed; @@ -2719,21 +2758,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)  				   sc->nr_scanned - scanned,  				   sc->nr_reclaimed - reclaimed); -			/* -			 * Kswapd have to scan all memory cgroups to fulfill -			 * the overall scan target for the node. -			 * -			 * Limit reclaim, on the other hand, only cares about -			 * nr_to_reclaim pages to be reclaimed and it will -			 * retry with decreasing priority if one round over the -			 * whole hierarchy is not sufficient. -			 */ -			if (!current_is_kswapd() && -					sc->nr_reclaimed >= sc->nr_to_reclaim) { -				mem_cgroup_iter_break(root, memcg); -				break; -			} -		} while ((memcg = mem_cgroup_iter(root, memcg, &reclaim))); +		} while ((memcg = mem_cgroup_iter(root, memcg, NULL)));  		if (reclaim_state) {  			sc->nr_reclaimed += reclaim_state->reclaimed_slab; @@ -2810,7 +2835,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)  			wait_iff_congested(BLK_RW_ASYNC, HZ/10);  	} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, -					 sc->nr_scanned - nr_scanned, sc)); +					 sc));  	/*  	 * Kswapd gives up on balancing particular nodes after too  |