diff options
Diffstat (limited to 'mm/workingset.c')
| -rw-r--r-- | mm/workingset.c | 114 | 
1 files changed, 77 insertions, 37 deletions
diff --git a/mm/workingset.c b/mm/workingset.c index fb1f9183d89a..241fa5d6b3b2 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -10,6 +10,7 @@  #include <linux/atomic.h>  #include <linux/module.h>  #include <linux/swap.h> +#include <linux/dax.h>  #include <linux/fs.h>  #include <linux/mm.h> @@ -334,48 +335,81 @@ out:   * point where they would still be useful.   */ -struct list_lru workingset_shadow_nodes; +static struct list_lru shadow_nodes; + +void workingset_update_node(struct radix_tree_node *node, void *private) +{ +	struct address_space *mapping = private; + +	/* Only regular page cache has shadow entries */ +	if (dax_mapping(mapping) || shmem_mapping(mapping)) +		return; + +	/* +	 * Track non-empty nodes that contain only shadow entries; +	 * unlink those that contain pages or are being freed. +	 * +	 * Avoid acquiring the list_lru lock when the nodes are +	 * already where they should be. The list_empty() test is safe +	 * as node->private_list is protected by &mapping->tree_lock. +	 */ +	if (node->count && node->count == node->exceptional) { +		if (list_empty(&node->private_list)) { +			node->private_data = mapping; +			list_lru_add(&shadow_nodes, &node->private_list); +		} +	} else { +		if (!list_empty(&node->private_list)) +			list_lru_del(&shadow_nodes, &node->private_list); +	} +}  static unsigned long count_shadow_nodes(struct shrinker *shrinker,  					struct shrink_control *sc)  { -	unsigned long shadow_nodes;  	unsigned long max_nodes; -	unsigned long pages; +	unsigned long nodes; +	unsigned long cache;  	/* list_lru lock nests inside IRQ-safe mapping->tree_lock */  	local_irq_disable(); -	shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); +	nodes = list_lru_shrink_count(&shadow_nodes, sc);  	local_irq_enable(); -	if (sc->memcg) { -		pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, -						     LRU_ALL_FILE); -	} else { -		pages = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) + -			node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE); -	} -  	/* -	 * Active cache pages are limited to 50% of memory, and shadow -	 * entries that represent a refault distance bigger than that -	 * do not have any effect.  Limit the number of shadow nodes -	 * such that shadow entries do not exceed the number of active -	 * cache pages, assuming a worst-case node population density -	 * of 1/8th on average. +	 * Approximate a reasonable limit for the radix tree nodes +	 * containing shadow entries. We don't need to keep more +	 * shadow entries than possible pages on the active list, +	 * since refault distances bigger than that are dismissed. +	 * +	 * The size of the active list converges toward 100% of +	 * overall page cache as memory grows, with only a tiny +	 * inactive list. Assume the total cache size for that. +	 * +	 * Nodes might be sparsely populated, with only one shadow +	 * entry in the extreme case. Obviously, we cannot keep one +	 * node for every eligible shadow entry, so compromise on a +	 * worst-case density of 1/8th. Below that, not all eligible +	 * refaults can be detected anymore.  	 *  	 * On 64-bit with 7 radix_tree_nodes per page and 64 slots  	 * each, this will reclaim shadow entries when they consume -	 * ~2% of available memory: +	 * ~1.8% of available memory:  	 * -	 * PAGE_SIZE / radix_tree_nodes / node_entries / PAGE_SIZE +	 * PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE  	 */ -	max_nodes = pages >> (1 + RADIX_TREE_MAP_SHIFT - 3); +	if (sc->memcg) { +		cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, +						     LRU_ALL_FILE); +	} else { +		cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) + +			node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE); +	} +	max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3); -	if (shadow_nodes <= max_nodes) +	if (nodes <= max_nodes)  		return 0; - -	return shadow_nodes - max_nodes; +	return nodes - max_nodes;  }  static enum lru_status shadow_lru_isolate(struct list_head *item, @@ -418,23 +452,30 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,  	 * no pages, so we expect to be able to remove them all and  	 * delete and free the empty node afterwards.  	 */ -	BUG_ON(!workingset_node_shadows(node)); -	BUG_ON(workingset_node_pages(node)); - +	if (WARN_ON_ONCE(!node->exceptional)) +		goto out_invalid; +	if (WARN_ON_ONCE(node->count != node->exceptional)) +		goto out_invalid;  	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {  		if (node->slots[i]) { -			BUG_ON(!radix_tree_exceptional_entry(node->slots[i])); +			if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i]))) +				goto out_invalid; +			if (WARN_ON_ONCE(!node->exceptional)) +				goto out_invalid; +			if (WARN_ON_ONCE(!mapping->nrexceptional)) +				goto out_invalid;  			node->slots[i] = NULL; -			workingset_node_shadows_dec(node); -			BUG_ON(!mapping->nrexceptional); +			node->exceptional--; +			node->count--;  			mapping->nrexceptional--;  		}  	} -	BUG_ON(workingset_node_shadows(node)); +	if (WARN_ON_ONCE(node->exceptional)) +		goto out_invalid;  	inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); -	if (!__radix_tree_delete_node(&mapping->page_tree, node)) -		BUG(); +	__radix_tree_delete_node(&mapping->page_tree, node); +out_invalid:  	spin_unlock(&mapping->tree_lock);  	ret = LRU_REMOVED_RETRY;  out: @@ -452,8 +493,7 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,  	/* list_lru lock nests inside IRQ-safe mapping->tree_lock */  	local_irq_disable(); -	ret =  list_lru_shrink_walk(&workingset_shadow_nodes, sc, -				    shadow_lru_isolate, NULL); +	ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);  	local_irq_enable();  	return ret;  } @@ -492,7 +532,7 @@ static int __init workingset_init(void)  	pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",  	       timestamp_bits, max_order, bucket_order); -	ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); +	ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);  	if (ret)  		goto err;  	ret = register_shrinker(&workingset_shadow_shrinker); @@ -500,7 +540,7 @@ static int __init workingset_init(void)  		goto err_list_lru;  	return 0;  err_list_lru: -	list_lru_destroy(&workingset_shadow_nodes); +	list_lru_destroy(&shadow_nodes);  err:  	return ret;  }  |