diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/Kconfig | 15 | ||||
| -rw-r--r-- | mm/compaction.c | 22 | ||||
| -rw-r--r-- | mm/filemap.c | 49 | ||||
| -rw-r--r-- | mm/hugetlb.c | 19 | ||||
| -rw-r--r-- | mm/kmemleak.c | 4 | ||||
| -rw-r--r-- | mm/memcontrol.c | 20 | ||||
| -rw-r--r-- | mm/mremap.c | 9 | ||||
| -rw-r--r-- | mm/page-writeback.c | 6 | ||||
| -rw-r--r-- | mm/percpu.c | 2 | ||||
| -rw-r--r-- | mm/slab.c | 6 | ||||
| -rw-r--r-- | mm/slab.h | 1 | ||||
| -rw-r--r-- | mm/slab_common.c | 13 | ||||
| -rw-r--r-- | mm/slub.c | 41 | ||||
| -rw-r--r-- | mm/truncate.c | 8 | ||||
| -rw-r--r-- | mm/util.c | 10 | ||||
| -rw-r--r-- | mm/vmacache.c | 8 | ||||
| -rw-r--r-- | mm/vmscan.c | 18 | 
17 files changed, 158 insertions, 93 deletions
| diff --git a/mm/Kconfig b/mm/Kconfig index ebe5880c29d6..1b5a95f0fa01 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -581,3 +581,18 @@ config PGTABLE_MAPPING  config GENERIC_EARLY_IOREMAP  	bool + +config MAX_STACK_SIZE_MB +	int "Maximum user stack size for 32-bit processes (MB)" +	default 80 +	range 8 256 if METAG +	range 8 2048 +	depends on STACK_GROWSUP && (!64BIT || COMPAT) +	help +	  This is the maximum stack size in Megabytes in the VM layout of 32-bit +	  user processes when the stack grows upwards (currently only on parisc +	  and metag arch). The stack will be located at the highest memory +	  address minus the given value, unless the RLIMIT_STACK hard limit is +	  changed to a smaller value in which case that is used. + +	  A sane initial value is 80 MB. diff --git a/mm/compaction.c b/mm/compaction.c index 37f976287068..627dc2e4320f 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -671,16 +671,20 @@ static void isolate_freepages(struct zone *zone,  				struct compact_control *cc)  {  	struct page *page; -	unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn; +	unsigned long high_pfn, low_pfn, pfn, z_end_pfn;  	int nr_freepages = cc->nr_freepages;  	struct list_head *freelist = &cc->freepages;  	/*  	 * Initialise the free scanner. The starting point is where we last -	 * scanned from (or the end of the zone if starting). The low point -	 * is the end of the pageblock the migration scanner is using. +	 * successfully isolated from, zone-cached value, or the end of the +	 * zone when isolating for the first time. We need this aligned to +	 * the pageblock boundary, because we do pfn -= pageblock_nr_pages +	 * in the for loop. +	 * The low boundary is the end of the pageblock the migration scanner +	 * is using.  	 */ -	pfn = cc->free_pfn; +	pfn = cc->free_pfn & ~(pageblock_nr_pages-1);  	low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);  	/* @@ -700,6 +704,7 @@ static void isolate_freepages(struct zone *zone,  	for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;  					pfn -= pageblock_nr_pages) {  		unsigned long isolated; +		unsigned long end_pfn;  		/*  		 * This can iterate a massively long zone without finding any @@ -734,13 +739,10 @@ static void isolate_freepages(struct zone *zone,  		isolated = 0;  		/* -		 * As pfn may not start aligned, pfn+pageblock_nr_page -		 * may cross a MAX_ORDER_NR_PAGES boundary and miss -		 * a pfn_valid check. Ensure isolate_freepages_block() -		 * only scans within a pageblock +		 * Take care when isolating in last pageblock of a zone which +		 * ends in the middle of a pageblock.  		 */ -		end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); -		end_pfn = min(end_pfn, z_end_pfn); +		end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn);  		isolated = isolate_freepages_block(cc, pfn, end_pfn,  						   freelist, false);  		nr_freepages += isolated; diff --git a/mm/filemap.c b/mm/filemap.c index 5020b280a771..000a220e2a41 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -906,8 +906,8 @@ EXPORT_SYMBOL(page_cache_prev_hole);   * Looks up the page cache slot at @mapping & @offset.  If there is a   * page cache page, it is returned with an increased refcount.   * - * If the slot holds a shadow entry of a previously evicted page, it - * is returned. + * If the slot holds a shadow entry of a previously evicted page, or a + * swap entry from shmem/tmpfs, it is returned.   *   * Otherwise, %NULL is returned.   */ @@ -928,9 +928,9 @@ repeat:  			if (radix_tree_deref_retry(page))  				goto repeat;  			/* -			 * Otherwise, shmem/tmpfs must be storing a swap entry -			 * here as an exceptional entry: so return it without -			 * attempting to raise page count. +			 * A shadow entry of a recently evicted page, +			 * or a swap entry from shmem/tmpfs.  Return +			 * it without attempting to raise page count.  			 */  			goto out;  		} @@ -983,8 +983,8 @@ EXPORT_SYMBOL(find_get_page);   * page cache page, it is returned locked and with an increased   * refcount.   * - * If the slot holds a shadow entry of a previously evicted page, it - * is returned. + * If the slot holds a shadow entry of a previously evicted page, or a + * swap entry from shmem/tmpfs, it is returned.   *   * Otherwise, %NULL is returned.   * @@ -1099,8 +1099,8 @@ EXPORT_SYMBOL(find_or_create_page);   * with ascending indexes.  There may be holes in the indices due to   * not-present pages.   * - * Any shadow entries of evicted pages are included in the returned - * array. + * Any shadow entries of evicted pages, or swap entries from + * shmem/tmpfs, are included in the returned array.   *   * find_get_entries() returns the number of pages and shadow entries   * which were found. @@ -1128,9 +1128,9 @@ repeat:  			if (radix_tree_deref_retry(page))  				goto restart;  			/* -			 * Otherwise, we must be storing a swap entry -			 * here as an exceptional entry: so return it -			 * without attempting to raise page count. +			 * A shadow entry of a recently evicted page, +			 * or a swap entry from shmem/tmpfs.  Return +			 * it without attempting to raise page count.  			 */  			goto export;  		} @@ -1198,9 +1198,9 @@ repeat:  				goto restart;  			}  			/* -			 * Otherwise, shmem/tmpfs must be storing a swap entry -			 * here as an exceptional entry: so skip over it - -			 * we only reach this from invalidate_mapping_pages(). +			 * A shadow entry of a recently evicted page, +			 * or a swap entry from shmem/tmpfs.  Skip +			 * over it.  			 */  			continue;  		} @@ -1265,9 +1265,9 @@ repeat:  				goto restart;  			}  			/* -			 * Otherwise, shmem/tmpfs must be storing a swap entry -			 * here as an exceptional entry: so stop looking for -			 * contiguous pages. +			 * A shadow entry of a recently evicted page, +			 * or a swap entry from shmem/tmpfs.  Stop +			 * looking for contiguous pages.  			 */  			break;  		} @@ -1341,10 +1341,17 @@ repeat:  				goto restart;  			}  			/* -			 * This function is never used on a shmem/tmpfs -			 * mapping, so a swap entry won't be found here. +			 * A shadow entry of a recently evicted page. +			 * +			 * Those entries should never be tagged, but +			 * this tree walk is lockless and the tags are +			 * looked up in bulk, one radix tree node at a +			 * time, so there is a sizable window for page +			 * reclaim to evict a page we saw tagged. +			 * +			 * Skip over it.  			 */ -			BUG(); +			continue;  		}  		if (!page_cache_get_speculative(page)) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 246192929a2d..c82290b9c1fc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1981,11 +1981,7 @@ static int __init hugetlb_init(void)  {  	int i; -	/* Some platform decide whether they support huge pages at boot -	 * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when -	 * there is no such support -	 */ -	if (HPAGE_SHIFT == 0) +	if (!hugepages_supported())  		return 0;  	if (!size_to_hstate(default_hstate_size)) { @@ -2112,6 +2108,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,  	unsigned long tmp;  	int ret; +	if (!hugepages_supported()) +		return -ENOTSUPP; +  	tmp = h->max_huge_pages;  	if (write && h->order >= MAX_ORDER) @@ -2165,6 +2164,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,  	unsigned long tmp;  	int ret; +	if (!hugepages_supported()) +		return -ENOTSUPP; +  	tmp = h->nr_overcommit_huge_pages;  	if (write && h->order >= MAX_ORDER) @@ -2190,6 +2192,8 @@ out:  void hugetlb_report_meminfo(struct seq_file *m)  {  	struct hstate *h = &default_hstate; +	if (!hugepages_supported()) +		return;  	seq_printf(m,  			"HugePages_Total:   %5lu\n"  			"HugePages_Free:    %5lu\n" @@ -2206,6 +2210,8 @@ void hugetlb_report_meminfo(struct seq_file *m)  int hugetlb_report_node_meminfo(int nid, char *buf)  {  	struct hstate *h = &default_hstate; +	if (!hugepages_supported()) +		return 0;  	return sprintf(buf,  		"Node %d HugePages_Total: %5u\n"  		"Node %d HugePages_Free:  %5u\n" @@ -2220,6 +2226,9 @@ void hugetlb_show_meminfo(void)  	struct hstate *h;  	int nid; +	if (!hugepages_supported()) +		return; +  	for_each_node_state(nid, N_MEMORY)  		for_each_hstate(h)  			pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n", diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 91d67eaee050..8d2fcdfeff7f 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1775,10 +1775,9 @@ void __init kmemleak_init(void)  	int i;  	unsigned long flags; -	kmemleak_early_log = 0; -  #ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF  	if (!kmemleak_skip_disable) { +		kmemleak_early_log = 0;  		kmemleak_disable();  		return;  	} @@ -1796,6 +1795,7 @@ void __init kmemleak_init(void)  	/* the kernel is still in UP mode, so disabling the IRQs is enough */  	local_irq_save(flags); +	kmemleak_early_log = 0;  	if (kmemleak_error) {  		local_irq_restore(flags);  		return; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 29501f040568..c47dffdcb246 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6686,16 +6686,20 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,  		pgoff = pte_to_pgoff(ptent);  	/* page is moved even if it's not RSS of this task(page-faulted). */ -	page = find_get_page(mapping, pgoff); -  #ifdef CONFIG_SWAP  	/* shmem/tmpfs may report page out on swap: account for that too. */ -	if (radix_tree_exceptional_entry(page)) { -		swp_entry_t swap = radix_to_swp_entry(page); -		if (do_swap_account) -			*entry = swap; -		page = find_get_page(swap_address_space(swap), swap.val); -	} +	if (shmem_mapping(mapping)) { +		page = find_get_entry(mapping, pgoff); +		if (radix_tree_exceptional_entry(page)) { +			swp_entry_t swp = radix_to_swp_entry(page); +			if (do_swap_account) +				*entry = swp; +			page = find_get_page(swap_address_space(swp), swp.val); +		} +	} else +		page = find_get_page(mapping, pgoff); +#else +	page = find_get_page(mapping, pgoff);  #endif  	return page;  } diff --git a/mm/mremap.c b/mm/mremap.c index 0843feb66f3d..05f1180e9f21 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -194,10 +194,17 @@ unsigned long move_page_tables(struct vm_area_struct *vma,  			break;  		if (pmd_trans_huge(*old_pmd)) {  			int err = 0; -			if (extent == HPAGE_PMD_SIZE) +			if (extent == HPAGE_PMD_SIZE) { +				VM_BUG_ON(vma->vm_file || !vma->anon_vma); +				/* See comment in move_ptes() */ +				if (need_rmap_locks) +					anon_vma_lock_write(vma->anon_vma);  				err = move_huge_pmd(vma, new_vma, old_addr,  						    new_addr, old_end,  						    old_pmd, new_pmd); +				if (need_rmap_locks) +					anon_vma_unlock_write(vma->anon_vma); +			}  			if (err > 0) {  				need_flush = true;  				continue; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ef413492a149..a4317da60532 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -593,14 +593,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)   * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)   *     => fast response on large errors; small oscillation near setpoint   */ -static inline long long pos_ratio_polynom(unsigned long setpoint, +static long long pos_ratio_polynom(unsigned long setpoint,  					  unsigned long dirty,  					  unsigned long limit)  {  	long long pos_ratio;  	long x; -	x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, +	x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,  		    limit - setpoint + 1);  	pos_ratio = x;  	pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; @@ -842,7 +842,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,  	x_intercept = bdi_setpoint + span;  	if (bdi_dirty < x_intercept - span / 4) { -		pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty), +		pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),  				    x_intercept - bdi_setpoint + 1);  	} else  		pos_ratio /= 4; diff --git a/mm/percpu.c b/mm/percpu.c index 63e24fb4387b..2ddf9a990dbd 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -610,7 +610,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)  	chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *  						sizeof(chunk->map[0]));  	if (!chunk->map) { -		kfree(chunk); +		pcpu_mem_free(chunk, pcpu_chunk_struct_size);  		return NULL;  	} diff --git a/mm/slab.c b/mm/slab.c index 388cb1ae6fbc..19d92181ce24 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -166,7 +166,7 @@ typedef unsigned char freelist_idx_t;  typedef unsigned short freelist_idx_t;  #endif -#define SLAB_OBJ_MAX_NUM (1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) +#define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)  /*   * true if a page was allocated from pfmemalloc reserves for network-based @@ -2572,13 +2572,13 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,  	return freelist;  } -static inline freelist_idx_t get_free_obj(struct page *page, unsigned char idx) +static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)  {  	return ((freelist_idx_t *)page->freelist)[idx];  }  static inline void set_free_obj(struct page *page, -					unsigned char idx, freelist_idx_t val) +					unsigned int idx, freelist_idx_t val)  {  	((freelist_idx_t *)(page->freelist))[idx] = val;  } diff --git a/mm/slab.h b/mm/slab.h index 3045316b7c9d..6bd4c353704f 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,  #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)  int __kmem_cache_shutdown(struct kmem_cache *); +void slab_kmem_cache_release(struct kmem_cache *);  struct seq_file;  struct file; diff --git a/mm/slab_common.c b/mm/slab_common.c index f3cfccf76dda..102cc6fca3d3 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -323,6 +323,12 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)  }  #endif /* CONFIG_MEMCG_KMEM */ +void slab_kmem_cache_release(struct kmem_cache *s) +{ +	kfree(s->name); +	kmem_cache_free(kmem_cache, s); +} +  void kmem_cache_destroy(struct kmem_cache *s)  {  	get_online_cpus(); @@ -352,8 +358,11 @@ void kmem_cache_destroy(struct kmem_cache *s)  		rcu_barrier();  	memcg_free_cache_params(s); -	kfree(s->name); -	kmem_cache_free(kmem_cache, s); +#ifdef SLAB_SUPPORTS_SYSFS +	sysfs_slab_remove(s); +#else +	slab_kmem_cache_release(s); +#endif  	goto out_put_cpus;  out_unlock: diff --git a/mm/slub.c b/mm/slub.c index 5e234f1f8853..2b1ce697fc4b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -210,14 +210,11 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };  #ifdef CONFIG_SYSFS  static int sysfs_slab_add(struct kmem_cache *);  static int sysfs_slab_alias(struct kmem_cache *, const char *); -static void sysfs_slab_remove(struct kmem_cache *);  static void memcg_propagate_slab_attrs(struct kmem_cache *s);  #else  static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }  static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)  							{ return 0; } -static inline void sysfs_slab_remove(struct kmem_cache *s) { } -  static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }  #endif @@ -3238,24 +3235,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)  int __kmem_cache_shutdown(struct kmem_cache *s)  { -	int rc = kmem_cache_close(s); - -	if (!rc) { -		/* -		 * Since slab_attr_store may take the slab_mutex, we should -		 * release the lock while removing the sysfs entry in order to -		 * avoid a deadlock. Because this is pretty much the last -		 * operation we do and the lock will be released shortly after -		 * that in slab_common.c, we could just move sysfs_slab_remove -		 * to a later point in common code. We should do that when we -		 * have a common sysfs framework for all allocators. -		 */ -		mutex_unlock(&slab_mutex); -		sysfs_slab_remove(s); -		mutex_lock(&slab_mutex); -	} - -	return rc; +	return kmem_cache_close(s);  }  /******************************************************************** @@ -5071,15 +5051,18 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)  #ifdef CONFIG_MEMCG_KMEM  	int i;  	char *buffer = NULL; +	struct kmem_cache *root_cache; -	if (!is_root_cache(s)) +	if (is_root_cache(s))  		return; +	root_cache = s->memcg_params->root_cache; +  	/*  	 * This mean this cache had no attribute written. Therefore, no point  	 * in copying default values around  	 */ -	if (!s->max_attr_size) +	if (!root_cache->max_attr_size)  		return;  	for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) { @@ -5101,7 +5084,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)  		 */  		if (buffer)  			buf = buffer; -		else if (s->max_attr_size < ARRAY_SIZE(mbuf)) +		else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))  			buf = mbuf;  		else {  			buffer = (char *) get_zeroed_page(GFP_KERNEL); @@ -5110,7 +5093,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)  			buf = buffer;  		} -		attr->show(s->memcg_params->root_cache, buf); +		attr->show(root_cache, buf);  		attr->store(s, buf, strlen(buf));  	} @@ -5119,6 +5102,11 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)  #endif  } +static void kmem_cache_release(struct kobject *k) +{ +	slab_kmem_cache_release(to_slab(k)); +} +  static const struct sysfs_ops slab_sysfs_ops = {  	.show = slab_attr_show,  	.store = slab_attr_store, @@ -5126,6 +5114,7 @@ static const struct sysfs_ops slab_sysfs_ops = {  static struct kobj_type slab_ktype = {  	.sysfs_ops = &slab_sysfs_ops, +	.release = kmem_cache_release,  };  static int uevent_filter(struct kset *kset, struct kobject *kobj) @@ -5252,7 +5241,7 @@ out_put_kobj:  	goto out;  } -static void sysfs_slab_remove(struct kmem_cache *s) +void sysfs_slab_remove(struct kmem_cache *s)  {  	if (slab_state < FULL)  		/* diff --git a/mm/truncate.c b/mm/truncate.c index e5cc39ab0751..6a78c814bebf 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -484,14 +484,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,  	unsigned long count = 0;  	int i; -	/* -	 * Note: this function may get called on a shmem/tmpfs mapping: -	 * pagevec_lookup() might then return 0 prematurely (because it -	 * got a gangful of swap entries); but it's hardly worth worrying -	 * about - it can rarely have anything to free from such a mapping -	 * (most pages are dirty), and already skips over any difficulties. -	 */ -  	pagevec_init(&pvec, 0);  	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,  			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, diff --git a/mm/util.c b/mm/util.c index f380af7ea779..d5ea733c5082 100644 --- a/mm/util.c +++ b/mm/util.c @@ -10,6 +10,7 @@  #include <linux/swapops.h>  #include <linux/mman.h>  #include <linux/hugetlb.h> +#include <linux/vmalloc.h>  #include <asm/uaccess.h> @@ -387,6 +388,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,  }  EXPORT_SYMBOL(vm_mmap); +void kvfree(const void *addr) +{ +	if (is_vmalloc_addr(addr)) +		vfree(addr); +	else +		kfree(addr); +} +EXPORT_SYMBOL(kvfree); +  struct address_space *page_mapping(struct page *page)  {  	struct address_space *mapping = page->mapping; diff --git a/mm/vmacache.c b/mm/vmacache.c index d4224b397c0e..1037a3bab505 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -81,10 +81,12 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)  	for (i = 0; i < VMACACHE_SIZE; i++) {  		struct vm_area_struct *vma = current->vmacache[i]; -		if (vma && vma->vm_start <= addr && vma->vm_end > addr) { -			BUG_ON(vma->vm_mm != mm); +		if (!vma) +			continue; +		if (WARN_ON_ONCE(vma->vm_mm != mm)) +			break; +		if (vma->vm_start <= addr && vma->vm_end > addr)  			return vma; -		}  	}  	return NULL; diff --git a/mm/vmscan.c b/mm/vmscan.c index 3f56c8deb3c0..32c661d66a45 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1916,6 +1916,24 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,  		get_lru_size(lruvec, LRU_INACTIVE_FILE);  	/* +	 * Prevent the reclaimer from falling into the cache trap: as +	 * cache pages start out inactive, every cache fault will tip +	 * the scan balance towards the file LRU.  And as the file LRU +	 * shrinks, so does the window for rotation from references. +	 * This means we have a runaway feedback loop where a tiny +	 * thrashing file LRU becomes infinitely more attractive than +	 * anon pages.  Try to detect this based on file LRU size. +	 */ +	if (global_reclaim(sc)) { +		unsigned long free = zone_page_state(zone, NR_FREE_PAGES); + +		if (unlikely(file + free <= high_wmark_pages(zone))) { +			scan_balance = SCAN_ANON; +			goto out; +		} +	} + +	/*  	 * There is enough inactive page cache, do not reclaim  	 * anything from the anonymous working set right now.  	 */ |