diff options
Diffstat (limited to 'mm')
72 files changed, 265 insertions, 136 deletions
| diff --git a/mm/Makefile b/mm/Makefile index e3ac3aeb533b..4659b93cba43 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0  #  # Makefile for the linux memory manager.  # diff --git a/mm/bootmem.c b/mm/bootmem.c index 9fedb27c6451..6aef64254203 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *  bootmem - A boot-time physical memory allocator and configurator   * @@ -460,7 +460,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,  	trace_cma_alloc(pfn, page, count, align); -	if (ret) { +	if (ret && !(gfp_mask & __GFP_NOWARN)) {  		pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n",  			__func__, count, ret);  		cma_debug_show_areas(cma); @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef __MM_CMA_H__  #define __MM_CMA_H__ diff --git a/mm/cma_debug.c b/mm/cma_debug.c index c03ccbc405a0..275df8b5b22e 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * CMA DebugFS Interface   * diff --git a/mm/compaction.c b/mm/compaction.c index fb548e4c7bd4..85395dc6eb13 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * linux/mm/compaction.c   * @@ -1999,17 +2000,14 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)  	if (pgdat->kcompactd_max_order < order)  		pgdat->kcompactd_max_order = order; -	/* -	 * Pairs with implicit barrier in wait_event_freezable() -	 * such that wakeups are not missed in the lockless -	 * waitqueue_active() call. -	 */ -	smp_acquire__after_ctrl_dep(); -  	if (pgdat->kcompactd_classzone_idx > classzone_idx)  		pgdat->kcompactd_classzone_idx = classzone_idx; -	if (!waitqueue_active(&pgdat->kcompactd_wait)) +	/* +	 * Pairs with implicit barrier in wait_event_freezable() +	 * such that wakeups are not missed. +	 */ +	if (!wq_has_sleeper(&pgdat->kcompactd_wait))  		return;  	if (!kcompactd_node_suitable(pgdat)) diff --git a/mm/debug.c b/mm/debug.c index 5715448ab0b5..6726bec731c9 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * mm/debug.c   * diff --git a/mm/debug_page_ref.c b/mm/debug_page_ref.c index 1aef3d562e52..f3b2c9d3ece2 100644 --- a/mm/debug_page_ref.c +++ b/mm/debug_page_ref.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/mm_types.h>  #include <linux/tracepoint.h> diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index b1dd4a948fc0..d04ac1ec0559 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Provide common bits of early_ioremap() support for architectures needing   * temporary mappings during boot before ioremap() is available. diff --git a/mm/fadvise.c b/mm/fadvise.c index 702f239cd6db..ec70d6e4b86d 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * mm/fadvise.c   * diff --git a/mm/failslab.c b/mm/failslab.c index b0fac98cd938..8087d976a809 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/fault-inject.h>  #include <linux/slab.h>  #include <linux/mm.h> diff --git a/mm/filemap.c b/mm/filemap.c index 870971e20967..594d73fef8b4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -620,6 +620,14 @@ int file_check_and_advance_wb_err(struct file *file)  		trace_file_check_and_advance_wb_err(file, old);  		spin_unlock(&file->f_lock);  	} + +	/* +	 * We're mostly using this function as a drop in replacement for +	 * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect +	 * that the legacy code would have had on these flags. +	 */ +	clear_bit(AS_EIO, &mapping->flags); +	clear_bit(AS_ENOSPC, &mapping->flags);  	return err;  }  EXPORT_SYMBOL(file_check_and_advance_wb_err); @@ -2926,9 +2934,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)  	 * we're writing.  Either one is a pretty crazy thing to do,  	 * so we don't support it 100%.  If this invalidation  	 * fails, tough, the write still worked... +	 * +	 * Most of the time we do not need this since dio_complete() will do +	 * the invalidation for us. However there are some file systems that +	 * do not end up with dio_complete() being called, so let's not break +	 * them by removing it completely  	 */ -	invalidate_inode_pages2_range(mapping, -				pos >> PAGE_SHIFT, end); +	if (mapping->nrpages) +		invalidate_inode_pages2_range(mapping, +					pos >> PAGE_SHIFT, end);  	if (written > 0) {  		pos += written; diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 72ebec18629c..2f98df0d460e 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/kernel.h>  #include <linux/errno.h>  #include <linux/err.h> diff --git a/mm/highmem.c b/mm/highmem.c index 50b4ca6787f0..59db3223a5d6 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * High memory handling common code and variables.   * diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 269b5df58543..1981ed697dab 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -941,6 +941,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,  				pmd = pmd_swp_mksoft_dirty(pmd);  			set_pmd_at(src_mm, addr, src_pmd, pmd);  		} +		add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); +		atomic_long_inc(&dst_mm->nr_ptes); +		pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);  		set_pmd_at(dst_mm, addr, dst_pmd, pmd);  		ret = 0;  		goto out_unlock; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 424b0ef08a60..2d2ff5e8bf2b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3984,6 +3984,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,  			    unsigned long src_addr,  			    struct page **pagep)  { +	struct address_space *mapping; +	pgoff_t idx; +	unsigned long size;  	int vm_shared = dst_vma->vm_flags & VM_SHARED;  	struct hstate *h = hstate_vma(dst_vma);  	pte_t _dst_pte; @@ -4021,13 +4024,24 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,  	__SetPageUptodate(page);  	set_page_huge_active(page); +	mapping = dst_vma->vm_file->f_mapping; +	idx = vma_hugecache_offset(h, dst_vma, dst_addr); +  	/*  	 * If shared, add to page cache  	 */  	if (vm_shared) { -		struct address_space *mapping = dst_vma->vm_file->f_mapping; -		pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr); +		size = i_size_read(mapping->host) >> huge_page_shift(h); +		ret = -EFAULT; +		if (idx >= size) +			goto out_release_nounlock; +		/* +		 * Serialization between remove_inode_hugepages() and +		 * huge_add_to_page_cache() below happens through the +		 * hugetlb_fault_mutex_table that here must be hold by +		 * the caller. +		 */  		ret = huge_add_to_page_cache(page, mapping, idx);  		if (ret)  			goto out_release_nounlock; @@ -4036,6 +4050,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,  	ptl = huge_pte_lockptr(h, dst_mm, dst_pte);  	spin_lock(ptl); +	/* +	 * Recheck the i_size after holding PT lock to make sure not +	 * to leave any page mapped (as page_mapped()) beyond the end +	 * of the i_size (remove_inode_hugepages() is strict about +	 * enforcing that). If we bail out here, we'll also leave a +	 * page in the radix tree in the vm_shared case beyond the end +	 * of the i_size, but remove_inode_hugepages() will take care +	 * of it as soon as we drop the hugetlb_fault_mutex_table. +	 */ +	size = i_size_read(mapping->host) >> huge_page_shift(h); +	ret = -EFAULT; +	if (idx >= size) +		goto out_release_unlock; +  	ret = -EEXIST;  	if (!huge_pte_none(huge_ptep_get(dst_pte)))  		goto out_release_unlock; diff --git a/mm/init-mm.c b/mm/init-mm.c index 975e49f00f34..f94d5d15ebc0 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/mm_types.h>  #include <linux/rbtree.h>  #include <linux/rwsem.h> diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 2976a9ee104f..3289db38bc87 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0  KASAN_SANITIZE := n  UBSAN_SANITIZE_kasan.o := n  KCOV_INSTRUMENT := n diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 1229298cce64..c70851a9a6a4 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef __MM_KASAN_KASAN_H  #define __MM_KASAN_KASAN_H diff --git a/mm/khugepaged.c b/mm/khugepaged.c index c01f177a1120..43cb3043311b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt  #include <linux/mm.h> diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index 2d5959c5f7c5..800d64b854ea 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/gfp.h>  #include <linux/mm_types.h>  #include <linux/mm.h> @@ -1990,6 +1990,7 @@ static void stable_tree_append(struct rmap_item *rmap_item,   */  static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)  { +	struct mm_struct *mm = rmap_item->mm;  	struct rmap_item *tree_rmap_item;  	struct page *tree_page = NULL;  	struct stable_node *stable_node; @@ -2062,9 +2063,11 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)  	if (ksm_use_zero_pages && (checksum == zero_checksum)) {  		struct vm_area_struct *vma; -		vma = find_mergeable_vma(rmap_item->mm, rmap_item->address); +		down_read(&mm->mmap_sem); +		vma = find_mergeable_vma(mm, rmap_item->address);  		err = try_to_merge_one_page(vma, page,  					    ZERO_PAGE(rmap_item->address)); +		up_read(&mm->mmap_sem);  		/*  		 * In case of failure, the page was not really empty, so we  		 * need to continue. Otherwise we're done. diff --git a/mm/list_lru.c b/mm/list_lru.c index 7a40fa2be858..f141f0c80ff3 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -325,12 +325,12 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)  {  	int size = memcg_nr_cache_ids; -	nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); +	nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL);  	if (!nlru->memcg_lrus)  		return -ENOMEM;  	if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { -		kfree(nlru->memcg_lrus); +		kvfree(nlru->memcg_lrus);  		return -ENOMEM;  	} @@ -340,7 +340,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)  static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)  {  	__memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); -	kfree(nlru->memcg_lrus); +	kvfree(nlru->memcg_lrus);  }  static int memcg_update_list_lru_node(struct list_lru_node *nlru, @@ -351,12 +351,12 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,  	BUG_ON(old_size > new_size);  	old = nlru->memcg_lrus; -	new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); +	new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL);  	if (!new)  		return -ENOMEM;  	if (__memcg_init_list_lru_node(new, old_size, new_size)) { -		kfree(new); +		kvfree(new);  		return -ENOMEM;  	} @@ -373,7 +373,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,  	nlru->memcg_lrus = new;  	spin_unlock_irq(&nlru->lock); -	kfree(old); +	kvfree(old);  	return 0;  } diff --git a/mm/madvise.c b/mm/madvise.c index 21261ff0466f..375cf32087e4 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *	linux/mm/madvise.c   * @@ -625,18 +626,26 @@ static int madvise_inject_error(int behavior,  {  	struct page *page;  	struct zone *zone; +	unsigned int order;  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; -	for (; start < end; start += PAGE_SIZE << -				compound_order(compound_head(page))) { + +	for (; start < end; start += PAGE_SIZE << order) {  		int ret;  		ret = get_user_pages_fast(start, 1, 0, &page);  		if (ret != 1)  			return ret; +		/* +		 * When soft offlining hugepages, after migrating the page +		 * we dissolve it, therefore in the second loop "page" will +		 * no longer be a compound page, and order will be 0. +		 */ +		order = compound_order(compound_head(page)); +  		if (PageHWPoison(page)) {  			put_page(page);  			continue; @@ -749,6 +758,9 @@ madvise_behavior_valid(int behavior)   *  MADV_DONTFORK - omit this area from child's address space when forking:   *		typically, to avoid COWing pages pinned by get_user_pages().   *  MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. + *  MADV_WIPEONFORK - present the child process with zero-filled memory in this + *              range after a fork. + *  MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK   *  MADV_HWPOISON - trigger memory error handler as if the given memory range   *		were corrupted by unrecoverable hardware memory failure.   *  MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. @@ -769,7 +781,9 @@ madvise_behavior_valid(int behavior)   *  zero    - success   *  -EINVAL - start + len < 0, start is not page-aligned,   *		"behavior" is not a valid value, or application - *		is attempting to release locked or shared pages. + *		is attempting to release locked or shared pages, + *		or the specified address range includes file, Huge TLB, + *		MAP_SHARED or VMPFNMAP range.   *  -ENOMEM - addresses in the specified range are not currently   *		mapped, or are outside the AS of the process.   *  -EIO    - an I/O error occurred while paging in data. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 15af3da5af02..661f046ad318 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1777,6 +1777,10 @@ static void drain_local_stock(struct work_struct *dummy)  	struct memcg_stock_pcp *stock;  	unsigned long flags; +	/* +	 * The only protection from memory hotplug vs. drain_stock races is +	 * that we always operate on local CPU stock here with IRQ disabled +	 */  	local_irq_save(flags);  	stock = this_cpu_ptr(&memcg_stock); @@ -1821,27 +1825,33 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)  	/* If someone's already draining, avoid adding running more workers. */  	if (!mutex_trylock(&percpu_charge_mutex))  		return; -	/* Notify other cpus that system-wide "drain" is running */ -	get_online_cpus(); +	/* +	 * Notify other cpus that system-wide "drain" is running +	 * We do not care about races with the cpu hotplug because cpu down +	 * as well as workers from this path always operate on the local +	 * per-cpu data. CPU up doesn't touch memcg_stock at all. +	 */  	curcpu = get_cpu();  	for_each_online_cpu(cpu) {  		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);  		struct mem_cgroup *memcg;  		memcg = stock->cached; -		if (!memcg || !stock->nr_pages) +		if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))  			continue; -		if (!mem_cgroup_is_descendant(memcg, root_memcg)) +		if (!mem_cgroup_is_descendant(memcg, root_memcg)) { +			css_put(&memcg->css);  			continue; +		}  		if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {  			if (cpu == curcpu)  				drain_local_stock(&stock->work);  			else  				schedule_work_on(cpu, &stock->work);  		} +		css_put(&memcg->css);  	}  	put_cpu(); -	put_online_cpus();  	mutex_unlock(&percpu_charge_mutex);  } @@ -5648,7 +5658,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)  static void uncharge_page(struct page *page, struct uncharge_gather *ug)  {  	VM_BUG_ON_PAGE(PageLRU(page), page); -	VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); +	VM_BUG_ON_PAGE(page_count(page) && !is_zone_device_page(page) && +			!PageHWPoison(page) , page);  	if (!page->mem_cgroup)  		return; @@ -5817,21 +5828,6 @@ void mem_cgroup_sk_alloc(struct sock *sk)  	if (!mem_cgroup_sockets_enabled)  		return; -	/* -	 * Socket cloning can throw us here with sk_memcg already -	 * filled. It won't however, necessarily happen from -	 * process context. So the test for root memcg given -	 * the current task's memcg won't help us in this case. -	 * -	 * Respecting the original socket's memcg is a better -	 * decision in this case. -	 */ -	if (sk->sk_memcg) { -		BUG_ON(mem_cgroup_is_root(sk->sk_memcg)); -		css_get(&sk->sk_memcg->css); -		return; -	} -  	rcu_read_lock();  	memcg = mem_cgroup_from_task(current);  	if (memcg == root_mem_cgroup) diff --git a/mm/memory.c b/mm/memory.c index ec4e15494901..a728bed16c20 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -845,7 +845,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,  		 * vm_normal_page() so that we do not have to special case all  		 * call site of vm_normal_page().  		 */ -		if (likely(pfn < highest_memmap_pfn)) { +		if (likely(pfn <= highest_memmap_pfn)) {  			struct page *page = pfn_to_page(pfn);  			if (is_device_public_page(page)) { diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e882cb6da994..d4b5f29906b9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -328,6 +328,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,  		if (err && (err != -EEXIST))  			break;  		err = 0; +		cond_resched();  	}  	vmemmap_populate_print_last();  out: @@ -337,7 +338,7 @@ EXPORT_SYMBOL_GPL(__add_pages);  #ifdef CONFIG_MEMORY_HOTREMOVE  /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ -static int find_smallest_section_pfn(int nid, struct zone *zone, +static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,  				     unsigned long start_pfn,  				     unsigned long end_pfn)  { @@ -362,7 +363,7 @@ static int find_smallest_section_pfn(int nid, struct zone *zone,  }  /* find the biggest valid pfn in the range [start_pfn, end_pfn). */ -static int find_biggest_section_pfn(int nid, struct zone *zone, +static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,  				    unsigned long start_pfn,  				    unsigned long end_pfn)  { @@ -550,7 +551,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,  		return ret;  	scn_nr = __section_nr(ms); -	start_pfn = section_nr_to_pfn(scn_nr); +	start_pfn = section_nr_to_pfn((unsigned long)scn_nr);  	__remove_zone(zone, start_pfn);  	sparse_remove_one_section(zone, ms, map_offset); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 006ba625c0b8..a2af6d58a68f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1920,8 +1920,11 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,  	struct page *page;  	page = __alloc_pages(gfp, order, nid); -	if (page && page_to_nid(page) == nid) -		inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); +	if (page && page_to_nid(page) == nid) { +		preempt_disable(); +		__inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); +		preempt_enable(); +	}  	return page;  } diff --git a/mm/mempool.c b/mm/mempool.c index 1c0294858527..c4a23cdae3f0 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *  linux/mm/mempool.c   * diff --git a/mm/memtest.c b/mm/memtest.c index 8eaa4c3a5f65..f53ace709ccd 100644 --- a/mm/memtest.c +++ b/mm/memtest.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/kernel.h>  #include <linux/types.h>  #include <linux/init.h> diff --git a/mm/migrate.c b/mm/migrate.c index 6954c1435833..1236449b4777 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Memory Migration functionality - linux/mm/migrate.c   * @@ -2146,8 +2147,9 @@ static int migrate_vma_collect_hole(unsigned long start,  	unsigned long addr;  	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { -		migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE; +		migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;  		migrate->dst[migrate->npages] = 0; +		migrate->npages++;  		migrate->cpages++;  	} diff --git a/mm/mincore.c b/mm/mincore.c index c5687c45c326..fc37afe226e6 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *	linux/mm/mincore.c   * diff --git a/mm/mlock.c b/mm/mlock.c index dfc6f1912176..46af369c13e5 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *	linux/mm/mlock.c   * diff --git a/mm/mmzone.c b/mm/mmzone.c index a51c0a67ea3d..4686fdc23bb9 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * linux/mm/mmzone.c   * diff --git a/mm/mprotect.c b/mm/mprotect.c index 6d3e2f082290..ec39f730a0bf 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *  mm/mprotect.c   * diff --git a/mm/mremap.c b/mm/mremap.c index cfec004c4ff9..049470aa1e3e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *	mm/mremap.c   * diff --git a/mm/msync.c b/mm/msync.c index 24e612fefa04..ef30a429623a 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *	linux/mm/msync.c   * diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 3637809a18d0..9b02fda0886b 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *  bootmem - A boot-time physical memory allocator and configurator   * diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 99736e026712..dee0f75c3013 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -40,6 +40,7 @@  #include <linux/ratelimit.h>  #include <linux/kthread.h>  #include <linux/init.h> +#include <linux/mmu_notifier.h>  #include <asm/tlb.h>  #include "internal.h" @@ -495,6 +496,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)  	}  	/* +	 * If the mm has notifiers then we would need to invalidate them around +	 * unmap_page_range and that is risky because notifiers can sleep and +	 * what they do is basically undeterministic.  So let's have a short +	 * sleep to give the oom victim some more time. +	 * TODO: we really want to get rid of this ugly hack and make sure that +	 * notifiers cannot block for unbounded amount of time and add +	 * mmu_notifier_invalidate_range_{start,end} around unmap_page_range +	 */ +	if (mm_has_notifiers(mm)) { +		up_read(&mm->mmap_sem); +		schedule_timeout_idle(HZ); +		goto unlock_oom; +	} + +	/*  	 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't  	 * work on the mm anymore. The check for MMF_OOM_SKIP must run  	 * under mmap_sem for reading because it serializes against the diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c841af88836a..77e4d3c5c57b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1190,7 +1190,7 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,  }  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -static void init_reserved_page(unsigned long pfn) +static void __meminit init_reserved_page(unsigned long pfn)  {  	pg_data_t *pgdat;  	int nid, zid; @@ -5367,6 +5367,7 @@ not_early:  			__init_single_page(page, pfn, zone, nid);  			set_pageblock_migratetype(page, MIGRATE_MOVABLE); +			cond_resched();  		} else {  			__init_single_pfn(pfn, zone, nid);  		} diff --git a/mm/page_counter.c b/mm/page_counter.c index 7c6a63d2c27f..2a8df3ad60a4 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Lockless hierarchical page accounting & limiting   * diff --git a/mm/page_ext.c b/mm/page_ext.c index 32f18911deda..4f0367d472c4 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/mm.h>  #include <linux/mmzone.h>  #include <linux/bootmem.h> diff --git a/mm/page_idle.c b/mm/page_idle.c index 4bd03a8d809e..0a49374e6931 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/init.h>  #include <linux/bootmem.h>  #include <linux/fs.h> diff --git a/mm/page_io.c b/mm/page_io.c index 21502d341a67..5d882de3fbfd 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *  linux/mm/page_io.c   * diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 757410d9f758..44f213935bf6 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * linux/mm/page_isolation.c   */ diff --git a/mm/page_owner.c b/mm/page_owner.c index 57abca62d4db..4f44b95b9d1e 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/debugfs.h>  #include <linux/mm.h>  #include <linux/slab.h> diff --git a/mm/page_poison.c b/mm/page_poison.c index be19e989ccff..e83fd44867de 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/kernel.h>  #include <linux/string.h>  #include <linux/mm.h> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 6a03946469a9..d22b84310f6d 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/mm.h>  #include <linux/rmap.h>  #include <linux/hugetlb.h> @@ -6,17 +7,6 @@  #include "internal.h" -static inline bool check_pmd(struct page_vma_mapped_walk *pvmw) -{ -	pmd_t pmde; -	/* -	 * Make sure we don't re-load pmd between present and !trans_huge check. -	 * We need a consistent view. -	 */ -	pmde = READ_ONCE(*pvmw->pmd); -	return pmd_present(pmde) && !pmd_trans_huge(pmde); -} -  static inline bool not_found(struct page_vma_mapped_walk *pvmw)  {  	page_vma_mapped_walk_done(pvmw); @@ -116,6 +106,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)  	pgd_t *pgd;  	p4d_t *p4d;  	pud_t *pud; +	pmd_t pmde;  	/* The only possible pmd mapping has been handled on last iteration */  	if (pvmw->pmd && !pvmw->pte) @@ -148,7 +139,13 @@ restart:  	if (!pud_present(*pud))  		return false;  	pvmw->pmd = pmd_offset(pud, pvmw->address); -	if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) { +	/* +	 * Make sure the pmd value isn't cached in a register by the +	 * compiler and used as a stale value after we've observed a +	 * subsequent update. +	 */ +	pmde = READ_ONCE(*pvmw->pmd); +	if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {  		pvmw->ptl = pmd_lock(mm, pvmw->pmd);  		if (likely(pmd_trans_huge(*pvmw->pmd))) {  			if (pvmw->flags & PVMW_MIGRATION) @@ -167,17 +164,15 @@ restart:  						return not_found(pvmw);  					return true;  				} -			} else -				WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); +			}  			return not_found(pvmw);  		} else {  			/* THP pmd was split under us: handle on pte level */  			spin_unlock(pvmw->ptl);  			pvmw->ptl = NULL;  		} -	} else { -		if (!check_pmd(pvmw)) -			return false; +	} else if (!pmd_present(pmde)) { +		return false;  	}  	if (!map_pte(pvmw))  		goto next_pte; diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 1a4197965415..8bd4afa83cb8 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/mm.h>  #include <linux/highmem.h>  #include <linux/sched.h> diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 7065faf74b46..b1739dc06b73 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef _MM_PERCPU_INTERNAL_H  #define _MM_PERCPU_INTERNAL_H diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index 6142484e88f7..7a58460bfd27 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -73,7 +73,7 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,  		     last_alloc + 1 : 0;  	as_len = 0; -	start = chunk->start_offset; +	start = chunk->start_offset / PCPU_MIN_ALLOC_SIZE;  	/*  	 * If a bit is set in the allocation map, the bound_map identifies diff --git a/mm/percpu.c b/mm/percpu.c index 59d44d61f5f1..a0e0c82c1e4c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -353,6 +353,8 @@ static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,  					block->contig_hint_start);  			return;  		} +		/* reset to satisfy the second predicate above */ +		block_off = 0;  		*bits = block->right_free;  		*bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free; @@ -407,6 +409,8 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,  			*bit_off = pcpu_block_off_to_off(i, block->first_free);  			return;  		} +		/* reset to satisfy the second predicate above */ +		block_off = 0;  		*bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free,  				 align); @@ -1325,7 +1329,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)   * @gfp: allocation flags   *   * Allocate percpu area of @size bytes aligned at @align.  If @gfp doesn't - * contain %GFP_KERNEL, the allocation is atomic. + * contain %GFP_KERNEL, the allocation is atomic. If @gfp has __GFP_NOWARN + * then no warning will be triggered on invalid or failed allocation + * requests.   *   * RETURNS:   * Percpu pointer to the allocated area on success, NULL on failure. @@ -1333,10 +1339,11 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)  static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,  				 gfp_t gfp)  { +	bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; +	bool do_warn = !(gfp & __GFP_NOWARN);  	static int warn_limit = 10;  	struct pcpu_chunk *chunk;  	const char *err; -	bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;  	int slot, off, cpu, ret;  	unsigned long flags;  	void __percpu *ptr; @@ -1357,7 +1364,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,  	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||  		     !is_power_of_2(align))) { -		WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n", +		WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n",  		     size, align);  		return NULL;  	} @@ -1478,7 +1485,7 @@ fail_unlock:  fail:  	trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); -	if (!is_atomic && warn_limit) { +	if (!is_atomic && do_warn && warn_limit) {  		pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",  			size, align, is_atomic, err);  		dump_stack(); @@ -1503,7 +1510,9 @@ fail:   *   * Allocate zero-filled percpu area of @size bytes aligned at @align.  If   * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can - * be called from any context but is a lot more likely to fail. + * be called from any context but is a lot more likely to fail. If @gfp + * has __GFP_NOWARN then no warning will be triggered on invalid or failed + * allocation requests.   *   * RETURNS:   * Percpu pointer to the allocated area on success, NULL on failure. diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 1175f6a24fdb..1e4ee763c190 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *  mm/pgtable-generic.c   * diff --git a/mm/quicklist.c b/mm/quicklist.c index daf6ff6e199a..5e98ac78e410 100644 --- a/mm/quicklist.c +++ b/mm/quicklist.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Quicklist support.   * diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 6bb4deb12e78..d908c8769b48 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -14,7 +14,7 @@  #include <linux/uaccess.h>  #include <asm/sections.h> -const int rodata_test_data = 0xC3; +static const int rodata_test_data = 0xC3;  void rodata_test(void)  { diff --git a/mm/slab.c b/mm/slab.c index 04dec48c3ed7..b7095884fd93 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * linux/mm/slab.c   * Written by Mark Hemment, 1996/97. diff --git a/mm/slab.h b/mm/slab.h index 073362816acc..028cdc7df67e 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */  #ifndef MM_SLAB_H  #define MM_SLAB_H  /* diff --git a/mm/slab_common.c b/mm/slab_common.c index 904a83be82de..0d7fe71ff5e4 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Slab allocator functions that are independent of the allocator strategy   * @@ -165,9 +166,9 @@ static int init_memcg_params(struct kmem_cache *s,  	if (!memcg_nr_cache_ids)  		return 0; -	arr = kzalloc(sizeof(struct memcg_cache_array) + -		      memcg_nr_cache_ids * sizeof(void *), -		      GFP_KERNEL); +	arr = kvzalloc(sizeof(struct memcg_cache_array) + +		       memcg_nr_cache_ids * sizeof(void *), +		       GFP_KERNEL);  	if (!arr)  		return -ENOMEM; @@ -178,15 +179,23 @@ static int init_memcg_params(struct kmem_cache *s,  static void destroy_memcg_params(struct kmem_cache *s)  {  	if (is_root_cache(s)) -		kfree(rcu_access_pointer(s->memcg_params.memcg_caches)); +		kvfree(rcu_access_pointer(s->memcg_params.memcg_caches)); +} + +static void free_memcg_params(struct rcu_head *rcu) +{ +	struct memcg_cache_array *old; + +	old = container_of(rcu, struct memcg_cache_array, rcu); +	kvfree(old);  }  static int update_memcg_params(struct kmem_cache *s, int new_array_size)  {  	struct memcg_cache_array *old, *new; -	new = kzalloc(sizeof(struct memcg_cache_array) + -		      new_array_size * sizeof(void *), GFP_KERNEL); +	new = kvzalloc(sizeof(struct memcg_cache_array) + +		       new_array_size * sizeof(void *), GFP_KERNEL);  	if (!new)  		return -ENOMEM; @@ -198,7 +207,7 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size)  	rcu_assign_pointer(s->memcg_params.memcg_caches, new);  	if (old) -		kfree_rcu(old, rcu); +		call_rcu(&old->rcu, free_memcg_params);  	return 0;  } diff --git a/mm/slob.c b/mm/slob.c index a8bd6fa11a66..10249160b693 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * SLOB Allocator: Simple List Of Blocks   * diff --git a/mm/slub.c b/mm/slub.c index 163352c537ab..1efbb8123037 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * SLUB: A slab allocator that limits cache line use instead of queuing   * objects in per cpu and per node lists. diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index d1a39b8051e0..478ce6d4a2c4 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Virtual Memory Map support   * diff --git a/mm/sparse.c b/mm/sparse.c index 83b3bf6461af..4900707ae146 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * sparse memory mappings.   */ diff --git a/mm/swap.c b/mm/swap.c index 9295ae960d66..a77d68f2c1b6 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -575,7 +575,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,  			    void *arg)  {  	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && -	    !PageUnevictable(page)) { +	    !PageSwapCache(page) && !PageUnevictable(page)) {  		bool active = PageActive(page);  		del_page_from_lru_list(page, lruvec, @@ -665,7 +665,7 @@ void deactivate_file_page(struct page *page)  void mark_page_lazyfree(struct page *page)  {  	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && -	    !PageUnevictable(page)) { +	    !PageSwapCache(page) && !PageUnevictable(page)) {  		struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);  		get_page(page); diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c index fcd2740f4ed7..45affaef3bc6 100644 --- a/mm/swap_cgroup.c +++ b/mm/swap_cgroup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  #include <linux/swap_cgroup.h>  #include <linux/vmalloc.h>  #include <linux/mm.h> diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 13a174006b91..d81cfc5a43d5 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Manage cache of swap slots to be used for and returned from   * swap. diff --git a/mm/swap_state.c b/mm/swap_state.c index 71ce2d1ccbf7..326439428daf 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *  linux/mm/swap_state.c   * @@ -39,10 +40,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];  static unsigned int nr_swapper_spaces[MAX_SWAPFILES];  bool swap_vma_readahead = true; -#define SWAP_RA_MAX_ORDER_DEFAULT	3 - -static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT; -  #define SWAP_RA_WIN_SHIFT	(PAGE_SHIFT / 2)  #define SWAP_RA_HITS_MASK	((1UL << SWAP_RA_WIN_SHIFT) - 1)  #define SWAP_RA_HITS_MAX	SWAP_RA_HITS_MASK @@ -242,6 +239,17 @@ int add_to_swap(struct page *page)  		 * clear SWAP_HAS_CACHE flag.  		 */  		goto fail; +	/* +	 * Normally the page will be dirtied in unmap because its pte should be +	 * dirty. A special case is MADV_FREE page. The page'e pte could have +	 * dirty bit cleared but the page's SwapBacked bit is still set because +	 * clearing the dirty bit and SwapBacked bit has no lock protected. For +	 * such page, unmap will not set dirty bit for it, so page reclaim will +	 * not write the page out. This can cause data corruption when the page +	 * is swap in later. Always setting the dirty bit for the page solves +	 * the problem. +	 */ +	set_page_dirty(page);  	return 1; @@ -653,6 +661,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,  	pte_t *tpte;  #endif +	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), +			     SWAP_RA_ORDER_CEILING); +	if (max_win == 1) { +		swap_ra->win = 1; +		return NULL; +	} +  	faddr = vmf->address;  	entry = pte_to_swp_entry(vmf->orig_pte);  	if ((unlikely(non_swap_entry(entry)))) @@ -661,12 +676,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,  	if (page)  		return page; -	max_win = 1 << READ_ONCE(swap_ra_max_order); -	if (max_win == 1) { -		swap_ra->win = 1; -		return NULL; -	} -  	fpfn = PFN_DOWN(faddr);  	swap_ra_info = GET_SWAP_RA_VAL(vma);  	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); @@ -775,32 +784,8 @@ static struct kobj_attribute vma_ra_enabled_attr =  	__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,  	       vma_ra_enabled_store); -static ssize_t vma_ra_max_order_show(struct kobject *kobj, -				     struct kobj_attribute *attr, char *buf) -{ -	return sprintf(buf, "%d\n", swap_ra_max_order); -} -static ssize_t vma_ra_max_order_store(struct kobject *kobj, -				      struct kobj_attribute *attr, -				      const char *buf, size_t count) -{ -	int err, v; - -	err = kstrtoint(buf, 10, &v); -	if (err || v > SWAP_RA_ORDER_CEILING || v <= 0) -		return -EINVAL; - -	swap_ra_max_order = v; - -	return count; -} -static struct kobj_attribute vma_ra_max_order_attr = -	__ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show, -	       vma_ra_max_order_store); -  static struct attribute *swap_attrs[] = {  	&vma_ra_enabled_attr.attr, -	&vma_ra_max_order_attr.attr,  	NULL,  }; diff --git a/mm/swapfile.c b/mm/swapfile.c index bf91dc9e7a79..e47a21e64764 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2869,6 +2869,7 @@ static struct swap_info_struct *alloc_swap_info(void)  	p->flags = SWP_USED;  	spin_unlock(&swap_lock);  	spin_lock_init(&p->lock); +	spin_lock_init(&p->cont_lock);  	return p;  } @@ -3545,6 +3546,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)  	head = vmalloc_to_page(si->swap_map + offset);  	offset &= ~PAGE_MASK; +	spin_lock(&si->cont_lock);  	/*  	 * Page allocation does not initialize the page's lru field,  	 * but it does always reset its private field. @@ -3564,7 +3566,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)  		 * a continuation page, free our allocation and use this one.  		 */  		if (!(count & COUNT_CONTINUED)) -			goto out; +			goto out_unlock_cont;  		map = kmap_atomic(list_page) + offset;  		count = *map; @@ -3575,11 +3577,13 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)  		 * free our allocation and use this one.  		 */  		if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX) -			goto out; +			goto out_unlock_cont;  	}  	list_add_tail(&page->lru, &head->lru);  	page = NULL;			/* now it's attached, don't free it */ +out_unlock_cont: +	spin_unlock(&si->cont_lock);  out:  	unlock_cluster(ci);  	spin_unlock(&si->lock); @@ -3604,6 +3608,7 @@ static bool swap_count_continued(struct swap_info_struct *si,  	struct page *head;  	struct page *page;  	unsigned char *map; +	bool ret;  	head = vmalloc_to_page(si->swap_map + offset);  	if (page_private(head) != SWP_CONTINUED) { @@ -3611,6 +3616,7 @@ static bool swap_count_continued(struct swap_info_struct *si,  		return false;		/* need to add count continuation */  	} +	spin_lock(&si->cont_lock);  	offset &= ~PAGE_MASK;  	page = list_entry(head->lru.next, struct page, lru);  	map = kmap_atomic(page) + offset; @@ -3631,8 +3637,10 @@ static bool swap_count_continued(struct swap_info_struct *si,  		if (*map == SWAP_CONT_MAX) {  			kunmap_atomic(map);  			page = list_entry(page->lru.next, struct page, lru); -			if (page == head) -				return false;	/* add count continuation */ +			if (page == head) { +				ret = false;	/* add count continuation */ +				goto out; +			}  			map = kmap_atomic(page) + offset;  init_map:		*map = 0;		/* we didn't zero the page */  		} @@ -3645,7 +3653,7 @@ init_map:		*map = 0;		/* we didn't zero the page */  			kunmap_atomic(map);  			page = list_entry(page->lru.prev, struct page, lru);  		} -		return true;			/* incremented */ +		ret = true;			/* incremented */  	} else {				/* decrementing */  		/* @@ -3671,8 +3679,11 @@ init_map:		*map = 0;		/* we didn't zero the page */  			kunmap_atomic(map);  			page = list_entry(page->lru.prev, struct page, lru);  		} -		return count == COUNT_CONTINUED; +		ret = count == COUNT_CONTINUED;  	} +out: +	spin_unlock(&si->cont_lock); +	return ret;  }  /* diff --git a/mm/vmacache.c b/mm/vmacache.c index 7ffa0ee341b5..db7596eb6132 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Copyright (C) 2014 Davidlohr Bueso.   */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8a43db6284eb..673942094328 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1695,11 +1695,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,  	for (i = 0; i < area->nr_pages; i++) {  		struct page *page; -		if (fatal_signal_pending(current)) { -			area->nr_pages = i; -			goto fail_no_warn; -		} -  		if (node == NUMA_NO_NODE)  			page = alloc_page(alloc_mask|highmem_mask);  		else @@ -1723,7 +1718,6 @@ fail:  	warn_alloc(gfp_mask, NULL,  			  "vmalloc: allocation failure, allocated %ld of %ld bytes",  			  (area->nr_pages*PAGE_SIZE), area->size); -fail_no_warn:  	vfree(area->addr);  	return NULL;  } diff --git a/mm/vmscan.c b/mm/vmscan.c index 13d711dd8776..eb2f0315b8c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   *  linux/mm/vmscan.c   * diff --git a/mm/workingset.c b/mm/workingset.c index 7119cd745ace..b997c9de28f6 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * Workingset detection   * diff --git a/mm/z3fold.c b/mm/z3fold.c index 486550df32be..b2ba2ba585f3 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -250,6 +250,7 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)  	WARN_ON(!list_empty(&zhdr->buddy));  	set_bit(PAGE_STALE, &page->private); +	clear_bit(NEEDS_COMPACTING, &page->private);  	spin_lock(&pool->lock);  	if (!list_empty(&page->lru))  		list_del(&page->lru); @@ -303,7 +304,6 @@ static void free_pages_work(struct work_struct *w)  		list_del(&zhdr->buddy);  		if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))  			continue; -		clear_bit(NEEDS_COMPACTING, &page->private);  		spin_unlock(&pool->stale_lock);  		cancel_work_sync(&zhdr->work);  		free_z3fold_page(page); @@ -624,10 +624,8 @@ lookup:  	 * stale pages list. cancel_work_sync() can sleep so we must make  	 * sure it won't be called in case we're in atomic context.  	 */ -	if (zhdr && (can_sleep || !work_pending(&zhdr->work) || -	    !unlikely(work_busy(&zhdr->work)))) { +	if (zhdr && (can_sleep || !work_pending(&zhdr->work))) {  		list_del(&zhdr->buddy); -		clear_bit(NEEDS_COMPACTING, &page->private);  		spin_unlock(&pool->stale_lock);  		if (can_sleep)  			cancel_work_sync(&zhdr->work); @@ -875,16 +873,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)  				goto next;  		}  next: +		spin_lock(&pool->lock);  		if (test_bit(PAGE_HEADLESS, &page->private)) {  			if (ret == 0) { +				spin_unlock(&pool->lock);  				free_z3fold_page(page);  				return 0;  			}  		} else if (kref_put(&zhdr->refcount, release_z3fold_page)) {  			atomic64_dec(&pool->pages_nr); +			spin_unlock(&pool->lock);  			return 0;  		} -		spin_lock(&pool->lock);  		/*  		 * Add to the beginning of LRU. |