diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 108 | 
1 files changed, 85 insertions, 23 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6e9903d3f096..3a7779d9891d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -71,6 +71,8 @@  #include <linux/dax.h>  #include <linux/oom.h>  #include <linux/numa.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h>  #include <trace/events/kmem.h> @@ -1098,7 +1100,7 @@ again:  		}  		entry = pte_to_swp_entry(ptent); -		if (non_swap_entry(entry) && is_device_private_entry(entry)) { +		if (is_device_private_entry(entry)) {  			struct page *page = device_private_entry_to_page(entry);  			if (unlikely(details && details->check_mapping)) { @@ -1800,7 +1802,7 @@ out_unlock:   * @pfn: source kernel pfn   * @pgprot: pgprot flags for the inserted page   * - * This is exactly like vmf_insert_pfn(), except that it allows drivers to + * This is exactly like vmf_insert_pfn(), except that it allows drivers   * to override pgprot on a per-page basis.   *   * This only makes sense for IO mappings, and it makes no sense for @@ -1936,7 +1938,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,   * @pfn: source kernel pfn   * @pgprot: pgprot flags for the inserted page   * - * This is exactly like vmf_insert_mixed(), except that it allows drivers to + * This is exactly like vmf_insert_mixed(), except that it allows drivers   * to override pgprot on a per-page basis.   *   * Typically this function should be used by drivers to set caching- and @@ -2082,7 +2084,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,  /**   * remap_pfn_range - remap kernel memory to userspace   * @vma: user vma to map to - * @addr: target user address to start at + * @addr: target page aligned user address to start at   * @pfn: page frame number of kernel physical memory address   * @size: size of mapping area   * @prot: page protection flags for this mapping @@ -2101,6 +2103,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,  	unsigned long remap_pfn = pfn;  	int err; +	if (WARN_ON_ONCE(!PAGE_ALIGNED(addr))) +		return -EINVAL; +  	/*  	 * Physically remapped pages are special. Tell the  	 * rest of the world about it: @@ -2205,7 +2210,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,  {  	pte_t *pte;  	int err = 0; -	spinlock_t *uninitialized_var(ptl); +	spinlock_t *ptl;  	if (create) {  		pte = (mm == &init_mm) ? @@ -2406,8 +2411,6 @@ static inline bool cow_user_page(struct page *dst, struct page *src,  	struct mm_struct *mm = vma->vm_mm;  	unsigned long addr = vmf->address; -	debug_dma_assert_idle(src); -  	if (likely(src)) {  		copy_user_highpage(dst, src, addr, vma);  		return true; @@ -2712,7 +2715,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)  		 */  		ptep_clear_flush_notify(vma, vmf->address, vmf->pte);  		page_add_new_anon_rmap(new_page, vma, vmf->address, false); -		lru_cache_add_active_or_unevictable(new_page, vma); +		lru_cache_add_inactive_or_unevictable(new_page, vma);  		/*  		 * We call the notify macro here because, when using secondary  		 * mmu page tables (such as kvm shadow page tables), we want the @@ -3095,6 +3098,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)  	int locked;  	int exclusive = 0;  	vm_fault_t ret = 0; +	void *shadow = NULL;  	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))  		goto out; @@ -3124,8 +3128,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)  	if (!page) {  		struct swap_info_struct *si = swp_swap_info(entry); -		if (si->flags & SWP_SYNCHRONOUS_IO && -				__swap_count(entry) == 1) { +		if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && +		    __swap_count(entry) == 1) {  			/* skip swapcache */  			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,  							vmf->address); @@ -3146,13 +3150,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)  					goto out_page;  				} -				/* -				 * XXX: Move to lru_cache_add() when it -				 * supports new vs putback -				 */ -				spin_lock_irq(&page_pgdat(page)->lru_lock); -				lru_note_cost_page(page); -				spin_unlock_irq(&page_pgdat(page)->lru_lock); +				shadow = get_shadow_from_swap_cache(entry); +				if (shadow) +					workingset_refault(page, shadow);  				lru_cache_add(page);  				swap_readpage(page, true); @@ -3263,10 +3263,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)  	/* ksm created a completely new copy */  	if (unlikely(page != swapcache && swapcache)) {  		page_add_new_anon_rmap(page, vma, vmf->address, false); -		lru_cache_add_active_or_unevictable(page, vma); +		lru_cache_add_inactive_or_unevictable(page, vma);  	} else {  		do_page_add_anon_rmap(page, vma, vmf->address, exclusive); -		activate_page(page);  	}  	swap_free(entry); @@ -3411,7 +3410,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)  	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);  	page_add_new_anon_rmap(page, vma, vmf->address, false); -	lru_cache_add_active_or_unevictable(page, vma); +	lru_cache_add_inactive_or_unevictable(page, vma);  setpte:  	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); @@ -3669,7 +3668,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)  	if (write && !(vma->vm_flags & VM_SHARED)) {  		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);  		page_add_new_anon_rmap(page, vma, vmf->address, false); -		lru_cache_add_active_or_unevictable(page, vma); +		lru_cache_add_inactive_or_unevictable(page, vma);  	} else {  		inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));  		page_add_file_rmap(page, false); @@ -4357,6 +4356,67 @@ retry_pud:  	return handle_pte_fault(&vmf);  } +/** + * mm_account_fault - Do page fault accountings + * + * @regs: the pt_regs struct pointer.  When set to NULL, will skip accounting + *        of perf event counters, but we'll still do the per-task accounting to + *        the task who triggered this page fault. + * @address: the faulted address. + * @flags: the fault flags. + * @ret: the fault retcode. + * + * This will take care of most of the page fault accountings.  Meanwhile, it + * will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter + * updates.  However note that the handling of PERF_COUNT_SW_PAGE_FAULTS should + * still be in per-arch page fault handlers at the entry of page fault. + */ +static inline void mm_account_fault(struct pt_regs *regs, +				    unsigned long address, unsigned int flags, +				    vm_fault_t ret) +{ +	bool major; + +	/* +	 * We don't do accounting for some specific faults: +	 * +	 * - Unsuccessful faults (e.g. when the address wasn't valid).  That +	 *   includes arch_vma_access_permitted() failing before reaching here. +	 *   So this is not a "this many hardware page faults" counter.  We +	 *   should use the hw profiling for that. +	 * +	 * - Incomplete faults (VM_FAULT_RETRY).  They will only be counted +	 *   once they're completed. +	 */ +	if (ret & (VM_FAULT_ERROR | VM_FAULT_RETRY)) +		return; + +	/* +	 * We define the fault as a major fault when the final successful fault +	 * is VM_FAULT_MAJOR, or if it retried (which implies that we couldn't +	 * handle it immediately previously). +	 */ +	major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED); + +	if (major) +		current->maj_flt++; +	else +		current->min_flt++; + +	/* +	 * If the fault is done for GUP, regs will be NULL.  We only do the +	 * accounting for the per thread fault counters who triggered the +	 * fault, and we skip the perf event updates. +	 */ +	if (!regs) +		return; + +	if (major) +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); +	else +		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); +} +  /*   * By the time we get here, we already hold the mm semaphore   * @@ -4364,7 +4424,7 @@ retry_pud:   * return value.  See filemap_fault() and __lock_page_or_retry().   */  vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, -		unsigned int flags) +			   unsigned int flags, struct pt_regs *regs)  {  	vm_fault_t ret; @@ -4405,6 +4465,8 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,  			mem_cgroup_oom_synchronize(false);  	} +	mm_account_fault(regs, address, flags, ret); +  	return ret;  }  EXPORT_SYMBOL_GPL(handle_mm_fault); @@ -4678,7 +4740,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,  		void *maddr;  		struct page *page = NULL; -		ret = get_user_pages_remote(tsk, mm, addr, 1, +		ret = get_user_pages_remote(mm, addr, 1,  				gup_flags, &page, &vma, NULL);  		if (ret <= 0) {  #ifndef CONFIG_HAVE_IOREMAP_PROT  |