diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 225 | 
1 files changed, 141 insertions, 84 deletions
| diff --git a/mm/memory.c b/mm/memory.c index 7206a634270b..19f47d7b9b86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_  #ifdef CONFIG_HAVE_RCU_TABLE_FREE -/* - * See the comment near struct mmu_table_batch. - */ -  static void tlb_remove_table_smp_sync(void *arg)  { -	/* Simply deliver the interrupt */ +	struct mm_struct __maybe_unused *mm = arg; +	/* +	 * On most architectures this does nothing. Simply delivering the +	 * interrupt is enough to prevent races with software page table +	 * walking like that done in get_user_pages_fast. +	 * +	 * See the comment near struct mmu_table_batch. +	 */ +	tlb_flush_remove_tables_local(mm);  } -static void tlb_remove_table_one(void *table) +static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)  {  	/*  	 * This isn't an RCU grace period and hence the page-tables cannot be @@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table)  	 * It is however sufficient for software page-table walkers that rely on  	 * IRQ disabling. See the comment near struct mmu_table_batch.  	 */ -	smp_call_function(tlb_remove_table_smp_sync, NULL, 1); +	smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);  	__tlb_remove_table(table);  } @@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb)  {  	struct mmu_table_batch **batch = &tlb->batch; +	tlb_flush_remove_tables(tlb->mm); +  	if (*batch) {  		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);  		*batch = NULL; @@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)  	if (*batch == NULL) {  		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);  		if (*batch == NULL) { -			tlb_remove_table_one(table); +			tlb_remove_table_one(table, tlb);  			return;  		}  		(*batch)->nr = 0; @@ -853,6 +859,10 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,  				return NULL;  			}  		} + +		if (pte_devmap(pte)) +			return NULL; +  		print_bad_pte(vma, addr, pte, NULL);  		return NULL;  	} @@ -917,6 +927,8 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,  		}  	} +	if (pmd_devmap(pmd)) +		return NULL;  	if (is_zero_pfn(pfn))  		return NULL;  	if (unlikely(pfn > highest_memmap_pfn)) @@ -1417,11 +1429,9 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,  	do {  		next = pmd_addr_end(addr, end);  		if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { -			if (next - addr != HPAGE_PMD_SIZE) { -				VM_BUG_ON_VMA(vma_is_anonymous(vma) && -				    !rwsem_is_locked(&tlb->mm->mmap_sem), vma); +			if (next - addr != HPAGE_PMD_SIZE)  				__split_huge_pmd(vma, pmd, addr, false, NULL); -			} else if (zap_huge_pmd(tlb, vma, pmd, addr)) +			else if (zap_huge_pmd(tlb, vma, pmd, addr))  				goto next;  			/* fall through */  		} @@ -1603,20 +1613,8 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,  	tlb_gather_mmu(&tlb, mm, start, end);  	update_hiwater_rss(mm);  	mmu_notifier_invalidate_range_start(mm, start, end); -	for ( ; vma && vma->vm_start < end; vma = vma->vm_next) { +	for ( ; vma && vma->vm_start < end; vma = vma->vm_next)  		unmap_single_vma(&tlb, vma, start, end, NULL); - -		/* -		 * zap_page_range does not specify whether mmap_sem should be -		 * held for read or write. That allows parallel zap_page_range -		 * operations to unmap a PTE and defer a flush meaning that -		 * this call observes pte_none and fails to flush the TLB. -		 * Rather than adding a complex API, ensure that no stale -		 * TLB entries exist when this call returns. -		 */ -		flush_tlb_range(vma, start, end); -	} -  	mmu_notifier_invalidate_range_end(mm, start, end);  	tlb_finish_mmu(&tlb, start, end);  } @@ -1886,6 +1884,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,  	if (addr < vma->vm_start || addr >= vma->vm_end)  		return -EFAULT; +	if (!pfn_modify_allowed(pfn, pgprot)) +		return -EACCES; +  	track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));  	ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, @@ -1921,6 +1922,9 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,  	track_pfn_insert(vma, &pgprot, pfn); +	if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot)) +		return -EACCES; +  	/*  	 * If we don't have pte special, then we have to use the pfn_valid()  	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* @@ -1982,6 +1986,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,  {  	pte_t *pte;  	spinlock_t *ptl; +	int err = 0;  	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);  	if (!pte) @@ -1989,12 +1994,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,  	arch_enter_lazy_mmu_mode();  	do {  		BUG_ON(!pte_none(*pte)); +		if (!pfn_modify_allowed(pfn, prot)) { +			err = -EACCES; +			break; +		}  		set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));  		pfn++;  	} while (pte++, addr += PAGE_SIZE, addr != end);  	arch_leave_lazy_mmu_mode();  	pte_unmap_unlock(pte - 1, ptl); -	return 0; +	return err;  }  static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, @@ -2003,6 +2012,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,  {  	pmd_t *pmd;  	unsigned long next; +	int err;  	pfn -= addr >> PAGE_SHIFT;  	pmd = pmd_alloc(mm, pud, addr); @@ -2011,9 +2021,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,  	VM_BUG_ON(pmd_trans_huge(*pmd));  	do {  		next = pmd_addr_end(addr, end); -		if (remap_pte_range(mm, pmd, addr, next, -				pfn + (addr >> PAGE_SHIFT), prot)) -			return -ENOMEM; +		err = remap_pte_range(mm, pmd, addr, next, +				pfn + (addr >> PAGE_SHIFT), prot); +		if (err) +			return err;  	} while (pmd++, addr = next, addr != end);  	return 0;  } @@ -2024,6 +2035,7 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,  {  	pud_t *pud;  	unsigned long next; +	int err;  	pfn -= addr >> PAGE_SHIFT;  	pud = pud_alloc(mm, p4d, addr); @@ -2031,9 +2043,10 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,  		return -ENOMEM;  	do {  		next = pud_addr_end(addr, end); -		if (remap_pmd_range(mm, pud, addr, next, -				pfn + (addr >> PAGE_SHIFT), prot)) -			return -ENOMEM; +		err = remap_pmd_range(mm, pud, addr, next, +				pfn + (addr >> PAGE_SHIFT), prot); +		if (err) +			return err;  	} while (pud++, addr = next, addr != end);  	return 0;  } @@ -2044,6 +2057,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,  {  	p4d_t *p4d;  	unsigned long next; +	int err;  	pfn -= addr >> PAGE_SHIFT;  	p4d = p4d_alloc(mm, pgd, addr); @@ -2051,9 +2065,10 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,  		return -ENOMEM;  	do {  		next = p4d_addr_end(addr, end); -		if (remap_pud_range(mm, p4d, addr, next, -				pfn + (addr >> PAGE_SHIFT), prot)) -			return -ENOMEM; +		err = remap_pud_range(mm, p4d, addr, next, +				pfn + (addr >> PAGE_SHIFT), prot); +		if (err) +			return err;  	} while (p4d++, addr = next, addr != end);  	return 0;  } @@ -2503,7 +2518,7 @@ static int wp_page_copy(struct vm_fault *vmf)  		cow_user_page(new_page, old_page, vmf->address, vma);  	} -	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) +	if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))  		goto oom_free_new;  	__SetPageUptodate(new_page); @@ -3003,8 +3018,8 @@ int do_swap_page(struct vm_fault *vmf)  		goto out_page;  	} -	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, -				&memcg, false)) { +	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, +					&memcg, false)) {  		ret = VM_FAULT_OOM;  		goto out_page;  	} @@ -3165,7 +3180,8 @@ static int do_anonymous_page(struct vm_fault *vmf)  	if (!page)  		goto oom; -	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) +	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg, +					false))  		goto oom_free_page;  	/* @@ -3372,7 +3388,7 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)  	if (write)  		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); -	add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); +	add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);  	page_add_file_rmap(page, true);  	/*  	 * deposit and withdraw with pmd lock held @@ -3661,7 +3677,7 @@ static int do_cow_fault(struct vm_fault *vmf)  	if (!vmf->cow_page)  		return VM_FAULT_OOM; -	if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL, +	if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,  				&vmf->memcg, false)) {  		put_page(vmf->cow_page);  		return VM_FAULT_OOM; @@ -4125,7 +4141,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,  	 * space.  Kernel faults are handled more gracefully.  	 */  	if (flags & FAULT_FLAG_USER) -		mem_cgroup_oom_enable(); +		mem_cgroup_enter_user_fault();  	if (unlikely(is_vm_hugetlb_page(vma)))  		ret = hugetlb_fault(vma->vm_mm, vma, address, flags); @@ -4133,7 +4149,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,  		ret = __handle_mm_fault(vma, address, flags);  	if (flags & FAULT_FLAG_USER) { -		mem_cgroup_oom_disable(); +		mem_cgroup_exit_user_fault();  		/*  		 * The task may have entered a memcg OOM situation but  		 * if the allocation error was handled gracefully (no @@ -4397,6 +4413,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,  		return -EINVAL;  	maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); +	if (!maddr) +		return -ENOMEM; +  	if (write)  		memcpy_toio(maddr + offset, buf, len);  	else @@ -4568,71 +4587,93 @@ EXPORT_SYMBOL(__might_fault);  #endif  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) -static void clear_gigantic_page(struct page *page, -				unsigned long addr, -				unsigned int pages_per_huge_page) -{ -	int i; -	struct page *p = page; - -	might_sleep(); -	for (i = 0; i < pages_per_huge_page; -	     i++, p = mem_map_next(p, page, i)) { -		cond_resched(); -		clear_user_highpage(p, addr + i * PAGE_SIZE); -	} -} -void clear_huge_page(struct page *page, -		     unsigned long addr_hint, unsigned int pages_per_huge_page) +/* + * Process all subpages of the specified huge page with the specified + * operation.  The target subpage will be processed last to keep its + * cache lines hot. + */ +static inline void process_huge_page( +	unsigned long addr_hint, unsigned int pages_per_huge_page, +	void (*process_subpage)(unsigned long addr, int idx, void *arg), +	void *arg)  {  	int i, n, base, l;  	unsigned long addr = addr_hint &  		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); -	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { -		clear_gigantic_page(page, addr, pages_per_huge_page); -		return; -	} - -	/* Clear sub-page to access last to keep its cache lines hot */ +	/* Process target subpage last to keep its cache lines hot */  	might_sleep();  	n = (addr_hint - addr) / PAGE_SIZE;  	if (2 * n <= pages_per_huge_page) { -		/* If sub-page to access in first half of huge page */ +		/* If target subpage in first half of huge page */  		base = 0;  		l = n; -		/* Clear sub-pages at the end of huge page */ +		/* Process subpages at the end of huge page */  		for (i = pages_per_huge_page - 1; i >= 2 * n; i--) {  			cond_resched(); -			clear_user_highpage(page + i, addr + i * PAGE_SIZE); +			process_subpage(addr + i * PAGE_SIZE, i, arg);  		}  	} else { -		/* If sub-page to access in second half of huge page */ +		/* If target subpage in second half of huge page */  		base = pages_per_huge_page - 2 * (pages_per_huge_page - n);  		l = pages_per_huge_page - n; -		/* Clear sub-pages at the begin of huge page */ +		/* Process subpages at the begin of huge page */  		for (i = 0; i < base; i++) {  			cond_resched(); -			clear_user_highpage(page + i, addr + i * PAGE_SIZE); +			process_subpage(addr + i * PAGE_SIZE, i, arg);  		}  	}  	/* -	 * Clear remaining sub-pages in left-right-left-right pattern -	 * towards the sub-page to access +	 * Process remaining subpages in left-right-left-right pattern +	 * towards the target subpage  	 */  	for (i = 0; i < l; i++) {  		int left_idx = base + i;  		int right_idx = base + 2 * l - 1 - i;  		cond_resched(); -		clear_user_highpage(page + left_idx, -				    addr + left_idx * PAGE_SIZE); +		process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg);  		cond_resched(); -		clear_user_highpage(page + right_idx, -				    addr + right_idx * PAGE_SIZE); +		process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg);  	}  } +static void clear_gigantic_page(struct page *page, +				unsigned long addr, +				unsigned int pages_per_huge_page) +{ +	int i; +	struct page *p = page; + +	might_sleep(); +	for (i = 0; i < pages_per_huge_page; +	     i++, p = mem_map_next(p, page, i)) { +		cond_resched(); +		clear_user_highpage(p, addr + i * PAGE_SIZE); +	} +} + +static void clear_subpage(unsigned long addr, int idx, void *arg) +{ +	struct page *page = arg; + +	clear_user_highpage(page + idx, addr); +} + +void clear_huge_page(struct page *page, +		     unsigned long addr_hint, unsigned int pages_per_huge_page) +{ +	unsigned long addr = addr_hint & +		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); + +	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { +		clear_gigantic_page(page, addr, pages_per_huge_page); +		return; +	} + +	process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page); +} +  static void copy_user_gigantic_page(struct page *dst, struct page *src,  				    unsigned long addr,  				    struct vm_area_struct *vma, @@ -4652,11 +4693,31 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,  	}  } +struct copy_subpage_arg { +	struct page *dst; +	struct page *src; +	struct vm_area_struct *vma; +}; + +static void copy_subpage(unsigned long addr, int idx, void *arg) +{ +	struct copy_subpage_arg *copy_arg = arg; + +	copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, +			   addr, copy_arg->vma); +} +  void copy_user_huge_page(struct page *dst, struct page *src, -			 unsigned long addr, struct vm_area_struct *vma, +			 unsigned long addr_hint, struct vm_area_struct *vma,  			 unsigned int pages_per_huge_page)  { -	int i; +	unsigned long addr = addr_hint & +		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); +	struct copy_subpage_arg arg = { +		.dst = dst, +		.src = src, +		.vma = vma, +	};  	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {  		copy_user_gigantic_page(dst, src, addr, vma, @@ -4664,11 +4725,7 @@ void copy_user_huge_page(struct page *dst, struct page *src,  		return;  	} -	might_sleep(); -	for (i = 0; i < pages_per_huge_page; i++) { -		cond_resched(); -		copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma); -	} +	process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);  }  long copy_huge_page_from_user(struct page *dst_page, |