diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 150 | 
1 files changed, 92 insertions, 58 deletions
diff --git a/mm/memory.c b/mm/memory.c index 87d935333f0d..a56e3ba816b2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -305,6 +305,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)  	if (batch->nr == batch->max) {  		if (!tlb_next_batch(tlb))  			return 0; +		batch = tlb->active;  	}  	VM_BUG_ON(batch->nr > batch->max); @@ -1289,13 +1290,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,  	return addr;  } -#ifdef CONFIG_PREEMPT -# define ZAP_BLOCK_SIZE	(8 * PAGE_SIZE) -#else -/* No preempt: go for improved straight-line efficiency */ -# define ZAP_BLOCK_SIZE	(1024 * PAGE_SIZE) -#endif -  /**   * unmap_vmas - unmap a range of memory covered by a list of vma's   * @tlb: address of the caller's struct mmu_gather @@ -1309,10 +1303,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,   *   * Unmap all pages in the vma list.   * - * We aim to not hold locks for too long (for scheduling latency reasons). - * So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to - * return the ending mmu_gather to the caller. - *   * Only addresses between `start' and `end' will be unmapped.   *   * The VMA list must be sorted in ascending virtual address order. @@ -1815,7 +1805,63 @@ next_page:  }  EXPORT_SYMBOL(__get_user_pages); -/** +/* + * fixup_user_fault() - manually resolve a user page fault + * @tsk:	the task_struct to use for page fault accounting, or + *		NULL if faults are not to be recorded. + * @mm:		mm_struct of target mm + * @address:	user address + * @fault_flags:flags to pass down to handle_mm_fault() + * + * This is meant to be called in the specific scenario where for locking reasons + * we try to access user memory in atomic context (within a pagefault_disable() + * section), this returns -EFAULT, and we want to resolve the user fault before + * trying again. + * + * Typically this is meant to be used by the futex code. + * + * The main difference with get_user_pages() is that this function will + * unconditionally call handle_mm_fault() which will in turn perform all the + * necessary SW fixup of the dirty and young bits in the PTE, while + * handle_mm_fault() only guarantees to update these in the struct page. + * + * This is important for some architectures where those bits also gate the + * access permission to the page because they are maintained in software.  On + * such architectures, gup() will not be enough to make a subsequent access + * succeed. + * + * This should be called with the mm_sem held for read. + */ +int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, +		     unsigned long address, unsigned int fault_flags) +{ +	struct vm_area_struct *vma; +	int ret; + +	vma = find_extend_vma(mm, address); +	if (!vma || address < vma->vm_start) +		return -EFAULT; + +	ret = handle_mm_fault(mm, vma, address, fault_flags); +	if (ret & VM_FAULT_ERROR) { +		if (ret & VM_FAULT_OOM) +			return -ENOMEM; +		if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) +			return -EHWPOISON; +		if (ret & VM_FAULT_SIGBUS) +			return -EFAULT; +		BUG(); +	} +	if (tsk) { +		if (ret & VM_FAULT_MAJOR) +			tsk->maj_flt++; +		else +			tsk->min_flt++; +	} +	return 0; +} + +/*   * get_user_pages() - pin user pages in memory   * @tsk:	the task_struct to use for page fault accounting, or   *		NULL if faults are not to be recorded. @@ -2798,30 +2844,6 @@ void unmap_mapping_range(struct address_space *mapping,  }  EXPORT_SYMBOL(unmap_mapping_range); -int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) -{ -	struct address_space *mapping = inode->i_mapping; - -	/* -	 * If the underlying filesystem is not going to provide -	 * a way to truncate a range of blocks (punch a hole) - -	 * we should return failure right now. -	 */ -	if (!inode->i_op->truncate_range) -		return -ENOSYS; - -	mutex_lock(&inode->i_mutex); -	down_write(&inode->i_alloc_sem); -	unmap_mapping_range(mapping, offset, (end - offset), 1); -	truncate_inode_pages_range(mapping, offset, end); -	unmap_mapping_range(mapping, offset, (end - offset), 1); -	inode->i_op->truncate_range(inode, offset, end); -	up_write(&inode->i_alloc_sem); -	mutex_unlock(&inode->i_mutex); - -	return 0; -} -  /*   * We enter with non-exclusive mmap_sem (to exclude vma changes,   * but allow concurrent faults), and pte mapped but not yet locked. @@ -3127,14 +3149,34 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,  	pte_t *page_table;  	spinlock_t *ptl;  	struct page *page; +	struct page *cow_page;  	pte_t entry;  	int anon = 0; -	int charged = 0;  	struct page *dirty_page = NULL;  	struct vm_fault vmf;  	int ret;  	int page_mkwrite = 0; +	/* +	 * If we do COW later, allocate page befor taking lock_page() +	 * on the file cache page. This will reduce lock holding time. +	 */ +	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { + +		if (unlikely(anon_vma_prepare(vma))) +			return VM_FAULT_OOM; + +		cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); +		if (!cow_page) +			return VM_FAULT_OOM; + +		if (mem_cgroup_newpage_charge(cow_page, mm, GFP_KERNEL)) { +			page_cache_release(cow_page); +			return VM_FAULT_OOM; +		} +	} else +		cow_page = NULL; +  	vmf.virtual_address = (void __user *)(address & PAGE_MASK);  	vmf.pgoff = pgoff;  	vmf.flags = flags; @@ -3143,12 +3185,13 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,  	ret = vma->vm_ops->fault(vma, &vmf);  	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |  			    VM_FAULT_RETRY))) -		return ret; +		goto uncharge_out;  	if (unlikely(PageHWPoison(vmf.page))) {  		if (ret & VM_FAULT_LOCKED)  			unlock_page(vmf.page); -		return VM_FAULT_HWPOISON; +		ret = VM_FAULT_HWPOISON; +		goto uncharge_out;  	}  	/* @@ -3166,23 +3209,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,  	page = vmf.page;  	if (flags & FAULT_FLAG_WRITE) {  		if (!(vma->vm_flags & VM_SHARED)) { +			page = cow_page;  			anon = 1; -			if (unlikely(anon_vma_prepare(vma))) { -				ret = VM_FAULT_OOM; -				goto out; -			} -			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, -						vma, address); -			if (!page) { -				ret = VM_FAULT_OOM; -				goto out; -			} -			if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) { -				ret = VM_FAULT_OOM; -				page_cache_release(page); -				goto out; -			} -			charged = 1;  			copy_user_highpage(page, vmf.page, address, vma);  			__SetPageUptodate(page);  		} else { @@ -3251,8 +3279,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,  		/* no need to invalidate: a not-present page won't be cached */  		update_mmu_cache(vma, address, page_table);  	} else { -		if (charged) -			mem_cgroup_uncharge_page(page); +		if (cow_page) +			mem_cgroup_uncharge_page(cow_page);  		if (anon)  			page_cache_release(page);  		else @@ -3261,7 +3289,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,  	pte_unmap_unlock(page_table, ptl); -out:  	if (dirty_page) {  		struct address_space *mapping = page->mapping; @@ -3291,6 +3318,13 @@ out:  unwritable_page:  	page_cache_release(page);  	return ret; +uncharge_out: +	/* fs's fault handler get error */ +	if (cow_page) { +		mem_cgroup_uncharge_page(cow_page); +		page_cache_release(cow_page); +	} +	return ret;  }  static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,  |