diff options
Diffstat (limited to 'mm/mmap.c')
| -rw-r--r-- | mm/mmap.c | 240 | 
1 files changed, 155 insertions, 85 deletions
diff --git a/mm/mmap.c b/mm/mmap.c index 8f1000bc35df..547b40531791 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -193,8 +193,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)  	struct mm_struct *mm = current->mm;  	struct vm_area_struct *brkvma, *next = NULL;  	unsigned long min_brk; -	bool populate; -	bool downgraded = false; +	bool populate = false;  	LIST_HEAD(uf);  	struct vma_iterator vmi; @@ -236,13 +235,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)  		goto success;  	} -	/* -	 * Always allow shrinking brk. -	 * do_vma_munmap() may downgrade mmap_lock to read. -	 */ +	/* Always allow shrinking brk. */  	if (brk <= mm->brk) { -		int ret; -  		/* Search one past newbrk */  		vma_iter_init(&vmi, mm, newbrk);  		brkvma = vma_find(&vmi, oldbrk); @@ -250,19 +244,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)  			goto out; /* mapping intersects with an existing non-brk vma. */  		/*  		 * mm->brk must be protected by write mmap_lock. -		 * do_vma_munmap() may downgrade the lock,  so update it +		 * do_vma_munmap() will drop the lock on success,  so update it  		 * before calling do_vma_munmap().  		 */  		mm->brk = brk; -		ret = do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true); -		if (ret == 1)  { -			downgraded = true; -			goto success; -		} else if (!ret) -			goto success; - -		mm->brk = origbrk; -		goto out; +		if (do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true)) +			goto out; + +		goto success_unlocked;  	}  	if (check_brk_limits(oldbrk, newbrk - oldbrk)) @@ -283,19 +272,19 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)  		goto out;  	mm->brk = brk; +	if (mm->def_flags & VM_LOCKED) +		populate = true;  success: -	populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; -	if (downgraded) -		mmap_read_unlock(mm); -	else -		mmap_write_unlock(mm); +	mmap_write_unlock(mm); +success_unlocked:  	userfaultfd_unmap_complete(mm, &uf);  	if (populate)  		mm_populate(oldbrk, newbrk - oldbrk);  	return brk;  out: +	mm->brk = origbrk;  	mmap_write_unlock(mm);  	return origbrk;  } @@ -1948,7 +1937,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,   * PA-RISC uses this for its stack; IA64 for its Register Backing Store.   * vma is the last one with address > vma->vm_end.  Have to extend vma.   */ -int expand_upwards(struct vm_area_struct *vma, unsigned long address) +static int expand_upwards(struct vm_area_struct *vma, unsigned long address)  {  	struct mm_struct *mm = vma->vm_mm;  	struct vm_area_struct *next; @@ -2040,6 +2029,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)  /*   * vma is the first one with address < vma->vm_start.  Have to extend vma. + * mmap_lock held for writing.   */  int expand_downwards(struct vm_area_struct *vma, unsigned long address)  { @@ -2048,16 +2038,20 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)  	struct vm_area_struct *prev;  	int error = 0; +	if (!(vma->vm_flags & VM_GROWSDOWN)) +		return -EFAULT; +  	address &= PAGE_MASK; -	if (address < mmap_min_addr) +	if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)  		return -EPERM;  	/* Enforce stack_guard_gap */  	prev = mas_prev(&mas, 0);  	/* Check that both stack segments have the same anon_vma? */ -	if (prev && !(prev->vm_flags & VM_GROWSDOWN) && -			vma_is_accessible(prev)) { -		if (address - prev->vm_end < stack_guard_gap) +	if (prev) { +		if (!(prev->vm_flags & VM_GROWSDOWN) && +		    vma_is_accessible(prev) && +		    (address - prev->vm_end < stack_guard_gap))  			return -ENOMEM;  	} @@ -2137,13 +2131,12 @@ static int __init cmdline_parse_stack_guard_gap(char *p)  __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);  #ifdef CONFIG_STACK_GROWSUP -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)  {  	return expand_upwards(vma, address);  } -struct vm_area_struct * -find_extend_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)  {  	struct vm_area_struct *vma, *prev; @@ -2151,20 +2144,23 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)  	vma = find_vma_prev(mm, addr, &prev);  	if (vma && (vma->vm_start <= addr))  		return vma; -	if (!prev || expand_stack(prev, addr)) +	if (!prev) +		return NULL; +	if (expand_stack_locked(prev, addr))  		return NULL;  	if (prev->vm_flags & VM_LOCKED)  		populate_vma_page_range(prev, addr, prev->vm_end, NULL);  	return prev;  }  #else -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)  { +	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) +		return -EINVAL;  	return expand_downwards(vma, address);  } -struct vm_area_struct * -find_extend_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)  {  	struct vm_area_struct *vma;  	unsigned long start; @@ -2175,10 +2171,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)  		return NULL;  	if (vma->vm_start <= addr)  		return vma; -	if (!(vma->vm_flags & VM_GROWSDOWN)) -		return NULL;  	start = vma->vm_start; -	if (expand_stack(vma, addr)) +	if (expand_stack_locked(vma, addr))  		return NULL;  	if (vma->vm_flags & VM_LOCKED)  		populate_vma_page_range(vma, addr, start, NULL); @@ -2186,7 +2180,91 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)  }  #endif -EXPORT_SYMBOL_GPL(find_extend_vma); +/* + * IA64 has some horrid mapping rules: it can expand both up and down, + * but with various special rules. + * + * We'll get rid of this architecture eventually, so the ugliness is + * temporary. + */ +#ifdef CONFIG_IA64 +static inline bool vma_expand_ok(struct vm_area_struct *vma, unsigned long addr) +{ +	return REGION_NUMBER(addr) == REGION_NUMBER(vma->vm_start) && +		REGION_OFFSET(addr) < RGN_MAP_LIMIT; +} + +/* + * IA64 stacks grow down, but there's a special register backing store + * that can grow up. Only sequentially, though, so the new address must + * match vm_end. + */ +static inline int vma_expand_up(struct vm_area_struct *vma, unsigned long addr) +{ +	if (!vma_expand_ok(vma, addr)) +		return -EFAULT; +	if (vma->vm_end != (addr & PAGE_MASK)) +		return -EFAULT; +	return expand_upwards(vma, addr); +} + +static inline bool vma_expand_down(struct vm_area_struct *vma, unsigned long addr) +{ +	if (!vma_expand_ok(vma, addr)) +		return -EFAULT; +	return expand_downwards(vma, addr); +} + +#elif defined(CONFIG_STACK_GROWSUP) + +#define vma_expand_up(vma,addr) expand_upwards(vma, addr) +#define vma_expand_down(vma, addr) (-EFAULT) + +#else + +#define vma_expand_up(vma,addr) (-EFAULT) +#define vma_expand_down(vma, addr) expand_downwards(vma, addr) + +#endif + +/* + * expand_stack(): legacy interface for page faulting. Don't use unless + * you have to. + * + * This is called with the mm locked for reading, drops the lock, takes + * the lock for writing, tries to look up a vma again, expands it if + * necessary, and downgrades the lock to reading again. + * + * If no vma is found or it can't be expanded, it returns NULL and has + * dropped the lock. + */ +struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr) +{ +	struct vm_area_struct *vma, *prev; + +	mmap_read_unlock(mm); +	if (mmap_write_lock_killable(mm)) +		return NULL; + +	vma = find_vma_prev(mm, addr, &prev); +	if (vma && vma->vm_start <= addr) +		goto success; + +	if (prev && !vma_expand_up(prev, addr)) { +		vma = prev; +		goto success; +	} + +	if (vma && !vma_expand_down(vma, addr)) +		goto success; + +	mmap_write_unlock(mm); +	return NULL; + +success: +	mmap_write_downgrade(mm); +	return vma; +}  /*   * Ok - we have the memory areas we should free on a maple tree so release them, @@ -2339,14 +2417,16 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,   * @start: The aligned start address to munmap.   * @end: The aligned end address to munmap.   * @uf: The userfaultfd list_head - * @downgrade: Set to true to attempt a write downgrade of the mmap_lock + * @unlock: Set to true to drop the mmap_lock.  unlocking only happens on + * success.   * - * If @downgrade is true, check return code for potential release of the lock. + * Return: 0 on success and drops the lock if so directed, error and leaves the + * lock held otherwise.   */  static int  do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,  		    struct mm_struct *mm, unsigned long start, -		    unsigned long end, struct list_head *uf, bool downgrade) +		    unsigned long end, struct list_head *uf, bool unlock)  {  	struct vm_area_struct *prev, *next = NULL;  	struct maple_tree mt_detach; @@ -2400,7 +2480,8 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,  		}  		vma_start_write(next);  		mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1); -		if (mas_store_gfp(&mas_detach, next, GFP_KERNEL)) +		error = mas_store_gfp(&mas_detach, next, GFP_KERNEL); +		if (error)  			goto munmap_gather_failed;  		vma_mark_detached(next, true);  		if (next->vm_flags & VM_LOCKED) @@ -2453,41 +2534,31 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,  		BUG_ON(count != test_count);  	}  #endif -	/* Point of no return */ -	error = -ENOMEM;  	vma_iter_set(vmi, start); -	if (vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL)) +	error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); +	if (error)  		goto clear_tree_failed; +	/* Point of no return */  	mm->locked_vm -= locked_vm;  	mm->map_count -= count; -	/* -	 * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or -	 * VM_GROWSUP VMA. Such VMAs can change their size under -	 * down_read(mmap_lock) and collide with the VMA we are about to unmap. -	 */ -	if (downgrade) { -		if (next && (next->vm_flags & VM_GROWSDOWN)) -			downgrade = false; -		else if (prev && (prev->vm_flags & VM_GROWSUP)) -			downgrade = false; -		else -			mmap_write_downgrade(mm); -	} +	if (unlock) +		mmap_write_downgrade(mm);  	/*  	 * We can free page tables without write-locking mmap_lock because VMAs  	 * were isolated before we downgraded mmap_lock.  	 */ -	unmap_region(mm, &mt_detach, vma, prev, next, start, end, !downgrade); +	unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock);  	/* Statistics and freeing VMAs */  	mas_set(&mas_detach, start);  	remove_mt(mm, &mas_detach);  	__mt_destroy(&mt_detach); - -  	validate_mm(mm); -	return downgrade ? 1 : 0; +	if (unlock) +		mmap_read_unlock(mm); + +	return 0;  clear_tree_failed:  userfaultfd_error: @@ -2510,18 +2581,18 @@ map_count_exceeded:   * @start: The start address to munmap   * @len: The length of the range to munmap   * @uf: The userfaultfd list_head - * @downgrade: set to true if the user wants to attempt to write_downgrade the - * mmap_lock + * @unlock: set to true if the user wants to drop the mmap_lock on success   *   * This function takes a @mas that is either pointing to the previous VMA or set   * to MA_START and sets it up to remove the mapping(s).  The @len will be   * aligned and any arch_unmap work will be preformed.   * - * Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise. + * Return: 0 on success and drops the lock if so directed, error and leaves the + * lock held otherwise.   */  int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,  		  unsigned long start, size_t len, struct list_head *uf, -		  bool downgrade) +		  bool unlock)  {  	unsigned long end;  	struct vm_area_struct *vma; @@ -2538,10 +2609,13 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,  	/* Find the first overlapping VMA */  	vma = vma_find(vmi, end); -	if (!vma) +	if (!vma) { +		if (unlock) +			mmap_write_unlock(mm);  		return 0; +	} -	return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade); +	return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);  }  /* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls. @@ -2549,6 +2623,8 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,   * @start: The start address to munmap   * @len: The length to be munmapped.   * @uf: The userfaultfd list_head + * + * Return: 0 on success, error otherwise.   */  int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,  	      struct list_head *uf) @@ -2809,7 +2885,7 @@ unacct_error:  	return error;  } -static int __vm_munmap(unsigned long start, size_t len, bool downgrade) +static int __vm_munmap(unsigned long start, size_t len, bool unlock)  {  	int ret;  	struct mm_struct *mm = current->mm; @@ -2819,16 +2895,8 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)  	if (mmap_write_lock_killable(mm))  		return -EINTR; -	ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade); -	/* -	 * Returning 1 indicates mmap_lock is downgraded. -	 * But 1 is not legal return value of vm_munmap() and munmap(), reset -	 * it to 0 before return. -	 */ -	if (ret == 1) { -		mmap_read_unlock(mm); -		ret = 0; -	} else +	ret = do_vmi_munmap(&vmi, mm, start, len, &uf, unlock); +	if (ret || !unlock)  		mmap_write_unlock(mm);  	userfaultfd_unmap_complete(mm, &uf); @@ -2938,21 +3006,23 @@ out:   * @start: the start of the address to unmap   * @end: The end of the address to unmap   * @uf: The userfaultfd list_head - * @downgrade: Attempt to downgrade or not + * @unlock: Drop the lock on success   * - * Returns: 0 on success and not downgraded, 1 on success and downgraded.   * unmaps a VMA mapping when the vma iterator is already in position.   * Does not handle alignment. + * + * Return: 0 on success drops the lock of so directed, error on failure and will + * still hold the lock.   */  int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, -		  unsigned long start, unsigned long end, -		  struct list_head *uf, bool downgrade) +		unsigned long start, unsigned long end, struct list_head *uf, +		bool unlock)  {  	struct mm_struct *mm = vma->vm_mm;  	int ret;  	arch_unmap(mm, start, end); -	ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade); +	ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);  	validate_mm(mm);  	return ret;  }  |