diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 78 | 
1 files changed, 30 insertions, 48 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6ff5d729ded0..bb11c474857e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2855,40 +2855,6 @@ out_release:  }  /* - * This is like a special single-page "expand_{down|up}wards()", - * except we must first make sure that 'address{-|+}PAGE_SIZE' - * doesn't hit another vma. - */ -static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) -{ -	address &= PAGE_MASK; -	if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) { -		struct vm_area_struct *prev = vma->vm_prev; - -		/* -		 * Is there a mapping abutting this one below? -		 * -		 * That's only ok if it's the same stack mapping -		 * that has gotten split.. -		 */ -		if (prev && prev->vm_end == address) -			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - -		return expand_downwards(vma, address - PAGE_SIZE); -	} -	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { -		struct vm_area_struct *next = vma->vm_next; - -		/* As VM_GROWSDOWN but s/below/above/ */ -		if (next && next->vm_start == address + PAGE_SIZE) -			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; - -		return expand_upwards(vma, address + PAGE_SIZE); -	} -	return 0; -} - -/*   * We enter with non-exclusive mmap_sem (to exclude vma changes,   * but allow concurrent faults), and pte mapped but not yet locked.   * We return with mmap_sem still held, but pte unmapped and unlocked. @@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_fault *vmf)  	if (vma->vm_flags & VM_SHARED)  		return VM_FAULT_SIGBUS; -	/* Check if we need to add a guard page to the stack */ -	if (check_stack_guard_page(vma, vmf->address) < 0) -		return VM_FAULT_SIGSEGV; -  	/*  	 * Use pte_alloc() instead of pte_alloc_map().  We can't run  	 * pte_offset_map() on pmds where a huge pmd might be created @@ -3029,6 +2991,17 @@ static int __do_fault(struct vm_fault *vmf)  	return ret;  } +/* + * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. + * If we check pmd_trans_unstable() first we will trip the bad_pmd() check + * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly + * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. + */ +static int pmd_devmap_trans_unstable(pmd_t *pmd) +{ +	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); +} +  static int pte_alloc_one_map(struct vm_fault *vmf)  {  	struct vm_area_struct *vma = vmf->vma; @@ -3052,18 +3025,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf)  map_pte:  	/*  	 * If a huge pmd materialized under us just retry later.  Use -	 * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd -	 * didn't become pmd_trans_huge under us and then back to pmd_none, as -	 * a result of MADV_DONTNEED running immediately after a huge pmd fault -	 * in a different thread of this mm, in turn leading to a misleading -	 * pmd_trans_huge() retval.  All we have to ensure is that it is a -	 * regular pmd that we can walk with pte_offset_map() and we can do that -	 * through an atomic read in C, which is what pmd_trans_unstable() -	 * provides. +	 * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of +	 * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge +	 * under us and then back to pmd_none, as a result of MADV_DONTNEED +	 * running immediately after a huge pmd fault in a different thread of +	 * this mm, in turn leading to a misleading pmd_trans_huge() retval. +	 * All we have to ensure is that it is a regular pmd that we can walk +	 * with pte_offset_map() and we can do that through an atomic read in +	 * C, which is what pmd_trans_unstable() provides.  	 */ -	if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) +	if (pmd_devmap_trans_unstable(vmf->pmd))  		return VM_FAULT_NOPAGE; +	/* +	 * At this point we know that our vmf->pmd points to a page of ptes +	 * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() +	 * for the duration of the fault.  If a racing MADV_DONTNEED runs and +	 * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still +	 * be valid and we will re-check to make sure the vmf->pte isn't +	 * pte_none() under vmf->ptl protection when we return to +	 * alloc_set_pte(). +	 */  	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,  			&vmf->ptl);  	return 0; @@ -3690,7 +3672,7 @@ static int handle_pte_fault(struct vm_fault *vmf)  		vmf->pte = NULL;  	} else {  		/* See comment in pte_alloc_one_map() */ -		if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) +		if (pmd_devmap_trans_unstable(vmf->pmd))  			return 0;  		/*  		 * A regular pmd is established and it can't morph into a huge  |