diff options
Diffstat (limited to 'mm/hugetlb.c')
| -rw-r--r-- | mm/hugetlb.c | 91 | 
1 files changed, 81 insertions, 10 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ec49d9ef1eef..3edb759c5c7d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h,   * is not the case is if a reserve map was changed between calls.  It   * is the responsibility of the caller to notice the difference and   * take appropriate action. + * + * vma_add_reservation is used in error paths where a reservation must + * be restored when a newly allocated huge page must be freed.  It is + * to be called after calling vma_needs_reservation to determine if a + * reservation exists.   */  enum vma_resv_mode {  	VMA_NEEDS_RESV,  	VMA_COMMIT_RESV,  	VMA_END_RESV, +	VMA_ADD_RESV,  };  static long __vma_reservation_common(struct hstate *h,  				struct vm_area_struct *vma, unsigned long addr, @@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h,  		region_abort(resv, idx, idx + 1);  		ret = 0;  		break; +	case VMA_ADD_RESV: +		if (vma->vm_flags & VM_MAYSHARE) +			ret = region_add(resv, idx, idx + 1); +		else { +			region_abort(resv, idx, idx + 1); +			ret = region_del(resv, idx, idx + 1); +		} +		break;  	default:  		BUG();  	} @@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h,  	(void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);  } +static long vma_add_reservation(struct hstate *h, +			struct vm_area_struct *vma, unsigned long addr) +{ +	return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); +} + +/* + * This routine is called to restore a reservation on error paths.  In the + * specific error paths, a huge page was allocated (via alloc_huge_page) + * and is about to be freed.  If a reservation for the page existed, + * alloc_huge_page would have consumed the reservation and set PagePrivate + * in the newly allocated page.  When the page is freed via free_huge_page, + * the global reservation count will be incremented if PagePrivate is set. + * However, free_huge_page can not adjust the reserve map.  Adjust the + * reserve map here to be consistent with global reserve count adjustments + * to be made by free_huge_page. + */ +static void restore_reserve_on_error(struct hstate *h, +			struct vm_area_struct *vma, unsigned long address, +			struct page *page) +{ +	if (unlikely(PagePrivate(page))) { +		long rc = vma_needs_reservation(h, vma, address); + +		if (unlikely(rc < 0)) { +			/* +			 * Rare out of memory condition in reserve map +			 * manipulation.  Clear PagePrivate so that +			 * global reserve count will not be incremented +			 * by free_huge_page.  This will make it appear +			 * as though the reservation for this page was +			 * consumed.  This may prevent the task from +			 * faulting in the page at a later time.  This +			 * is better than inconsistent global huge page +			 * accounting of reserve counts. +			 */ +			ClearPagePrivate(page); +		} else if (rc) { +			rc = vma_add_reservation(h, vma, address); +			if (unlikely(rc < 0)) +				/* +				 * See above comment about rare out of +				 * memory condition. +				 */ +				ClearPagePrivate(page); +		} else +			vma_end_reservation(h, vma, address); +	} +} +  struct page *alloc_huge_page(struct vm_area_struct *vma,  				    unsigned long addr, int avoid_reserve)  { @@ -3222,6 +3286,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,  	BUG_ON(start & ~huge_page_mask(h));  	BUG_ON(end & ~huge_page_mask(h)); +	/* +	 * This is a hugetlb vma, all the pte entries should point +	 * to huge page. +	 */ +	tlb_remove_check_page_size_change(tlb, sz);  	tlb_start_vma(tlb, vma);  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);  	address = start; @@ -3272,7 +3341,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,  		}  		pte = huge_ptep_get_and_clear(mm, address, ptep); -		tlb_remove_tlb_entry(tlb, ptep, address); +		tlb_remove_huge_tlb_entry(h, tlb, ptep, address);  		if (huge_pte_dirty(pte))  			set_page_dirty(page); @@ -3386,15 +3455,17 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,   * Keep the pte_same checks anyway to make transition from the mutex easier.   */  static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, -			unsigned long address, pte_t *ptep, pte_t pte, -			struct page *pagecache_page, spinlock_t *ptl) +		       unsigned long address, pte_t *ptep, +		       struct page *pagecache_page, spinlock_t *ptl)  { +	pte_t pte;  	struct hstate *h = hstate_vma(vma);  	struct page *old_page, *new_page;  	int ret = 0, outside_reserve = 0;  	unsigned long mmun_start;	/* For mmu_notifiers */  	unsigned long mmun_end;		/* For mmu_notifiers */ +	pte = huge_ptep_get(ptep);  	old_page = pte_page(pte);  retry_avoidcopy: @@ -3498,6 +3569,7 @@ retry_avoidcopy:  	spin_unlock(ptl);  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);  out_release_all: +	restore_reserve_on_error(h, vma, address, new_page);  	put_page(new_page);  out_release_old:  	put_page(old_page); @@ -3646,8 +3718,7 @@ retry:  		vma_end_reservation(h, vma, address);  	} -	ptl = huge_pte_lockptr(h, mm, ptep); -	spin_lock(ptl); +	ptl = huge_pte_lock(h, mm, ptep);  	size = i_size_read(mapping->host) >> huge_page_shift(h);  	if (idx >= size)  		goto backout; @@ -3668,7 +3739,7 @@ retry:  	hugetlb_count_add(pages_per_huge_page(h), mm);  	if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {  		/* Optimization, do the COW without a second fault */ -		ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); +		ret = hugetlb_cow(mm, vma, address, ptep, page, ptl);  	}  	spin_unlock(ptl); @@ -3680,6 +3751,7 @@ backout:  	spin_unlock(ptl);  backout_unlocked:  	unlock_page(page); +	restore_reserve_on_error(h, vma, address, page);  	put_page(page);  	goto out;  } @@ -3822,8 +3894,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,  	if (flags & FAULT_FLAG_WRITE) {  		if (!huge_pte_write(entry)) { -			ret = hugetlb_cow(mm, vma, address, ptep, entry, -					pagecache_page, ptl); +			ret = hugetlb_cow(mm, vma, address, ptep, +					  pagecache_page, ptl);  			goto out_put_page;  		}  		entry = huge_pte_mkdirty(entry); @@ -4264,8 +4336,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)  	if (!spte)  		goto out; -	ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); -	spin_lock(ptl); +	ptl = huge_pte_lock(hstate_vma(vma), mm, spte);  	if (pud_none(*pud)) {  		pud_populate(mm, pud,  				(pmd_t *)((unsigned long)spte & PAGE_MASK));  |