diff options
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 100 | 
1 files changed, 68 insertions, 32 deletions
diff --git a/mm/memory.c b/mm/memory.c index e18c57bdc75c..32e9b7aec366 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -300,15 +300,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_  	struct mmu_gather_batch *batch;  	VM_BUG_ON(!tlb->end); - -	if (!tlb->page_size) -		tlb->page_size = page_size; -	else { -		if (page_size != tlb->page_size) -			return true; -	} +	VM_WARN_ON(tlb->page_size != page_size);  	batch = tlb->active; +	/* +	 * Add the page and check if we are full. If so +	 * force a flush. +	 */ +	batch->pages[batch->nr++] = page;  	if (batch->nr == batch->max) {  		if (!tlb_next_batch(tlb))  			return true; @@ -316,7 +315,6 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_  	}  	VM_BUG_ON_PAGE(batch->nr > batch->max, page); -	batch->pages[batch->nr++] = page;  	return false;  } @@ -528,7 +526,11 @@ void free_pgd_range(struct mmu_gather *tlb,  		end -= PMD_SIZE;  	if (addr > end - 1)  		return; - +	/* +	 * We add page table cache pages with PAGE_SIZE, +	 * (see pte_free_tlb()), flush the tlb if we need +	 */ +	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);  	pgd = pgd_offset(tlb->mm, addr);  	do {  		next = pgd_addr_end(addr, end); @@ -1118,8 +1120,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,  	pte_t *start_pte;  	pte_t *pte;  	swp_entry_t entry; -	struct page *pending_page = NULL; +	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);  again:  	init_rss_vec(rss);  	start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); @@ -1172,7 +1174,6 @@ again:  				print_bad_pte(vma, addr, ptent, page);  			if (unlikely(__tlb_remove_page(tlb, page))) {  				force_flush = 1; -				pending_page = page;  				addr += PAGE_SIZE;  				break;  			} @@ -1213,11 +1214,6 @@ again:  	if (force_flush) {  		force_flush = 0;  		tlb_flush_mmu_free(tlb); -		if (pending_page) { -			/* remove the page with new size */ -			__tlb_remove_pte_page(tlb, pending_page); -			pending_page = NULL; -		}  		if (addr != end)  			goto again;  	} @@ -1240,7 +1236,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,  			if (next - addr != HPAGE_PMD_SIZE) {  				VM_BUG_ON_VMA(vma_is_anonymous(vma) &&  				    !rwsem_is_locked(&tlb->mm->mmap_sem), vma); -				split_huge_pmd(vma, pmd, addr); +				__split_huge_pmd(vma, pmd, addr, false, NULL);  			} else if (zap_huge_pmd(tlb, vma, pmd, addr))  				goto next;  			/* fall through */ @@ -1637,8 +1633,8 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,  	if (addr < vma->vm_start || addr >= vma->vm_end)  		return -EFAULT; -	if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV))) -		return -EINVAL; + +	track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));  	ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot); @@ -1655,8 +1651,8 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,  	if (addr < vma->vm_start || addr >= vma->vm_end)  		return -EFAULT; -	if (track_pfn_insert(vma, &pgprot, pfn)) -		return -EINVAL; + +	track_pfn_insert(vma, &pgprot, pfn);  	/*  	 * If we don't have pte special, then we have to use the pfn_valid() @@ -2939,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,  	return true;  } +static void deposit_prealloc_pte(struct fault_env *fe) +{ +	struct vm_area_struct *vma = fe->vma; + +	pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte); +	/* +	 * We are going to consume the prealloc table, +	 * count that as nr_ptes. +	 */ +	atomic_long_inc(&vma->vm_mm->nr_ptes); +	fe->prealloc_pte = 0; +} +  static int do_set_pmd(struct fault_env *fe, struct page *page)  {  	struct vm_area_struct *vma = fe->vma; @@ -2953,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)  	ret = VM_FAULT_FALLBACK;  	page = compound_head(page); +	/* +	 * Archs like ppc64 need additonal space to store information +	 * related to pte entry. Use the preallocated table for that. +	 */ +	if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) { +		fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address); +		if (!fe->prealloc_pte) +			return VM_FAULT_OOM; +		smp_wmb(); /* See comment in __pte_alloc() */ +	} +  	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);  	if (unlikely(!pmd_none(*fe->pmd)))  		goto out; @@ -2966,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)  	add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);  	page_add_file_rmap(page, true); +	/* +	 * deposit and withdraw with pmd lock held +	 */ +	if (arch_needs_pgtable_deposit()) +		deposit_prealloc_pte(fe);  	set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); @@ -2975,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)  	ret = 0;  	count_vm_event(THP_FILE_MAPPED);  out: +	/* +	 * If we are going to fallback to pte mapping, do a +	 * withdraw with pmd lock held. +	 */ +	if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) +		fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, +							       fe->pmd);  	spin_unlock(fe->ptl);  	return ret;  } @@ -3014,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,  		ret = do_set_pmd(fe, page);  		if (ret != VM_FAULT_FALLBACK) -			return ret; +			goto fault_handled;  	}  	if (!fe->pte) {  		ret = pte_alloc_one_map(fe);  		if (ret) -			return ret; +			goto fault_handled;  	}  	/* Re-check under ptl */ -	if (unlikely(!pte_none(*fe->pte))) -		return VM_FAULT_NOPAGE; +	if (unlikely(!pte_none(*fe->pte))) { +		ret = VM_FAULT_NOPAGE; +		goto fault_handled; +	}  	flush_icache_page(vma, page);  	entry = mk_pte(page, vma->vm_page_prot); @@ -3045,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,  	/* no need to invalidate: a not-present page won't be cached */  	update_mmu_cache(vma, fe->address, fe->pte); +	ret = 0; -	return 0; +fault_handled: +	/* preallocated pagetable is unused: free it */ +	if (fe->prealloc_pte) { +		pte_free(fe->vma->vm_mm, fe->prealloc_pte); +		fe->prealloc_pte = 0; +	} +	return ret;  }  static unsigned long fault_around_bytes __read_mostly = @@ -3145,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)  	fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); -	/* preallocated pagetable is unused: free it */ -	if (fe->prealloc_pte) { -		pte_free(fe->vma->vm_mm, fe->prealloc_pte); -		fe->prealloc_pte = 0; -	}  	/* Huge page is mapped? Page fault is solved */  	if (pmd_trans_huge(*fe->pmd)) {  		ret = VM_FAULT_NOPAGE; @@ -3454,7 +3490,7 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)  	/* COW handled on pte level: split pmd */  	VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma); -	split_huge_pmd(fe->vma, fe->pmd, fe->address); +	__split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL);  	return VM_FAULT_FALLBACK;  }  |