diff options
Diffstat (limited to 'mm/huge_memory.c')
| -rw-r--r-- | mm/huge_memory.c | 96 | 
1 files changed, 71 insertions, 25 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 33514d88fef9..f8ffd9412ec5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,  					unsigned long haddr, pmd_t *pmd,  					struct page *page)  { +	struct mem_cgroup *memcg;  	pgtable_t pgtable;  	spinlock_t *ptl;  	VM_BUG_ON_PAGE(!PageCompound(page), page); + +	if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg)) +		return VM_FAULT_OOM; +  	pgtable = pte_alloc_one(mm, haddr); -	if (unlikely(!pgtable)) +	if (unlikely(!pgtable)) { +		mem_cgroup_cancel_charge(page, memcg);  		return VM_FAULT_OOM; +	}  	clear_huge_page(page, haddr, HPAGE_PMD_NR);  	/* @@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,  	ptl = pmd_lock(mm, pmd);  	if (unlikely(!pmd_none(*pmd))) {  		spin_unlock(ptl); -		mem_cgroup_uncharge_page(page); +		mem_cgroup_cancel_charge(page, memcg);  		put_page(page);  		pte_free(mm, pgtable);  	} else { @@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,  		entry = mk_huge_pmd(page, vma->vm_page_prot);  		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);  		page_add_new_anon_rmap(page, vma, haddr); +		mem_cgroup_commit_charge(page, memcg, false); +		lru_cache_add_active_or_unevictable(page, vma);  		pgtable_trans_huge_deposit(mm, pmd, pgtable);  		set_pmd_at(mm, haddr, pmd, entry);  		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); @@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,  		count_vm_event(THP_FAULT_FALLBACK);  		return VM_FAULT_FALLBACK;  	} -	if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) { -		put_page(page); -		count_vm_event(THP_FAULT_FALLBACK); -		return VM_FAULT_FALLBACK; -	}  	if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { -		mem_cgroup_uncharge_page(page);  		put_page(page);  		count_vm_event(THP_FAULT_FALLBACK);  		return VM_FAULT_FALLBACK; @@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,  					struct page *page,  					unsigned long haddr)  { +	struct mem_cgroup *memcg;  	spinlock_t *ptl;  	pgtable_t pgtable;  	pmd_t _pmd; @@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,  					       __GFP_OTHER_NODE,  					       vma, address, page_to_nid(page));  		if (unlikely(!pages[i] || -			     mem_cgroup_charge_anon(pages[i], mm, -						       GFP_KERNEL))) { +			     mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL, +						   &memcg))) {  			if (pages[i])  				put_page(pages[i]); -			mem_cgroup_uncharge_start();  			while (--i >= 0) { -				mem_cgroup_uncharge_page(pages[i]); +				memcg = (void *)page_private(pages[i]); +				set_page_private(pages[i], 0); +				mem_cgroup_cancel_charge(pages[i], memcg);  				put_page(pages[i]);  			} -			mem_cgroup_uncharge_end();  			kfree(pages);  			ret |= VM_FAULT_OOM;  			goto out;  		} +		set_page_private(pages[i], (unsigned long)memcg);  	}  	for (i = 0; i < HPAGE_PMD_NR; i++) { @@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,  		pte_t *pte, entry;  		entry = mk_pte(pages[i], vma->vm_page_prot);  		entry = maybe_mkwrite(pte_mkdirty(entry), vma); +		memcg = (void *)page_private(pages[i]); +		set_page_private(pages[i], 0);  		page_add_new_anon_rmap(pages[i], vma, haddr); +		mem_cgroup_commit_charge(pages[i], memcg, false); +		lru_cache_add_active_or_unevictable(pages[i], vma);  		pte = pte_offset_map(&_pmd, haddr);  		VM_BUG_ON(!pte_none(*pte));  		set_pte_at(mm, haddr, pte, entry); @@ -1065,12 +1074,12 @@ out:  out_free_pages:  	spin_unlock(ptl);  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); -	mem_cgroup_uncharge_start();  	for (i = 0; i < HPAGE_PMD_NR; i++) { -		mem_cgroup_uncharge_page(pages[i]); +		memcg = (void *)page_private(pages[i]); +		set_page_private(pages[i], 0); +		mem_cgroup_cancel_charge(pages[i], memcg);  		put_page(pages[i]);  	} -	mem_cgroup_uncharge_end();  	kfree(pages);  	goto out;  } @@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,  	spinlock_t *ptl;  	int ret = 0;  	struct page *page = NULL, *new_page; +	struct mem_cgroup *memcg;  	unsigned long haddr;  	unsigned long mmun_start;	/* For mmu_notifiers */  	unsigned long mmun_end;		/* For mmu_notifiers */ @@ -1132,7 +1142,8 @@ alloc:  		goto out;  	} -	if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) { +	if (unlikely(mem_cgroup_try_charge(new_page, mm, +					   GFP_TRANSHUGE, &memcg))) {  		put_page(new_page);  		if (page) {  			split_huge_page(page); @@ -1161,7 +1172,7 @@ alloc:  		put_user_huge_page(page);  	if (unlikely(!pmd_same(*pmd, orig_pmd))) {  		spin_unlock(ptl); -		mem_cgroup_uncharge_page(new_page); +		mem_cgroup_cancel_charge(new_page, memcg);  		put_page(new_page);  		goto out_mn;  	} else { @@ -1170,6 +1181,8 @@ alloc:  		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);  		pmdp_clear_flush(vma, haddr, pmd);  		page_add_new_anon_rmap(new_page, vma, haddr); +		mem_cgroup_commit_charge(new_page, memcg, false); +		lru_cache_add_active_or_unevictable(new_page, vma);  		set_pmd_at(mm, haddr, pmd, entry);  		update_mmu_cache_pmd(vma, address, pmd);  		if (!page) { @@ -1681,7 +1694,7 @@ static void __split_huge_page_refcount(struct page *page,  			   &page_tail->_count);  		/* after clearing PageTail the gup refcount can be released */ -		smp_mb(); +		smp_mb__after_atomic();  		/*  		 * retain hwpoison flag of the poisoned tail page: @@ -1775,21 +1788,24 @@ static int __split_huge_page_map(struct page *page,  	if (pmd) {  		pgtable = pgtable_trans_huge_withdraw(mm, pmd);  		pmd_populate(mm, &_pmd, pgtable); +		if (pmd_write(*pmd)) +			BUG_ON(page_mapcount(page) != 1);  		haddr = address;  		for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {  			pte_t *pte, entry;  			BUG_ON(PageCompound(page+i)); +			/* +			 * Note that pmd_numa is not transferred deliberately +			 * to avoid any possibility that pte_numa leaks to +			 * a PROT_NONE VMA by accident. +			 */  			entry = mk_pte(page + i, vma->vm_page_prot);  			entry = maybe_mkwrite(pte_mkdirty(entry), vma);  			if (!pmd_write(*pmd))  				entry = pte_wrprotect(entry); -			else -				BUG_ON(page_mapcount(page) != 1);  			if (!pmd_young(*pmd))  				entry = pte_mkold(entry); -			if (pmd_numa(*pmd)) -				entry = pte_mknuma(entry);  			pte = pte_offset_map(&_pmd, haddr);  			BUG_ON(!pte_none(*pte));  			set_pte_at(mm, haddr, pte, entry); @@ -2233,6 +2249,30 @@ static void khugepaged_alloc_sleep(void)  static int khugepaged_node_load[MAX_NUMNODES]; +static bool khugepaged_scan_abort(int nid) +{ +	int i; + +	/* +	 * If zone_reclaim_mode is disabled, then no extra effort is made to +	 * allocate memory locally. +	 */ +	if (!zone_reclaim_mode) +		return false; + +	/* If there is a count for this node already, it must be acceptable */ +	if (khugepaged_node_load[nid]) +		return false; + +	for (i = 0; i < MAX_NUMNODES; i++) { +		if (!khugepaged_node_load[i]) +			continue; +		if (node_distance(nid, i) > RECLAIM_DISTANCE) +			return true; +	} +	return false; +} +  #ifdef CONFIG_NUMA  static int khugepaged_find_target_node(void)  { @@ -2389,6 +2429,7 @@ static void collapse_huge_page(struct mm_struct *mm,  	spinlock_t *pmd_ptl, *pte_ptl;  	int isolated;  	unsigned long hstart, hend; +	struct mem_cgroup *memcg;  	unsigned long mmun_start;	/* For mmu_notifiers */  	unsigned long mmun_end;		/* For mmu_notifiers */ @@ -2399,7 +2440,8 @@ static void collapse_huge_page(struct mm_struct *mm,  	if (!new_page)  		return; -	if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) +	if (unlikely(mem_cgroup_try_charge(new_page, mm, +					   GFP_TRANSHUGE, &memcg)))  		return;  	/* @@ -2486,6 +2528,8 @@ static void collapse_huge_page(struct mm_struct *mm,  	spin_lock(pmd_ptl);  	BUG_ON(!pmd_none(*pmd));  	page_add_new_anon_rmap(new_page, vma, address); +	mem_cgroup_commit_charge(new_page, memcg, false); +	lru_cache_add_active_or_unevictable(new_page, vma);  	pgtable_trans_huge_deposit(mm, pmd, pgtable);  	set_pmd_at(mm, address, pmd, _pmd);  	update_mmu_cache_pmd(vma, address, pmd); @@ -2499,7 +2543,7 @@ out_up_write:  	return;  out: -	mem_cgroup_uncharge_page(new_page); +	mem_cgroup_cancel_charge(new_page, memcg);  	goto out_up_write;  } @@ -2545,6 +2589,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,  		 * hit record.  		 */  		node = page_to_nid(page); +		if (khugepaged_scan_abort(node)) +			goto out_unmap;  		khugepaged_node_load[node]++;  		VM_BUG_ON_PAGE(PageCompound(page), page);  		if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))  |