diff options
Diffstat (limited to 'mm/mempolicy.c')
| -rw-r--r-- | mm/mempolicy.c | 69 | 
1 files changed, 52 insertions, 17 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 65e0874fce17..4ae967bcf954 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -68,7 +68,7 @@  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt  #include <linux/mempolicy.h> -#include <linux/mm.h> +#include <linux/pagewalk.h>  #include <linux/highmem.h>  #include <linux/hugetlb.h>  #include <linux/kernel.h> @@ -655,6 +655,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,  	return 1;  } +static const struct mm_walk_ops queue_pages_walk_ops = { +	.hugetlb_entry		= queue_pages_hugetlb, +	.pmd_entry		= queue_pages_pte_range, +	.test_walk		= queue_pages_test_walk, +}; +  /*   * Walk through page tables and collect pages to be migrated.   * @@ -679,15 +685,8 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,  		.nmask = nodes,  		.prev = NULL,  	}; -	struct mm_walk queue_pages_walk = { -		.hugetlb_entry = queue_pages_hugetlb, -		.pmd_entry = queue_pages_pte_range, -		.test_walk = queue_pages_test_walk, -		.mm = mm, -		.private = &qp, -	}; -	return walk_page_range(start, end, &queue_pages_walk); +	return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);  }  /* @@ -1180,8 +1179,8 @@ static struct page *new_page(struct page *page, unsigned long start)  	} else if (PageTransHuge(page)) {  		struct page *thp; -		thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, -				address, numa_node_id()); +		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, +					 HPAGE_PMD_ORDER);  		if (!thp)  			return NULL;  		prep_transhuge_page(thp); @@ -1406,6 +1405,7 @@ static long kernel_mbind(unsigned long start, unsigned long len,  	int err;  	unsigned short mode_flags; +	start = untagged_addr(start);  	mode_flags = mode & MPOL_MODE_FLAGS;  	mode &= ~MPOL_MODE_FLAGS;  	if (mode >= MPOL_MAX) @@ -1513,10 +1513,6 @@ static int kernel_migrate_pages(pid_t pid, unsigned long maxnode,  	if (nodes_empty(*new))  		goto out_put; -	nodes_and(*new, *new, node_states[N_MEMORY]); -	if (nodes_empty(*new)) -		goto out_put; -  	err = security_task_movememory(task);  	if (err)  		goto out_put; @@ -1563,6 +1559,8 @@ static int kernel_get_mempolicy(int __user *policy,  	int uninitialized_var(pval);  	nodemask_t nodes; +	addr = untagged_addr(addr); +  	if (nmask != NULL && maxnode < nr_node_ids)  		return -EINVAL; @@ -1734,7 +1732,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,   * freeing by another task.  It is the caller's responsibility to free the   * extra reference for shared policies.   */ -struct mempolicy *get_vma_policy(struct vm_area_struct *vma, +static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,  						unsigned long addr)  {  	struct mempolicy *pol = __get_vma_policy(vma, addr); @@ -2083,6 +2081,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,   * 	@vma:  Pointer to VMA or NULL if not available.   *	@addr: Virtual Address of the allocation. Must be inside the VMA.   *	@node: Which node to prefer for allocation (modulo policy). + *	@hugepage: for hugepages try only the preferred node if possible   *   * 	This function allocates a page from the kernel page pool and applies   *	a NUMA policy associated with the VMA or the current process. @@ -2093,7 +2092,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,   */  struct page *  alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, -		unsigned long addr, int node) +		unsigned long addr, int node, bool hugepage)  {  	struct mempolicy *pol;  	struct page *page; @@ -2111,6 +2110,42 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,  		goto out;  	} +	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { +		int hpage_node = node; + +		/* +		 * For hugepage allocation and non-interleave policy which +		 * allows the current node (or other explicitly preferred +		 * node) we only try to allocate from the current/preferred +		 * node and don't fall back to other nodes, as the cost of +		 * remote accesses would likely offset THP benefits. +		 * +		 * If the policy is interleave, or does not allow the current +		 * node in its nodemask, we allocate the standard way. +		 */ +		if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL)) +			hpage_node = pol->v.preferred_node; + +		nmask = policy_nodemask(gfp, pol); +		if (!nmask || node_isset(hpage_node, *nmask)) { +			mpol_cond_put(pol); +			page = __alloc_pages_node(hpage_node, +						gfp | __GFP_THISNODE, order); + +			/* +			 * If hugepage allocations are configured to always +			 * synchronous compact or the vma has been madvised +			 * to prefer hugepage backing, retry allowing remote +			 * memory as well. +			 */ +			if (!page && (gfp & __GFP_DIRECT_RECLAIM)) +				page = __alloc_pages_node(hpage_node, +						gfp | __GFP_NORETRY, order); + +			goto out; +		} +	} +  	nmask = policy_nodemask(gfp, pol);  	preferred_nid = policy_node(gfp, pol, node);  	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);  |