diff options
Diffstat (limited to 'mm/vmalloc.c')
| -rw-r--r-- | mm/vmalloc.c | 112 | 
1 files changed, 79 insertions, 33 deletions
| diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d77830ff604c..d2a00ad4e1dd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1195,18 +1195,14 @@ find_vmap_lowest_match(unsigned long size,  {  	struct vmap_area *va;  	struct rb_node *node; -	unsigned long length;  	/* Start from the root. */  	node = free_vmap_area_root.rb_node; -	/* Adjust the search size for alignment overhead. */ -	length = size + align - 1; -  	while (node) {  		va = rb_entry(node, struct vmap_area, rb_node); -		if (get_subtree_max_size(node->rb_left) >= length && +		if (get_subtree_max_size(node->rb_left) >= size &&  				vstart < va->va_start) {  			node = node->rb_left;  		} else { @@ -1216,9 +1212,9 @@ find_vmap_lowest_match(unsigned long size,  			/*  			 * Does not make sense to go deeper towards the right  			 * sub-tree if it does not have a free block that is -			 * equal or bigger to the requested search length. +			 * equal or bigger to the requested search size.  			 */ -			if (get_subtree_max_size(node->rb_right) >= length) { +			if (get_subtree_max_size(node->rb_right) >= size) {  				node = node->rb_right;  				continue;  			} @@ -1226,15 +1222,23 @@ find_vmap_lowest_match(unsigned long size,  			/*  			 * OK. We roll back and find the first right sub-tree,  			 * that will satisfy the search criteria. It can happen -			 * only once due to "vstart" restriction. +			 * due to "vstart" restriction or an alignment overhead +			 * that is bigger then PAGE_SIZE.  			 */  			while ((node = rb_parent(node))) {  				va = rb_entry(node, struct vmap_area, rb_node);  				if (is_within_this_va(va, size, align, vstart))  					return va; -				if (get_subtree_max_size(node->rb_right) >= length && +				if (get_subtree_max_size(node->rb_right) >= size &&  						vstart <= va->va_start) { +					/* +					 * Shift the vstart forward. Please note, we update it with +					 * parent's start address adding "1" because we do not want +					 * to enter same sub-tree after it has already been checked +					 * and no suitable free block found there. +					 */ +					vstart = va->va_start + 1;  					node = node->rb_right;  					break;  				} @@ -1265,7 +1269,7 @@ find_vmap_lowest_linear_match(unsigned long size,  }  static void -find_vmap_lowest_match_check(unsigned long size) +find_vmap_lowest_match_check(unsigned long size, unsigned long align)  {  	struct vmap_area *va_1, *va_2;  	unsigned long vstart; @@ -1274,8 +1278,8 @@ find_vmap_lowest_match_check(unsigned long size)  	get_random_bytes(&rnd, sizeof(rnd));  	vstart = VMALLOC_START + rnd; -	va_1 = find_vmap_lowest_match(size, 1, vstart); -	va_2 = find_vmap_lowest_linear_match(size, 1, vstart); +	va_1 = find_vmap_lowest_match(size, align, vstart); +	va_2 = find_vmap_lowest_linear_match(size, align, vstart);  	if (va_1 != va_2)  		pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n", @@ -1454,7 +1458,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,  		return vend;  #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK -	find_vmap_lowest_match_check(size); +	find_vmap_lowest_match_check(size, align);  #endif  	return nva_start_addr; @@ -2272,15 +2276,22 @@ void __init vm_area_add_early(struct vm_struct *vm)   */  void __init vm_area_register_early(struct vm_struct *vm, size_t align)  { -	static size_t vm_init_off __initdata; -	unsigned long addr; +	unsigned long addr = ALIGN(VMALLOC_START, align); +	struct vm_struct *cur, **p; -	addr = ALIGN(VMALLOC_START + vm_init_off, align); -	vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; +	BUG_ON(vmap_initialized); -	vm->addr = (void *)addr; +	for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) { +		if ((unsigned long)cur->addr - addr >= vm->size) +			break; +		addr = ALIGN((unsigned long)cur->addr + cur->size, align); +	} -	vm_area_add_early(vm); +	BUG_ON(addr > VMALLOC_END - vm->size); +	vm->addr = (void *)addr; +	vm->next = *p; +	*p = vm; +	kasan_populate_early_vm_area_shadow(vm->addr, vm->size);  }  static void vmap_init_free_space(void) @@ -2743,6 +2754,13 @@ void *vmap(struct page **pages, unsigned int count,  	might_sleep(); +	/* +	 * Your top guard is someone else's bottom guard. Not having a top +	 * guard compromises someone else's mappings too. +	 */ +	if (WARN_ON_ONCE(flags & VM_NO_GUARD)) +		flags &= ~VM_NO_GUARD; +  	if (count > totalram_pages())  		return NULL; @@ -2816,6 +2834,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,  		unsigned int order, unsigned int nr_pages, struct page **pages)  {  	unsigned int nr_allocated = 0; +	struct page *page; +	int i;  	/*  	 * For order-0 pages we make use of bulk allocator, if @@ -2835,8 +2855,20 @@ vm_area_alloc_pages(gfp_t gfp, int nid,  			 */  			nr_pages_request = min(100U, nr_pages - nr_allocated); -			nr = alloc_pages_bulk_array_node(gfp, nid, -				nr_pages_request, pages + nr_allocated); +			/* memory allocation should consider mempolicy, we can't +			 * wrongly use nearest node when nid == NUMA_NO_NODE, +			 * otherwise memory may be allocated in only one node, +			 * but mempolcy want to alloc memory by interleaving. +			 */ +			if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE) +				nr = alloc_pages_bulk_array_mempolicy(gfp, +							nr_pages_request, +							pages + nr_allocated); + +			else +				nr = alloc_pages_bulk_array_node(gfp, nid, +							nr_pages_request, +							pages + nr_allocated);  			nr_allocated += nr;  			cond_resched(); @@ -2856,11 +2888,15 @@ vm_area_alloc_pages(gfp_t gfp, int nid,  		gfp |= __GFP_COMP;  	/* High-order pages or fallback path if "bulk" fails. */ +  	while (nr_allocated < nr_pages) { -		struct page *page; -		int i; +		if (fatal_signal_pending(current)) +			break; -		page = alloc_pages_node(nid, gfp, order); +		if (nid == NUMA_NO_NODE) +			page = alloc_pages(gfp, order); +		else +			page = alloc_pages_node(nid, gfp, order);  		if (unlikely(!page))  			break; @@ -2884,6 +2920,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,  				 int node)  {  	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; +	const gfp_t orig_gfp_mask = gfp_mask;  	unsigned long addr = (unsigned long)area->addr;  	unsigned long size = get_vm_area_size(area);  	unsigned long array_size; @@ -2904,7 +2941,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,  	}  	if (!area->pages) { -		warn_alloc(gfp_mask, NULL, +		warn_alloc(orig_gfp_mask, NULL,  			"vmalloc error: size %lu, failed to allocated page array size %lu",  			nr_small_pages * PAGE_SIZE, array_size);  		free_vm_area(area); @@ -2924,7 +2961,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,  	 * allocation request, free them via __vfree() if any.  	 */  	if (area->nr_pages != nr_small_pages) { -		warn_alloc(gfp_mask, NULL, +		warn_alloc(orig_gfp_mask, NULL,  			"vmalloc error: size %lu, page order %u, failed to allocate pages",  			area->nr_pages * PAGE_SIZE, page_order);  		goto fail; @@ -2932,7 +2969,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,  	if (vmap_pages_range(addr, addr + size, prot, area->pages,  			page_shift) < 0) { -		warn_alloc(gfp_mask, NULL, +		warn_alloc(orig_gfp_mask, NULL,  			"vmalloc error: size %lu, failed to map pages",  			area->nr_pages * PAGE_SIZE);  		goto fail; @@ -2958,8 +2995,16 @@ fail:   * @caller:		  caller's return address   *   * Allocate enough pages to cover @size from the page level - * allocator with @gfp_mask flags.  Map them into contiguous - * kernel virtual space, using a pagetable protection of @prot. + * allocator with @gfp_mask flags. Please note that the full set of gfp + * flags are not supported. GFP_KERNEL would be a preferred allocation mode + * but GFP_NOFS and GFP_NOIO are supported as well. Zone modifiers are not + * supported. From the reclaim modifiers__GFP_DIRECT_RECLAIM is required (aka + * GFP_NOWAIT is not supported) and only __GFP_NOFAIL is supported (aka + * __GFP_NORETRY and __GFP_RETRY_MAYFAIL are not supported). + * __GFP_NOWARN can be used to suppress error messages about failures. + * + * Map them into contiguous kernel virtual space, using a pagetable + * protection of @prot.   *   * Return: the address of the area or %NULL on failure   */ @@ -3853,6 +3898,7 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)  {  	if (IS_ENABLED(CONFIG_NUMA)) {  		unsigned int nr, *counters = m->private; +		unsigned int step = 1U << vm_area_page_order(v);  		if (!counters)  			return; @@ -3864,9 +3910,8 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)  		memset(counters, 0, nr_node_ids * sizeof(unsigned int)); -		for (nr = 0; nr < v->nr_pages; nr++) -			counters[page_to_nid(v->pages[nr])]++; - +		for (nr = 0; nr < v->nr_pages; nr += step) +			counters[page_to_nid(v->pages[nr])] += step;  		for_each_node_state(nr, N_HIGH_MEMORY)  			if (counters[nr])  				seq_printf(m, " N%u=%u", nr, counters[nr]); @@ -3902,7 +3947,7 @@ static int s_show(struct seq_file *m, void *p)  			(void *)va->va_start, (void *)va->va_end,  			va->va_end - va->va_start); -		return 0; +		goto final;  	}  	v = va->vm; @@ -3943,6 +3988,7 @@ static int s_show(struct seq_file *m, void *p)  	/*  	 * As a final step, dump "unpurged" areas.  	 */ +final:  	if (list_is_last(&va->list, &vmap_area_list))  		show_purge_info(m); |