diff options
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r-- | mm/vmalloc.c | 185 |
1 files changed, 104 insertions, 81 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 125427cbdb87..6b783baf12a1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -42,6 +42,7 @@ #include <linux/sched/mm.h> #include <asm/tlbflush.h> #include <asm/shmparam.h> +#include <linux/page_owner.h> #define CREATE_TRACE_POINTS #include <trace/events/vmalloc.h> @@ -96,6 +97,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, { pte_t *pte; u64 pfn; + struct page *page; unsigned long size = PAGE_SIZE; pfn = phys_addr >> PAGE_SHIFT; @@ -103,7 +105,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (!pte) return -ENOMEM; do { - BUG_ON(!pte_none(ptep_get(pte))); + if (!pte_none(ptep_get(pte))) { + if (pfn_valid(pfn)) { + page = pfn_to_page(pfn); + dump_page(page, "remapping already mapped page"); + } + BUG(); + } #ifdef CONFIG_HUGETLB_PAGE size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift); @@ -714,7 +722,7 @@ int is_vmalloc_or_module_addr(const void *x) * and fall back on vmalloc() if that fails. Others * just put it in the vmalloc space. */ -#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) +#if defined(CONFIG_EXECMEM) && defined(MODULES_VADDR) unsigned long addr = (unsigned long)kasan_reset_tag(x); if (addr >= MODULES_VADDR && addr < MODULES_END) return 1; @@ -1808,7 +1816,7 @@ static void free_vmap_area(struct vmap_area *va) static inline void preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_mask, int node) { - struct vmap_area *va = NULL; + struct vmap_area *va = NULL, *tmp; /* * Preload this CPU with one extra vmap_area object. It is used @@ -1824,7 +1832,8 @@ preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_mask, int node) spin_lock(lock); - if (va && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, va)) + tmp = NULL; + if (va && !__this_cpu_try_cmpxchg(ne_fit_preload_node, &tmp, va)) kmem_cache_free(vmap_area_cachep, va); } @@ -1926,15 +1935,25 @@ node_alloc(unsigned long size, unsigned long align, return va; } +static inline void setup_vmalloc_vm(struct vm_struct *vm, + struct vmap_area *va, unsigned long flags, const void *caller) +{ + vm->flags = flags; + vm->addr = (void *)va->va_start; + vm->size = va->va_end - va->va_start; + vm->caller = caller; + va->vm = vm; +} + /* * Allocate a region of KVA of the specified size and alignment, within the - * vstart and vend. + * vstart and vend. If vm is passed in, the two will also be bound. */ static struct vmap_area *alloc_vmap_area(unsigned long size, unsigned long align, unsigned long vstart, unsigned long vend, int node, gfp_t gfp_mask, - unsigned long va_flags) + unsigned long va_flags, struct vm_struct *vm) { struct vmap_node *vn; struct vmap_area *va; @@ -1997,6 +2016,12 @@ retry: va->vm = NULL; va->flags = (va_flags | vn_id); + if (vm) { + vm->addr = (void *)va->va_start; + vm->size = va->va_end - va->va_start; + va->vm = vm; + } + vn = addr_to_node(va->va_start); spin_lock(&vn->busy.lock); @@ -2031,8 +2056,8 @@ overflow: } if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) - pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n", - size); + pr_warn("vmalloc_node_range for size %lu failed: Address range restricted to %#lx - %#lx\n", + size, vstart, vend); kmem_cache_free(vmap_area_cachep, va); return ERR_PTR(-EBUSY); @@ -2474,6 +2499,7 @@ struct vmap_block { struct list_head free_list; struct rcu_head rcu_head; struct list_head purge; + unsigned int cpu; }; /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ @@ -2518,7 +2544,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); static struct xarray * addr_to_vb_xa(unsigned long addr) { - int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus(); + int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids; + + /* + * Please note, nr_cpu_ids points on a highest set + * possible bit, i.e. we never invoke cpumask_next() + * if an index points on it which is nr_cpu_ids - 1. + */ + if (!cpu_possible(index)) + index = cpumask_next(index, cpu_possible_mask); return &per_cpu(vmap_block_queue, index).vmap_blocks; } @@ -2574,7 +2608,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, VMALLOC_START, VMALLOC_END, node, gfp_mask, - VMAP_RAM|VMAP_BLOCK); + VMAP_RAM|VMAP_BLOCK, NULL); if (IS_ERR(va)) { kfree(vb); return ERR_CAST(va); @@ -2601,8 +2635,15 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) free_vmap_area(va); return ERR_PTR(err); } - - vbq = raw_cpu_ptr(&vmap_block_queue); + /* + * list_add_tail_rcu could happened in another core + * rather than vb->cpu due to task migration, which + * is safe as list_add_tail_rcu will ensure the list's + * integrity together with list_for_each_rcu from read + * side. + */ + vb->cpu = raw_smp_processor_id(); + vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); spin_lock(&vbq->lock); list_add_tail_rcu(&vb->free_list, &vbq->free); spin_unlock(&vbq->lock); @@ -2630,9 +2671,10 @@ static void free_vmap_block(struct vmap_block *vb) } static bool purge_fragmented_block(struct vmap_block *vb, - struct vmap_block_queue *vbq, struct list_head *purge_list, - bool force_purge) + struct list_head *purge_list, bool force_purge) { + struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu); + if (vb->free + vb->dirty != VMAP_BBMAP_BITS || vb->dirty == VMAP_BBMAP_BITS) return false; @@ -2680,7 +2722,7 @@ static void purge_fragmented_blocks(int cpu) continue; spin_lock(&vb->lock); - purge_fragmented_block(vb, vbq, &purge, true); + purge_fragmented_block(vb, &purge, true); spin_unlock(&vb->lock); } rcu_read_unlock(); @@ -2817,7 +2859,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) * not purgeable, check whether there is dirty * space to be flushed. */ - if (!purge_fragmented_block(vb, vbq, &purge_list, false) && + if (!purge_fragmented_block(vb, &purge_list, false) && vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) { unsigned long va_start = vb->va->va_start; unsigned long s, e; @@ -2931,7 +2973,8 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node) struct vmap_area *va; va = alloc_vmap_area(size, PAGE_SIZE, VMALLOC_START, VMALLOC_END, - node, GFP_KERNEL, VMAP_RAM); + node, GFP_KERNEL, VMAP_RAM, + NULL); if (IS_ERR(va)) return NULL; @@ -3034,26 +3077,6 @@ void __init vm_area_register_early(struct vm_struct *vm, size_t align) kasan_populate_early_vm_area_shadow(vm->addr, vm->size); } -static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, - struct vmap_area *va, unsigned long flags, const void *caller) -{ - vm->flags = flags; - vm->addr = (void *)va->va_start; - vm->size = va->va_end - va->va_start; - vm->caller = caller; - va->vm = vm; -} - -static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, - unsigned long flags, const void *caller) -{ - struct vmap_node *vn = addr_to_node(va->va_start); - - spin_lock(&vn->busy.lock); - setup_vmalloc_vm_locked(vm, va, flags, caller); - spin_unlock(&vn->busy.lock); -} - static void clear_vm_uninitialized_flag(struct vm_struct *vm) { /* @@ -3090,14 +3113,15 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, if (!(flags & VM_NO_GUARD)) size += PAGE_SIZE; - va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0); + area->flags = flags; + area->caller = caller; + + va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area); if (IS_ERR(va)) { kfree(area); return NULL; } - setup_vmalloc_vm(area, va, flags, caller); - /* * Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a * best-effort approach, as they can be mapped outside of vmalloc code. @@ -3492,7 +3516,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, { unsigned int nr_allocated = 0; gfp_t alloc_gfp = gfp; - bool nofail = false; + bool nofail = gfp & __GFP_NOFAIL; struct page *page; int i; @@ -3523,12 +3547,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * but mempolicy wants to alloc memory by interleaving. */ if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE) - nr = alloc_pages_bulk_array_mempolicy(bulk_gfp, + nr = alloc_pages_bulk_array_mempolicy_noprof(bulk_gfp, nr_pages_request, pages + nr_allocated); else - nr = alloc_pages_bulk_array_node(bulk_gfp, nid, + nr = alloc_pages_bulk_array_node_noprof(bulk_gfp, nid, nr_pages_request, pages + nr_allocated); @@ -3549,18 +3573,17 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * and compaction etc. */ alloc_gfp &= ~__GFP_NOFAIL; - nofail = true; } /* High-order pages or fallback path if "bulk" fails. */ while (nr_allocated < nr_pages) { - if (fatal_signal_pending(current)) + if (!nofail && fatal_signal_pending(current)) break; if (nid == NUMA_NO_NODE) - page = alloc_pages(alloc_gfp, order); + page = alloc_pages_noprof(alloc_gfp, order); else - page = alloc_pages_node(nid, alloc_gfp, order); + page = alloc_pages_node_noprof(nid, alloc_gfp, order); if (unlikely(!page)) { if (!nofail) break; @@ -3617,10 +3640,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { - area->pages = __vmalloc_node(array_size, 1, nested_gfp, node, + area->pages = __vmalloc_node_noprof(array_size, 1, nested_gfp, node, area->caller); } else { - area->pages = kmalloc_node(array_size, nested_gfp, node); + area->pages = kmalloc_node_noprof(array_size, nested_gfp, node); } if (!area->pages) { @@ -3730,7 +3753,7 @@ fail: * * Return: the address of the area or %NULL on failure */ -void *__vmalloc_node_range(unsigned long size, unsigned long align, +void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) @@ -3877,10 +3900,10 @@ fail: * * Return: pointer to the allocated memory or %NULL on error */ -void *__vmalloc_node(unsigned long size, unsigned long align, +void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) { - return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END, gfp_mask, PAGE_KERNEL, 0, node, caller); } /* @@ -3889,15 +3912,15 @@ void *__vmalloc_node(unsigned long size, unsigned long align, * than that. */ #ifdef CONFIG_TEST_VMALLOC_MODULE -EXPORT_SYMBOL_GPL(__vmalloc_node); +EXPORT_SYMBOL_GPL(__vmalloc_node_noprof); #endif -void *__vmalloc(unsigned long size, gfp_t gfp_mask) +void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) { - return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE, + return __vmalloc_node_noprof(size, 1, gfp_mask, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(__vmalloc); +EXPORT_SYMBOL(__vmalloc_noprof); /** * vmalloc - allocate virtually contiguous memory @@ -3911,12 +3934,12 @@ EXPORT_SYMBOL(__vmalloc); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc(unsigned long size) +void *vmalloc_noprof(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE, + return __vmalloc_node_noprof(size, 1, GFP_KERNEL, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc); +EXPORT_SYMBOL(vmalloc_noprof); /** * vmalloc_huge - allocate virtually contiguous memory, allow huge pages @@ -3930,13 +3953,13 @@ EXPORT_SYMBOL(vmalloc); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) +void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) { - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END, gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL_GPL(vmalloc_huge); +EXPORT_SYMBOL_GPL(vmalloc_huge_noprof); /** * vzalloc - allocate virtually contiguous memory with zero fill @@ -3951,12 +3974,12 @@ EXPORT_SYMBOL_GPL(vmalloc_huge); * * Return: pointer to the allocated memory or %NULL on error */ -void *vzalloc(unsigned long size) +void *vzalloc_noprof(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, + return __vmalloc_node_noprof(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vzalloc); +EXPORT_SYMBOL(vzalloc_noprof); /** * vmalloc_user - allocate zeroed virtually contiguous memory for userspace @@ -3967,14 +3990,14 @@ EXPORT_SYMBOL(vzalloc); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_user(unsigned long size) +void *vmalloc_user_noprof(unsigned long size) { - return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, SHMLBA, VMALLOC_START, VMALLOC_END, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, VM_USERMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_user); +EXPORT_SYMBOL(vmalloc_user_noprof); /** * vmalloc_node - allocate memory on a specific node @@ -3989,12 +4012,12 @@ EXPORT_SYMBOL(vmalloc_user); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_node(unsigned long size, int node) +void *vmalloc_node_noprof(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL, node, + return __vmalloc_node_noprof(size, 1, GFP_KERNEL, node, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_node); +EXPORT_SYMBOL(vmalloc_node_noprof); /** * vzalloc_node - allocate memory on a specific node with zero fill @@ -4007,12 +4030,12 @@ EXPORT_SYMBOL(vmalloc_node); * * Return: pointer to the allocated memory or %NULL on error */ -void *vzalloc_node(unsigned long size, int node) +void *vzalloc_node_noprof(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node, + return __vmalloc_node_noprof(size, 1, GFP_KERNEL | __GFP_ZERO, node, __builtin_return_address(0)); } -EXPORT_SYMBOL(vzalloc_node); +EXPORT_SYMBOL(vzalloc_node_noprof); #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) #define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) @@ -4035,12 +4058,12 @@ EXPORT_SYMBOL(vzalloc_node); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_32(unsigned long size) +void *vmalloc_32_noprof(unsigned long size) { - return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE, + return __vmalloc_node_noprof(size, 1, GFP_VMALLOC32, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_32); +EXPORT_SYMBOL(vmalloc_32_noprof); /** * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory @@ -4051,14 +4074,14 @@ EXPORT_SYMBOL(vmalloc_32); * * Return: pointer to the allocated memory or %NULL on error */ -void *vmalloc_32_user(unsigned long size) +void *vmalloc_32_user_noprof(unsigned long size) { - return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range_noprof(size, SHMLBA, VMALLOC_START, VMALLOC_END, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL, VM_USERMAP, NUMA_NO_NODE, __builtin_return_address(0)); } -EXPORT_SYMBOL(vmalloc_32_user); +EXPORT_SYMBOL(vmalloc_32_user_noprof); /* * Atomically zero bytes in the iterator. @@ -4672,7 +4695,7 @@ retry: spin_lock(&vn->busy.lock); insert_vmap_area(vas[area], &vn->busy.root, &vn->busy.head); - setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, + setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC, pcpu_get_vm_areas); spin_unlock(&vn->busy.lock); } |