diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2019-08-12 14:36:27 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2019-08-12 14:36:27 +0200 |
commit | cbd32a1c56e36fedaa93a727699188bd3e6e6f67 (patch) | |
tree | 199e302eb5a66725a9d1774e47367a87098ba397 /mm/vmalloc.c | |
parent | 48c7d73b2362ce61503551ad70052617b3e8857d (diff) | |
parent | b61fbc887af7a13a1c90c84c1feaeb4c9780e1e2 (diff) |
Merge tag 'efi-urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi into efi/urgent
Pull a single EFI fix for v5.3 from Ard:
- Fix mixed mode breakage in EFI config table handling for TPM.
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r-- | mm/vmalloc.c | 133 |
1 files changed, 86 insertions, 47 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7350a124524b..4fa8d84599b0 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -365,6 +365,13 @@ static LIST_HEAD(free_vmap_area_list); */ static struct rb_root free_vmap_area_root = RB_ROOT; +/* + * Preload a CPU with one object for "no edge" split case. The + * aim is to get rid of allocations from the atomic context, thus + * to use more permissive allocation masks. + */ +static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node); + static __always_inline unsigned long va_size(struct vmap_area *va) { @@ -399,6 +406,13 @@ static void purge_vmap_area_lazy(void); static BLOCKING_NOTIFIER_HEAD(vmap_notify_list); static unsigned long lazy_max_pages(void); +static atomic_long_t nr_vmalloc_pages; + +unsigned long vmalloc_nr_pages(void) +{ + return atomic_long_read(&nr_vmalloc_pages); +} + static struct vmap_area *__find_vmap_area(unsigned long addr) { struct rb_node *n = vmap_area_root.rb_node; @@ -527,20 +541,17 @@ link_va(struct vmap_area *va, struct rb_root *root, static __always_inline void unlink_va(struct vmap_area *va, struct rb_root *root) { - /* - * During merging a VA node can be empty, therefore - * not linked with the tree nor list. Just check it. - */ - if (!RB_EMPTY_NODE(&va->rb_node)) { - if (root == &free_vmap_area_root) - rb_erase_augmented(&va->rb_node, - root, &free_vmap_area_rb_augment_cb); - else - rb_erase(&va->rb_node, root); + if (WARN_ON(RB_EMPTY_NODE(&va->rb_node))) + return; - list_del(&va->list); - RB_CLEAR_NODE(&va->rb_node); - } + if (root == &free_vmap_area_root) + rb_erase_augmented(&va->rb_node, + root, &free_vmap_area_rb_augment_cb); + else + rb_erase(&va->rb_node, root); + + list_del(&va->list); + RB_CLEAR_NODE(&va->rb_node); } #if DEBUG_AUGMENT_PROPAGATE_CHECK @@ -712,9 +723,6 @@ merge_or_add_vmap_area(struct vmap_area *va, /* Check and update the tree if needed. */ augment_tree_propagate_from(sibling); - /* Remove this VA, it has been merged. */ - unlink_va(va, root); - /* Free vmap_area object. */ kmem_cache_free(vmap_area_cachep, va); @@ -739,12 +747,11 @@ merge_or_add_vmap_area(struct vmap_area *va, /* Check and update the tree if needed. */ augment_tree_propagate_from(sibling); - /* Remove this VA, it has been merged. */ - unlink_va(va, root); + if (merged) + unlink_va(va, root); /* Free vmap_area object. */ kmem_cache_free(vmap_area_cachep, va); - return; } } @@ -913,7 +920,7 @@ adjust_va_to_fit_type(struct vmap_area *va, unsigned long nva_start_addr, unsigned long size, enum fit_type type) { - struct vmap_area *lva; + struct vmap_area *lva = NULL; if (type == FL_FIT_TYPE) { /* @@ -951,9 +958,24 @@ adjust_va_to_fit_type(struct vmap_area *va, * L V NVA V R * |---|-------|---| */ - lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT); - if (unlikely(!lva)) - return -1; + lva = __this_cpu_xchg(ne_fit_preload_node, NULL); + if (unlikely(!lva)) { + /* + * For percpu allocator we do not do any pre-allocation + * and leave it as it is. The reason is it most likely + * never ends up with NE_FIT_TYPE splitting. In case of + * percpu allocations offsets and sizes are aligned to + * fixed align request, i.e. RE_FIT_TYPE and FL_FIT_TYPE + * are its main fitting cases. + * + * There are a few exceptions though, as an example it is + * a first allocation (early boot up) when we have "one" + * big free space that has to be split. + */ + lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT); + if (!lva) + return -1; + } /* * Build the remainder. @@ -972,7 +994,7 @@ adjust_va_to_fit_type(struct vmap_area *va, if (type != FL_FIT_TYPE) { augment_tree_propagate_from(va); - if (type == NE_FIT_TYPE) + if (lva) /* type == NE_FIT_TYPE */ insert_vmap_area_augment(lva, &va->rb_node, &free_vmap_area_root, &free_vmap_area_list); } @@ -986,7 +1008,7 @@ adjust_va_to_fit_type(struct vmap_area *va, */ static __always_inline unsigned long __alloc_vmap_area(unsigned long size, unsigned long align, - unsigned long vstart, unsigned long vend, int node) + unsigned long vstart, unsigned long vend) { unsigned long nva_start_addr; struct vmap_area *va; @@ -1032,7 +1054,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, unsigned long vstart, unsigned long vend, int node, gfp_t gfp_mask) { - struct vmap_area *va; + struct vmap_area *va, *pva; unsigned long addr; int purged = 0; @@ -1057,13 +1079,38 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK); retry: + /* + * Preload this CPU with one extra vmap_area object to ensure + * that we have it available when fit type of free area is + * NE_FIT_TYPE. + * + * The preload is done in non-atomic context, thus it allows us + * to use more permissive allocation masks to be more stable under + * low memory condition and high memory pressure. + * + * Even if it fails we do not really care about that. Just proceed + * as it is. "overflow" path will refill the cache we allocate from. + */ + preempt_disable(); + if (!__this_cpu_read(ne_fit_preload_node)) { + preempt_enable(); + pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node); + preempt_disable(); + + if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) { + if (pva) + kmem_cache_free(vmap_area_cachep, pva); + } + } + spin_lock(&vmap_area_lock); + preempt_enable(); /* * If an allocation fails, the "vend" address is * returned. Therefore trigger the overflow path. */ - addr = __alloc_vmap_area(size, align, vstart, vend, node); + addr = __alloc_vmap_area(size, align, vstart, vend); if (unlikely(addr == vend)) goto overflow; @@ -1119,8 +1166,6 @@ EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier); static void __free_vmap_area(struct vmap_area *va) { - BUG_ON(RB_EMPTY_NODE(&va->rb_node)); - /* * Remove from the busy tree/list. */ @@ -2123,22 +2168,11 @@ static inline void set_area_direct_map(const struct vm_struct *area, /* Handle removing and resetting vm mappings related to the vm_struct. */ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) { - unsigned long addr = (unsigned long)area->addr; unsigned long start = ULONG_MAX, end = 0; int flush_reset = area->flags & VM_FLUSH_RESET_PERMS; + int flush_dmap = 0; int i; - /* - * The below block can be removed when all architectures that have - * direct map permissions also have set_direct_map_() implementations. - * This is concerned with resetting the direct map any an vm alias with - * execute permissions, without leaving a RW+X window. - */ - if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { - set_memory_nx(addr, area->nr_pages); - set_memory_rw(addr, area->nr_pages); - } - remove_vm_area(area->addr); /* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */ @@ -2160,9 +2194,11 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) * the vm_unmap_aliases() flush includes the direct map. */ for (i = 0; i < area->nr_pages; i++) { - if (page_address(area->pages[i])) { + unsigned long addr = (unsigned long)page_address(area->pages[i]); + if (addr) { start = min(addr, start); - end = max(addr, end); + end = max(addr + PAGE_SIZE, end); + flush_dmap = 1; } } @@ -2172,7 +2208,7 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages) * reset the direct map permissions to the default. */ set_area_direct_map(area, set_direct_map_invalid_noflush); - _vm_unmap_aliases(start, end, 1); + _vm_unmap_aliases(start, end, flush_dmap); set_area_direct_map(area, set_direct_map_default_noflush); } @@ -2208,6 +2244,7 @@ static void __vunmap(const void *addr, int deallocate_pages) BUG_ON(!page); __free_pages(page, 0); } + atomic_long_sub(area->nr_pages, &nr_vmalloc_pages); kvfree(area->pages); } @@ -2385,12 +2422,14 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vunmap() */ area->nr_pages = i; + atomic_long_add(area->nr_pages, &nr_vmalloc_pages); goto fail; } area->pages[i] = page; if (gfpflags_allow_blocking(gfp_mask|highmem_mask)) cond_resched(); } + atomic_long_add(area->nr_pages, &nr_vmalloc_pages); if (map_vm_area(area, prot, pages)) goto fail; @@ -2783,7 +2822,7 @@ static int aligned_vwrite(char *buf, char *addr, unsigned long count) * Note: In usual ops, vread() is never necessary because the caller * should know vmalloc() area is valid and can use memcpy(). * This is for routines which have to access vmalloc area without - * any informaion, as /dev/kmem. + * any information, as /dev/kmem. * * Return: number of bytes for which addr and buf should be increased * (same number as @count) or %0 if [addr...addr+count) doesn't @@ -2862,7 +2901,7 @@ finished: * Note: In usual ops, vwrite() is never necessary because the caller * should know vmalloc() area is valid and can use memcpy(). * This is for routines which have to access vmalloc area without - * any informaion, as /dev/kmem. + * any information, as /dev/kmem. * * Return: number of bytes for which addr and buf should be * increased (same number as @count) or %0 if [addr...addr+count) @@ -3005,7 +3044,7 @@ void __weak vmalloc_sync_all(void) } -static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) +static int f(pte_t *pte, unsigned long addr, void *data) { pte_t ***p = data; |