diff options
Diffstat (limited to 'mm/slub.c')
| -rw-r--r-- | mm/slub.c | 170 |
1 files changed, 115 insertions, 55 deletions
diff --git a/mm/slub.c b/mm/slub.c index 1bb2a93cf7b6..4954999183d5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -557,6 +557,26 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); } +/* + * See comment in calculate_sizes(). + */ +static inline bool freeptr_outside_object(struct kmem_cache *s) +{ + return s->offset >= s->inuse; +} + +/* + * Return offset of the end of info block which is inuse + free pointer if + * not overlapping with object. + */ +static inline unsigned int get_info_end(struct kmem_cache *s) +{ + if (freeptr_outside_object(s)) + return s->inuse + sizeof(void *); + else + return s->inuse; +} + /* Loop over all objects in a slab */ #define for_each_object(__p, __s, __addr, __objects) \ for (__p = fixup_red_left(__s, __addr); \ @@ -604,11 +624,21 @@ static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo)); s->cpu_partial_slabs = nr_slabs; } + +static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s) +{ + return s->cpu_partial_slabs; +} #else static inline void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) { } + +static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s) +{ + return 0; +} #endif /* CONFIG_SLUB_CPU_PARTIAL */ /* @@ -845,26 +875,6 @@ static void print_section(char *level, char *text, u8 *addr, metadata_access_disable(); } -/* - * See comment in calculate_sizes(). - */ -static inline bool freeptr_outside_object(struct kmem_cache *s) -{ - return s->offset >= s->inuse; -} - -/* - * Return offset of the end of info block which is inuse + free pointer if - * not overlapping with object. - */ -static inline unsigned int get_info_end(struct kmem_cache *s) -{ - if (freeptr_outside_object(s)) - return s->inuse + sizeof(void *); - else - return s->inuse; -} - static struct track *get_track(struct kmem_cache *s, void *object, enum track_item alloc) { @@ -2092,15 +2102,20 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init) * * The initialization memset's clear the object and the metadata, * but don't touch the SLAB redzone. + * + * The object's freepointer is also avoided if stored outside the + * object. */ if (unlikely(init)) { int rsize; + unsigned int inuse; + inuse = get_info_end(s); if (!kasan_has_integrated_init()) memset(kasan_reset_tag(x), 0, s->object_size); rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0; - memset((char *)kasan_reset_tag(x) + s->inuse, 0, - s->size - s->inuse - rsize); + memset((char *)kasan_reset_tag(x) + inuse, 0, + s->size - inuse - rsize); } /* KASAN might put x into memory quarantine, delaying its reuse. */ return !kasan_slab_free(s, x, init); @@ -2604,19 +2619,18 @@ static struct slab *get_partial_node(struct kmem_cache *s, if (!partial) { partial = slab; stat(s, ALLOC_FROM_PARTIAL); + + if ((slub_get_cpu_partial(s) == 0)) { + break; + } } else { put_cpu_partial(s, slab, 0); stat(s, CPU_PARTIAL_NODE); - partial_slabs++; - } -#ifdef CONFIG_SLUB_CPU_PARTIAL - if (!kmem_cache_has_cpu_partial(s) - || partial_slabs > s->cpu_partial_slabs / 2) - break; -#else - break; -#endif + if (++partial_slabs > slub_get_cpu_partial(s) / 2) { + break; + } + } } spin_unlock_irqrestore(&n->list_lock, flags); return partial; @@ -2699,7 +2713,7 @@ static struct slab *get_partial(struct kmem_cache *s, int node, searchnode = numa_mem_id(); slab = get_partial_node(s, get_node(s, searchnode), pc); - if (slab || node != NUMA_NO_NODE) + if (slab || (node != NUMA_NO_NODE && (pc->flags & __GFP_THISNODE))) return slab; return get_any_partial(s, pc); @@ -2797,7 +2811,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab, struct slab new; struct slab old; - if (slab->freelist) { + if (READ_ONCE(slab->freelist)) { stat(s, DEACTIVATE_REMOTE_FREES); tail = DEACTIVATE_TO_TAIL; } @@ -3229,6 +3243,43 @@ static unsigned long count_partial(struct kmem_cache_node *n, #endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */ #ifdef CONFIG_SLUB_DEBUG +#define MAX_PARTIAL_TO_SCAN 10000 + +static unsigned long count_partial_free_approx(struct kmem_cache_node *n) +{ + unsigned long flags; + unsigned long x = 0; + struct slab *slab; + + spin_lock_irqsave(&n->list_lock, flags); + if (n->nr_partial <= MAX_PARTIAL_TO_SCAN) { + list_for_each_entry(slab, &n->partial, slab_list) + x += slab->objects - slab->inuse; + } else { + /* + * For a long list, approximate the total count of objects in + * it to meet the limit on the number of slabs to scan. + * Scan from both the list's head and tail for better accuracy. + */ + unsigned long scanned = 0; + + list_for_each_entry(slab, &n->partial, slab_list) { + x += slab->objects - slab->inuse; + if (++scanned == MAX_PARTIAL_TO_SCAN / 2) + break; + } + list_for_each_entry_reverse(slab, &n->partial, slab_list) { + x += slab->objects - slab->inuse; + if (++scanned == MAX_PARTIAL_TO_SCAN) + break; + } + x = mult_frac(x, n->nr_partial, scanned); + x = min(x, node_nr_objs(n)); + } + spin_unlock_irqrestore(&n->list_lock, flags); + return x; +} + static noinline void slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { @@ -3255,7 +3306,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) unsigned long nr_objs; unsigned long nr_free; - nr_free = count_partial(n, count_free); + nr_free = count_partial_free_approx(n); nr_slabs = node_nr_slabs(n); nr_objs = node_nr_objs(n); @@ -3375,6 +3426,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, struct slab *slab; unsigned long flags; struct partial_context pc; + bool try_thisnode = true; stat(s, ALLOC_SLOWPATH); @@ -3501,6 +3553,21 @@ new_slab: new_objects: pc.flags = gfpflags; + /* + * When a preferred node is indicated but no __GFP_THISNODE + * + * 1) try to get a partial slab from target node only by having + * __GFP_THISNODE in pc.flags for get_partial() + * 2) if 1) failed, try to allocate a new slab from target node with + * GPF_NOWAIT | __GFP_THISNODE opportunistically + * 3) if 2) failed, retry with original gfpflags which will allow + * get_partial() try partial lists of other nodes before potentially + * allocating new page from other nodes + */ + if (unlikely(node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE) + && try_thisnode)) + pc.flags = GFP_NOWAIT | __GFP_THISNODE; + pc.orig_size = orig_size; slab = get_partial(s, node, &pc); if (slab) { @@ -3522,10 +3589,15 @@ new_objects: } slub_put_cpu_ptr(s->cpu_slab); - slab = new_slab(s, gfpflags, node); + slab = new_slab(s, pc.flags, node); c = slub_get_cpu_ptr(s->cpu_slab); if (unlikely(!slab)) { + if (node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE) + && try_thisnode) { + try_thisnode = false; + goto new_objects; + } slab_out_of_memory(s, gfpflags, node); return NULL; } @@ -3722,7 +3794,8 @@ static void *__slab_alloc_node(struct kmem_cache *s, static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, void *obj) { - if (unlikely(slab_want_init_on_free(s)) && obj) + if (unlikely(slab_want_init_on_free(s)) && obj && + !freeptr_outside_object(s)) memset((void *)((char *)kasan_reset_tag(obj) + s->offset), 0, sizeof(void *)); } @@ -4226,7 +4299,7 @@ redo: c = raw_cpu_ptr(s->cpu_slab); tid = READ_ONCE(c->tid); - /* Same with comment on barrier() in slab_alloc_node() */ + /* Same with comment on barrier() in __slab_alloc_node() */ barrier(); if (unlikely(slab != c->slab)) { @@ -4847,7 +4920,6 @@ static void early_kmem_cache_node_alloc(int node) BUG_ON(!n); #ifdef CONFIG_SLUB_DEBUG init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); - init_tracking(kmem_cache_node, n); #endif n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); slab->freelist = get_freepointer(kmem_cache_node, n); @@ -5060,9 +5132,7 @@ static int calculate_sizes(struct kmem_cache *s) if ((int)order < 0) return 0; - s->allocflags = 0; - if (order) - s->allocflags |= __GFP_COMP; + s->allocflags = __GFP_COMP; if (s->flags & SLAB_CACHE_DMA) s->allocflags |= GFP_DMA; @@ -6036,7 +6106,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, else if (flags & SO_OBJECTS) WARN_ON_ONCE(1); else - x = slab->slabs; + x = data_race(slab->slabs); total += x; nodes[node] += x; } @@ -6241,7 +6311,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); if (slab) - slabs += slab->slabs; + slabs += data_race(slab->slabs); } #endif @@ -6255,7 +6325,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); if (slab) { - slabs = READ_ONCE(slab->slabs); + slabs = data_race(slab->slabs); objects = (slabs * oo_objects(s->oo)) / 2; len += sysfs_emit_at(buf, len, " C%d=%d(%d)", cpu, objects, slabs); @@ -7089,7 +7159,7 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) for_each_kmem_cache_node(s, node, n) { nr_slabs += node_nr_slabs(n); nr_objs += node_nr_objs(n); - nr_free += count_partial(n, count_free); + nr_free += count_partial_free_approx(n); } sinfo->active_objs = nr_objs - nr_free; @@ -7099,14 +7169,4 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) sinfo->objects_per_slab = oo_objects(s->oo); sinfo->cache_order = oo_order(s->oo); } - -void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s) -{ -} - -ssize_t slabinfo_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - return -EIO; -} #endif /* CONFIG_SLUB_DEBUG */ |