diff options
-rw-r--r-- | Documentation/mm/slub.rst | 2 | ||||
-rw-r--r-- | include/linux/percpu.h | 2 | ||||
-rw-r--r-- | include/linux/slab.h | 51 | ||||
-rw-r--r-- | mm/migrate.c | 15 | ||||
-rw-r--r-- | mm/slab.c | 106 | ||||
-rw-r--r-- | mm/slab.h | 58 | ||||
-rw-r--r-- | mm/slub.c | 69 | ||||
-rw-r--r-- | tools/vm/slabinfo.c | 6 |
8 files changed, 169 insertions, 140 deletions
diff --git a/Documentation/mm/slub.rst b/Documentation/mm/slub.rst index 4e1578186b4f..7f652216dabe 100644 --- a/Documentation/mm/slub.rst +++ b/Documentation/mm/slub.rst @@ -116,6 +116,8 @@ options from the ``slub_debug`` parameter translate to the following files:: T trace A failslab +failslab file is writable, so writing 1 or 0 will enable or disable +the option at runtime. Write returns -EINVAL if cache is an alias. Careful with tracing: It may spew out lots of information and never stop if used on the wrong slab. diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f1ec5ad1351c..3dbb6fb70658 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -42,7 +42,7 @@ * larger than PERCPU_DYNAMIC_EARLY_SIZE. */ #define PERCPU_DYNAMIC_EARLY_SLOTS 128 -#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10) +#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy diff --git a/include/linux/slab.h b/include/linux/slab.h index 45efc6c553b8..cd3efac72f3d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -441,7 +441,18 @@ static_assert(PAGE_SHIFT <= 20); #endif /* !CONFIG_SLOB */ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; + +/** + * kmem_cache_alloc - Allocate an object + * @cachep: The cache to allocate from. + * @flags: See kmalloc(). + * + * Allocate an object from this cache. + * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. + * + * Return: pointer to the new object or %NULL in case of error + */ +void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc; void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *s, void *objp); @@ -483,9 +494,9 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align __alloc_size(1); /** - * kmalloc - allocate memory + * kmalloc - allocate kernel memory * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate. + * @flags: describe the allocation context * * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. @@ -512,12 +523,12 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align * %GFP_ATOMIC * Allocation will not sleep. May use emergency pools. * - * %GFP_HIGHUSER - * Allocate memory from high memory on behalf of user. - * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * + * %__GFP_ZERO + * Zero the allocated memory before returning. Also see kzalloc(). + * * %__GFP_HIGH * This allocation has high priority and may use emergency pools. * @@ -536,42 +547,42 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align * Try really hard to succeed the allocation but fail * eventually. */ +#ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) { - if (__builtin_constant_p(size)) { -#ifndef CONFIG_SLOB + if (__builtin_constant_p(size) && size) { unsigned int index; -#endif + if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); -#ifndef CONFIG_SLOB - index = kmalloc_index(size); - - if (!index) - return ZERO_SIZE_PTR; + index = kmalloc_index(size); return kmalloc_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, size); -#endif } return __kmalloc(size, flags); } +#else +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large(size, flags); + + return __kmalloc(size, flags); +} +#endif #ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { - if (__builtin_constant_p(size)) { + if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large_node(size, flags, node); index = kmalloc_index(size); - - if (!index) - return ZERO_SIZE_PTR; - return kmalloc_node_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, node, size); diff --git a/mm/migrate.c b/mm/migrate.c index 1379e1912772..959c99cff814 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -74,13 +74,22 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) if (unlikely(!get_page_unless_zero(page))) goto out; + if (unlikely(PageSlab(page))) + goto out_putpage; + /* Pairs with smp_wmb() in slab freeing, e.g. SLUB's __free_slab() */ + smp_rmb(); /* - * Check PageMovable before holding a PG_lock because page's owner - * assumes anybody doesn't touch PG_lock of newly allocated page - * so unconditionally grabbing the lock ruins page's owner side. + * Check movable flag before taking the page lock because + * we use non-atomic bitops on newly allocated page flags so + * unconditionally grabbing the lock ruins page's owner side. */ if (unlikely(!__PageMovable(page))) goto out_putpage; + /* Pairs with smp_wmb() in slab allocation, e.g. SLUB's alloc_slab_page() */ + smp_rmb(); + if (unlikely(PageSlab(page))) + goto out_putpage; + /* * As movable pages are not isolated from LRU lists, concurrent * compaction threads can race against page migration functions diff --git a/mm/slab.c b/mm/slab.c index 26f41d47f5f1..7a269db050ee 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -234,7 +234,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) parent->shared = NULL; parent->alien = NULL; parent->colour_next = 0; - spin_lock_init(&parent->list_lock); + raw_spin_lock_init(&parent->list_lock); parent->free_objects = 0; parent->free_touched = 0; } @@ -559,9 +559,9 @@ static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep, slab_node = slab_nid(slab); n = get_node(cachep, slab_node); - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); free_block(cachep, &objp, 1, slab_node, &list); - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); slabs_destroy(cachep, &list); } @@ -684,7 +684,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, struct kmem_cache_node *n = get_node(cachep, node); if (ac->avail) { - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); /* * Stuff objects into the remote nodes shared array first. * That way we could avoid the overhead of putting the objects @@ -695,7 +695,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, free_block(cachep, ac->entry, ac->avail, node, list); ac->avail = 0; - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); } } @@ -768,9 +768,9 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp, slabs_destroy(cachep, &list); } else { n = get_node(cachep, slab_node); - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); free_block(cachep, &objp, 1, slab_node, &list); - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); slabs_destroy(cachep, &list); } return 1; @@ -811,10 +811,10 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) */ n = get_node(cachep, node); if (n) { - spin_lock_irq(&n->list_lock); + raw_spin_lock_irq(&n->list_lock); n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); return 0; } @@ -893,7 +893,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep, goto fail; n = get_node(cachep, node); - spin_lock_irq(&n->list_lock); + raw_spin_lock_irq(&n->list_lock); if (n->shared && force_change) { free_block(cachep, n->shared->entry, n->shared->avail, node, &list); @@ -911,7 +911,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep, new_alien = NULL; } - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); slabs_destroy(cachep, &list); /* @@ -950,7 +950,7 @@ static void cpuup_canceled(long cpu) if (!n) continue; - spin_lock_irq(&n->list_lock); + raw_spin_lock_irq(&n->list_lock); /* Free limit for this kmem_cache_node */ n->free_limit -= cachep->batchcount; @@ -961,7 +961,7 @@ static void cpuup_canceled(long cpu) nc->avail = 0; if (!cpumask_empty(mask)) { - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); goto free_slab; } @@ -975,7 +975,7 @@ static void cpuup_canceled(long cpu) alien = n->alien; n->alien = NULL; - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); kfree(shared); if (alien) { @@ -1159,7 +1159,7 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node * /* * Do not assume that spinlocks can be initialized via memcpy: */ - spin_lock_init(&ptr->list_lock); + raw_spin_lock_init(&ptr->list_lock); MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->node[nodeid] = ptr; @@ -1330,11 +1330,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) for_each_kmem_cache_node(cachep, node, n) { unsigned long total_slabs, free_slabs, free_objs; - spin_lock_irqsave(&n->list_lock, flags); + raw_spin_lock_irqsave(&n->list_lock, flags); total_slabs = n->total_slabs; free_slabs = n->free_slabs; free_objs = n->free_objects; - spin_unlock_irqrestore(&n->list_lock, flags); + raw_spin_unlock_irqrestore(&n->list_lock, flags); pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n", node, total_slabs - free_slabs, total_slabs, @@ -1370,6 +1370,8 @@ static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, account_slab(slab, cachep->gfporder, cachep, flags); __folio_set_slab(folio); + /* Make the flag visible before any changes to folio->mapping */ + smp_wmb(); /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ if (sk_memalloc_socks() && page_is_pfmemalloc(folio_page(folio, 0))) slab_set_pfmemalloc(slab); @@ -1387,9 +1389,11 @@ static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab) BUG_ON(!folio_test_slab(folio)); __slab_clear_pfmemalloc(slab); - __folio_clear_slab(folio); page_mapcount_reset(folio_page(folio, 0)); folio->mapping = NULL; + /* Make the mapping reset visible before clearing the flag */ + smp_wmb(); + __folio_clear_slab(folio); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += 1 << order; @@ -2096,7 +2100,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep) { #ifdef CONFIG_SMP check_irq_off(); - assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); + assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); #endif } @@ -2104,7 +2108,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) { #ifdef CONFIG_SMP check_irq_off(); - assert_spin_locked(&get_node(cachep, node)->list_lock); + assert_raw_spin_locked(&get_node(cachep, node)->list_lock); #endif } @@ -2144,9 +2148,9 @@ static void do_drain(void *arg) check_irq_off(); ac = cpu_cache_get(cachep); n = get_node(cachep, node); - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); free_block(cachep, ac->entry, ac->avail, node, &list); - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); ac->avail = 0; slabs_destroy(cachep, &list); } @@ -2164,9 +2168,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep) drain_alien_cache(cachep, n->alien); for_each_kmem_cache_node(cachep, node, n) { - spin_lock_irq(&n->list_lock); + raw_spin_lock_irq(&n->list_lock); drain_array_locked(cachep, n->shared, node, true, &list); - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); slabs_destroy(cachep, &list); } @@ -2188,10 +2192,10 @@ static int drain_freelist(struct kmem_cache *cache, nr_freed = 0; while (nr_freed < tofree && !list_empty(&n->slabs_free)) { - spin_lock_irq(&n->list_lock); + raw_spin_lock_irq(&n->list_lock); p = n->slabs_free.prev; if (p == &n->slabs_free) { - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); goto out; } @@ -2204,7 +2208,7 @@ static int drain_freelist(struct kmem_cache *cache, * to the cache. */ n->free_objects -= cache->num; - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); slab_destroy(cache, slab); nr_freed++; } @@ -2629,7 +2633,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab) INIT_LIST_HEAD(&slab->slab_list); n = get_node(cachep, slab_nid(slab)); - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); n->total_slabs++; if (!slab->active) { list_add_tail(&slab->slab_list, &n->slabs_free); @@ -2639,7 +2643,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab) STATS_INC_GROWN(cachep); n->free_objects += cachep->num - slab->active; - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); } @@ -2805,7 +2809,7 @@ static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) { struct slab *slab; - assert_spin_locked(&n->list_lock); + assert_raw_spin_locked(&n->list_lock); slab = list_first_entry_or_null(&n->slabs_partial, struct slab, slab_list); if (!slab) { @@ -2832,10 +2836,10 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, if (!gfp_pfmemalloc_allowed(flags)) return NULL; - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); slab = get_first_slab(n, true); if (!slab) { - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); return NULL; } @@ -2844,7 +2848,7 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, fixup_slab_list(cachep, n, slab, &list); - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); return obj; @@ -2903,7 +2907,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) if (!n->free_objects && (!shared || !shared->avail)) goto direct_grow; - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); shared = READ_ONCE(n->shared); /* See if we can refill from the shared array */ @@ -2927,7 +2931,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) must_grow: n->free_objects -= ac->avail; alloc_done: - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); direct_grow: @@ -3147,7 +3151,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, BUG_ON(!n); check_irq_off(); - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); slab = get_first_slab(n, false); if (!slab) goto must_grow; @@ -3165,12 +3169,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, fixup_slab_list(cachep, n, slab, &list); - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); return obj; must_grow: - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); if (slab) { /* This slab isn't counted yet so don't update free_objects */ @@ -3326,7 +3330,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) check_irq_off(); n = get_node(cachep, node); - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); if (n->shared) { struct array_cache *shared_array = n->shared; int max = shared_array->limit - shared_array->avail; @@ -3355,7 +3359,7 @@ free_done: STATS_SET_FREEABLE(cachep, i); } #endif - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); slabs_destroy(cachep, &list); @@ -3447,16 +3451,6 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, return ret; } -/** - * kmem_cache_alloc - Allocate an object - * @cachep: The cache to allocate from. - * @flags: See kmalloc(). - * - * Allocate an object from this cache. The flags are only relevant - * if the cache has no available objects. - * - * Return: pointer to the new object or %NULL in case of error - */ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { return __kmem_cache_alloc_lru(cachep, NULL, flags); @@ -3722,9 +3716,9 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, node = cpu_to_mem(cpu); n = get_node(cachep, node); - spin_lock_irq(&n->list_lock); + raw_spin_lock_irq(&n->list_lock); free_block(cachep, ac->entry, ac->avail, node, &list); - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); slabs_destroy(cachep, &list); } free_percpu(prev); @@ -3816,9 +3810,9 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, return; } - spin_lock_irq(&n->list_lock); + raw_spin_lock_irq(&n->list_lock); drain_array_locked(cachep, ac, node, false, &list); - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); slabs_destroy(cachep, &list); } @@ -3902,7 +3896,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) for_each_kmem_cache_node(cachep, node, n) { check_irq_on(); - spin_lock_irq(&n->list_lock); + raw_spin_lock_irq(&n->list_lock); total_slabs += n->total_slabs; free_slabs += n->free_slabs; @@ -3911,7 +3905,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) if (n->shared) shared_avail += n->shared->avail; - spin_unlock_irq(&n->list_lock); + raw_spin_unlock_irq(&n->list_lock); } num_objs = total_slabs * cachep->num; active_slabs = total_slabs - free_slabs; diff --git a/mm/slab.h b/mm/slab.h index d5a0b69b81ab..060c589a827a 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -11,37 +11,43 @@ struct slab { #if defined(CONFIG_SLAB) + struct kmem_cache *slab_cache; union { - struct list_head slab_list; + struct { + struct list_head slab_list; + void *freelist; /* array of free object indexes */ + void *s_mem; /* first object */ + }; struct rcu_head rcu_head; }; - struct kmem_cache *slab_cache; - void *freelist; /* array of free object indexes */ - void *s_mem; /* first object */ unsigned int active; #elif defined(CONFIG_SLUB) - union { - struct list_head slab_list; - struct rcu_head rcu_head; -#ifdef CONFIG_SLUB_CPU_PARTIAL - struct { - struct slab *next; - int slabs; /* Nr of slabs left */ - }; -#endif - }; struct kmem_cache *slab_cache; - /* Double-word boundary */ - void *freelist; /* first free object */ union { - unsigned long counters; struct { - unsigned inuse:16; - unsigned objects:15; - unsigned frozen:1; + union { + struct list_head slab_list; +#ifdef CONFIG_SLUB_CPU_PARTIAL + struct { + struct slab *next; + int slabs; /* Nr of slabs left */ + }; +#endif + }; + /* Double-word boundary */ + void *freelist; /* first free object */ + union { + unsigned long counters; + struct { + unsigned inuse:16; + unsigned objects:15; + unsigned frozen:1; + }; + }; }; + struct rcu_head rcu_head; }; unsigned int __unused; @@ -66,9 +72,10 @@ struct slab { #define SLAB_MATCH(pg, sl) \ static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) SLAB_MATCH(flags, __page_flags); -SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ #ifndef CONFIG_SLOB -SLAB_MATCH(rcu_head, rcu_head); +SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ +#else +SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ #endif SLAB_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG @@ -76,6 +83,9 @@ SLAB_MATCH(memcg_data, memcg_data); #endif #undef SLAB_MATCH static_assert(sizeof(struct slab) <= sizeof(struct page)); +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB) +static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 2*sizeof(void *))); +#endif /** * folio_slab - Converts from folio to slab. @@ -766,9 +776,8 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, * The slab lists for all objects. */ struct kmem_cache_node { - spinlock_t list_lock; - #ifdef CONFIG_SLAB + raw_spinlock_t list_lock; struct list_head slabs_partial; /* partial list first, better asm code */ struct list_head slabs_full; struct list_head slabs_free; @@ -784,6 +793,7 @@ struct kmem_cache_node { #endif #ifdef CONFIG_SLUB + spinlock_t list_lock; unsigned long nr_partial; struct list_head partial; #ifdef CONFIG_SLUB_DEBUG diff --git a/mm/slub.c b/mm/slub.c index 5f3e34923065..a24b71041b26 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1839,6 +1839,8 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node, slab = folio_slab(folio); __folio_set_slab(folio); + /* Make the flag visible before any changes to folio->mapping */ + smp_wmb(); if (page_is_pfmemalloc(folio_page(folio, 0))) slab_set_pfmemalloc(slab); @@ -2038,17 +2040,11 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab) int order = folio_order(folio); int pages = 1 << order; - if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { - void *p; - - slab_pad_check(s, slab); - for_each_object(p, s, slab_address(slab), slab->objects) - check_object(s, slab, p, SLUB_RED_INACTIVE); - } - __slab_clear_pfmemalloc(slab); - __folio_clear_slab(folio); folio->mapping = NULL; + /* Make the mapping reset visible before clearing the flag */ + smp_wmb(); + __folio_clear_slab(folio); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; unaccount_slab(slab, order, s); @@ -2064,9 +2060,17 @@ static void rcu_free_slab(struct rcu_head *h) static void free_slab(struct kmem_cache *s, struct slab *slab) { - if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { + if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { + void *p; + + slab_pad_check(s, slab); + for_each_object(p, s, slab_address(slab), slab->objects) + check_object(s, slab, p, SLUB_RED_INACTIVE); + } + + if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) call_rcu(&slab->rcu_head, rcu_free_slab); - } else + else __free_slab(s, slab); } @@ -2450,7 +2454,7 @@ static void init_kmem_cache_cpus(struct kmem_cache *s) static void deactivate_slab(struct kmem_cache *s, struct slab *slab, void *freelist) { - enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST }; + enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST }; struct kmem_cache_node *n = get_node(s, slab_nid(slab)); int free_delta = 0; enum slab_modes mode = M_NONE; @@ -2526,14 +2530,6 @@ redo: * acquire_slab() will see a slab that is frozen */ spin_lock_irqsave(&n->list_lock, flags); - } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) { - mode = M_FULL; - /* - * This also ensures that the scanning of full - * slabs from diagnostic functions will not see - * any frozen slabs. - */ - spin_lock_irqsave(&n->list_lock, flags); } else { mode = M_FULL_NOLIST; } @@ -2543,7 +2539,7 @@ redo: old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")) { - if (mode == M_PARTIAL || mode == M_FULL) + if (mode == M_PARTIAL) spin_unlock_irqrestore(&n->list_lock, flags); goto redo; } @@ -2557,10 +2553,6 @@ redo: stat(s, DEACTIVATE_EMPTY); discard_slab(s, slab); stat(s, FREE_SLAB); - } else if (mode == M_FULL) { - add_full(s, n, slab); - spin_unlock_irqrestore(&n->list_lock, flags); - stat(s, DEACTIVATE_FULL); } else if (mode == M_FULL_NOLIST) { stat(s, DEACTIVATE_FULL); } @@ -4060,7 +4052,8 @@ init_kmem_cache_node(struct kmem_cache_node *n) static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < - KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu)); + NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH * + sizeof(struct kmem_cache_cpu)); /* * Must align to double word boundary for the double cmpxchg @@ -5630,7 +5623,21 @@ static ssize_t failslab_show(struct kmem_cache *s, char *buf) { return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); } -SLAB_ATTR_RO(failslab); + +static ssize_t failslab_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + if (s->refcount > 1) + return -EINVAL; + + if (buf[0] == '1') + WRITE_ONCE(s->flags, s->flags | SLAB_FAILSLAB); + else + WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB); + + return length; +} +SLAB_ATTR(failslab); #endif static ssize_t shrink_show(struct kmem_cache *s, char *buf) @@ -5964,11 +5971,6 @@ static int sysfs_slab_add(struct kmem_cache *s) struct kset *kset = cache_kset(s); int unmergeable = slab_unmergeable(s); - if (!kset) { - kobject_init(&s->kobj, &slab_ktype); - return 0; - } - if (!unmergeable && disable_higher_order_debug && (slub_debug & DEBUG_METADATA_FLAGS)) unmergeable = 1; @@ -6098,8 +6100,7 @@ static int __init slab_sysfs_init(void) mutex_unlock(&slab_mutex); return 0; } - -__initcall(slab_sysfs_init); +late_initcall(slab_sysfs_init); #endif /* CONFIG_SYSFS */ #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 0fffaeedee76..cfaeaea71042 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -157,9 +157,11 @@ static unsigned long read_obj(const char *name) { FILE *f = fopen(name, "r"); - if (!f) + if (!f) { buffer[0] = 0; - else { + if (errno == EACCES) + fatal("%s, Try using superuser\n", strerror(errno)); + } else { if (!fgets(buffer, sizeof(buffer), f)) buffer[0] = 0; fclose(f); |