From ad59baa3169591e0b4cf1a217c9139f2145f4c7f Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 3 Jul 2024 09:25:21 +0200 Subject: slab, rust: extend kmalloc() alignment guarantees to remove Rust padding Slab allocators have been guaranteeing natural alignment for power-of-two sizes since commit 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment for kmalloc(power-of-two)"), while any other sizes are guaranteed to be aligned only to ARCH_KMALLOC_MINALIGN bytes (although in practice are aligned more than that in non-debug scenarios). Rust's allocator API specifies size and alignment per allocation, which have to satisfy the following rules, per Alice Ryhl [1]: 1. The alignment is a power of two. 2. The size is non-zero. 3. When you round up the size to the next multiple of the alignment, then it must not overflow the signed type isize / ssize_t. In order to map this to kmalloc()'s guarantees, some requested allocation sizes have to be padded to the next power-of-two size [2]. For example, an allocation of size 96 and alignment of 32 will be padded to an allocation of size 128, because the existing kmalloc-96 bucket doesn't guarantee alignent above ARCH_KMALLOC_MINALIGN. Without slab debugging active, the layout of the kmalloc-96 slabs however naturally align the objects to 32 bytes, so extending the size to 128 bytes is wasteful. To improve the situation we can extend the kmalloc() alignment guarantees in a way that 1) doesn't change the current slab layout (and thus does not increase internal fragmentation) when slab debugging is not active 2) reduces waste in the Rust allocator use case 3) is a superset of the current guarantee for power-of-two sizes. The extended guarantee is that alignment is at least the largest power-of-two divisor of the requested size. For power-of-two sizes the largest divisor is the size itself, but let's keep this case documented separately for clarity. For current kmalloc size buckets, it means kmalloc-96 will guarantee alignment of 32 bytes and kmalloc-196 will guarantee 64 bytes. This covers the rules 1 and 2 above of Rust's API as long as the size is a multiple of the alignment. The Rust layer should now only need to round up the size to the next multiple if it isn't, while enforcing the rule 3. Implementation-wise, this changes the alignment calculation in create_boot_cache(). While at it also do the calulation only for caches with the SLAB_KMALLOC flag, because the function is also used to create the initial kmem_cache and kmem_cache_node caches, where no alignment guarantee is necessary. In the Rust allocator's krealloc_aligned(), remove the code that padded sizes to the next power of two (suggested by Alice Ryhl) as it's no longer necessary with the new guarantees. Reported-by: Alice Ryhl Reported-by: Boqun Feng Link: https://lore.kernel.org/all/CAH5fLggjrbdUuT-H-5vbQfMazjRDpp2%2Bk3%3DYhPyS17ezEqxwcw@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/CAH5fLghsZRemYUwVvhk77o6y1foqnCeDzW4WZv6ScEWna2+_jw@mail.gmail.com/ [2] Reviewed-by: Boqun Feng Acked-by: Roman Gushchin Reviewed-by: Alice Ryhl Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'mm/slab_common.c') diff --git a/mm/slab_common.c b/mm/slab_common.c index 1560a1546bb1..7272ef7bc55f 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -617,11 +617,12 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, s->size = s->object_size = size; /* - * For power of two sizes, guarantee natural alignment for kmalloc - * caches, regardless of SL*B debugging options. + * kmalloc caches guarantee alignment of at least the largest + * power-of-two divisor of the size. For power-of-two sizes, + * it is the size itself. */ - if (is_power_of_2(size)) - align = max(align, size); + if (flags & SLAB_KMALLOC) + align = max(align, 1U << (ffs(size) - 1)); s->align = calculate_alignment(flags, align, size); #ifdef CONFIG_HARDENED_USERCOPY -- cgit From 72e0fe2241ce113cbba339ca8c2450b167774530 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jul 2024 12:12:58 -0700 Subject: mm/slab: Introduce kmem_buckets typedef Encapsulate the concept of a single set of kmem_caches that are used for the kmalloc size buckets. Redefine kmalloc_caches as an array of these buckets (for the different global cache buckets). Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 5 +++-- mm/slab_common.c | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/slab_common.c') diff --git a/include/linux/slab.h b/include/linux/slab.h index 640cea6e6323..922bf15794f7 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -426,8 +426,9 @@ enum kmalloc_cache_type { NR_KMALLOC_TYPES }; -extern struct kmem_cache * -kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; +typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1]; + +extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; /* * Define gfp bits that should not be set for KMALLOC_NORMAL. diff --git a/mm/slab_common.c b/mm/slab_common.c index 7272ef7bc55f..ff60f91e4edc 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -654,8 +654,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name, return s; } -struct kmem_cache * -kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = +kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES] __ro_after_init = { /* initialization for https://llvm.org/pr42570 */ }; EXPORT_SYMBOL(kmalloc_caches); -- cgit From 67f2df3b82d091ed095d0e47e1f3a9d3e18e4e41 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jul 2024 12:12:59 -0700 Subject: mm/slab: Plumb kmem_buckets into __do_kmalloc_node() Introduce CONFIG_SLAB_BUCKETS which provides the infrastructure to support separated kmalloc buckets (in the following kmem_buckets_create() patches and future codetag-based separation). Since this will provide a mitigation for a very common case of exploits, it is recommended to enable this feature for general purpose distros. By default, the new Kconfig will be enabled if CONFIG_SLAB_FREELIST_HARDENED is enabled (and it is added to the hardening.config Kconfig fragment). To be able to choose which buckets to allocate from, make the buckets available to the internal kmalloc interfaces by adding them as the second argument, rather than depending on the buckets being chosen from the fixed set of global buckets. Where the bucket is not available, pass NULL, which means "use the default system kmalloc bucket set" (the prior existing behavior), as implemented in kmalloc_slab(). To avoid adding the extra argument when !CONFIG_SLAB_BUCKETS, only the top-level macros and static inlines use the buckets argument (where they are stripped out and compiled out respectively). The actual extern functions can then be built without the argument, and the internals fall back to the global kmalloc buckets unconditionally. Co-developed-by: Vlastimil Babka Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 27 ++++++++++++++++++++++----- kernel/configs/hardening.config | 1 + mm/Kconfig | 17 +++++++++++++++++ mm/slab.h | 6 ++++-- mm/slab_common.c | 2 +- mm/slub.c | 20 ++++++++++---------- scripts/kernel-doc | 1 + 7 files changed, 56 insertions(+), 18 deletions(-) (limited to 'mm/slab_common.c') diff --git a/include/linux/slab.h b/include/linux/slab.h index 922bf15794f7..a9200d453087 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -570,6 +570,21 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) +/* + * These macros allow declaring a kmem_buckets * parameter alongside size, which + * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call + * sites don't have to pass NULL. + */ +#ifdef CONFIG_SLAB_BUCKETS +#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) +#define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) +#define PASS_BUCKET_PARAM(_b) (_b) +#else +#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) +#define PASS_BUCKET_PARAMS(_size, _b) (_size) +#define PASS_BUCKET_PARAM(_b) NULL +#endif + /* * The following functions are not to be used directly and are intended only * for internal use from kmalloc() and kmalloc_node() @@ -579,7 +594,7 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) +void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) @@ -680,7 +695,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } - return __kmalloc_node_noprof(size, flags, node); + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); } #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) @@ -731,8 +746,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi */ #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) -void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node, - unsigned long caller) __alloc_size(1); +void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, + unsigned long caller) __alloc_size(1); +#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ + __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) #define kmalloc_node_track_caller(...) \ alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) @@ -758,7 +775,7 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_ return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) return kmalloc_node_noprof(bytes, flags, node); - return __kmalloc_node_noprof(bytes, flags, node); + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); } #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config index 8a7ce7a6b3ab..3fabb8f55ef6 100644 --- a/kernel/configs/hardening.config +++ b/kernel/configs/hardening.config @@ -20,6 +20,7 @@ CONFIG_RANDOMIZE_MEMORY=y # Randomize allocator freelists, harden metadata. CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y +CONFIG_SLAB_BUCKETS=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y CONFIG_RANDOM_KMALLOC_CACHES=y diff --git a/mm/Kconfig b/mm/Kconfig index b4cb45255a54..e0dfb268717c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -273,6 +273,23 @@ config SLAB_FREELIST_HARDENED sacrifices to harden the kernel slab allocator against common freelist exploit methods. +config SLAB_BUCKETS + bool "Support allocation from separate kmalloc buckets" + depends on !SLUB_TINY + default SLAB_FREELIST_HARDENED + help + Kernel heap attacks frequently depend on being able to create + specifically-sized allocations with user-controlled contents + that will be allocated into the same kmalloc bucket as a + target object. To avoid sharing these allocation buckets, + provide an explicitly separated set of buckets to be used for + user-controlled allocations. This may very slightly increase + memory fragmentation, though in practice it's only a handful + of extra pages since the bulk of user-controlled allocations + are relatively long-lived. + + If unsure, say Y. + config SLUB_STATS default n bool "Enable performance statistics" diff --git a/mm/slab.h b/mm/slab.h index b16e63191578..d5e8034af9d5 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -403,16 +403,18 @@ static inline unsigned int size_index_elem(unsigned int bytes) * KMALLOC_MAX_CACHE_SIZE and the caller must check that. */ static inline struct kmem_cache * -kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) +kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) { unsigned int index; + if (!b) + b = &kmalloc_caches[kmalloc_type(flags, caller)]; if (size <= 192) index = kmalloc_size_index[size_index_elem(size)]; else index = fls(size - 1); - return kmalloc_caches[kmalloc_type(flags, caller)][index]; + return (*b)[index]; } gfp_t kmalloc_fix_flags(gfp_t flags); diff --git a/mm/slab_common.c b/mm/slab_common.c index ff60f91e4edc..bcc1e13d7f86 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -703,7 +703,7 @@ size_t kmalloc_size_roundup(size_t size) * The flags don't matter since size_index is common to all. * Neither does the caller for just getting ->object_size. */ - return kmalloc_slab(size, GFP_KERNEL, 0)->object_size; + return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size; } /* Above the smaller buckets, size is a multiple of page size. */ diff --git a/mm/slub.c b/mm/slub.c index 3d19a0ee411f..80f0a51242d1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4117,7 +4117,7 @@ void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) EXPORT_SYMBOL(__kmalloc_large_node_noprof); static __always_inline -void *__do_kmalloc_node(size_t size, gfp_t flags, int node, +void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node, unsigned long caller) { struct kmem_cache *s; @@ -4133,32 +4133,32 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, if (unlikely(!size)) return ZERO_SIZE_PTR; - s = kmalloc_slab(size, flags, caller); + s = kmalloc_slab(size, b, flags, caller); ret = slab_alloc_node(s, NULL, flags, node, caller, size); ret = kasan_kmalloc(s, ret, size, flags); trace_kmalloc(caller, ret, size, s->size, flags, node); return ret; } - -void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) +void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) { - return __do_kmalloc_node(size, flags, node, _RET_IP_); + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_); } EXPORT_SYMBOL(__kmalloc_node_noprof); void *__kmalloc_noprof(size_t size, gfp_t flags) { - return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); + return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_); } EXPORT_SYMBOL(__kmalloc_noprof); -void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, - int node, unsigned long caller) +void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, + int node, unsigned long caller) { - return __do_kmalloc_node(size, flags, node, caller); + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller); + } -EXPORT_SYMBOL(kmalloc_node_track_caller_noprof); +EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof); void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) { diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 95a59ac78f82..2791f8195203 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1729,6 +1729,7 @@ sub dump_function($$) { $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//; + $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/; my $define = $prototype =~ s/^#\s*define\s+//; #ak added $prototype =~ s/__attribute_const__ +//; $prototype =~ s/__attribute__\s*\(\( -- cgit From b32801d1255be1da62ea8134df3ed9f3331fba12 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jul 2024 12:13:01 -0700 Subject: mm/slab: Introduce kmem_buckets_create() and family Dedicated caches are available for fixed size allocations via kmem_cache_alloc(), but for dynamically sized allocations there is only the global kmalloc API's set of buckets available. This means it isn't possible to separate specific sets of dynamically sized allocations into a separate collection of caches. This leads to a use-after-free exploitation weakness in the Linux kernel since many heap memory spraying/grooming attacks depend on using userspace-controllable dynamically sized allocations to collide with fixed size allocations that end up in same cache. While CONFIG_RANDOM_KMALLOC_CACHES provides a probabilistic defense against these kinds of "type confusion" attacks, including for fixed same-size heap objects, we can create a complementary deterministic defense for dynamically sized allocations that are directly user controlled. Addressing these cases is limited in scope, so isolating these kinds of interfaces will not become an unbounded game of whack-a-mole. For example, many pass through memdup_user(), making isolation there very effective. In order to isolate user-controllable dynamically-sized allocations from the common system kmalloc allocations, introduce kmem_buckets_create(), which behaves like kmem_cache_create(). Introduce kmem_buckets_alloc(), which behaves like kmem_cache_alloc(). Introduce kmem_buckets_alloc_track_caller() for where caller tracking is needed. Introduce kmem_buckets_valloc() for cases where vmalloc fallback is needed. Note that these caches are specifically flagged with SLAB_NO_MERGE, since merging would defeat the entire purpose of the mitigation. This can also be used in the future to extend allocation profiling's use of code tagging to implement per-caller allocation cache isolation[1] even for dynamic allocations. Memory allocation pinning[2] is still needed to plug the Use-After-Free cross-allocator weakness (where attackers can arrange to free an entire slab page and have it reallocated to a different cache), but that is an existing and separate issue which is complementary to this improvement. Development continues for that feature via the SLAB_VIRTUAL[3] series (which could also provide guard pages -- another complementary improvement). Link: https://lore.kernel.org/lkml/202402211449.401382D2AF@keescook [1] Link: https://googleprojectzero.blogspot.com/2021/10/how-simple-linux-kernel-memory.html [2] Link: https://lore.kernel.org/lkml/20230915105933.495735-1-matteorizzo@google.com/ [3] Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 12 +++++++ mm/slab_common.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) (limited to 'mm/slab_common.c') diff --git a/include/linux/slab.h b/include/linux/slab.h index 837005314f96..d99afce36098 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -549,6 +549,10 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, void kmem_cache_free(struct kmem_cache *s, void *objp); +kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)); + /* * Bulk allocation and freeing operations. These are accelerated in an * allocator specific way to avoid taking locks repeatedly or building @@ -682,6 +686,12 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f } #define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) +#define kmem_buckets_alloc(_b, _size, _flags) \ + alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) + +#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ + alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) + static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && size) { @@ -809,6 +819,8 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) +#define kmem_buckets_valloc(_b, _size, _flags) \ + alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) static inline __alloc_size(1, 2) void * kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) diff --git a/mm/slab_common.c b/mm/slab_common.c index bcc1e13d7f86..70943a4c1c4b 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -392,6 +392,98 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, } EXPORT_SYMBOL(kmem_cache_create); +static struct kmem_cache *kmem_buckets_cache __ro_after_init; + +/** + * kmem_buckets_create - Create a set of caches that handle dynamic sized + * allocations via kmem_buckets_alloc() + * @name: A prefix string which is used in /proc/slabinfo to identify this + * cache. The individual caches with have their sizes as the suffix. + * @flags: SLAB flags (see kmem_cache_create() for details). + * @useroffset: Starting offset within an allocation that may be copied + * to/from userspace. + * @usersize: How many bytes, starting at @useroffset, may be copied + * to/from userspace. + * @ctor: A constructor for the objects, run when new allocations are made. + * + * Cannot be called within an interrupt, but can be interrupted. + * + * Return: a pointer to the cache on success, NULL on failure. When + * CONFIG_SLAB_BUCKETS is not enabled, ZERO_SIZE_PTR is returned, and + * subsequent calls to kmem_buckets_alloc() will fall back to kmalloc(). + * (i.e. callers only need to check for NULL on failure.) + */ +kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, + unsigned int useroffset, + unsigned int usersize, + void (*ctor)(void *)) +{ + kmem_buckets *b; + int idx; + + /* + * When the separate buckets API is not built in, just return + * a non-NULL value for the kmem_buckets pointer, which will be + * unused when performing allocations. + */ + if (!IS_ENABLED(CONFIG_SLAB_BUCKETS)) + return ZERO_SIZE_PTR; + + if (WARN_ON(!kmem_buckets_cache)) + return NULL; + + b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO); + if (WARN_ON(!b)) + return NULL; + + flags |= SLAB_NO_MERGE; + + for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) { + char *short_size, *cache_name; + unsigned int cache_useroffset, cache_usersize; + unsigned int size; + + if (!kmalloc_caches[KMALLOC_NORMAL][idx]) + continue; + + size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size; + if (!size) + continue; + + short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-'); + if (WARN_ON(!short_size)) + goto fail; + + cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1); + if (WARN_ON(!cache_name)) + goto fail; + + if (useroffset >= size) { + cache_useroffset = 0; + cache_usersize = 0; + } else { + cache_useroffset = useroffset; + cache_usersize = min(size - cache_useroffset, usersize); + } + (*b)[idx] = kmem_cache_create_usercopy(cache_name, size, + 0, flags, cache_useroffset, + cache_usersize, ctor); + kfree(cache_name); + if (WARN_ON(!(*b)[idx])) + goto fail; + } + + return b; + +fail: + for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) + kmem_cache_destroy((*b)[idx]); + kfree(b); + + return NULL; +} +EXPORT_SYMBOL(kmem_buckets_create); + #ifdef SLAB_SUPPORTS_SYSFS /* * For a given kmem_cache, kmem_cache_destroy() should only be called @@ -932,6 +1024,11 @@ void __init create_kmalloc_caches(void) /* Kmalloc array is now usable */ slab_state = UP; + + if (IS_ENABLED(CONFIG_SLAB_BUCKETS)) + kmem_buckets_cache = kmem_cache_create("kmalloc_buckets", + sizeof(kmem_buckets), + 0, SLAB_NO_MERGE, NULL); } /** -- cgit