diff options
| -rw-r--r-- | include/linux/mm_types.h | 7 | ||||
| -rw-r--r-- | include/linux/slab.h | 9 | ||||
| -rw-r--r-- | include/linux/slab_def.h | 6 | ||||
| -rw-r--r-- | mm/slab.c | 171 | ||||
| -rw-r--r-- | mm/slab.h | 33 | ||||
| -rw-r--r-- | mm/slab_common.c | 73 | ||||
| -rw-r--r-- | mm/slob.c | 46 | ||||
| -rw-r--r-- | mm/slub.c | 224 | 
8 files changed, 236 insertions, 333 deletions
| diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 31f8a3af7d94..2fef4e720e79 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -128,10 +128,7 @@ struct page {  		};  		struct list_head list;	/* slobs list of pages */ -		struct {		/* slab fields */ -			struct kmem_cache *slab_cache; -			struct slab *slab_page; -		}; +		struct slab *slab_page; /* slab fields */  	};  	/* Remainder is not double word aligned */ @@ -146,7 +143,7 @@ struct page {  #if USE_SPLIT_PTLOCKS  		spinlock_t ptl;  #endif -		struct kmem_cache *slab;	/* SLUB: Pointer to slab */ +		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */  		struct page *first_page;	/* Compound tail pages */  	}; diff --git a/include/linux/slab.h b/include/linux/slab.h index 83d1a1454b7e..743a10415122 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,  void kmem_cache_destroy(struct kmem_cache *);  int kmem_cache_shrink(struct kmem_cache *);  void kmem_cache_free(struct kmem_cache *, void *); -unsigned int kmem_cache_size(struct kmem_cache *);  /*   * Please use this macro to create slab caches. Simply specify the @@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)  	return kmalloc_node(size, flags | __GFP_ZERO, node);  } +/* + * Determine the size of a slab object + */ +static inline unsigned int kmem_cache_size(struct kmem_cache *s) +{ +	return s->object_size; +} +  void __init kmem_cache_init_late(void);  #endif	/* _LINUX_SLAB_H */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index cc290f0bdb34..45c0356fdc8c 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -89,9 +89,13 @@ struct kmem_cache {  	 * (see kmem_cache_init())  	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache  	 * is statically defined, so we reserve the max number of cpus. +	 * +	 * We also need to guarantee that the list is able to accomodate a +	 * pointer for each node since "nodelists" uses the remainder of +	 * available pointers.  	 */  	struct kmem_list3 **nodelists; -	struct array_cache *array[NR_CPUS]; +	struct array_cache *array[NR_CPUS + MAX_NUMNODES];  	/*  	 * Do not add fields after array[]  	 */ diff --git a/mm/slab.c b/mm/slab.c index 6d5c83c6ddd5..2c3a2e0394db 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -162,23 +162,6 @@   */  static bool pfmemalloc_active __read_mostly; -/* Legal flag mask for kmem_cache_create(). */ -#if DEBUG -# define CREATE_MASK	(SLAB_RED_ZONE | \ -			 SLAB_POISON | SLAB_HWCACHE_ALIGN | \ -			 SLAB_CACHE_DMA | \ -			 SLAB_STORE_USER | \ -			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ -			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ -			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK) -#else -# define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \ -			 SLAB_CACHE_DMA | \ -			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ -			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ -			 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK) -#endif -  /*   * kmem_bufctl_t:   * @@ -564,15 +547,11 @@ static struct cache_names __initdata cache_names[] = {  #undef CACHE  }; -static struct arraycache_init initarray_cache __initdata = -    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };  static struct arraycache_init initarray_generic =      { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };  /* internal cache of cache description objs */ -static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];  static struct kmem_cache kmem_cache_boot = { -	.nodelists = kmem_cache_nodelists,  	.batchcount = 1,  	.limit = BOOT_CPUCACHE_ENTRIES,  	.shared = 1, @@ -1577,28 +1556,33 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index)  }  /* + * The memory after the last cpu cache pointer is used for the + * the nodelists pointer. + */ +static void setup_nodelists_pointer(struct kmem_cache *cachep) +{ +	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; +} + +/*   * Initialisation.  Called after the page allocator have been initialised and   * before smp_init().   */  void __init kmem_cache_init(void)  { -	size_t left_over;  	struct cache_sizes *sizes;  	struct cache_names *names;  	int i; -	int order; -	int node;  	kmem_cache = &kmem_cache_boot; +	setup_nodelists_pointer(kmem_cache);  	if (num_possible_nodes() == 1)  		use_alien_caches = 0; -	for (i = 0; i < NUM_INIT_LISTS; i++) { +	for (i = 0; i < NUM_INIT_LISTS; i++)  		kmem_list3_init(&initkmem_list3[i]); -		if (i < MAX_NUMNODES) -			kmem_cache->nodelists[i] = NULL; -	} +  	set_up_list3s(kmem_cache, CACHE_CACHE);  	/* @@ -1629,37 +1613,16 @@ void __init kmem_cache_init(void)  	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.  	 */ -	node = numa_mem_id(); -  	/* 1) create the kmem_cache */ -	INIT_LIST_HEAD(&slab_caches); -	list_add(&kmem_cache->list, &slab_caches); -	kmem_cache->colour_off = cache_line_size(); -	kmem_cache->array[smp_processor_id()] = &initarray_cache.cache; -	kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];  	/*  	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids  	 */ -	kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + -				  nr_node_ids * sizeof(struct kmem_list3 *); -	kmem_cache->object_size = kmem_cache->size; -	kmem_cache->size = ALIGN(kmem_cache->object_size, -					cache_line_size()); -	kmem_cache->reciprocal_buffer_size = -		reciprocal_value(kmem_cache->size); - -	for (order = 0; order < MAX_ORDER; order++) { -		cache_estimate(order, kmem_cache->size, -			cache_line_size(), 0, &left_over, &kmem_cache->num); -		if (kmem_cache->num) -			break; -	} -	BUG_ON(!kmem_cache->num); -	kmem_cache->gfporder = order; -	kmem_cache->colour = left_over / kmem_cache->colour_off; -	kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) + -				      sizeof(struct slab), cache_line_size()); +	create_boot_cache(kmem_cache, "kmem_cache", +		offsetof(struct kmem_cache, array[nr_cpu_ids]) + +				  nr_node_ids * sizeof(struct kmem_list3 *), +				  SLAB_HWCACHE_ALIGN); +	list_add(&kmem_cache->list, &slab_caches);  	/* 2+3) create the kmalloc caches */  	sizes = malloc_sizes; @@ -1671,23 +1634,13 @@ void __init kmem_cache_init(void)  	 * bug.  	 */ -	sizes[INDEX_AC].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); -	sizes[INDEX_AC].cs_cachep->name = names[INDEX_AC].name; -	sizes[INDEX_AC].cs_cachep->size = sizes[INDEX_AC].cs_size; -	sizes[INDEX_AC].cs_cachep->object_size = sizes[INDEX_AC].cs_size; -	sizes[INDEX_AC].cs_cachep->align = ARCH_KMALLOC_MINALIGN; -	__kmem_cache_create(sizes[INDEX_AC].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC); -	list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches); - -	if (INDEX_AC != INDEX_L3) { -		sizes[INDEX_L3].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); -		sizes[INDEX_L3].cs_cachep->name = names[INDEX_L3].name; -		sizes[INDEX_L3].cs_cachep->size = sizes[INDEX_L3].cs_size; -		sizes[INDEX_L3].cs_cachep->object_size = sizes[INDEX_L3].cs_size; -		sizes[INDEX_L3].cs_cachep->align = ARCH_KMALLOC_MINALIGN; -		__kmem_cache_create(sizes[INDEX_L3].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC); -		list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches); -	} +	sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name, +					sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS); + +	if (INDEX_AC != INDEX_L3) +		sizes[INDEX_L3].cs_cachep = +			create_kmalloc_cache(names[INDEX_L3].name, +				sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS);  	slab_early_init = 0; @@ -1699,24 +1652,14 @@ void __init kmem_cache_init(void)  		 * Note for systems short on memory removing the alignment will  		 * allow tighter packing of the smaller caches.  		 */ -		if (!sizes->cs_cachep) { -			sizes->cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); -			sizes->cs_cachep->name = names->name; -			sizes->cs_cachep->size = sizes->cs_size; -			sizes->cs_cachep->object_size = sizes->cs_size; -			sizes->cs_cachep->align = ARCH_KMALLOC_MINALIGN; -			__kmem_cache_create(sizes->cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC); -			list_add(&sizes->cs_cachep->list, &slab_caches); -		} +		if (!sizes->cs_cachep) +			sizes->cs_cachep = create_kmalloc_cache(names->name, +					sizes->cs_size, ARCH_KMALLOC_FLAGS); +  #ifdef CONFIG_ZONE_DMA -		sizes->cs_dmacachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); -		sizes->cs_dmacachep->name = names->name_dma; -		sizes->cs_dmacachep->size = sizes->cs_size; -		sizes->cs_dmacachep->object_size = sizes->cs_size; -		sizes->cs_dmacachep->align = ARCH_KMALLOC_MINALIGN; -		__kmem_cache_create(sizes->cs_dmacachep, -			       ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC); -		list_add(&sizes->cs_dmacachep->list, &slab_caches); +		sizes->cs_dmacachep = create_kmalloc_cache( +			names->name_dma, sizes->cs_size, +			SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS);  #endif  		sizes++;  		names++; @@ -1727,7 +1670,6 @@ void __init kmem_cache_init(void)  		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); -		BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache);  		memcpy(ptr, cpu_cache_get(kmem_cache),  		       sizeof(struct arraycache_init));  		/* @@ -2282,7 +2224,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)  	if (slab_state == DOWN) {  		/* -		 * Note: the first kmem_cache_create must create the cache +		 * Note: Creation of first cache (kmem_cache). +		 * The setup_list3s is taken care +		 * of by the caller of __kmem_cache_create +		 */ +		cachep->array[smp_processor_id()] = &initarray_generic.cache; +		slab_state = PARTIAL; +	} else if (slab_state == PARTIAL) { +		/* +		 * Note: the second kmem_cache_create must create the cache  		 * that's used by kmalloc(24), otherwise the creation of  		 * further caches will BUG().  		 */ @@ -2290,7 +2240,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)  		/*  		 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is -		 * the first cache, then we need to set up all its list3s, +		 * the second cache, then we need to set up all its list3s,  		 * otherwise the creation of further caches will BUG().  		 */  		set_up_list3s(cachep, SIZE_AC); @@ -2299,6 +2249,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)  		else  			slab_state = PARTIAL_ARRAYCACHE;  	} else { +		/* Remaining boot caches */  		cachep->array[smp_processor_id()] =  			kmalloc(sizeof(struct arraycache_init), gfp); @@ -2331,11 +2282,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)  /**   * __kmem_cache_create - Create a cache. - * @name: A string which is used in /proc/slabinfo to identify this cache. - * @size: The size of objects to be created in this cache. - * @align: The required alignment for the objects. + * @cachep: cache management descriptor   * @flags: SLAB flags - * @ctor: A constructor for the objects.   *   * Returns a ptr to the cache on success, NULL on failure.   * Cannot be called within a int, but can be interrupted. @@ -2378,11 +2326,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)  	if (flags & SLAB_DESTROY_BY_RCU)  		BUG_ON(flags & SLAB_POISON);  #endif -	/* -	 * Always checks flags, a caller might be expecting debug support which -	 * isn't available. -	 */ -	BUG_ON(flags & ~CREATE_MASK);  	/*  	 * Check that size is in terms of words.  This is needed to avoid @@ -2394,22 +2337,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)  		size &= ~(BYTES_PER_WORD - 1);  	} -	/* calculate the final buffer alignment: */ - -	/* 1) arch recommendation: can be overridden for debug */ -	if (flags & SLAB_HWCACHE_ALIGN) { -		/* -		 * Default alignment: as specified by the arch code.  Except if -		 * an object is really small, then squeeze multiple objects into -		 * one cacheline. -		 */ -		ralign = cache_line_size(); -		while (size <= ralign / 2) -			ralign /= 2; -	} else { -		ralign = BYTES_PER_WORD; -	} -  	/*  	 * Redzoning and user store require word alignment or possibly larger.  	 * Note this will be overridden by architecture or caller mandated @@ -2426,10 +2353,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)  		size &= ~(REDZONE_ALIGN - 1);  	} -	/* 2) arch mandated alignment */ -	if (ralign < ARCH_SLAB_MINALIGN) { -		ralign = ARCH_SLAB_MINALIGN; -	}  	/* 3) caller mandated alignment */  	if (ralign < cachep->align) {  		ralign = cachep->align; @@ -2447,7 +2370,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)  	else  		gfp = GFP_NOWAIT; -	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; +	setup_nodelists_pointer(cachep);  #if DEBUG  	/* @@ -3969,12 +3892,6 @@ void kfree(const void *objp)  }  EXPORT_SYMBOL(kfree); -unsigned int kmem_cache_size(struct kmem_cache *cachep) -{ -	return cachep->object_size; -} -EXPORT_SYMBOL(kmem_cache_size); -  /*   * This initializes kmem_list3 or resizes various caches for all nodes.   */ diff --git a/mm/slab.h b/mm/slab.h index 5a43c2f13621..1cb9c9ee0e6f 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -32,9 +32,17 @@ extern struct list_head slab_caches;  /* The slab cache that manages slab cache information */  extern struct kmem_cache *kmem_cache; +unsigned long calculate_alignment(unsigned long flags, +		unsigned long align, unsigned long size); +  /* Functions provided by the slab allocators */  extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags); +extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, +			unsigned long flags); +extern void create_boot_cache(struct kmem_cache *, const char *name, +			size_t size, unsigned long flags); +  #ifdef CONFIG_SLUB  struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,  	size_t align, unsigned long flags, void (*ctor)(void *)); @@ -45,6 +53,31 @@ static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t siz  #endif +/* Legal flag mask for kmem_cache_create(), for various configurations */ +#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ +			 SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS ) + +#if defined(CONFIG_DEBUG_SLAB) +#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) +#elif defined(CONFIG_SLUB_DEBUG) +#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ +			  SLAB_TRACE | SLAB_DEBUG_FREE) +#else +#define SLAB_DEBUG_FLAGS (0) +#endif + +#if defined(CONFIG_SLAB) +#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \ +			  SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | SLAB_NOTRACK) +#elif defined(CONFIG_SLUB) +#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ +			  SLAB_TEMPORARY | SLAB_NOTRACK) +#else +#define SLAB_CACHE_FLAGS (0) +#endif + +#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) +  int __kmem_cache_shutdown(struct kmem_cache *);  struct seq_file; diff --git a/mm/slab_common.c b/mm/slab_common.c index 5fb753da6cf0..a8e76d79ee65 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -73,6 +73,34 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size)  #endif  /* + * Figure out what the alignment of the objects will be given a set of + * flags, a user specified alignment and the size of the objects. + */ +unsigned long calculate_alignment(unsigned long flags, +		unsigned long align, unsigned long size) +{ +	/* +	 * If the user wants hardware cache aligned objects then follow that +	 * suggestion if the object is sufficiently large. +	 * +	 * The hardware cache alignment cannot override the specified +	 * alignment though. If that is greater then use it. +	 */ +	if (flags & SLAB_HWCACHE_ALIGN) { +		unsigned long ralign = cache_line_size(); +		while (size <= ralign / 2) +			ralign /= 2; +		align = max(align, ralign); +	} + +	if (align < ARCH_SLAB_MINALIGN) +		align = ARCH_SLAB_MINALIGN; + +	return ALIGN(align, sizeof(void *)); +} + + +/*   * kmem_cache_create - Create a cache.   * @name: A string which is used in /proc/slabinfo to identify this cache.   * @size: The size of objects to be created in this cache. @@ -109,6 +137,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align  	if (!kmem_cache_sanity_check(name, size) == 0)  		goto out_locked; +	/* +	 * Some allocators will constraint the set of valid flags to a subset +	 * of all flags. We expect them to define CACHE_CREATE_MASK in this +	 * case, and we'll just provide them with a sanitized version of the +	 * passed flags. +	 */ +	flags &= CACHE_CREATE_MASK;  	s = __kmem_cache_alias(name, size, align, flags, ctor);  	if (s) @@ -117,7 +152,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align  	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);  	if (s) {  		s->object_size = s->size = size; -		s->align = align; +		s->align = calculate_alignment(flags, align, size);  		s->ctor = ctor;  		s->name = kstrdup(name, GFP_KERNEL);  		if (!s->name) { @@ -195,6 +230,42 @@ int slab_is_available(void)  	return slab_state >= UP;  } +#ifndef CONFIG_SLOB +/* Create a cache during boot when no slab services are available yet */ +void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, +		unsigned long flags) +{ +	int err; + +	s->name = name; +	s->size = s->object_size = size; +	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); +	err = __kmem_cache_create(s, flags); + +	if (err) +		panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n", +					name, size, err); + +	s->refcount = -1;	/* Exempt from merging for now */ +} + +struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, +				unsigned long flags) +{ +	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + +	if (!s) +		panic("Out of memory when creating slab %s\n", name); + +	create_boot_cache(s, name, size, flags); +	list_add(&s->list, &slab_caches); +	s->refcount = 1; +	return s; +} + +#endif /* !CONFIG_SLOB */ + +  #ifdef CONFIG_SLABINFO  static void print_slabinfo_header(struct seq_file *m)  { diff --git a/mm/slob.c b/mm/slob.c index 1e921c5e9576..795bab7d391d 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -28,9 +28,8 @@   * from kmalloc are prepended with a 4-byte header with the kmalloc size.   * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls   * alloc_pages() directly, allocating compound pages so the page order - * does not have to be separately tracked, and also stores the exact - * allocation size in page->private so that it can be used to accurately - * provide ksize(). These objects are detected in kfree() because slob_page() + * does not have to be separately tracked. + * These objects are detected in kfree() because PageSlab()   * is false for them.   *   * SLAB is emulated on top of SLOB by simply calling constructors and @@ -124,7 +123,6 @@ static inline void clear_slob_page_free(struct page *sp)  #define SLOB_UNIT sizeof(slob_t)  #define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) -#define SLOB_ALIGN L1_CACHE_BYTES  /*   * struct slob_rcu is inserted at the tail of allocated slob blocks, which @@ -455,11 +453,6 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)  		if (likely(order))  			gfp |= __GFP_COMP;  		ret = slob_new_pages(gfp, order, node); -		if (ret) { -			struct page *page; -			page = virt_to_page(ret); -			page->private = size; -		}  		trace_kmalloc_node(caller, ret,  				   size, PAGE_SIZE << order, gfp, node); @@ -506,7 +499,7 @@ void kfree(const void *block)  		unsigned int *m = (unsigned int *)(block - align);  		slob_free(m, *m + align);  	} else -		put_page(sp); +		__free_pages(sp, compound_order(sp));  }  EXPORT_SYMBOL(kfree); @@ -514,37 +507,30 @@ EXPORT_SYMBOL(kfree);  size_t ksize(const void *block)  {  	struct page *sp; +	int align; +	unsigned int *m;  	BUG_ON(!block);  	if (unlikely(block == ZERO_SIZE_PTR))  		return 0;  	sp = virt_to_page(block); -	if (PageSlab(sp)) { -		int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); -		unsigned int *m = (unsigned int *)(block - align); -		return SLOB_UNITS(*m) * SLOB_UNIT; -	} else -		return sp->private; +	if (unlikely(!PageSlab(sp))) +		return PAGE_SIZE << compound_order(sp); + +	align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); +	m = (unsigned int *)(block - align); +	return SLOB_UNITS(*m) * SLOB_UNIT;  }  EXPORT_SYMBOL(ksize);  int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)  { -	size_t align = c->size; -  	if (flags & SLAB_DESTROY_BY_RCU) {  		/* leave room for rcu footer at the end of object */  		c->size += sizeof(struct slob_rcu);  	}  	c->flags = flags; -	/* ignore alignment unless it's forced */ -	c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; -	if (c->align < ARCH_SLAB_MINALIGN) -		c->align = ARCH_SLAB_MINALIGN; -	if (c->align < align) -		c->align = align; -  	return 0;  } @@ -558,12 +544,12 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)  	if (c->size < PAGE_SIZE) {  		b = slob_alloc(c->size, flags, c->align, node); -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, +		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,  					    SLOB_UNITS(c->size) * SLOB_UNIT,  					    flags, node);  	} else {  		b = slob_new_pages(flags, get_order(c->size), node); -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, +		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,  					    PAGE_SIZE << get_order(c->size),  					    flags, node);  	} @@ -608,12 +594,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b)  }  EXPORT_SYMBOL(kmem_cache_free); -unsigned int kmem_cache_size(struct kmem_cache *c) -{ -	return c->size; -} -EXPORT_SYMBOL(kmem_cache_size); -  int __kmem_cache_shutdown(struct kmem_cache *c)  {  	/* No way to check for remaining objects */ diff --git a/mm/slub.c b/mm/slub.c index 472e739278b4..9640edd2cc78 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -112,9 +112,6 @@   * 			the fast path and disables lockless freelists.   */ -#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ -		SLAB_TRACE | SLAB_DEBUG_FREE) -  static inline int kmem_cache_debug(struct kmem_cache *s)  {  #ifdef CONFIG_SLUB_DEBUG @@ -179,8 +176,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s)  #define __OBJECT_POISON		0x80000000UL /* Poison object */  #define __CMPXCHG_DOUBLE	0x40000000UL /* Use cmpxchg_double */ -static int kmem_size = sizeof(struct kmem_cache); -  #ifdef CONFIG_SMP  static struct notifier_block slab_notifier;  #endif @@ -1092,11 +1087,11 @@ static noinline struct kmem_cache_node *free_debug_processing(  	if (!check_object(s, page, object, SLUB_RED_ACTIVE))  		goto out; -	if (unlikely(s != page->slab)) { +	if (unlikely(s != page->slab_cache)) {  		if (!PageSlab(page)) {  			slab_err(s, page, "Attempt to free object(0x%p) "  				"outside of slab", object); -		} else if (!page->slab) { +		} else if (!page->slab_cache) {  			printk(KERN_ERR  				"SLUB <none>: no slab for object 0x%p.\n",  						object); @@ -1357,7 +1352,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)  		goto out;  	inc_slabs_node(s, page_to_nid(page), page->objects); -	page->slab = s; +	page->slab_cache = s;  	__SetPageSlab(page);  	if (page->pfmemalloc)  		SetPageSlabPfmemalloc(page); @@ -1424,7 +1419,7 @@ static void rcu_free_slab(struct rcu_head *h)  	else  		page = container_of((struct list_head *)h, struct page, lru); -	__free_slab(page->slab, page); +	__free_slab(page->slab_cache, page);  }  static void free_slab(struct kmem_cache *s, struct page *page) @@ -1872,12 +1867,14 @@ redo:  /*   * Unfreeze all the cpu partial slabs.   * - * This function must be called with interrupt disabled. + * This function must be called with interrupts disabled + * for the cpu using c (or some other guarantee must be there + * to guarantee no concurrent accesses).   */ -static void unfreeze_partials(struct kmem_cache *s) +static void unfreeze_partials(struct kmem_cache *s, +		struct kmem_cache_cpu *c)  {  	struct kmem_cache_node *n = NULL, *n2 = NULL; -	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);  	struct page *page, *discard_page = NULL;  	while ((page = c->partial)) { @@ -1963,7 +1960,7 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)  				 * set to the per node partial list.  				 */  				local_irq_save(flags); -				unfreeze_partials(s); +				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));  				local_irq_restore(flags);  				oldpage = NULL;  				pobjects = 0; @@ -2006,7 +2003,7 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)  		if (c->page)  			flush_slab(s, c); -		unfreeze_partials(s); +		unfreeze_partials(s, c);  	}  } @@ -2459,7 +2456,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,  	void *prior;  	void **object = (void *)x;  	int was_frozen; -	int inuse;  	struct page new;  	unsigned long counters;  	struct kmem_cache_node *n = NULL; @@ -2472,13 +2468,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,  		return;  	do { +		if (unlikely(n)) { +			spin_unlock_irqrestore(&n->list_lock, flags); +			n = NULL; +		}  		prior = page->freelist;  		counters = page->counters;  		set_freepointer(s, object, prior);  		new.counters = counters;  		was_frozen = new.frozen;  		new.inuse--; -		if ((!new.inuse || !prior) && !was_frozen && !n) { +		if ((!new.inuse || !prior) && !was_frozen) {  			if (!kmem_cache_debug(s) && !prior) @@ -2503,7 +2503,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,  			}  		} -		inuse = new.inuse;  	} while (!cmpxchg_double_slab(s, page,  		prior, counters, @@ -2529,25 +2528,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,                  return;          } +	if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) +		goto slab_empty; +  	/* -	 * was_frozen may have been set after we acquired the list_lock in -	 * an earlier loop. So we need to check it here again. +	 * Objects left in the slab. If it was not on the partial list before +	 * then add it.  	 */ -	if (was_frozen) -		stat(s, FREE_FROZEN); -	else { -		if (unlikely(!inuse && n->nr_partial > s->min_partial)) -                        goto slab_empty; - -		/* -		 * Objects left in the slab. If it was not on the partial list before -		 * then add it. -		 */ -		if (unlikely(!prior)) { -			remove_full(s, page); -			add_partial(n, page, DEACTIVATE_TO_TAIL); -			stat(s, FREE_ADD_PARTIAL); -		} +	if (kmem_cache_debug(s) && unlikely(!prior)) { +		remove_full(s, page); +		add_partial(n, page, DEACTIVATE_TO_TAIL); +		stat(s, FREE_ADD_PARTIAL);  	}  	spin_unlock_irqrestore(&n->list_lock, flags);  	return; @@ -2623,9 +2614,9 @@ void kmem_cache_free(struct kmem_cache *s, void *x)  	page = virt_to_head_page(x); -	if (kmem_cache_debug(s) && page->slab != s) { +	if (kmem_cache_debug(s) && page->slab_cache != s) {  		pr_err("kmem_cache_free: Wrong slab cache. %s but object" -			" is from  %s\n", page->slab->name, s->name); +			" is from  %s\n", page->slab_cache->name, s->name);  		WARN_ON_ONCE(1);  		return;  	} @@ -2769,32 +2760,6 @@ static inline int calculate_order(int size, int reserved)  	return -ENOSYS;  } -/* - * Figure out what the alignment of the objects will be. - */ -static unsigned long calculate_alignment(unsigned long flags, -		unsigned long align, unsigned long size) -{ -	/* -	 * If the user wants hardware cache aligned objects then follow that -	 * suggestion if the object is sufficiently large. -	 * -	 * The hardware cache alignment cannot override the specified -	 * alignment though. If that is greater then use it. -	 */ -	if (flags & SLAB_HWCACHE_ALIGN) { -		unsigned long ralign = cache_line_size(); -		while (size <= ralign / 2) -			ralign /= 2; -		align = max(align, ralign); -	} - -	if (align < ARCH_SLAB_MINALIGN) -		align = ARCH_SLAB_MINALIGN; - -	return ALIGN(align, sizeof(void *)); -} -  static void  init_kmem_cache_node(struct kmem_cache_node *n)  { @@ -2928,7 +2893,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)  {  	unsigned long flags = s->flags;  	unsigned long size = s->object_size; -	unsigned long align = s->align;  	int order;  	/* @@ -3000,19 +2964,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)  #endif  	/* -	 * Determine the alignment based on various parameters that the -	 * user specified and the dynamic determination of cache line size -	 * on bootup. -	 */ -	align = calculate_alignment(flags, align, s->object_size); -	s->align = align; - -	/*  	 * SLUB stores one object immediately after another beginning from  	 * offset 0. In order to align the objects we have to simply size  	 * each object to conform to the alignment.  	 */ -	size = ALIGN(size, align); +	size = ALIGN(size, s->align);  	s->size = size;  	if (forced_order >= 0)  		order = forced_order; @@ -3041,7 +2997,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)  		s->max = s->oo;  	return !!oo_objects(s->oo); -  }  static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) @@ -3127,15 +3082,6 @@ error:  	return -EINVAL;  } -/* - * Determine the size of a slab object - */ -unsigned int kmem_cache_size(struct kmem_cache *s) -{ -	return s->object_size; -} -EXPORT_SYMBOL(kmem_cache_size); -  static void list_slab_objects(struct kmem_cache *s, struct page *page,  							const char *text)  { @@ -3261,32 +3207,6 @@ static int __init setup_slub_nomerge(char *str)  __setup("slub_nomerge", setup_slub_nomerge); -static struct kmem_cache *__init create_kmalloc_cache(const char *name, -						int size, unsigned int flags) -{ -	struct kmem_cache *s; - -	s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); - -	s->name = name; -	s->size = s->object_size = size; -	s->align = ARCH_KMALLOC_MINALIGN; - -	/* -	 * This function is called with IRQs disabled during early-boot on -	 * single CPU so there's no need to take slab_mutex here. -	 */ -	if (kmem_cache_open(s, flags)) -		goto panic; - -	list_add(&s->list, &slab_caches); -	return s; - -panic: -	panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); -	return NULL; -} -  /*   * Conversion table for small slabs sizes / 8 to the index in the   * kmalloc array. This is necessary for slabs < 192 since we have non power @@ -3424,7 +3344,7 @@ size_t ksize(const void *object)  		return PAGE_SIZE << compound_order(page);  	} -	return slab_ksize(page->slab); +	return slab_ksize(page->slab_cache);  }  EXPORT_SYMBOL(ksize); @@ -3449,8 +3369,8 @@ bool verify_mem_not_deleted(const void *x)  	}  	slab_lock(page); -	if (on_freelist(page->slab, page, object)) { -		object_err(page->slab, page, object, "Object is on free-list"); +	if (on_freelist(page->slab_cache, page, object)) { +		object_err(page->slab_cache, page, object, "Object is on free-list");  		rv = false;  	} else {  		rv = true; @@ -3481,7 +3401,7 @@ void kfree(const void *x)  		__free_pages(page, compound_order(page));  		return;  	} -	slab_free(page->slab, page, object, _RET_IP_); +	slab_free(page->slab_cache, page, object, _RET_IP_);  }  EXPORT_SYMBOL(kfree); @@ -3676,15 +3596,16 @@ static int slab_memory_callback(struct notifier_block *self,  /*   * Used for early kmem_cache structures that were allocated using - * the page allocator + * the page allocator. Allocate them properly then fix up the pointers + * that may be pointing to the wrong kmem_cache structure.   */ -static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) +static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)  {  	int node; +	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); -	list_add(&s->list, &slab_caches); -	s->refcount = -1; +	memcpy(s, static_cache, kmem_cache->object_size);  	for_each_node_state(node, N_NORMAL_MEMORY) {  		struct kmem_cache_node *n = get_node(s, node); @@ -3692,78 +3613,52 @@ static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)  		if (n) {  			list_for_each_entry(p, &n->partial, lru) -				p->slab = s; +				p->slab_cache = s;  #ifdef CONFIG_SLUB_DEBUG  			list_for_each_entry(p, &n->full, lru) -				p->slab = s; +				p->slab_cache = s;  #endif  		}  	} +	list_add(&s->list, &slab_caches); +	return s;  }  void __init kmem_cache_init(void)  { +	static __initdata struct kmem_cache boot_kmem_cache, +		boot_kmem_cache_node;  	int i; -	int caches = 0; -	struct kmem_cache *temp_kmem_cache; -	int order; -	struct kmem_cache *temp_kmem_cache_node; -	unsigned long kmalloc_size; +	int caches = 2;  	if (debug_guardpage_minorder())  		slub_max_order = 0; -	kmem_size = offsetof(struct kmem_cache, node) + -			nr_node_ids * sizeof(struct kmem_cache_node *); - -	/* Allocate two kmem_caches from the page allocator */ -	kmalloc_size = ALIGN(kmem_size, cache_line_size()); -	order = get_order(2 * kmalloc_size); -	kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_ZERO, order); - -	/* -	 * Must first have the slab cache available for the allocations of the -	 * struct kmem_cache_node's. There is special bootstrap code in -	 * kmem_cache_open for slab_state == DOWN. -	 */ -	kmem_cache_node = (void *)kmem_cache + kmalloc_size; +	kmem_cache_node = &boot_kmem_cache_node; +	kmem_cache = &boot_kmem_cache; -	kmem_cache_node->name = "kmem_cache_node"; -	kmem_cache_node->size = kmem_cache_node->object_size = -		sizeof(struct kmem_cache_node); -	kmem_cache_open(kmem_cache_node, SLAB_HWCACHE_ALIGN | SLAB_PANIC); +	create_boot_cache(kmem_cache_node, "kmem_cache_node", +		sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);  	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);  	/* Able to allocate the per node structures */  	slab_state = PARTIAL; -	temp_kmem_cache = kmem_cache; -	kmem_cache->name = "kmem_cache"; -	kmem_cache->size = kmem_cache->object_size = kmem_size; -	kmem_cache_open(kmem_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC); +	create_boot_cache(kmem_cache, "kmem_cache", +			offsetof(struct kmem_cache, node) + +				nr_node_ids * sizeof(struct kmem_cache_node *), +		       SLAB_HWCACHE_ALIGN); -	kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); -	memcpy(kmem_cache, temp_kmem_cache, kmem_size); +	kmem_cache = bootstrap(&boot_kmem_cache);  	/*  	 * Allocate kmem_cache_node properly from the kmem_cache slab.  	 * kmem_cache_node is separately allocated so no need to  	 * update any list pointers.  	 */ -	temp_kmem_cache_node = kmem_cache_node; - -	kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); -	memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size); - -	kmem_cache_bootstrap_fixup(kmem_cache_node); - -	caches++; -	kmem_cache_bootstrap_fixup(kmem_cache); -	caches++; -	/* Free temporary boot structure */ -	free_pages((unsigned long)temp_kmem_cache, order); +	kmem_cache_node = bootstrap(&boot_kmem_cache_node);  	/* Now we can use the kmem_cache to allocate kmalloc slabs */ @@ -3964,6 +3859,10 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)  	if (err)  		return err; +	/* Mutex is not taken during early boot */ +	if (slab_state <= UP) +		return 0; +  	mutex_unlock(&slab_mutex);  	err = sysfs_slab_add(s);  	mutex_lock(&slab_mutex); @@ -5265,13 +5164,8 @@ static int sysfs_slab_add(struct kmem_cache *s)  {  	int err;  	const char *name; -	int unmergeable; - -	if (slab_state < FULL) -		/* Defer until later */ -		return 0; +	int unmergeable = slab_unmergeable(s); -	unmergeable = slab_unmergeable(s);  	if (unmergeable) {  		/*  		 * Slabcache can never be merged so we can use the name proper. |