From 534acc057b5a08ec33fa57cdd2f5a09ef124e7f2 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 29 Jul 2009 15:04:18 -0700 Subject: lib: flexible array implementation Once a structure goes over PAGE_SIZE*2, we see occasional allocation failures. Some people have chosen to switch over to things like vmalloc() that will let them keep array-like access to such a large structures. But, vmalloc() has plenty of downsides. Here's an alternative. I think it's what Andrew was suggesting here: http://lkml.org/lkml/2009/7/2/518 I call it a flexible array. It does all of its work in PAGE_SIZE bits, so never does an order>0 allocation. The base level has PAGE_SIZE-2*sizeof(int) bytes of storage for pointers to the second level. So, with a 32-bit arch, you get about 4MB (4183112 bytes) of total storage when the objects pack nicely into a page. It is half that on 64-bit because the pointers are twice the size. There's a table detailing this in the code. There are kerneldocs for the functions, but here's an overview: flex_array_alloc() - dynamically allocate a base structure flex_array_free() - free the array and all of the second-level pages flex_array_free_parts() - free the second-level pages, but not the base (for static bases) flex_array_put() - copy into the array at the given index flex_array_get() - copy out of the array at the given index flex_array_prealloc() - preallocate the second-level pages between the given indexes to guarantee no allocs will occur at put() time. We could also potentially just pass the "element_size" into each of the API functions instead of storing it internally. That would get us one more base pointer on 32-bit. I've been testing this by running it in userspace. The header and patch that I've been using are here, as well as the little script I'm using to generate the size table which goes in the kerneldocs. http://sr71.net/~dave/linux/flexarray/ [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Dave Hansen Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/flex_array.h | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/flex_array.h (limited to 'include/linux/flex_array.h') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h new file mode 100644 index 000000000000..23c1ec79a31b --- /dev/null +++ b/include/linux/flex_array.h @@ -0,0 +1,47 @@ +#ifndef _FLEX_ARRAY_H +#define _FLEX_ARRAY_H + +#include +#include + +#define FLEX_ARRAY_PART_SIZE PAGE_SIZE +#define FLEX_ARRAY_BASE_SIZE PAGE_SIZE + +struct flex_array_part; + +/* + * This is meant to replace cases where an array-like + * structure has gotten too big to fit into kmalloc() + * and the developer is getting tempted to use + * vmalloc(). + */ + +struct flex_array { + union { + struct { + int element_size; + int total_nr_elements; + struct flex_array_part *parts[0]; + }; + /* + * This little trick makes sure that + * sizeof(flex_array) == PAGE_SIZE + */ + char padding[FLEX_ARRAY_BASE_SIZE]; + }; +}; + +#define FLEX_ARRAY_INIT(size, total) { { {\ + .element_size = (size), \ + .total_nr_elements = (total), \ +} } } + +struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags); +int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags); +void flex_array_free(struct flex_array *fa); +void flex_array_free_parts(struct flex_array *fa); +int flex_array_put(struct flex_array *fa, int element_nr, void *src, + gfp_t flags); +void *flex_array_get(struct flex_array *fa, int element_nr); + +#endif /* _FLEX_ARRAY_H */ -- cgit From 8e7ee27095aee87b5db1b0061e2ceea5878a1bbd Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 26 Aug 2009 14:29:21 -0700 Subject: flex_array: declare parts member to have incomplete type The `parts' member of struct flex_array should evaluate to an incomplete type so that sizeof() cannot be used and C99 does not require the zero-length specification. Signed-off-by: David Rientjes Acked-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/flex_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/flex_array.h') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 23c1ec79a31b..603160db7c98 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -21,7 +21,7 @@ struct flex_array { struct { int element_size; int total_nr_elements; - struct flex_array_part *parts[0]; + struct flex_array_part *parts[]; }; /* * This little trick makes sure that -- cgit From b62e408c05228f40e69bb38a48db8961cac6cd23 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 26 Aug 2009 14:29:22 -0700 Subject: flex_array: convert element_nr formals to unsigned It's problematic to allow signed element_nr's or total's to be passed as part of the flex array API. flex_array_alloc() allows total_nr_elements to be set to a negative quantity, which is obviously erroneous. flex_array_get() and flex_array_put() allows negative array indices in dereferencing an array part, which could address memory mapped before struct flex_array. The fix is to convert all existing element_nr formals to be qualified as unsigned. Existing checks to compare it to total_nr_elements or the max array size based on element_size need not be changed. Signed-off-by: David Rientjes Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/flex_array.h | 10 ++++++---- lib/flex_array.c | 24 +++++++++++++----------- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include/linux/flex_array.h') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 603160db7c98..45ff18491514 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -36,12 +36,14 @@ struct flex_array { .total_nr_elements = (total), \ } } } -struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags); -int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags); +struct flex_array *flex_array_alloc(int element_size, unsigned int total, + gfp_t flags); +int flex_array_prealloc(struct flex_array *fa, unsigned int start, + unsigned int end, gfp_t flags); void flex_array_free(struct flex_array *fa); void flex_array_free_parts(struct flex_array *fa); -int flex_array_put(struct flex_array *fa, int element_nr, void *src, +int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags); -void *flex_array_get(struct flex_array *fa, int element_nr); +void *flex_array_get(struct flex_array *fa, unsigned int element_nr); #endif /* _FLEX_ARRAY_H */ diff --git a/lib/flex_array.c b/lib/flex_array.c index cf4e372cae90..7baed2fc3bc8 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -99,7 +99,8 @@ static inline int elements_fit_in_base(struct flex_array *fa) * capacity in the base structure. Also note that no effort is made * to efficiently pack objects across page boundaries. */ -struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags) +struct flex_array *flex_array_alloc(int element_size, unsigned int total, + gfp_t flags) { struct flex_array *ret; int max_size = nr_base_part_ptrs() * __elements_per_part(element_size); @@ -115,7 +116,8 @@ struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags) return ret; } -static int fa_element_to_part_nr(struct flex_array *fa, int element_nr) +static int fa_element_to_part_nr(struct flex_array *fa, + unsigned int element_nr) { return element_nr / __elements_per_part(fa->element_size); } @@ -143,14 +145,12 @@ void flex_array_free(struct flex_array *fa) kfree(fa); } -static int fa_index_inside_part(struct flex_array *fa, int element_nr) +static unsigned int index_inside_part(struct flex_array *fa, + unsigned int element_nr) { - return element_nr % __elements_per_part(fa->element_size); -} + unsigned int part_offset; -static int index_inside_part(struct flex_array *fa, int element_nr) -{ - int part_offset = fa_index_inside_part(fa, element_nr); + part_offset = element_nr % __elements_per_part(fa->element_size); return part_offset * fa->element_size; } @@ -185,7 +185,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) * * Locking must be provided by the caller. */ -int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags) +int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, + gfp_t flags) { int part_nr = fa_element_to_part_nr(fa, element_nr); struct flex_array_part *part; @@ -217,7 +218,8 @@ int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags * * Locking must be provided by the caller. */ -int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags) +int flex_array_prealloc(struct flex_array *fa, unsigned int start, + unsigned int end, gfp_t flags) { int start_part; int end_part; @@ -248,7 +250,7 @@ int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags) * * Locking must be provided by the caller. */ -void *flex_array_get(struct flex_array *fa, int element_nr) +void *flex_array_get(struct flex_array *fa, unsigned int element_nr) { int part_nr = fa_element_to_part_nr(fa, element_nr); struct flex_array_part *part; -- cgit From e6de3988aa52debb25a427d085061f3bf1181d54 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 21 Sep 2009 17:04:30 -0700 Subject: flex_array: add flex_array_clear function Add a new function to the flex_array API: int flex_array_clear(struct flex_array *fa, unsigned int element_nr) This function will zero the element at element_nr in the flex_array. Although this is equivalent to using flex_array_put() and passing a pointer to zero'd memory, flex_array_clear() does not require such a pointer to memory that would most likely need to be allocated on the caller's stack which could be significantly large depending on element_size. Signed-off-by: David Rientjes Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/flex_array.h | 1 + lib/flex_array.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include/linux/flex_array.h') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 45ff18491514..3887b21f883f 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -44,6 +44,7 @@ void flex_array_free(struct flex_array *fa); void flex_array_free_parts(struct flex_array *fa); int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags); +int flex_array_clear(struct flex_array *fa, unsigned int element_nr); void *flex_array_get(struct flex_array *fa, unsigned int element_nr); #endif /* _FLEX_ARRAY_H */ diff --git a/lib/flex_array.c b/lib/flex_array.c index 7baed2fc3bc8..b68f99be4080 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -206,6 +206,32 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, return 0; } +/** + * flex_array_clear - clear element in array at @element_nr + * @element_nr: index of the position to clear. + * + * Locking must be provided by the caller. + */ +int flex_array_clear(struct flex_array *fa, unsigned int element_nr) +{ + int part_nr = fa_element_to_part_nr(fa, element_nr); + struct flex_array_part *part; + void *dst; + + if (element_nr >= fa->total_nr_elements) + return -ENOSPC; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part = fa->parts[part_nr]; + if (!part) + return -EINVAL; + } + dst = &part->elements[index_inside_part(fa, element_nr)]; + memset(dst, 0, fa->element_size); + return 0; +} + /** * flex_array_prealloc - guarantee that array space exists * @start: index of first array element for which space is allocated -- cgit From 4af5a2f770cc8575840ccb1514ec76ecb592985c Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 21 Sep 2009 17:04:31 -0700 Subject: flex_array: add flex_array_shrink function Add a new function to the flex_array API: int flex_array_shrink(struct flex_array *fa) This function will free all unused second-level pages. Since elements are now poisoned if they are not allocated with __GFP_ZERO, it's possible to identify parts that consist solely of unused elements. flex_array_shrink() returns the number of pages freed. Signed-off-by: David Rientjes Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/flex_array.h | 1 + lib/flex_array.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include/linux/flex_array.h') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 3887b21f883f..f12401e485fe 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -46,5 +46,6 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags); int flex_array_clear(struct flex_array *fa, unsigned int element_nr); void *flex_array_get(struct flex_array *fa, unsigned int element_nr); +int flex_array_shrink(struct flex_array *fa); #endif /* _FLEX_ARRAY_H */ diff --git a/lib/flex_array.c b/lib/flex_array.c index e22d0e9776aa..1b03bb553410 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -291,3 +291,43 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr) } return &part->elements[index_inside_part(fa, element_nr)]; } + +static int part_is_free(struct flex_array_part *part) +{ + int i; + + for (i = 0; i < sizeof(struct flex_array_part); i++) + if (part->elements[i] != FLEX_ARRAY_FREE) + return 0; + return 1; +} + +/** + * flex_array_shrink - free unused second-level pages + * + * Frees all second-level pages that consist solely of unused + * elements. Returns the number of pages freed. + * + * Locking must be provided by the caller. + */ +int flex_array_shrink(struct flex_array *fa) +{ + struct flex_array_part *part; + int max_part = nr_base_part_ptrs(); + int part_nr; + int ret = 0; + + if (elements_fit_in_base(fa)) + return ret; + for (part_nr = 0; part_nr < max_part; part_nr++) { + part = fa->parts[part_nr]; + if (!part) + continue; + if (part_is_free(part)) { + fa->parts[part_nr] = NULL; + kfree(part); + ret++; + } + } + return ret; +} -- cgit From 45b588d6e5cc172704bac0c998ce54873b149b22 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 21 Sep 2009 17:04:33 -0700 Subject: flex_array: introduce DEFINE_FLEX_ARRAY FLEX_ARRAY_INIT(element_size, total_nr_elements) cannot determine if either parameter is valid, so flex arrays which are statically allocated with this interface can easily become corrupted or reference beyond its allocated memory. This removes FLEX_ARRAY_INIT() as a struct flex_array initializer since no initializer may perform the required checking. Instead, the array is now defined with a new interface: DEFINE_FLEX_ARRAY(name, element_size, total_nr_elements) This may be prefixed with `static' for file scope. This interface includes compile-time checking of the parameters to ensure they are valid. Since the validity of both element_size and total_nr_elements depend on FLEX_ARRAY_BASE_SIZE and FLEX_ARRAY_PART_SIZE, the kernel build will fail if either of these predefined values changes such that the array parameters are no longer valid. Since BUILD_BUG_ON() requires compile time constants, several of the static inline functions that were once local to lib/flex_array.c had to be moved to include/linux/flex_array.h. Signed-off-by: David Rientjes Acked-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/flex_array.h | 30 ++++++++++++++++++++++++++---- lib/flex_array.c | 36 ++++++++++-------------------------- 2 files changed, 36 insertions(+), 30 deletions(-) (limited to 'include/linux/flex_array.h') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index f12401e485fe..1d747f72298b 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -31,10 +31,32 @@ struct flex_array { }; }; -#define FLEX_ARRAY_INIT(size, total) { { {\ - .element_size = (size), \ - .total_nr_elements = (total), \ -} } } +/* Number of bytes left in base struct flex_array, excluding metadata */ +#define FLEX_ARRAY_BASE_BYTES_LEFT \ + (FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts)) + +/* Number of pointers in base to struct flex_array_part pages */ +#define FLEX_ARRAY_NR_BASE_PTRS \ + (FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *)) + +/* Number of elements of size that fit in struct flex_array_part */ +#define FLEX_ARRAY_ELEMENTS_PER_PART(size) \ + (FLEX_ARRAY_PART_SIZE / size) + +/* + * Defines a statically allocated flex array and ensures its parameters are + * valid. + */ +#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total) \ + struct flex_array __arrayname = { { { \ + .element_size = (__element_size), \ + .total_nr_elements = (__total), \ + } } }; \ + static inline void __arrayname##_invalid_parameter(void) \ + { \ + BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS * \ + FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \ + } struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags); diff --git a/lib/flex_array.c b/lib/flex_array.c index 1b03bb553410..b62ce6cffd0a 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -28,23 +28,6 @@ struct flex_array_part { char elements[FLEX_ARRAY_PART_SIZE]; }; -static inline int __elements_per_part(int element_size) -{ - return FLEX_ARRAY_PART_SIZE / element_size; -} - -static inline int bytes_left_in_base(void) -{ - int element_offset = offsetof(struct flex_array, parts); - int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset; - return bytes_left; -} - -static inline int nr_base_part_ptrs(void) -{ - return bytes_left_in_base() / sizeof(struct flex_array_part *); -} - /* * If a user requests an allocation which is small * enough, we may simply use the space in the @@ -54,7 +37,7 @@ static inline int nr_base_part_ptrs(void) static inline int elements_fit_in_base(struct flex_array *fa) { int data_size = fa->element_size * fa->total_nr_elements; - if (data_size <= bytes_left_in_base()) + if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) return 1; return 0; } @@ -103,7 +86,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags) { struct flex_array *ret; - int max_size = nr_base_part_ptrs() * __elements_per_part(element_size); + int max_size = FLEX_ARRAY_NR_BASE_PTRS * + FLEX_ARRAY_ELEMENTS_PER_PART(element_size); /* max_size will end up 0 if element_size > PAGE_SIZE */ if (total > max_size) @@ -114,14 +98,15 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, ret->element_size = element_size; ret->total_nr_elements = total; if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) - memset(ret->parts[0], FLEX_ARRAY_FREE, bytes_left_in_base()); + memset(ret->parts[0], FLEX_ARRAY_FREE, + FLEX_ARRAY_BASE_BYTES_LEFT); return ret; } static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { - return element_nr / __elements_per_part(fa->element_size); + return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); } /** @@ -133,11 +118,10 @@ static int fa_element_to_part_nr(struct flex_array *fa, void flex_array_free_parts(struct flex_array *fa) { int part_nr; - int max_part = nr_base_part_ptrs(); if (elements_fit_in_base(fa)) return; - for (part_nr = 0; part_nr < max_part; part_nr++) + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) kfree(fa->parts[part_nr]); } @@ -152,7 +136,8 @@ static unsigned int index_inside_part(struct flex_array *fa, { unsigned int part_offset; - part_offset = element_nr % __elements_per_part(fa->element_size); + part_offset = element_nr % + FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); return part_offset * fa->element_size; } @@ -313,13 +298,12 @@ static int part_is_free(struct flex_array_part *part) int flex_array_shrink(struct flex_array *fa) { struct flex_array_part *part; - int max_part = nr_base_part_ptrs(); int part_nr; int ret = 0; if (elements_fit_in_base(fa)) return ret; - for (part_nr = 0; part_nr < max_part; part_nr++) { + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { part = fa->parts[part_nr]; if (!part) continue; -- cgit