From 4a67e3a79e3bdc47dfd0c85a1888067d95a0282c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Nov 2018 15:25:13 -0800 Subject: tools/kernel.h: Replace synchronize_sched() with synchronize_rcu() Now that synchronize_rcu() waits for preempt-disable regions of code as well as RCU read-side critical sections, synchronize_sched() can be replaced by synchronize_rcu(). This commit therefore makes this change, even though it is but a comment. Signed-off-by: Paul E. McKenney Cc: Matthew Wilcox Cc: --- tools/include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include/linux') diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 6935ef94e77a..857d9e22826e 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -116,6 +116,6 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...); #define round_down(x, y) ((x) & ~__round_mask(x, y)) #define current_gfp_context(k) 0 -#define synchronize_sched() +#define synchronize_rcu() #endif -- cgit From 92151b0a230ce2792e4b2b1f43ca3ea80a83292e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 27 Nov 2018 09:56:55 -0300 Subject: tools include: Adopt ERR_CAST() from the kernel err.h header Add ERR_CAST(), so that tools can use it, just like the kernel. This addresses coccinelle checks that are being performed to tools/ in addition to kernel sources, so lets add this to cover that and to get tools code closer to kernel coding standards. This originally was introduced in the kernel headers in this cset: d1bc8e954452 ("Add an ERR_CAST() function to complement ERR_PTR and co.") Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: David Howells Cc: Jiri Olsa Cc: Julia Lawall Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: Wen Yang Cc: zhong.weidong@zte.com.cn Link: https://lkml.kernel.org/n/tip-tlt97p066zyhzqhl5jt86og7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/err.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools/include/linux') diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h index 094649667bae..2f5a12b88a86 100644 --- a/tools/include/linux/err.h +++ b/tools/include/linux/err.h @@ -59,4 +59,17 @@ static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) else return 0; } + +/** + * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type + * @ptr: The pointer to cast. + * + * Explicitly cast an error-valued pointer to another pointer type in such a + * way as to make it clear that's what's going on. + */ +static inline void * __must_check ERR_CAST(__force const void *ptr) +{ + /* cast away the const */ + return (void *) ptr; +} #endif /* _LINUX_ERR_H */ -- cgit From 3aef2cad5d51ee66d2a614dd2f70cb34c74caf77 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 6 Dec 2018 11:18:13 -0800 Subject: tools: Update rbtree implementation There have been a number of changes in the kernel's rbrtee implementation, including loose lockless searching guarantees and rb_root_cached, which later patches will use as an optimization. Signed-off-by: Davidlohr Bueso Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20181206191819.30182-2-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/rbtree.h | 52 ++++++++-- tools/include/linux/rbtree_augmented.h | 60 ++++++++--- tools/lib/rbtree.c | 178 +++++++++++++++++++++++++-------- 3 files changed, 229 insertions(+), 61 deletions(-) (limited to 'tools/include/linux') diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h index 112582253dd0..8e9ed4786269 100644 --- a/tools/include/linux/rbtree.h +++ b/tools/include/linux/rbtree.h @@ -43,13 +43,28 @@ struct rb_root { struct rb_node *rb_node; }; +/* + * Leftmost-cached rbtrees. + * + * We do not cache the rightmost node based on footprint + * size vs number of potential users that could benefit + * from O(1) rb_last(). Just not worth it, users that want + * this feature can always implement the logic explicitly. + * Furthermore, users that want to cache both pointers may + * find it a bit asymmetric, but that's ok. + */ +struct rb_root_cached { + struct rb_root rb_root; + struct rb_node *rb_leftmost; +}; #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) #define RB_ROOT (struct rb_root) { NULL, } +#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } #define rb_entry(ptr, type, member) container_of(ptr, type, member) -#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) +#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) /* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ #define RB_EMPTY_NODE(node) \ @@ -68,6 +83,12 @@ extern struct rb_node *rb_prev(const struct rb_node *); extern struct rb_node *rb_first(const struct rb_root *); extern struct rb_node *rb_last(const struct rb_root *); +extern void rb_insert_color_cached(struct rb_node *, + struct rb_root_cached *, bool); +extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *); +/* Same as rb_first(), but O(1) */ +#define rb_first_cached(root) (root)->rb_leftmost + /* Postorder iteration - always visit the parent after its children */ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); @@ -75,6 +96,8 @@ extern struct rb_node *rb_next_postorder(const struct rb_node *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); +extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root); static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) @@ -90,12 +113,29 @@ static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, ____ptr ? rb_entry(____ptr, type, member) : NULL; \ }) - -/* - * Handy for checking that we are not deleting an entry that is - * already in a list, found in block/{blk-throttle,cfq-iosched}.c, - * probably should be moved to lib/rbtree.c... +/** + * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of + * given type allowing the backing memory of @pos to be invalidated + * + * @pos: the 'type *' to use as a loop cursor. + * @n: another 'type *' to use as temporary storage + * @root: 'rb_root *' of the rbtree. + * @field: the name of the rb_node field within 'type'. + * + * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as + * list_for_each_entry_safe() and allows the iteration to continue independent + * of changes to @pos by the body of the loop. + * + * Note, however, that it cannot handle other modifications that re-order the + * rbtree it is iterating over. This includes calling rb_erase() on @pos, as + * rb_erase() may rebalance the tree, causing us to miss some nodes. */ +#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ + for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ + pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ + typeof(*pos), field); 1; }); \ + pos = n) + static inline void rb_erase_init(struct rb_node *n, struct rb_root *root) { rb_erase(n, root); diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h index 43be941db695..d008e1404580 100644 --- a/tools/include/linux/rbtree_augmented.h +++ b/tools/include/linux/rbtree_augmented.h @@ -44,7 +44,9 @@ struct rb_augment_callbacks { void (*rotate)(struct rb_node *old, struct rb_node *new); }; -extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, +extern void __rb_insert_augmented(struct rb_node *node, + struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); /* * Fixup the rbtree and update the augmented information when rebalancing. @@ -60,7 +62,16 @@ static inline void rb_insert_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - __rb_insert_augmented(node, root, augment->rotate); + __rb_insert_augmented(node, root, false, NULL, augment->rotate); +} + +static inline void +rb_insert_augmented_cached(struct rb_node *node, + struct rb_root_cached *root, bool newleft, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, &root->rb_root, + newleft, &root->rb_leftmost, augment->rotate); } #define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ @@ -93,7 +104,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ old->rbaugmented = rbcompute(old); \ } \ rbstatic const struct rb_augment_callbacks rbname = { \ - rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ + .propagate = rbname ## _propagate, \ + .copy = rbname ## _copy, \ + .rotate = rbname ## _rotate \ }; @@ -126,11 +139,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, { if (parent) { if (parent->rb_left == old) - parent->rb_left = new; + WRITE_ONCE(parent->rb_left, new); else - parent->rb_right = new; + WRITE_ONCE(parent->rb_right, new); } else - root->rb_node = new; + WRITE_ONCE(root->rb_node, new); } extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, @@ -138,12 +151,17 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, + struct rb_node **leftmost, const struct rb_augment_callbacks *augment) { - struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *child = node->rb_right; + struct rb_node *tmp = node->rb_left; struct rb_node *parent, *rebalance; unsigned long pc; + if (leftmost && node == *leftmost) + *leftmost = rb_next(node); + if (!tmp) { /* * Case 1: node to erase has no more than 1 child (easy!) @@ -170,6 +188,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, tmp = parent; } else { struct rb_node *successor = child, *child2; + tmp = child->rb_left; if (!tmp) { /* @@ -183,6 +202,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, */ parent = successor; child2 = successor->rb_right; + augment->copy(node, successor); } else { /* @@ -204,19 +224,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, successor = tmp; tmp = tmp->rb_left; } while (tmp); - parent->rb_left = child2 = successor->rb_right; - successor->rb_right = child; + child2 = successor->rb_right; + WRITE_ONCE(parent->rb_left, child2); + WRITE_ONCE(successor->rb_right, child); rb_set_parent(child, successor); + augment->copy(node, successor); augment->propagate(parent, successor); } - successor->rb_left = tmp = node->rb_left; + tmp = node->rb_left; + WRITE_ONCE(successor->rb_left, tmp); rb_set_parent(tmp, successor); pc = node->__rb_parent_color; tmp = __rb_parent(pc); __rb_change_child(node, successor, tmp, root); + if (child2) { successor->__rb_parent_color = pc; rb_set_parent_color(child2, parent, RB_BLACK); @@ -237,9 +261,21 @@ static __always_inline void rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + struct rb_node *rebalance = __rb_erase_augmented(node, root, + NULL, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } -#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ +static __always_inline void +rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, + augment); + if (rebalance) + __rb_erase_color(rebalance, &root->rb_root, augment->rotate); +} + +#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 17c2b596f043..904adb70a4f0 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -22,6 +22,7 @@ */ #include +#include /* * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree @@ -43,6 +44,30 @@ * parentheses and have some accompanying text comment. */ +/* + * Notes on lockless lookups: + * + * All stores to the tree structure (rb_left and rb_right) must be done using + * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the + * tree structure as seen in program order. + * + * These two requirements will allow lockless iteration of the tree -- not + * correct iteration mind you, tree rotations are not atomic so a lookup might + * miss entire subtrees. + * + * But they do guarantee that any such traversal will only see valid elements + * and that it will indeed complete -- does not get stuck in a loop. + * + * It also guarantees that if the lookup returns an element it is the 'correct' + * one. But not returning an element does _NOT_ mean it's not present. + * + * NOTE: + * + * Stores to __rb_parent_color are not important for simple lookups so those + * are left undone as of now. Nor did I check for loops involving parent + * pointers. + */ + static inline void rb_set_black(struct rb_node *rb) { rb->__rb_parent_color |= RB_BLACK; @@ -70,22 +95,35 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, static __always_inline void __rb_insert(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + if (newleft) + *leftmost = node; + while (true) { /* - * Loop invariant: node is red - * - * If there is a black parent, we are done. - * Otherwise, take some corrective action as we don't - * want a red root or two consecutive red nodes. + * Loop invariant: node is red. */ - if (!parent) { + if (unlikely(!parent)) { + /* + * The inserted node is root. Either this is the + * first node, or we recursed at Case 1 below and + * are no longer violating 4). + */ rb_set_parent_color(node, NULL, RB_BLACK); break; - } else if (rb_is_black(parent)) + } + + /* + * If there is a black parent, we are done. + * Otherwise, take some corrective action as, + * per 4), we don't want a red root or two + * consecutive red nodes. + */ + if(rb_is_black(parent)) break; gparent = rb_red_parent(parent); @@ -94,7 +132,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root, if (parent != tmp) { /* parent == gparent->rb_left */ if (tmp && rb_is_red(tmp)) { /* - * Case 1 - color flips + * Case 1 - node's uncle is red (color flips). * * G g * / \ / \ @@ -117,7 +155,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_right; if (node == tmp) { /* - * Case 2 - left rotate at parent + * Case 2 - node's uncle is black and node is + * the parent's right child (left rotate at parent). * * G G * / \ / \ @@ -128,8 +167,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * This still leaves us in violation of 4), the * continuation into Case 3 will fix that. */ - parent->rb_right = tmp = node->rb_left; - node->rb_left = parent; + tmp = node->rb_left; + WRITE_ONCE(parent->rb_right, tmp); + WRITE_ONCE(node->rb_left, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -140,7 +180,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* - * Case 3 - right rotate at gparent + * Case 3 - node's uncle is black and node is + * the parent's left child (right rotate at gparent). * * G P * / \ / \ @@ -148,8 +189,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * / \ * n U */ - gparent->rb_left = tmp; /* == parent->rb_right */ - parent->rb_right = gparent; + WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */ + WRITE_ONCE(parent->rb_right, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -170,8 +211,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_left; if (node == tmp) { /* Case 2 - right rotate at parent */ - parent->rb_left = tmp = node->rb_right; - node->rb_right = parent; + tmp = node->rb_right; + WRITE_ONCE(parent->rb_left, tmp); + WRITE_ONCE(node->rb_right, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -182,8 +224,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* Case 3 - left rotate at gparent */ - gparent->rb_right = tmp; /* == parent->rb_left */ - parent->rb_left = gparent; + WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */ + WRITE_ONCE(parent->rb_left, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -223,8 +265,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * Sl Sr N Sl */ - parent->rb_right = tmp1 = sibling->rb_left; - sibling->rb_left = parent; + tmp1 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp1); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -268,15 +311,31 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * * (p) (p) * / \ / \ - * N S --> N Sl + * N S --> N sl * / \ \ - * sl Sr s + * sl Sr S * \ * Sr + * + * Note: p might be red, and then both + * p and sl are red after rotation(which + * breaks property 4). This is fixed in + * Case 4 (in __rb_rotate_set_parents() + * which set sl the color of p + * and set p RB_BLACK) + * + * (p) (sl) + * / \ / \ + * N sl --> P S + * \ / \ + * S N Sr + * \ + * Sr */ - sibling->rb_left = tmp1 = tmp2->rb_right; - tmp2->rb_right = sibling; - parent->rb_right = tmp2; + tmp1 = tmp2->rb_right; + WRITE_ONCE(sibling->rb_left, tmp1); + WRITE_ONCE(tmp2->rb_right, sibling); + WRITE_ONCE(parent->rb_right, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -296,8 +355,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * (sl) sr N (sl) */ - parent->rb_right = tmp2 = sibling->rb_left; - sibling->rb_left = parent; + tmp2 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp2); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -309,8 +369,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, sibling = parent->rb_left; if (rb_is_red(sibling)) { /* Case 1 - right rotate at parent */ - parent->rb_left = tmp1 = sibling->rb_right; - sibling->rb_right = parent; + tmp1 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp1); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -334,10 +395,11 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, } break; } - /* Case 3 - right rotate at sibling */ - sibling->rb_right = tmp1 = tmp2->rb_left; - tmp2->rb_left = sibling; - parent->rb_left = tmp2; + /* Case 3 - left rotate at sibling */ + tmp1 = tmp2->rb_left; + WRITE_ONCE(sibling->rb_right, tmp1); + WRITE_ONCE(tmp2->rb_left, sibling); + WRITE_ONCE(parent->rb_left, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -345,9 +407,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, tmp1 = sibling; sibling = tmp2; } - /* Case 4 - left rotate at parent + color flips */ - parent->rb_left = tmp2 = sibling->rb_right; - sibling->rb_right = parent; + /* Case 4 - right rotate at parent + color flips */ + tmp2 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp2); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -378,22 +441,41 @@ static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} static const struct rb_augment_callbacks dummy_callbacks = { - dummy_propagate, dummy_copy, dummy_rotate + .propagate = dummy_propagate, + .copy = dummy_copy, + .rotate = dummy_rotate }; void rb_insert_color(struct rb_node *node, struct rb_root *root) { - __rb_insert(node, root, dummy_rotate); + __rb_insert(node, root, false, NULL, dummy_rotate); } void rb_erase(struct rb_node *node, struct rb_root *root) { struct rb_node *rebalance; - rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + rebalance = __rb_erase_augmented(node, root, + NULL, &dummy_callbacks); if (rebalance) ____rb_erase_color(rebalance, root, dummy_rotate); } +void rb_insert_color_cached(struct rb_node *node, + struct rb_root_cached *root, bool leftmost) +{ + __rb_insert(node, &root->rb_root, leftmost, + &root->rb_leftmost, dummy_rotate); +} + +void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, &root->rb_root, dummy_rotate); +} + /* * Augmented rbtree manipulation functions. * @@ -402,9 +484,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root) */ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { - __rb_insert(node, root, augment_rotate); + __rb_insert(node, root, newleft, leftmost, augment_rotate); } /* @@ -498,15 +581,24 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new, { struct rb_node *parent = rb_parent(victim); + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; + /* Set the surrounding nodes to point to the replacement */ - __rb_change_child(victim, new, parent, root); if (victim->rb_left) rb_set_parent(victim->rb_left, new); if (victim->rb_right) rb_set_parent(victim->rb_right, new); + __rb_change_child(victim, new, parent, root); +} - /* Copy the pointers/colour from the victim to the replacement */ - *new = *victim; +void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root) +{ + rb_replace_node(victim, new, &root->rb_root); + + if (root->rb_leftmost == victim) + root->rb_leftmost = new; } static struct rb_node *rb_left_deepest_node(const struct rb_node *node) -- cgit From a7b76c8857692b0fce063b94ed83da11c396d341 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:05 -0500 Subject: bpf: JIT blinds support JMP32 This patch adds JIT blinds support for JMP32. Like BPF_JMP_REG/IMM, JMP32 version are needed for building raw bpf insn. They are added to both include/linux/filter.h and tools/include/linux/filter.h. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 20 ++++++++++++++++++++ kernel/bpf/core.c | 21 +++++++++++++++++++++ tools/include/linux/filter.h | 20 ++++++++++++++++++++ 3 files changed, 61 insertions(+) (limited to 'tools/include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index be9af6b4a9e4..e4b473f85b46 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -277,6 +277,26 @@ struct sock_reuseport; .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bba11c2565ee..a7bcb23bee84 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -949,6 +949,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); break; + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + /* Accommodate for extra offset in case of a backjump. */ + off = from->off; + if (off < 0) + off -= 2; + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX, + off); + break; + case BPF_LD | BPF_IMM | BPF_DW: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index af55acf73e75..cce0b02c0e28 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -199,6 +199,16 @@ .off = OFF, \ .imm = 0 }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ @@ -209,6 +219,16 @@ .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ -- cgit From 7c9eefe82ca1efec5890678c33e66d5d520c06f4 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 5 Mar 2019 15:43:01 -0800 Subject: tools/: replace open encodings for NUMA_NO_NODE This replaces all open encodings in tools with NUMA_NO_NODE. Also linux/numa.h is now needed for the perf build. [sfr@canb.auug.org.au: fix for replace open encodings for NUMA_NO_NODE] Link: http://lkml.kernel.org/r/20190108131141.730e9c4f@canb.auug.org.au Link: http://lkml.kernel.org/r/1545127933-10711-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Stephen Rothwell Signed-off-by: Anshuman Khandual Signed-off-by: Stephen Rothwell Cc: David Hildenbrand Cc: Doug Ledford [drivers/infiniband] Cc: Hans Verkuil Cc: Jeff Kirsher [ixgbe] Cc: Jens Axboe [mtip32xx] Cc: Joseph Qi Cc: Michael Ellerman [powerpc] Cc: Vinod Koul [dmaengine.c] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/linux/numa.h | 16 ++++++++++++++++ tools/perf/bench/numa.c | 7 ++++--- 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 tools/include/linux/numa.h (limited to 'tools/include/linux') diff --git a/tools/include/linux/numa.h b/tools/include/linux/numa.h new file mode 100644 index 000000000000..110b0e5d0fb0 --- /dev/null +++ b/tools/include/linux/numa.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NUMA_H +#define _LINUX_NUMA_H + + +#ifdef CONFIG_NODES_SHIFT +#define NODES_SHIFT CONFIG_NODES_SHIFT +#else +#define NODES_SHIFT 0 +#endif + +#define MAX_NUMNODES (1 << NODES_SHIFT) + +#define NUMA_NO_NODE (-1) + +#endif /* _LINUX_NUMA_H */ diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 44195514b19e..98ad783efc69 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -298,7 +299,7 @@ static cpu_set_t bind_to_node(int target_node) CPU_ZERO(&mask); - if (target_node == -1) { + if (target_node == NUMA_NO_NODE) { for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { @@ -339,7 +340,7 @@ static void bind_to_memnode(int node) unsigned long nodemask; int ret; - if (node == -1) + if (node == NUMA_NO_NODE) return; BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); @@ -1363,7 +1364,7 @@ static void init_thread_data(void) int cpu; /* Allow all nodes by default: */ - td->bind_node = -1; + td->bind_node = NUMA_NO_NODE; /* Allow all CPUs by default: */ CPU_ZERO(&td->bind_cpumask); -- cgit From 586187d7de71b4da7956ba588ae42253b9ff6482 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Mar 2019 23:31:26 -0700 Subject: Drop flex_arrays All existing users have been converted to generic radix trees Link: http://lkml.kernel.org/r/20181217131929.11727-8-kent.overstreet@gmail.com Signed-off-by: Kent Overstreet Acked-by: Dave Hansen Cc: Alexey Dobriyan Cc: Al Viro Cc: Eric Paris Cc: Marcelo Ricardo Leitner Cc: Matthew Wilcox Cc: Neil Horman Cc: Paul Moore Cc: Pravin B Shelar Cc: Shaohua Li Cc: Stephen Smalley Cc: Vlad Yasevich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/flexible-arrays.rst | 130 ---------- Documentation/flexible-arrays.txt | 123 --------- include/linux/flex_array.h | 149 ----------- include/linux/poison.h | 3 - lib/Makefile | 2 +- lib/flex_array.c | 398 ----------------------------- tools/include/linux/poison.h | 3 - 7 files changed, 1 insertion(+), 807 deletions(-) delete mode 100644 Documentation/core-api/flexible-arrays.rst delete mode 100644 Documentation/flexible-arrays.txt delete mode 100644 include/linux/flex_array.h delete mode 100644 lib/flex_array.c (limited to 'tools/include/linux') diff --git a/Documentation/core-api/flexible-arrays.rst b/Documentation/core-api/flexible-arrays.rst deleted file mode 100644 index b6b85a1b518e..000000000000 --- a/Documentation/core-api/flexible-arrays.rst +++ /dev/null @@ -1,130 +0,0 @@ - -=================================== -Using flexible arrays in the kernel -=================================== - -Large contiguous memory allocations can be unreliable in the Linux kernel. -Kernel programmers will sometimes respond to this problem by allocating -pages with :c:func:`vmalloc()`. This solution not ideal, though. On 32-bit -systems, memory from vmalloc() must be mapped into a relatively small address -space; it's easy to run out. On SMP systems, the page table changes required -by vmalloc() allocations can require expensive cross-processor interrupts on -all CPUs. And, on all systems, use of space in the vmalloc() range increases -pressure on the translation lookaside buffer (TLB), reducing the performance -of the system. - -In many cases, the need for memory from vmalloc() can be eliminated by piecing -together an array from smaller parts; the flexible array library exists to make -this task easier. - -A flexible array holds an arbitrary (within limits) number of fixed-sized -objects, accessed via an integer index. Sparse arrays are handled -reasonably well. Only single-page allocations are made, so memory -allocation failures should be relatively rare. The down sides are that the -arrays cannot be indexed directly, individual object size cannot exceed the -system page size, and putting data into a flexible array requires a copy -operation. It's also worth noting that flexible arrays do no internal -locking at all; if concurrent access to an array is possible, then the -caller must arrange for appropriate mutual exclusion. - -The creation of a flexible array is done with :c:func:`flex_array_alloc()`:: - - #include - - struct flex_array *flex_array_alloc(int element_size, - unsigned int total, - gfp_t flags); - -The individual object size is provided by ``element_size``, while total is the -maximum number of objects which can be stored in the array. The flags -argument is passed directly to the internal memory allocation calls. With -the current code, using flags to ask for high memory is likely to lead to -notably unpleasant side effects. - -It is also possible to define flexible arrays at compile time with:: - - DEFINE_FLEX_ARRAY(name, element_size, total); - -This macro will result in a definition of an array with the given name; the -element size and total will be checked for validity at compile time. - -Storing data into a flexible array is accomplished with a call to -:c:func:`flex_array_put()`:: - - int flex_array_put(struct flex_array *array, unsigned int element_nr, - void *src, gfp_t flags); - -This call will copy the data from src into the array, in the position -indicated by ``element_nr`` (which must be less than the maximum specified when -the array was created). If any memory allocations must be performed, flags -will be used. The return value is zero on success, a negative error code -otherwise. - -There might possibly be a need to store data into a flexible array while -running in some sort of atomic context; in this situation, sleeping in the -memory allocator would be a bad thing. That can be avoided by using -``GFP_ATOMIC`` for the flags value, but, often, there is a better way. The -trick is to ensure that any needed memory allocations are done before -entering atomic context, using :c:func:`flex_array_prealloc()`:: - - int flex_array_prealloc(struct flex_array *array, unsigned int start, - unsigned int nr_elements, gfp_t flags); - -This function will ensure that memory for the elements indexed in the range -defined by ``start`` and ``nr_elements`` has been allocated. Thereafter, a -``flex_array_put()`` call on an element in that range is guaranteed not to -block. - -Getting data back out of the array is done with :c:func:`flex_array_get()`:: - - void *flex_array_get(struct flex_array *fa, unsigned int element_nr); - -The return value is a pointer to the data element, or NULL if that -particular element has never been allocated. - -Note that it is possible to get back a valid pointer for an element which -has never been stored in the array. Memory for array elements is allocated -one page at a time; a single allocation could provide memory for several -adjacent elements. Flexible array elements are normally initialized to the -value ``FLEX_ARRAY_FREE`` (defined as 0x6c in ), so errors -involving that number probably result from use of unstored array entries. -Note that, if array elements are allocated with ``__GFP_ZERO``, they will be -initialized to zero and this poisoning will not happen. - -Individual elements in the array can be cleared with -:c:func:`flex_array_clear()`:: - - int flex_array_clear(struct flex_array *array, unsigned int element_nr); - -This function will set the given element to ``FLEX_ARRAY_FREE`` and return -zero. If storage for the indicated element is not allocated for the array, -``flex_array_clear()`` will return ``-EINVAL`` instead. Note that clearing an -element does not release the storage associated with it; to reduce the -allocated size of an array, call :c:func:`flex_array_shrink()`:: - - int flex_array_shrink(struct flex_array *array); - -The return value will be the number of pages of memory actually freed. -This function works by scanning the array for pages containing nothing but -``FLEX_ARRAY_FREE`` bytes, so (1) it can be expensive, and (2) it will not work -if the array's pages are allocated with ``__GFP_ZERO``. - -It is possible to remove all elements of an array with a call to -:c:func:`flex_array_free_parts()`:: - - void flex_array_free_parts(struct flex_array *array); - -This call frees all elements, but leaves the array itself in place. -Freeing the entire array is done with :c:func:`flex_array_free()`:: - - void flex_array_free(struct flex_array *array); - -As of this writing, there are no users of flexible arrays in the mainline -kernel. The functions described here are also not exported to modules; -that will probably be fixed when somebody comes up with a need for it. - - -Flexible array functions ------------------------- - -.. kernel-doc:: include/linux/flex_array.h diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt deleted file mode 100644 index a0f2989dd804..000000000000 --- a/Documentation/flexible-arrays.txt +++ /dev/null @@ -1,123 +0,0 @@ -=================================== -Using flexible arrays in the kernel -=================================== - -:Updated: Last updated for 2.6.32 -:Author: Jonathan Corbet - -Large contiguous memory allocations can be unreliable in the Linux kernel. -Kernel programmers will sometimes respond to this problem by allocating -pages with vmalloc(). This solution not ideal, though. On 32-bit systems, -memory from vmalloc() must be mapped into a relatively small address space; -it's easy to run out. On SMP systems, the page table changes required by -vmalloc() allocations can require expensive cross-processor interrupts on -all CPUs. And, on all systems, use of space in the vmalloc() range -increases pressure on the translation lookaside buffer (TLB), reducing the -performance of the system. - -In many cases, the need for memory from vmalloc() can be eliminated by -piecing together an array from smaller parts; the flexible array library -exists to make this task easier. - -A flexible array holds an arbitrary (within limits) number of fixed-sized -objects, accessed via an integer index. Sparse arrays are handled -reasonably well. Only single-page allocations are made, so memory -allocation failures should be relatively rare. The down sides are that the -arrays cannot be indexed directly, individual object size cannot exceed the -system page size, and putting data into a flexible array requires a copy -operation. It's also worth noting that flexible arrays do no internal -locking at all; if concurrent access to an array is possible, then the -caller must arrange for appropriate mutual exclusion. - -The creation of a flexible array is done with:: - - #include - - struct flex_array *flex_array_alloc(int element_size, - unsigned int total, - gfp_t flags); - -The individual object size is provided by element_size, while total is the -maximum number of objects which can be stored in the array. The flags -argument is passed directly to the internal memory allocation calls. With -the current code, using flags to ask for high memory is likely to lead to -notably unpleasant side effects. - -It is also possible to define flexible arrays at compile time with:: - - DEFINE_FLEX_ARRAY(name, element_size, total); - -This macro will result in a definition of an array with the given name; the -element size and total will be checked for validity at compile time. - -Storing data into a flexible array is accomplished with a call to:: - - int flex_array_put(struct flex_array *array, unsigned int element_nr, - void *src, gfp_t flags); - -This call will copy the data from src into the array, in the position -indicated by element_nr (which must be less than the maximum specified when -the array was created). If any memory allocations must be performed, flags -will be used. The return value is zero on success, a negative error code -otherwise. - -There might possibly be a need to store data into a flexible array while -running in some sort of atomic context; in this situation, sleeping in the -memory allocator would be a bad thing. That can be avoided by using -GFP_ATOMIC for the flags value, but, often, there is a better way. The -trick is to ensure that any needed memory allocations are done before -entering atomic context, using:: - - int flex_array_prealloc(struct flex_array *array, unsigned int start, - unsigned int nr_elements, gfp_t flags); - -This function will ensure that memory for the elements indexed in the range -defined by start and nr_elements has been allocated. Thereafter, a -flex_array_put() call on an element in that range is guaranteed not to -block. - -Getting data back out of the array is done with:: - - void *flex_array_get(struct flex_array *fa, unsigned int element_nr); - -The return value is a pointer to the data element, or NULL if that -particular element has never been allocated. - -Note that it is possible to get back a valid pointer for an element which -has never been stored in the array. Memory for array elements is allocated -one page at a time; a single allocation could provide memory for several -adjacent elements. Flexible array elements are normally initialized to the -value FLEX_ARRAY_FREE (defined as 0x6c in ), so errors -involving that number probably result from use of unstored array entries. -Note that, if array elements are allocated with __GFP_ZERO, they will be -initialized to zero and this poisoning will not happen. - -Individual elements in the array can be cleared with:: - - int flex_array_clear(struct flex_array *array, unsigned int element_nr); - -This function will set the given element to FLEX_ARRAY_FREE and return -zero. If storage for the indicated element is not allocated for the array, -flex_array_clear() will return -EINVAL instead. Note that clearing an -element does not release the storage associated with it; to reduce the -allocated size of an array, call:: - - int flex_array_shrink(struct flex_array *array); - -The return value will be the number of pages of memory actually freed. -This function works by scanning the array for pages containing nothing but -FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work -if the array's pages are allocated with __GFP_ZERO. - -It is possible to remove all elements of an array with a call to:: - - void flex_array_free_parts(struct flex_array *array); - -This call frees all elements, but leaves the array itself in place. -Freeing the entire array is done with:: - - void flex_array_free(struct flex_array *array); - -As of this writing, there are no users of flexible arrays in the mainline -kernel. The functions described here are also not exported to modules; -that will probably be fixed when somebody comes up with a need for it. diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h deleted file mode 100644 index b94fa61b51fb..000000000000 --- a/include/linux/flex_array.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _FLEX_ARRAY_H -#define _FLEX_ARRAY_H - -#include -#include -#include - -#define FLEX_ARRAY_PART_SIZE PAGE_SIZE -#define FLEX_ARRAY_BASE_SIZE PAGE_SIZE - -struct flex_array_part; - -/* - * This is meant to replace cases where an array-like - * structure has gotten too big to fit into kmalloc() - * and the developer is getting tempted to use - * vmalloc(). - */ - -struct flex_array { - union { - struct { - int element_size; - int total_nr_elements; - int elems_per_part; - struct reciprocal_value reciprocal_elems; - struct flex_array_part *parts[]; - }; - /* - * This little trick makes sure that - * sizeof(flex_array) == PAGE_SIZE - */ - char padding[FLEX_ARRAY_BASE_SIZE]; - }; -}; - -/* Number of bytes left in base struct flex_array, excluding metadata */ -#define FLEX_ARRAY_BASE_BYTES_LEFT \ - (FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts)) - -/* Number of pointers in base to struct flex_array_part pages */ -#define FLEX_ARRAY_NR_BASE_PTRS \ - (FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *)) - -/* Number of elements of size that fit in struct flex_array_part */ -#define FLEX_ARRAY_ELEMENTS_PER_PART(size) \ - (FLEX_ARRAY_PART_SIZE / size) - -/* - * Defines a statically allocated flex array and ensures its parameters are - * valid. - */ -#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total) \ - struct flex_array __arrayname = { { { \ - .element_size = (__element_size), \ - .total_nr_elements = (__total), \ - } } }; \ - static inline void __arrayname##_invalid_parameter(void) \ - { \ - BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS * \ - FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \ - } - -/** - * flex_array_alloc() - Creates a flexible array. - * @element_size: individual object size. - * @total: maximum number of objects which can be stored. - * @flags: GFP flags - * - * Return: Returns an object of structure flex_array. - */ -struct flex_array *flex_array_alloc(int element_size, unsigned int total, - gfp_t flags); - -/** - * flex_array_prealloc() - Ensures that memory for the elements indexed in the - * range defined by start and nr_elements has been allocated. - * @fa: array to allocate memory to. - * @start: start address - * @nr_elements: number of elements to be allocated. - * @flags: GFP flags - * - */ -int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int nr_elements, gfp_t flags); - -/** - * flex_array_free() - Removes all elements of a flexible array. - * @fa: array to be freed. - */ -void flex_array_free(struct flex_array *fa); - -/** - * flex_array_free_parts() - Removes all elements of a flexible array, but - * leaves the array itself in place. - * @fa: array to be emptied. - */ -void flex_array_free_parts(struct flex_array *fa); - -/** - * flex_array_put() - Stores data into a flexible array. - * @fa: array where element is to be stored. - * @element_nr: position to copy, must be less than the maximum specified when - * the array was created. - * @src: data source to be copied into the array. - * @flags: GFP flags - * - * Return: Returns zero on success, a negative error code otherwise. - */ -int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, - gfp_t flags); - -/** - * flex_array_clear() - Clears an individual element in the array, sets the - * given element to FLEX_ARRAY_FREE. - * @element_nr: element position to clear. - * @fa: array to which element to be cleared belongs. - * - * Return: Returns zero on success, -EINVAL otherwise. - */ -int flex_array_clear(struct flex_array *fa, unsigned int element_nr); - -/** - * flex_array_get() - Retrieves data into a flexible array. - * - * @element_nr: Element position to retrieve data from. - * @fa: array from which data is to be retrieved. - * - * Return: Returns a pointer to the data element, or NULL if that - * particular element has never been allocated. - */ -void *flex_array_get(struct flex_array *fa, unsigned int element_nr); - -/** - * flex_array_shrink() - Reduces the allocated size of an array. - * @fa: array to shrink. - * - * Return: Returns number of pages of memory actually freed. - * - */ -int flex_array_shrink(struct flex_array *fa); - -#define flex_array_put_ptr(fa, nr, src, gfp) \ - flex_array_put(fa, nr, (void *)&(src), gfp) - -void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr); - -#endif /* _FLEX_ARRAY_H */ diff --git a/include/linux/poison.h b/include/linux/poison.h index 5046bad0c1c5..d6d980a681c7 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -83,9 +83,6 @@ #define MUTEX_DEBUG_FREE 0x22 #define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA) -/********** lib/flex_array.c **********/ -#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ - /********** security/ **********/ #define KEY_DESTROY 0xbd diff --git a/lib/Makefile b/lib/Makefile index b798b41d01ae..4e066120a0d6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -35,7 +35,7 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ + gcd.o lcm.o list_sort.o uuid.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o rhashtable.o reciprocal_div.o \ once.o refcount.o usercopy.o errseq.o bucket_locks.o \ diff --git a/lib/flex_array.c b/lib/flex_array.c deleted file mode 100644 index 2eed22fa507c..000000000000 --- a/lib/flex_array.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Flexible array managed in PAGE_SIZE parts - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2009 - * - * Author: Dave Hansen - */ - -#include -#include -#include -#include -#include - -struct flex_array_part { - char elements[FLEX_ARRAY_PART_SIZE]; -}; - -/* - * If a user requests an allocation which is small - * enough, we may simply use the space in the - * flex_array->parts[] array to store the user - * data. - */ -static inline int elements_fit_in_base(struct flex_array *fa) -{ - int data_size = fa->element_size * fa->total_nr_elements; - if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) - return 1; - return 0; -} - -/** - * flex_array_alloc - allocate a new flexible array - * @element_size: the size of individual elements in the array - * @total: total number of elements that this should hold - * @flags: page allocation flags to use for base array - * - * Note: all locking must be provided by the caller. - * - * @total is used to size internal structures. If the user ever - * accesses any array indexes >=@total, it will produce errors. - * - * The maximum number of elements is defined as: the number of - * elements that can be stored in a page times the number of - * page pointers that we can fit in the base structure or (using - * integer math): - * - * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *) - * - * Here's a table showing example capacities. Note that the maximum - * index that the get/put() functions is just nr_objects-1. This - * basically means that you get 4MB of storage on 32-bit and 2MB on - * 64-bit. - * - * - * Element size | Objects | Objects | - * PAGE_SIZE=4k | 32-bit | 64-bit | - * ---------------------------------| - * 1 bytes | 4177920 | 2088960 | - * 2 bytes | 2088960 | 1044480 | - * 3 bytes | 1392300 | 696150 | - * 4 bytes | 1044480 | 522240 | - * 32 bytes | 130560 | 65408 | - * 33 bytes | 126480 | 63240 | - * 2048 bytes | 2040 | 1020 | - * 2049 bytes | 1020 | 510 | - * void * | 1044480 | 261120 | - * - * Since 64-bit pointers are twice the size, we lose half the - * capacity in the base structure. Also note that no effort is made - * to efficiently pack objects across page boundaries. - */ -struct flex_array *flex_array_alloc(int element_size, unsigned int total, - gfp_t flags) -{ - struct flex_array *ret; - int elems_per_part = 0; - int max_size = 0; - struct reciprocal_value reciprocal_elems = { 0 }; - - if (element_size) { - elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); - reciprocal_elems = reciprocal_value(elems_per_part); - max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part; - } - - /* max_size will end up 0 if element_size > PAGE_SIZE */ - if (total > max_size) - return NULL; - ret = kzalloc(sizeof(struct flex_array), flags); - if (!ret) - return NULL; - ret->element_size = element_size; - ret->total_nr_elements = total; - ret->elems_per_part = elems_per_part; - ret->reciprocal_elems = reciprocal_elems; - if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) - memset(&ret->parts[0], FLEX_ARRAY_FREE, - FLEX_ARRAY_BASE_BYTES_LEFT); - return ret; -} -EXPORT_SYMBOL(flex_array_alloc); - -static int fa_element_to_part_nr(struct flex_array *fa, - unsigned int element_nr) -{ - /* - * if element_size == 0 we don't get here, so we never touch - * the zeroed fa->reciprocal_elems, which would yield invalid - * results - */ - return reciprocal_divide(element_nr, fa->reciprocal_elems); -} - -/** - * flex_array_free_parts - just free the second-level pages - * @fa: the flex array from which to free parts - * - * This is to be used in cases where the base 'struct flex_array' - * has been statically allocated and should not be free. - */ -void flex_array_free_parts(struct flex_array *fa) -{ - int part_nr; - - if (elements_fit_in_base(fa)) - return; - for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) - kfree(fa->parts[part_nr]); -} -EXPORT_SYMBOL(flex_array_free_parts); - -void flex_array_free(struct flex_array *fa) -{ - flex_array_free_parts(fa); - kfree(fa); -} -EXPORT_SYMBOL(flex_array_free); - -static unsigned int index_inside_part(struct flex_array *fa, - unsigned int element_nr, - unsigned int part_nr) -{ - unsigned int part_offset; - - part_offset = element_nr - part_nr * fa->elems_per_part; - return part_offset * fa->element_size; -} - -static struct flex_array_part * -__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) -{ - struct flex_array_part *part = fa->parts[part_nr]; - if (!part) { - part = kmalloc(sizeof(struct flex_array_part), flags); - if (!part) - return NULL; - if (!(flags & __GFP_ZERO)) - memset(part, FLEX_ARRAY_FREE, - sizeof(struct flex_array_part)); - fa->parts[part_nr] = part; - } - return part; -} - -/** - * flex_array_put - copy data into the array at @element_nr - * @fa: the flex array to copy data into - * @element_nr: index of the position in which to insert - * the new element. - * @src: address of data to copy into the array - * @flags: page allocation flags to use for array expansion - * - * - * Note that this *copies* the contents of @src into - * the array. If you are trying to store an array of - * pointers, make sure to pass in &ptr instead of ptr. - * You may instead wish to use the flex_array_put_ptr() - * helper function. - * - * Locking must be provided by the caller. - */ -int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, - gfp_t flags) -{ - int part_nr = 0; - struct flex_array_part *part; - void *dst; - - if (element_nr >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = __fa_get_part(fa, part_nr, flags); - if (!part) - return -ENOMEM; - } - dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; - memcpy(dst, src, fa->element_size); - return 0; -} -EXPORT_SYMBOL(flex_array_put); - -/** - * flex_array_clear - clear element in array at @element_nr - * @fa: the flex array of the element. - * @element_nr: index of the position to clear. - * - * Locking must be provided by the caller. - */ -int flex_array_clear(struct flex_array *fa, unsigned int element_nr) -{ - int part_nr = 0; - struct flex_array_part *part; - void *dst; - - if (element_nr >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = fa->parts[part_nr]; - if (!part) - return -EINVAL; - } - dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; - memset(dst, FLEX_ARRAY_FREE, fa->element_size); - return 0; -} -EXPORT_SYMBOL(flex_array_clear); - -/** - * flex_array_prealloc - guarantee that array space exists - * @fa: the flex array for which to preallocate parts - * @start: index of first array element for which space is allocated - * @nr_elements: number of elements for which space is allocated - * @flags: page allocation flags - * - * This will guarantee that no future calls to flex_array_put() - * will allocate memory. It can be used if you are expecting to - * be holding a lock or in some atomic context while writing - * data into the array. - * - * Locking must be provided by the caller. - */ -int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int nr_elements, gfp_t flags) -{ - int start_part; - int end_part; - int part_nr; - unsigned int end; - struct flex_array_part *part; - - if (!start && !nr_elements) - return 0; - if (start >= fa->total_nr_elements) - return -ENOSPC; - if (!nr_elements) - return 0; - - end = start + nr_elements - 1; - - if (end >= fa->total_nr_elements) - return -ENOSPC; - if (!fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - return 0; - start_part = fa_element_to_part_nr(fa, start); - end_part = fa_element_to_part_nr(fa, end); - for (part_nr = start_part; part_nr <= end_part; part_nr++) { - part = __fa_get_part(fa, part_nr, flags); - if (!part) - return -ENOMEM; - } - return 0; -} -EXPORT_SYMBOL(flex_array_prealloc); - -/** - * flex_array_get - pull data back out of the array - * @fa: the flex array from which to extract data - * @element_nr: index of the element to fetch from the array - * - * Returns a pointer to the data at index @element_nr. Note - * that this is a copy of the data that was passed in. If you - * are using this to store pointers, you'll get back &ptr. You - * may instead wish to use the flex_array_get_ptr helper. - * - * Locking must be provided by the caller. - */ -void *flex_array_get(struct flex_array *fa, unsigned int element_nr) -{ - int part_nr = 0; - struct flex_array_part *part; - - if (!fa->element_size) - return NULL; - if (element_nr >= fa->total_nr_elements) - return NULL; - if (elements_fit_in_base(fa)) - part = (struct flex_array_part *)&fa->parts[0]; - else { - part_nr = fa_element_to_part_nr(fa, element_nr); - part = fa->parts[part_nr]; - if (!part) - return NULL; - } - return &part->elements[index_inside_part(fa, element_nr, part_nr)]; -} -EXPORT_SYMBOL(flex_array_get); - -/** - * flex_array_get_ptr - pull a ptr back out of the array - * @fa: the flex array from which to extract data - * @element_nr: index of the element to fetch from the array - * - * Returns the pointer placed in the flex array at element_nr using - * flex_array_put_ptr(). This function should not be called if the - * element in question was not set using the _put_ptr() helper. - */ -void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) -{ - void **tmp; - - tmp = flex_array_get(fa, element_nr); - if (!tmp) - return NULL; - - return *tmp; -} -EXPORT_SYMBOL(flex_array_get_ptr); - -static int part_is_free(struct flex_array_part *part) -{ - int i; - - for (i = 0; i < sizeof(struct flex_array_part); i++) - if (part->elements[i] != FLEX_ARRAY_FREE) - return 0; - return 1; -} - -/** - * flex_array_shrink - free unused second-level pages - * @fa: the flex array to shrink - * - * Frees all second-level pages that consist solely of unused - * elements. Returns the number of pages freed. - * - * Locking must be provided by the caller. - */ -int flex_array_shrink(struct flex_array *fa) -{ - struct flex_array_part *part; - int part_nr; - int ret = 0; - - if (!fa->total_nr_elements || !fa->element_size) - return 0; - if (elements_fit_in_base(fa)) - return ret; - for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { - part = fa->parts[part_nr]; - if (!part) - continue; - if (part_is_free(part)) { - fa->parts[part_nr] = NULL; - kfree(part); - ret++; - } - } - return ret; -} -EXPORT_SYMBOL(flex_array_shrink); diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index 9fdcd3eaac3b..d29725769107 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -87,9 +87,6 @@ #define MUTEX_DEBUG_INIT 0x11 #define MUTEX_DEBUG_FREE 0x22 -/********** lib/flex_array.c **********/ -#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ - /********** security/ **********/ #define KEY_DESTROY 0xbd -- cgit