diff options
Diffstat (limited to 'lib/maple_tree.c')
| -rw-r--r-- | lib/maple_tree.c | 311 | 
1 files changed, 195 insertions, 116 deletions
| diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 646297cae5d1..db60edb55f2f 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -185,7 +185,7 @@ static void mt_free_rcu(struct rcu_head *head)   */  static void ma_free_rcu(struct maple_node *node)  { -	node->parent = ma_parent_ptr(node); +	WARN_ON(node->parent != ma_parent_ptr(node));  	call_rcu(&node->rcu, mt_free_rcu);  } @@ -539,11 +539,14 @@ static inline struct maple_node *mte_parent(const struct maple_enode *enode)   */  static inline bool ma_dead_node(const struct maple_node *node)  { -	struct maple_node *parent = (void *)((unsigned long) -					     node->parent & ~MAPLE_NODE_MASK); +	struct maple_node *parent; +	/* Do not reorder reads from the node prior to the parent check */ +	smp_rmb(); +	parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);  	return (parent == node);  } +  /*   * mte_dead_node() - check if the @enode is dead.   * @enode: The encoded maple node @@ -555,6 +558,8 @@ static inline bool mte_dead_node(const struct maple_enode *enode)  	struct maple_node *parent, *node;  	node = mte_to_node(enode); +	/* Do not reorder reads from the node prior to the parent check */ +	smp_rmb();  	parent = mte_parent(enode);  	return (parent == node);  } @@ -625,6 +630,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas)   * @node - the maple node   * @type - the node type   * + * In the event of a dead node, this array may be %NULL + *   * Return: A pointer to the maple node pivots   */  static inline unsigned long *ma_pivots(struct maple_node *node, @@ -817,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt,  	return rcu_dereference_check(slots[offset], mt_locked(mt));  } +static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, +				   unsigned char offset) +{ +	return rcu_dereference_protected(slots[offset], mt_locked(mt)); +}  /*   * mas_slot_locked() - Get the slot value when holding the maple tree lock.   * @mas: The maple state @@ -828,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt,  static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,  				       unsigned char offset)  { -	return rcu_dereference_protected(slots[offset], mt_locked(mas->tree)); +	return mt_slot_locked(mas->tree, slots, offset);  }  /* @@ -900,6 +912,45 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,  }  /* + * mt_clear_meta() - clear the metadata information of a node, if it exists + * @mt: The maple tree + * @mn: The maple node + * @type: The maple node type + * @offset: The offset of the highest sub-gap in this node. + * @end: The end of the data in this node. + */ +static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, +				  enum maple_type type) +{ +	struct maple_metadata *meta; +	unsigned long *pivots; +	void __rcu **slots; +	void *next; + +	switch (type) { +	case maple_range_64: +		pivots = mn->mr64.pivot; +		if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { +			slots = mn->mr64.slot; +			next = mt_slot_locked(mt, slots, +					      MAPLE_RANGE64_SLOTS - 1); +			if (unlikely((mte_to_node(next) && +				      mte_node_type(next)))) +				return; /* no metadata, could be node */ +		} +		fallthrough; +	case maple_arange_64: +		meta = ma_meta(mn, type); +		break; +	default: +		return; +	} + +	meta->gap = 0; +	meta->end = 0; +} + +/*   * ma_meta_end() - Get the data end of a node from the metadata   * @mn: The maple node   * @mt: The maple node type @@ -1096,8 +1147,11 @@ static int mas_ascend(struct ma_state *mas)  		a_type = mas_parent_enum(mas, p_enode);  		a_node = mte_parent(p_enode);  		a_slot = mte_parent_slot(p_enode); -		pivots = ma_pivots(a_node, a_type);  		a_enode = mt_mk_node(a_node, a_type); +		pivots = ma_pivots(a_node, a_type); + +		if (unlikely(ma_dead_node(a_node))) +			return 1;  		if (!set_min && a_slot) {  			set_min = true; @@ -1354,12 +1408,16 @@ static inline struct maple_enode *mas_start(struct ma_state *mas)  		mas->max = ULONG_MAX;  		mas->depth = 0; +retry:  		root = mas_root(mas);  		/* Tree with nodes */  		if (likely(xa_is_node(root))) {  			mas->depth = 1;  			mas->node = mte_safe_root(root);  			mas->offset = 0; +			if (mte_dead_node(mas->node)) +				goto retry; +  			return NULL;  		} @@ -1401,6 +1459,9 @@ static inline unsigned char ma_data_end(struct maple_node *node,  {  	unsigned char offset; +	if (!pivots) +		return 0; +  	if (type == maple_arange_64)  		return ma_meta_end(node, type); @@ -1436,6 +1497,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas)  		return ma_meta_end(node, type);  	pivots = ma_pivots(node, type); +	if (unlikely(ma_dead_node(node))) +		return 0; +  	offset = mt_pivots[type] - 1;  	if (likely(!pivots[offset]))  		return ma_meta_end(node, type); @@ -1724,8 +1788,10 @@ static inline void mas_replace(struct ma_state *mas, bool advanced)  		rcu_assign_pointer(slots[offset], mas->node);  	} -	if (!advanced) +	if (!advanced) { +		mte_set_node_dead(old_enode);  		mas_free(mas, old_enode); +	}  }  /* @@ -3659,10 +3725,9 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)  		slot++;  	mas->depth = 1;  	mas_set_height(mas); - +	ma_set_meta(node, maple_leaf_64, 0, slot);  	/* swap the new root into the tree */  	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); -	ma_set_meta(node, maple_leaf_64, 0, slot);  	return slot;  } @@ -3875,18 +3940,13 @@ static inline void *mtree_lookup_walk(struct ma_state *mas)  		end = ma_data_end(node, type, pivots, max);  		if (unlikely(ma_dead_node(node)))  			goto dead_node; - -		if (pivots[offset] >= mas->index) -			goto next; -  		do { -			offset++; -		} while ((offset < end) && (pivots[offset] < mas->index)); - -		if (likely(offset > end)) -			max = pivots[offset]; +			if (pivots[offset] >= mas->index) { +				max = pivots[offset]; +				break; +			} +		} while (++offset < end); -next:  		slots = ma_slots(node, type);  		next = mt_slot(mas->tree, slots, offset);  		if (unlikely(ma_dead_node(node))) @@ -4164,6 +4224,7 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)  done:  	mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);  	if (in_rcu) { +		mte_set_node_dead(mas->node);  		mas->node = mt_mk_node(newnode, wr_mas->type);  		mas_replace(mas, false);  	} else { @@ -4505,6 +4566,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)  	node = mas_mn(mas);  	slots = ma_slots(node, mt);  	pivots = ma_pivots(node, mt); +	if (unlikely(ma_dead_node(node))) +		return 1; +  	mas->max = pivots[offset];  	if (offset)  		mas->min = pivots[offset - 1] + 1; @@ -4526,6 +4590,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)  		slots = ma_slots(node, mt);  		pivots = ma_pivots(node, mt);  		offset = ma_data_end(node, mt, pivots, mas->max); +		if (unlikely(ma_dead_node(node))) +			return 1; +  		if (offset)  			mas->min = pivots[offset - 1] + 1; @@ -4574,6 +4641,7 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,  	struct maple_enode *enode;  	int level = 0;  	unsigned char offset; +	unsigned char node_end;  	enum maple_type mt;  	void __rcu **slots; @@ -4597,7 +4665,11 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,  		node = mas_mn(mas);  		mt = mte_node_type(mas->node);  		pivots = ma_pivots(node, mt); -	} while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max))); +		node_end = ma_data_end(node, mt, pivots, mas->max); +		if (unlikely(ma_dead_node(node))) +			return 1; + +	} while (unlikely(offset == node_end));  	slots = ma_slots(node, mt);  	pivot = mas_safe_pivot(mas, pivots, ++offset, mt); @@ -4613,6 +4685,9 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,  		mt = mte_node_type(mas->node);  		slots = ma_slots(node, mt);  		pivots = ma_pivots(node, mt); +		if (unlikely(ma_dead_node(node))) +			return 1; +  		offset = 0;  		pivot = pivots[0];  	} @@ -4659,11 +4734,14 @@ static inline void *mas_next_nentry(struct ma_state *mas,  		return NULL;  	} -	pivots = ma_pivots(node, type);  	slots = ma_slots(node, type); -	mas->index = mas_safe_min(mas, pivots, mas->offset); +	pivots = ma_pivots(node, type);  	count = ma_data_end(node, type, pivots, mas->max); -	if (ma_dead_node(node)) +	if (unlikely(ma_dead_node(node))) +		return NULL; + +	mas->index = mas_safe_min(mas, pivots, mas->offset); +	if (unlikely(ma_dead_node(node)))  		return NULL;  	if (mas->index > max) @@ -4817,6 +4895,11 @@ retry:  	slots = ma_slots(mn, mt);  	pivots = ma_pivots(mn, mt); +	if (unlikely(ma_dead_node(mn))) { +		mas_rewalk(mas, index); +		goto retry; +	} +  	if (offset == mt_pivots[mt])  		pivot = mas->max;  	else @@ -5099,35 +5182,21 @@ static inline bool mas_rewind_node(struct ma_state *mas)   */  static inline bool mas_skip_node(struct ma_state *mas)  { -	unsigned char slot, slot_count; -	unsigned long *pivots; -	enum maple_type mt; +	if (mas_is_err(mas)) +		return false; -	mt = mte_node_type(mas->node); -	slot_count = mt_slots[mt] - 1;  	do {  		if (mte_is_root(mas->node)) { -			slot = mas->offset; -			if (slot > slot_count) { +			if (mas->offset >= mas_data_end(mas)) {  				mas_set_err(mas, -EBUSY);  				return false;  			}  		} else {  			mas_ascend(mas); -			slot = mas->offset; -			mt = mte_node_type(mas->node); -			slot_count = mt_slots[mt] - 1;  		} -	} while (slot > slot_count); - -	mas->offset = ++slot; -	pivots = ma_pivots(mas_mn(mas), mt); -	if (slot > 0) -		mas->min = pivots[slot - 1] + 1; - -	if (slot <= slot_count) -		mas->max = pivots[slot]; +	} while (mas->offset >= mas_data_end(mas)); +	mas->offset++;  	return true;  } @@ -5414,24 +5483,26 @@ no_gap:  }  /* - * mas_dead_leaves() - Mark all leaves of a node as dead. + * mte_dead_leaves() - Mark all leaves of a node as dead.   * @mas: The maple state   * @slots: Pointer to the slot array + * @type: The maple node type   *   * Must hold the write lock.   *   * Return: The number of leaves marked as dead.   */  static inline -unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) +unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt, +			      void __rcu **slots)  {  	struct maple_node *node;  	enum maple_type type;  	void *entry;  	int offset; -	for (offset = 0; offset < mt_slot_count(mas->node); offset++) { -		entry = mas_slot_locked(mas, slots, offset); +	for (offset = 0; offset < mt_slot_count(enode); offset++) { +		entry = mt_slot(mt, slots, offset);  		type = mte_node_type(entry);  		node = mte_to_node(entry);  		/* Use both node and type to catch LE & BE metadata */ @@ -5439,7 +5510,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)  			break;  		mte_set_node_dead(entry); -		smp_wmb(); /* Needed for RCU */  		node->type = type;  		rcu_assign_pointer(slots[offset], node);  	} @@ -5447,151 +5517,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)  	return offset;  } -static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset) +/** + * mte_dead_walk() - Walk down a dead tree to just before the leaves + * @enode: The maple encoded node + * @offset: The starting offset + * + * Note: This can only be used from the RCU callback context. + */ +static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)  {  	struct maple_node *node, *next;  	void __rcu **slots = NULL; -	next = mas_mn(mas); +	next = mte_to_node(*enode);  	do { -		mas->node = ma_enode_ptr(next); -		node = mas_mn(mas); +		*enode = ma_enode_ptr(next); +		node = mte_to_node(*enode);  		slots = ma_slots(node, node->type); -		next = mas_slot_locked(mas, slots, offset); +		next = rcu_dereference_protected(slots[offset], +					lock_is_held(&rcu_callback_map));  		offset = 0;  	} while (!ma_is_leaf(next->type));  	return slots;  } +/** + * mt_free_walk() - Walk & free a tree in the RCU callback context + * @head: The RCU head that's within the node. + * + * Note: This can only be used from the RCU callback context. + */  static void mt_free_walk(struct rcu_head *head)  {  	void __rcu **slots;  	struct maple_node *node, *start; -	struct maple_tree mt; +	struct maple_enode *enode;  	unsigned char offset;  	enum maple_type type; -	MA_STATE(mas, &mt, 0, 0);  	node = container_of(head, struct maple_node, rcu);  	if (ma_is_leaf(node->type))  		goto free_leaf; -	mt_init_flags(&mt, node->ma_flags); -	mas_lock(&mas);  	start = node; -	mas.node = mt_mk_node(node, node->type); -	slots = mas_dead_walk(&mas, 0); -	node = mas_mn(&mas); +	enode = mt_mk_node(node, node->type); +	slots = mte_dead_walk(&enode, 0); +	node = mte_to_node(enode);  	do {  		mt_free_bulk(node->slot_len, slots);  		offset = node->parent_slot + 1; -		mas.node = node->piv_parent; -		if (mas_mn(&mas) == node) -			goto start_slots_free; - -		type = mte_node_type(mas.node); -		slots = ma_slots(mte_to_node(mas.node), type); -		if ((offset < mt_slots[type]) && (slots[offset])) -			slots = mas_dead_walk(&mas, offset); - -		node = mas_mn(&mas); +		enode = node->piv_parent; +		if (mte_to_node(enode) == node) +			goto free_leaf; + +		type = mte_node_type(enode); +		slots = ma_slots(mte_to_node(enode), type); +		if ((offset < mt_slots[type]) && +		    rcu_dereference_protected(slots[offset], +					      lock_is_held(&rcu_callback_map))) +			slots = mte_dead_walk(&enode, offset); +		node = mte_to_node(enode);  	} while ((node != start) || (node->slot_len < offset));  	slots = ma_slots(node, node->type);  	mt_free_bulk(node->slot_len, slots); -start_slots_free: -	mas_unlock(&mas);  free_leaf:  	mt_free_rcu(&node->rcu);  } -static inline void __rcu **mas_destroy_descend(struct ma_state *mas, -			struct maple_enode *prev, unsigned char offset) +static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, +	struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)  {  	struct maple_node *node; -	struct maple_enode *next = mas->node; +	struct maple_enode *next = *enode;  	void __rcu **slots = NULL; +	enum maple_type type; +	unsigned char next_offset = 0;  	do { -		mas->node = next; -		node = mas_mn(mas); -		slots = ma_slots(node, mte_node_type(mas->node)); -		next = mas_slot_locked(mas, slots, 0); +		*enode = next; +		node = mte_to_node(*enode); +		type = mte_node_type(*enode); +		slots = ma_slots(node, type); +		next = mt_slot_locked(mt, slots, next_offset);  		if ((mte_dead_node(next))) -			next = mas_slot_locked(mas, slots, 1); +			next = mt_slot_locked(mt, slots, ++next_offset); -		mte_set_node_dead(mas->node); -		node->type = mte_node_type(mas->node); +		mte_set_node_dead(*enode); +		node->type = type;  		node->piv_parent = prev;  		node->parent_slot = offset; -		offset = 0; -		prev = mas->node; +		offset = next_offset; +		next_offset = 0; +		prev = *enode;  	} while (!mte_is_leaf(next));  	return slots;  } -static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags, +static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,  			    bool free)  {  	void __rcu **slots;  	struct maple_node *node = mte_to_node(enode);  	struct maple_enode *start; -	struct maple_tree mt; - -	MA_STATE(mas, &mt, 0, 0); -	if (mte_is_leaf(enode)) +	if (mte_is_leaf(enode)) { +		node->type = mte_node_type(enode);  		goto free_leaf; +	} -	mt_init_flags(&mt, ma_flags); -	mas_lock(&mas); - -	mas.node = start = enode; -	slots = mas_destroy_descend(&mas, start, 0); -	node = mas_mn(&mas); +	start = enode; +	slots = mte_destroy_descend(&enode, mt, start, 0); +	node = mte_to_node(enode); // Updated in the above call.  	do {  		enum maple_type type;  		unsigned char offset;  		struct maple_enode *parent, *tmp; -		node->slot_len = mas_dead_leaves(&mas, slots); +		node->slot_len = mte_dead_leaves(enode, mt, slots);  		if (free)  			mt_free_bulk(node->slot_len, slots);  		offset = node->parent_slot + 1; -		mas.node = node->piv_parent; -		if (mas_mn(&mas) == node) -			goto start_slots_free; +		enode = node->piv_parent; +		if (mte_to_node(enode) == node) +			goto free_leaf; -		type = mte_node_type(mas.node); -		slots = ma_slots(mte_to_node(mas.node), type); +		type = mte_node_type(enode); +		slots = ma_slots(mte_to_node(enode), type);  		if (offset >= mt_slots[type])  			goto next; -		tmp = mas_slot_locked(&mas, slots, offset); +		tmp = mt_slot_locked(mt, slots, offset);  		if (mte_node_type(tmp) && mte_to_node(tmp)) { -			parent = mas.node; -			mas.node = tmp; -			slots = mas_destroy_descend(&mas, parent, offset); +			parent = enode; +			enode = tmp; +			slots = mte_destroy_descend(&enode, mt, parent, offset);  		}  next: -		node = mas_mn(&mas); -	} while (start != mas.node); +		node = mte_to_node(enode); +	} while (start != enode); -	node = mas_mn(&mas); -	node->slot_len = mas_dead_leaves(&mas, slots); +	node = mte_to_node(enode); +	node->slot_len = mte_dead_leaves(enode, mt, slots);  	if (free)  		mt_free_bulk(node->slot_len, slots); -start_slots_free: -	mas_unlock(&mas); -  free_leaf:  	if (free)  		mt_free_rcu(&node->rcu); +	else +		mt_clear_meta(mt, node, node->type);  }  /* @@ -5607,10 +5686,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode,  	struct maple_node *node = mte_to_node(enode);  	if (mt_in_rcu(mt)) { -		mt_destroy_walk(enode, mt->ma_flags, false); +		mt_destroy_walk(enode, mt, false);  		call_rcu(&node->rcu, mt_free_walk);  	} else { -		mt_destroy_walk(enode, mt->ma_flags, true); +		mt_destroy_walk(enode, mt, true);  	}  } @@ -6631,11 +6710,11 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,  	while (likely(!ma_is_leaf(mt))) {  		MT_BUG_ON(mas->tree, mte_dead_node(mas->node));  		slots = ma_slots(mn, mt); -		pivots = ma_pivots(mn, mt); -		max = pivots[0];  		entry = mas_slot(mas, slots, 0); +		pivots = ma_pivots(mn, mt);  		if (unlikely(ma_dead_node(mn)))  			return NULL; +		max = pivots[0];  		mas->node = entry;  		mn = mas_mn(mas);  		mt = mte_node_type(mas->node); @@ -6655,13 +6734,13 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,  	if (likely(entry))  		return entry; -	pivots = ma_pivots(mn, mt); -	mas->index = pivots[0] + 1;  	mas->offset = 1;  	entry = mas_slot(mas, slots, 1); +	pivots = ma_pivots(mn, mt);  	if (unlikely(ma_dead_node(mn)))  		return NULL; +	mas->index = pivots[0] + 1;  	if (mas->index > limit)  		goto none; |