diff options
Diffstat (limited to 'kernel/bpf/bpf_local_storage.c')
| -rw-r--r-- | kernel/bpf/bpf_local_storage.c | 206 | 
1 files changed, 134 insertions, 72 deletions
| diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index f27fa5ba7d72..b39a46e8fb08 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -88,8 +88,14 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu)  {  	struct bpf_local_storage *local_storage; +	/* If RCU Tasks Trace grace period implies RCU grace period, do +	 * kfree(), else do kfree_rcu(). +	 */  	local_storage = container_of(rcu, struct bpf_local_storage, rcu); -	kfree_rcu(local_storage, rcu); +	if (rcu_trace_implies_rcu_gp()) +		kfree(local_storage); +	else +		kfree_rcu(local_storage, rcu);  }  static void bpf_selem_free_rcu(struct rcu_head *rcu) @@ -97,16 +103,19 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)  	struct bpf_local_storage_elem *selem;  	selem = container_of(rcu, struct bpf_local_storage_elem, rcu); -	kfree_rcu(selem, rcu); +	if (rcu_trace_implies_rcu_gp()) +		kfree(selem); +	else +		kfree_rcu(selem, rcu);  }  /* local_storage->lock must be held and selem->local_storage == local_storage.   * The caller must ensure selem->smap is still valid to be   * dereferenced for its smap->elem_size and smap->cache_idx.   */ -bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, -				     struct bpf_local_storage_elem *selem, -				     bool uncharge_mem, bool use_trace_rcu) +static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, +					    struct bpf_local_storage_elem *selem, +					    bool uncharge_mem, bool use_trace_rcu)  {  	struct bpf_local_storage_map *smap;  	bool free_local_storage; @@ -233,6 +242,7 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)  	__bpf_selem_unlink_storage(selem, use_trace_rcu);  } +/* If cacheit_lockit is false, this lookup function is lockless */  struct bpf_local_storage_data *  bpf_local_storage_lookup(struct bpf_local_storage *local_storage,  			 struct bpf_local_storage_map *smap, @@ -372,7 +382,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,  	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||  	    /* BPF_F_LOCK can only be used in a value with spin_lock */  	    unlikely((map_flags & BPF_F_LOCK) && -		     !map_value_has_spin_lock(&smap->map))) +		     !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))  		return ERR_PTR(-EINVAL);  	if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST) @@ -491,7 +501,7 @@ unlock_err:  	return ERR_PTR(err);  } -u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) +static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)  {  	u64 min_usage = U64_MAX;  	u16 i, res = 0; @@ -515,76 +525,14 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)  	return res;  } -void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, -				      u16 idx) +static void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, +					     u16 idx)  {  	spin_lock(&cache->idx_lock);  	cache->idx_usage_counts[idx]--;  	spin_unlock(&cache->idx_lock);  } -void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, -				int __percpu *busy_counter) -{ -	struct bpf_local_storage_elem *selem; -	struct bpf_local_storage_map_bucket *b; -	unsigned int i; - -	/* Note that this map might be concurrently cloned from -	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone -	 * RCU read section to finish before proceeding. New RCU -	 * read sections should be prevented via bpf_map_inc_not_zero. -	 */ -	synchronize_rcu(); - -	/* bpf prog and the userspace can no longer access this map -	 * now.  No new selem (of this map) can be added -	 * to the owner->storage or to the map bucket's list. -	 * -	 * The elem of this map can be cleaned up here -	 * or when the storage is freed e.g. -	 * by bpf_sk_storage_free() during __sk_destruct(). -	 */ -	for (i = 0; i < (1U << smap->bucket_log); i++) { -		b = &smap->buckets[i]; - -		rcu_read_lock(); -		/* No one is adding to b->list now */ -		while ((selem = hlist_entry_safe( -				rcu_dereference_raw(hlist_first_rcu(&b->list)), -				struct bpf_local_storage_elem, map_node))) { -			if (busy_counter) { -				migrate_disable(); -				this_cpu_inc(*busy_counter); -			} -			bpf_selem_unlink(selem, false); -			if (busy_counter) { -				this_cpu_dec(*busy_counter); -				migrate_enable(); -			} -			cond_resched_rcu(); -		} -		rcu_read_unlock(); -	} - -	/* While freeing the storage we may still need to access the map. -	 * -	 * e.g. when bpf_sk_storage_free() has unlinked selem from the map -	 * which then made the above while((selem = ...)) loop -	 * exit immediately. -	 * -	 * However, while freeing the storage one still needs to access the -	 * smap->elem_size to do the uncharging in -	 * bpf_selem_unlink_storage_nolock(). -	 * -	 * Hence, wait another rcu grace period for the storage to be freed. -	 */ -	synchronize_rcu(); - -	kvfree(smap->buckets); -	bpf_map_area_free(smap); -} -  int bpf_local_storage_map_alloc_check(union bpf_attr *attr)  {  	if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK || @@ -604,7 +552,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)  	return 0;  } -struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) +static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr)  {  	struct bpf_local_storage_map *smap;  	unsigned int i; @@ -654,3 +602,117 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,  	return 0;  } + +bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) +{ +	struct bpf_local_storage_elem *selem; +	bool free_storage = false; +	struct hlist_node *n; + +	/* Neither the bpf_prog nor the bpf_map's syscall +	 * could be modifying the local_storage->list now. +	 * Thus, no elem can be added to or deleted from the +	 * local_storage->list by the bpf_prog or by the bpf_map's syscall. +	 * +	 * It is racing with bpf_local_storage_map_free() alone +	 * when unlinking elem from the local_storage->list and +	 * the map's bucket->list. +	 */ +	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { +		/* Always unlink from map before unlinking from +		 * local_storage. +		 */ +		bpf_selem_unlink_map(selem); +		/* If local_storage list has only one element, the +		 * bpf_selem_unlink_storage_nolock() will return true. +		 * Otherwise, it will return false. The current loop iteration +		 * intends to remove all local storage. So the last iteration +		 * of the loop will set the free_cgroup_storage to true. +		 */ +		free_storage = bpf_selem_unlink_storage_nolock( +			local_storage, selem, false, false); +	} + +	return free_storage; +} + +struct bpf_map * +bpf_local_storage_map_alloc(union bpf_attr *attr, +			    struct bpf_local_storage_cache *cache) +{ +	struct bpf_local_storage_map *smap; + +	smap = __bpf_local_storage_map_alloc(attr); +	if (IS_ERR(smap)) +		return ERR_CAST(smap); + +	smap->cache_idx = bpf_local_storage_cache_idx_get(cache); +	return &smap->map; +} + +void bpf_local_storage_map_free(struct bpf_map *map, +				struct bpf_local_storage_cache *cache, +				int __percpu *busy_counter) +{ +	struct bpf_local_storage_map_bucket *b; +	struct bpf_local_storage_elem *selem; +	struct bpf_local_storage_map *smap; +	unsigned int i; + +	smap = (struct bpf_local_storage_map *)map; +	bpf_local_storage_cache_idx_free(cache, smap->cache_idx); + +	/* Note that this map might be concurrently cloned from +	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone +	 * RCU read section to finish before proceeding. New RCU +	 * read sections should be prevented via bpf_map_inc_not_zero. +	 */ +	synchronize_rcu(); + +	/* bpf prog and the userspace can no longer access this map +	 * now.  No new selem (of this map) can be added +	 * to the owner->storage or to the map bucket's list. +	 * +	 * The elem of this map can be cleaned up here +	 * or when the storage is freed e.g. +	 * by bpf_sk_storage_free() during __sk_destruct(). +	 */ +	for (i = 0; i < (1U << smap->bucket_log); i++) { +		b = &smap->buckets[i]; + +		rcu_read_lock(); +		/* No one is adding to b->list now */ +		while ((selem = hlist_entry_safe( +				rcu_dereference_raw(hlist_first_rcu(&b->list)), +				struct bpf_local_storage_elem, map_node))) { +			if (busy_counter) { +				migrate_disable(); +				this_cpu_inc(*busy_counter); +			} +			bpf_selem_unlink(selem, false); +			if (busy_counter) { +				this_cpu_dec(*busy_counter); +				migrate_enable(); +			} +			cond_resched_rcu(); +		} +		rcu_read_unlock(); +	} + +	/* While freeing the storage we may still need to access the map. +	 * +	 * e.g. when bpf_sk_storage_free() has unlinked selem from the map +	 * which then made the above while((selem = ...)) loop +	 * exit immediately. +	 * +	 * However, while freeing the storage one still needs to access the +	 * smap->elem_size to do the uncharging in +	 * bpf_selem_unlink_storage_nolock(). +	 * +	 * Hence, wait another rcu grace period for the storage to be freed. +	 */ +	synchronize_rcu(); + +	kvfree(smap->buckets); +	bpf_map_area_free(smap); +} |