diff options
Diffstat (limited to 'kernel/bpf/bpf_local_storage.c')
-rw-r--r-- | kernel/bpf/bpf_local_storage.c | 85 |
1 files changed, 77 insertions, 8 deletions
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 35f4138a54dc..3d320393a12c 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -51,11 +51,21 @@ owner_storage(struct bpf_local_storage_map *smap, void *owner) return map->ops->map_owner_storage_ptr(owner); } +static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem) +{ + return !hlist_unhashed_lockless(&selem->snode); +} + static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->snode); } +static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem) +{ + return !hlist_unhashed_lockless(&selem->map_node); +} + static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->map_node); @@ -75,6 +85,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (selem) { if (value) copy_map_value(&smap->map, SDATA(selem)->data, value); + /* No need to call check_and_init_map_value as memory is zero init */ return selem; } @@ -98,7 +109,28 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu) kfree_rcu(local_storage, rcu); } -static void bpf_selem_free_rcu(struct rcu_head *rcu) +static void bpf_selem_free_fields_rcu(struct rcu_head *rcu) +{ + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; + + selem = container_of(rcu, struct bpf_local_storage_elem, rcu); + /* protected by the rcu_barrier*() */ + smap = rcu_dereference_protected(SDATA(selem)->smap, true); + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + kfree(selem); +} + +static void bpf_selem_free_fields_trace_rcu(struct rcu_head *rcu) +{ + /* Free directly if Tasks Trace RCU GP also implies RCU GP */ + if (rcu_trace_implies_rcu_gp()) + bpf_selem_free_fields_rcu(rcu); + else + call_rcu(rcu, bpf_selem_free_fields_rcu); +} + +static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; @@ -119,6 +151,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor { struct bpf_local_storage_map *smap; bool free_local_storage; + struct btf_record *rec; void *owner; smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); @@ -159,10 +192,26 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor SDATA(selem)) RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); - if (use_trace_rcu) - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); - else - kfree_rcu(selem, rcu); + /* A different RCU callback is chosen whenever we need to free + * additional fields in selem data before freeing selem. + * bpf_local_storage_map_free only executes rcu_barrier to wait for RCU + * callbacks when it has special fields, hence we can only conditionally + * dereference smap, as by this time the map might have already been + * freed without waiting for our call_rcu callback if it did not have + * any special fields. + */ + rec = smap->map.record; + if (use_trace_rcu) { + if (!IS_ERR_OR_NULL(rec)) + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu); + else + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); + } else { + if (!IS_ERR_OR_NULL(rec)) + call_rcu(&selem->rcu, bpf_selem_free_fields_rcu); + else + kfree_rcu(selem, rcu); + } return free_local_storage; } @@ -174,7 +223,7 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, bool free_local_storage = false; unsigned long flags; - if (unlikely(!selem_linked_to_storage(selem))) + if (unlikely(!selem_linked_to_storage_lockless(selem))) /* selem has already been unlinked from sk */ return; @@ -208,7 +257,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) struct bpf_local_storage_map_bucket *b; unsigned long flags; - if (unlikely(!selem_linked_to_map(selem))) + if (unlikely(!selem_linked_to_map_lockless(selem))) /* selem has already be unlinked from smap */ return; @@ -420,7 +469,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, err = check_flags(old_sdata, map_flags); if (err) return ERR_PTR(err); - if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) { + if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) { copy_map_value_locked(&smap->map, old_sdata->data, value, false); return old_sdata; @@ -713,6 +762,26 @@ void bpf_local_storage_map_free(struct bpf_map *map, */ synchronize_rcu(); + /* Only delay freeing of smap, buckets are not needed anymore */ kvfree(smap->buckets); + + /* When local storage has special fields, callbacks for + * bpf_selem_free_fields_rcu and bpf_selem_free_fields_trace_rcu will + * keep using the map BTF record, we need to execute an RCU barrier to + * wait for them as the record will be freed right after our map_free + * callback. + */ + if (!IS_ERR_OR_NULL(smap->map.record)) { + rcu_barrier_tasks_trace(); + /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() + * is true, because while call_rcu invocation is skipped in that + * case in bpf_selem_free_fields_trace_rcu (and all local + * storage maps pass use_trace_rcu = true), there can be + * call_rcu callbacks based on use_trace_rcu = false in the + * while ((selem = ...)) loop above or when owner's free path + * calls bpf_local_storage_unlink_nolock. + */ + rcu_barrier(); + } bpf_map_area_free(smap); } |