aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/bpf_local_storage.h6
-rw-r--r--kernel/bpf/bpf_local_storage.c48
-rw-r--r--kernel/bpf/syscall.c6
-rw-r--r--kernel/bpf/verifier.c12
4 files changed, 63 insertions, 9 deletions
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 6d37a40cd90e..0fe92986412b 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -74,6 +74,12 @@ struct bpf_local_storage_elem {
struct hlist_node snode; /* Linked to bpf_local_storage */
struct bpf_local_storage __rcu *local_storage;
struct rcu_head rcu;
+ bool can_use_smap; /* Is it safe to access smap in bpf_selem_free_* RCU
+ * callbacks? bpf_local_storage_map_free only
+ * executes rcu_barrier when there are special
+ * fields, this field remembers that to ensure we
+ * don't access already freed smap in sdata.
+ */
/* 8 bytes hole */
/* The data is stored in another cacheline to minimize
* the number of cachelines access during a cache hit.
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 58da17ae5124..2bdd722fe293 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -85,6 +85,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (selem) {
if (value)
copy_map_value(&smap->map, SDATA(selem)->data, value);
+ /* No need to call check_and_init_map_value as memory is zero init */
return selem;
}
@@ -113,10 +114,25 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
struct bpf_local_storage_elem *selem;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
+ /* The can_use_smap bool is set whenever we need to free additional
+ * fields in selem data before freeing selem. bpf_local_storage_map_free
+ * only executes rcu_barrier to wait for RCU callbacks when it has
+ * special fields, hence we can only conditionally dereference smap, as
+ * by this time the map might have already been freed without waiting
+ * for our call_rcu callback if it did not have any special fields.
+ */
+ if (selem->can_use_smap)
+ bpf_obj_free_fields(SDATA(selem)->smap->map.record, SDATA(selem)->data);
+ kfree(selem);
+}
+
+static void bpf_selem_free_tasks_trace_rcu(struct rcu_head *rcu)
+{
+ /* Free directly if Tasks Trace RCU GP also implies RCU GP */
if (rcu_trace_implies_rcu_gp())
- kfree(selem);
+ bpf_selem_free_rcu(rcu);
else
- kfree_rcu(selem, rcu);
+ call_rcu(rcu, bpf_selem_free_rcu);
}
/* local_storage->lock must be held and selem->local_storage == local_storage.
@@ -170,9 +186,9 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
if (use_trace_rcu)
- call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
+ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_tasks_trace_rcu);
else
- kfree_rcu(selem, rcu);
+ call_rcu(&selem->rcu, bpf_selem_free_rcu);
return free_local_storage;
}
@@ -240,6 +256,11 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
hlist_add_head_rcu(&selem->map_node, &b->list);
raw_spin_unlock_irqrestore(&b->lock, flags);
+
+ /* If our data will have special fields, smap will wait for us to use
+ * its record in bpf_selem_free_* RCU callbacks before freeing itself.
+ */
+ selem->can_use_smap = !IS_ERR_OR_NULL(smap->map.record);
}
void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
@@ -723,6 +744,25 @@ void bpf_local_storage_map_free(struct bpf_map *map,
*/
synchronize_rcu();
+ /* Only delay freeing of smap, buckets are not needed anymore */
kvfree(smap->buckets);
+
+ /* When local storage has special fields, callbacks for
+ * bpf_selem_free_rcu and bpf_selem_free_tasks_trace_rcu will keep using
+ * the map BTF record, we need to execute an RCU barrier to wait for
+ * them as the record will be freed right after our map_free callback.
+ */
+ if (!IS_ERR_OR_NULL(smap->map.record)) {
+ rcu_barrier_tasks_trace();
+ /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp()
+ * is true, because while call_rcu invocation is skipped in that
+ * case in bpf_selem_free_tasks_trace_rcu (and all local storage
+ * maps pass use_trace_rcu = true), there can be call_rcu
+ * callbacks based on use_trace_rcu = false in the earlier while
+ * ((selem = ...)) loop or from bpf_local_storage_unlink_nolock
+ * called from owner's free path.
+ */
+ rcu_barrier();
+ }
bpf_map_area_free(smap);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index da117a2a83b2..eb50025b03c1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1063,7 +1063,11 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
- map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
ret = -EOPNOTSUPP;
goto free_map_tab;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a856896e835a..bf580f246a01 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7222,22 +7222,26 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
break;
case BPF_MAP_TYPE_SK_STORAGE:
if (func_id != BPF_FUNC_sk_storage_get &&
- func_id != BPF_FUNC_sk_storage_delete)
+ func_id != BPF_FUNC_sk_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_INODE_STORAGE:
if (func_id != BPF_FUNC_inode_storage_get &&
- func_id != BPF_FUNC_inode_storage_delete)
+ func_id != BPF_FUNC_inode_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_TASK_STORAGE:
if (func_id != BPF_FUNC_task_storage_get &&
- func_id != BPF_FUNC_task_storage_delete)
+ func_id != BPF_FUNC_task_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_CGRP_STORAGE:
if (func_id != BPF_FUNC_cgrp_storage_get &&
- func_id != BPF_FUNC_cgrp_storage_delete)
+ func_id != BPF_FUNC_cgrp_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_BLOOM_FILTER: