aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf/arraymap.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2023-12-04 17:50:27 -0800
committerAlexei Starovoitov <ast@kernel.org>2023-12-04 17:50:27 -0800
commitce3c49da11d77aa7d53cd549d308eb5f7fed8576 (patch)
treeb00bb98c69f23ce4e8e9ca2b06d056c07bef5b12 /kernel/bpf/arraymap.c
parent153de60e8bfb4501e1462a2f74cb787c137b996c (diff)
parente3dd40828534a67931e0dd00fcd35846271fd4e8 (diff)
Merge branch 'bpf-fix-the-release-of-inner-map'
Hou Tao says: ==================== bpf: Fix the release of inner map From: Hou Tao <houtao1@huawei.com> Hi, The patchset aims to fix the release of inner map in map array or map htab. The release of inner map is different with normal map. For normal map, the map is released after the bpf program which uses the map is destroyed, because the bpf program tracks the used maps. However bpf program can not track the used inner map because these inner map may be updated or deleted dynamically, and for now the ref-counter of inner map is decreased after the inner map is remove from outer map, so the inner map may be freed before the bpf program, which is accessing the inner map, exits and there will be use-after-free problem as demonstrated by patch #6. The patchset fixes the problem by deferring the release of inner map. The freeing of inner map is deferred according to the sleepable attributes of the bpf programs which own the outer map. Patch #1 fixes the warning when running the newly-added selftest under interpreter mode. Patch #2 adds more parameters to .map_fd_put_ptr() to prepare for the fix. Patch #3 fixes the incorrect value of need_defer when freeing the fd array. Patch #4 fixes the potential use-after-free problem by using call_rcu_tasks_trace() and call_rcu() to wait for one tasks trace RCU GP and one RCU GP unconditionally. Patch #5 optimizes the free of inner map by removing the unnecessary RCU GP waiting. Patch #6 adds a selftest to demonstrate the potential use-after-free problem. Patch #7 updates a selftest to update outer map in syscall bpf program. Please see individual patches for more details. And comments are always welcome. Change Log: v5: * patch #3: rename fd_array_map_delete_elem_with_deferred_free() to __fd_array_map_delete_elem() (Alexei) * patch #5: use atomic64_t instead of atomic_t to prevent potential overflow (Alexei) * patch #7: use ptr_to_u64() helper instead of force casting to initialize pointers in bpf_attr (Alexei) v4: https://lore.kernel.org/bpf/20231130140120.1736235-1-houtao@huaweicloud.com * patch #2: don't use "deferred", use "need_defer" uniformly * patch #3: newly-added, fix the incorrect value of need_defer during fd array free. * patch #4: doesn't consider the case in which bpf map is not used by any bpf program and only use sleepable_refcnt to remove unnecessary tasks trace RCU GP (Alexei) * patch #4: remove memory barriers added due to cautiousness (Alexei) v3: https://lore.kernel.org/bpf/20231124113033.503338-1-houtao@huaweicloud.com * multiple variable renamings (Martin) * define BPF_MAP_RCU_GP/BPF_MAP_RCU_TT_GP as bit (Martin) * use call_rcu() and its variants instead of synchronize_rcu() (Martin) * remove unnecessary mask in bpf_map_free_deferred() (Martin) * place atomic_or() and the related smp_mb() together (Martin) * add patch #6 to demonstrate that updating outer map in syscall program is dead-lock free (Alexei) * update comments about the memory barrier in bpf_map_fd_put_ptr() * update commit message for patch #3 and #4 to describe more details v2: https://lore.kernel.org/bpf/20231113123324.3914612-1-houtao@huaweicloud.com * defer the invocation of ops->map_free() instead of bpf_map_put() (Martin) * update selftest to make it being reproducible under JIT mode (Martin) * remove unnecessary preparatory patches v1: https://lore.kernel.org/bpf/20231107140702.1891778-1-houtao@huaweicloud.com ==================== Link: https://lore.kernel.org/r/20231204140425.1480317-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/arraymap.c')
-rw-r--r--kernel/bpf/arraymap.c33
1 files changed, 20 insertions, 13 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2058e89b5ddd..4a4a67956e21 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -867,11 +867,11 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
}
if (old_ptr)
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
}
-static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *old_ptr;
@@ -890,13 +890,18 @@ static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
}
if (old_ptr) {
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
return 0;
} else {
return -ENOENT;
}
}
+static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+ return __fd_array_map_delete_elem(map, key, true);
+}
+
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
@@ -913,8 +918,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
return prog;
}
-static void prog_fd_array_put_ptr(void *ptr)
+static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_prog is freed after one RCU or tasks trace grace period */
bpf_prog_put(ptr);
}
@@ -924,13 +930,13 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
}
/* decrement refcnt of all bpf_progs that are stored in this map */
-static void bpf_fd_array_map_clear(struct bpf_map *map)
+static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
for (i = 0; i < array->map.max_entries; i++)
- fd_array_map_delete_elem(map, &i);
+ __fd_array_map_delete_elem(map, &i, need_defer);
}
static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
@@ -1109,7 +1115,7 @@ static void prog_array_map_clear_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_array_aux,
work)->map;
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, true);
bpf_map_put(map);
}
@@ -1239,8 +1245,9 @@ err_out:
return ee;
}
-static void perf_event_fd_array_put_ptr(void *ptr)
+static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_perf_event is freed after one RCU grace period */
bpf_event_entry_free_rcu(ptr);
}
@@ -1258,7 +1265,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
if (ee && ee->map_file == map_file)
- fd_array_map_delete_elem(map, &i);
+ __fd_array_map_delete_elem(map, &i, true);
}
rcu_read_unlock();
}
@@ -1266,7 +1273,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
static void perf_event_fd_array_map_free(struct bpf_map *map)
{
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1294,7 +1301,7 @@ static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
return cgroup_get_from_fd(fd);
}
-static void cgroup_fd_array_put_ptr(void *ptr)
+static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
@@ -1302,7 +1309,7 @@ static void cgroup_fd_array_put_ptr(void *ptr)
static void cgroup_fd_array_free(struct bpf_map *map)
{
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1347,7 +1354,7 @@ static void array_of_map_free(struct bpf_map *map)
* is protected by fdget/fdput.
*/
bpf_map_meta_free(map->inner_map_meta);
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}