aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf/arraymap.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/arraymap.c')
-rw-r--r--kernel/bpf/arraymap.c149
1 files changed, 78 insertions, 71 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2058e89b5ddd..feabc0193852 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -82,7 +82,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
- bool bypass_spec_v1 = bpf_bypass_spec_v1();
+ bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
u64 array_size, mask64;
struct bpf_array *array;
@@ -246,6 +246,38 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
+/* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
+static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct bpf_insn *insn = insn_buf;
+
+ if (!bpf_jit_supports_percpu_insn())
+ return -EOPNOTSUPP;
+
+ if (map->map_flags & BPF_F_INNER_MAP)
+ return -EOPNOTSUPP;
+
+ BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0);
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs));
+
+ *insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0);
+ if (!map->bypass_spec_v1) {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
+ } else {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
+ }
+
+ *insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+ *insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
+ *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_0, 0);
+ return insn - insn_buf;
+}
+
static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -396,17 +428,22 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
return (void *)round_down((unsigned long)array, PAGE_SIZE);
}
-static void array_map_free_timers(struct bpf_map *map)
+static void array_map_free_timers_wq(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
- /* We don't reset or free fields other than timer on uref dropping to zero. */
- if (!btf_record_has_field(map->record, BPF_TIMER))
- return;
-
- for (i = 0; i < array->map.max_entries; i++)
- bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
+ /* We don't reset or free fields other than timer and workqueue
+ * on uref dropping to zero.
+ */
+ if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
+ for (i = 0; i < array->map.max_entries; i++) {
+ if (btf_record_has_field(map->record, BPF_TIMER))
+ bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
+ if (btf_record_has_field(map->record, BPF_WORKQUEUE))
+ bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
+ }
+ }
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
@@ -750,7 +787,7 @@ const struct bpf_map_ops array_map_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
- .map_release_uref = array_map_free_timers,
+ .map_release_uref = array_map_free_timers_wq,
.map_lookup_elem = array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
@@ -776,6 +813,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
+ .map_gen_lookup = percpu_array_map_gen_lookup,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
@@ -867,11 +905,11 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
}
if (old_ptr)
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
}
-static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *old_ptr;
@@ -890,13 +928,18 @@ static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
}
if (old_ptr) {
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
return 0;
} else {
return -ENOENT;
}
}
+static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+ return __fd_array_map_delete_elem(map, key, true);
+}
+
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
@@ -913,8 +956,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
return prog;
}
-static void prog_fd_array_put_ptr(void *ptr)
+static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_prog is freed after one RCU or tasks trace grace period */
bpf_prog_put(ptr);
}
@@ -924,13 +968,13 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
}
/* decrement refcnt of all bpf_progs that are stored in this map */
-static void bpf_fd_array_map_clear(struct bpf_map *map)
+static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
for (i = 0; i < array->map.max_entries; i++)
- fd_array_map_delete_elem(map, &i);
+ __fd_array_map_delete_elem(map, &i, need_defer);
}
static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
@@ -1012,11 +1056,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map,
mutex_unlock(&aux->poke_mutex);
}
+void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ struct bpf_prog *new, struct bpf_prog *old)
+{
+ WARN_ON_ONCE(1);
+}
+
static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
struct bpf_prog *old,
struct bpf_prog *new)
{
- u8 *old_addr, *new_addr, *old_bypass_addr;
struct prog_poke_elem *elem;
struct bpf_array_aux *aux;
@@ -1025,7 +1074,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
list_for_each_entry(elem, &aux->poke_progs, list) {
struct bpf_jit_poke_descriptor *poke;
- int i, ret;
+ int i;
for (i = 0; i < elem->aux->size_poke_tab; i++) {
poke = &elem->aux->poke_tab[i];
@@ -1044,21 +1093,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* activated, so tail call updates can arrive from here
* while JIT is still finishing its final fixup for
* non-activated poke entries.
- * 3) On program teardown, the program's kallsym entry gets
- * removed out of RCU callback, but we can only untrack
- * from sleepable context, therefore bpf_arch_text_poke()
- * might not see that this is in BPF text section and
- * bails out with -EINVAL. As these are unreachable since
- * RCU grace period already passed, we simply skip them.
- * 4) Also programs reaching refcount of zero while patching
+ * 3) Also programs reaching refcount of zero while patching
* is in progress is okay since we're protected under
* poke_mutex and untrack the programs before the JIT
- * buffer is freed. When we're still in the middle of
- * patching and suddenly kallsyms entry of the program
- * gets evicted, we just skip the rest which is fine due
- * to point 3).
- * 5) Any other error happening below from bpf_arch_text_poke()
- * is a unexpected bug.
+ * buffer is freed.
*/
if (!READ_ONCE(poke->tailcall_target_stable))
continue;
@@ -1068,39 +1106,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
poke->tail_call.key != key)
continue;
- old_bypass_addr = old ? NULL : poke->bypass_addr;
- old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
- new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
-
- if (new) {
- ret = bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
- old_addr, new_addr);
- BUG_ON(ret < 0 && ret != -EINVAL);
- if (!old) {
- ret = bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
- poke->bypass_addr,
- NULL);
- BUG_ON(ret < 0 && ret != -EINVAL);
- }
- } else {
- ret = bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
- old_bypass_addr,
- poke->bypass_addr);
- BUG_ON(ret < 0 && ret != -EINVAL);
- /* let other CPUs finish the execution of program
- * so that it will not possible to expose them
- * to invalid nop, stack unwind, nop state
- */
- if (!ret)
- synchronize_rcu();
- ret = bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
- old_addr, NULL);
- BUG_ON(ret < 0 && ret != -EINVAL);
- }
+ bpf_arch_poke_desc_update(poke, new, old);
}
}
}
@@ -1109,7 +1115,7 @@ static void prog_array_map_clear_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_array_aux,
work)->map;
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, true);
bpf_map_put(map);
}
@@ -1189,7 +1195,7 @@ static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
{
struct bpf_event_entry *ee;
- ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
+ ee = kzalloc(sizeof(*ee), GFP_KERNEL);
if (ee) {
ee->event = perf_file->private_data;
ee->perf_file = perf_file;
@@ -1239,8 +1245,9 @@ err_out:
return ee;
}
-static void perf_event_fd_array_put_ptr(void *ptr)
+static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_perf_event is freed after one RCU grace period */
bpf_event_entry_free_rcu(ptr);
}
@@ -1258,7 +1265,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
if (ee && ee->map_file == map_file)
- fd_array_map_delete_elem(map, &i);
+ __fd_array_map_delete_elem(map, &i, true);
}
rcu_read_unlock();
}
@@ -1266,7 +1273,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
static void perf_event_fd_array_map_free(struct bpf_map *map)
{
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1294,7 +1301,7 @@ static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
return cgroup_get_from_fd(fd);
}
-static void cgroup_fd_array_put_ptr(void *ptr)
+static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
@@ -1302,7 +1309,7 @@ static void cgroup_fd_array_put_ptr(void *ptr)
static void cgroup_fd_array_free(struct bpf_map *map)
{
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1347,7 +1354,7 @@ static void array_of_map_free(struct bpf_map *map)
* is protected by fdget/fdput.
*/
bpf_map_meta_free(map->inner_map_meta);
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}