From f062226d8d59b521ddc946ad791048188a16722a Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sun, 5 Mar 2023 12:46:09 +0000 Subject: bpf: bpf_struct_ops memory usage A new helper is introduced to calculate bpf_struct_ops memory usage. The result as follows, - before 1: struct_ops name count_map flags 0x0 key 4B value 256B max_entries 1 memlock 4096B btf_id 73 - after 1: struct_ops name count_map flags 0x0 key 4B value 256B max_entries 1 memlock 5016B btf_id 73 Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20230305124615.12358-13-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_struct_ops.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'kernel/bpf/bpf_struct_ops.c') diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index ece9870cab68..38903fb52f98 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -641,6 +641,21 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) return map; } +static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map) +{ + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; + const struct bpf_struct_ops *st_ops = st_map->st_ops; + const struct btf_type *vt = st_ops->value_type; + u64 usage; + + usage = sizeof(*st_map) + + vt->size - sizeof(struct bpf_struct_ops_value); + usage += vt->size; + usage += btf_type_vlen(vt) * sizeof(struct bpf_links *); + usage += PAGE_SIZE; + return usage; +} + BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map) const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_alloc_check = bpf_struct_ops_map_alloc_check, @@ -651,6 +666,7 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_delete_elem = bpf_struct_ops_map_delete_elem, .map_update_elem = bpf_struct_ops_map_update_elem, .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, + .map_mem_usage = bpf_struct_ops_map_mem_usage, .map_btf_id = &bpf_struct_ops_map_btf_ids[0], }; -- cgit From d7ba4cc900bf1eea2d8c807c6b1fc6bd61f41237 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Wed, 22 Mar 2023 12:47:54 -0700 Subject: bpf: return long from bpf_map_ops funcs This patch changes the return types of bpf_map_ops functions to long, where previously int was returned. Using long allows for bpf programs to maintain the sign bit in the absence of sign extension during situations where inlined bpf helper funcs make calls to the bpf_map_ops funcs and a negative error is returned. The definitions of the helper funcs are generated from comments in the bpf uapi header at `include/uapi/linux/bpf.h`. The return type of these helpers was previously changed from int to long in commit bdb7b79b4ce8. For any case where one of the map helpers call the bpf_map_ops funcs that are still returning 32-bit int, a compiler might not include sign extension instructions to properly convert the 32-bit negative value a 64-bit negative value. For example: bpf assembly excerpt of an inlined helper calling a kernel function and checking for a specific error: ; err = bpf_map_update_elem(&mymap, &key, &val, BPF_NOEXIST); ... 46: call 0xffffffffe103291c ; htab_map_update_elem ; if (err && err != -EEXIST) { 4b: cmp $0xffffffffffffffef,%rax ; cmp -EEXIST,%rax kernel function assembly excerpt of return value from `htab_map_update_elem` returning 32-bit int: movl $0xffffffef, %r9d ... movl %r9d, %eax ...results in the comparison: cmp $0xffffffffffffffef, $0x00000000ffffffef Fixes: bdb7b79b4ce8 ("bpf: Switch most helper return values from 32-bit int to 64-bit long") Tested-by: Eduard Zingerman Signed-off-by: JP Kobryn Link: https://lore.kernel.org/r/20230322194754.185781-3-inwardvessel@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 14 +++++++------- include/linux/filter.h | 6 +++--- kernel/bpf/arraymap.c | 12 ++++++------ kernel/bpf/bloom_filter.c | 12 ++++++------ kernel/bpf/bpf_cgrp_storage.c | 6 +++--- kernel/bpf/bpf_inode_storage.c | 6 +++--- kernel/bpf/bpf_struct_ops.c | 6 +++--- kernel/bpf/bpf_task_storage.c | 6 +++--- kernel/bpf/cpumap.c | 8 ++++---- kernel/bpf/devmap.c | 24 ++++++++++++------------ kernel/bpf/hashtab.c | 36 ++++++++++++++++++------------------ kernel/bpf/local_storage.c | 6 +++--- kernel/bpf/lpm_trie.c | 6 +++--- kernel/bpf/queue_stack_maps.c | 22 +++++++++++----------- kernel/bpf/reuseport_array.c | 2 +- kernel/bpf/ringbuf.c | 6 +++--- kernel/bpf/stackmap.c | 6 +++--- kernel/bpf/verifier.c | 14 +++++++------- net/core/bpf_sk_storage.c | 6 +++--- net/core/sock_map.c | 8 ++++---- net/xdp/xskmap.c | 8 ++++---- 21 files changed, 110 insertions(+), 110 deletions(-) (limited to 'kernel/bpf/bpf_struct_ops.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3ef98fb92987..ec0df059f562 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -96,11 +96,11 @@ struct bpf_map_ops { /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); - int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); - int (*map_delete_elem)(struct bpf_map *map, void *key); - int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); - int (*map_pop_elem)(struct bpf_map *map, void *value); - int (*map_peek_elem)(struct bpf_map *map, void *value); + long (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); + long (*map_delete_elem)(struct bpf_map *map, void *key); + long (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + long (*map_pop_elem)(struct bpf_map *map, void *value); + long (*map_peek_elem)(struct bpf_map *map, void *value); void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ @@ -139,7 +139,7 @@ struct bpf_map_ops { struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* Misc helpers.*/ - int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); + long (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure @@ -157,7 +157,7 @@ struct bpf_map_ops { int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); - int (*map_for_each_callback)(struct bpf_map *map, + long (*map_for_each_callback)(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags); diff --git a/include/linux/filter.h b/include/linux/filter.h index efa5d4a1677e..23c08c31bea9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1504,9 +1504,9 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, } #endif /* IS_ENABLED(CONFIG_IPV6) */ -static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index, - u64 flags, const u64 flag_mask, - void *lookup_elem(struct bpf_map *map, u32 key)) +static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 index, + u64 flags, const u64 flag_mask, + void *lookup_elem(struct bpf_map *map, u32 key)) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1588c793a715..2058e89b5ddd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -307,8 +307,8 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key } /* Called from syscall or from eBPF program */ -static int array_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long array_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); u32 index = *(u32 *)key; @@ -386,7 +386,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, } /* Called from syscall or from eBPF program */ -static int array_map_delete_elem(struct bpf_map *map, void *key) +static long array_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } @@ -686,8 +686,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), }; -static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, - void *callback_ctx, u64 flags) +static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, + void *callback_ctx, u64 flags) { u32 i, key, num_elems = 0; struct bpf_array *array; @@ -871,7 +871,7 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, return 0; } -static int fd_array_map_delete_elem(struct bpf_map *map, void *key) +static long fd_array_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); void *old_ptr; diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 6350c5d35a9b..db19784601a7 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -41,7 +41,7 @@ static u32 hash(struct bpf_bloom_filter *bloom, void *value, return h & bloom->bitset_mask; } -static int bloom_map_peek_elem(struct bpf_map *map, void *value) +static long bloom_map_peek_elem(struct bpf_map *map, void *value) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -56,7 +56,7 @@ static int bloom_map_peek_elem(struct bpf_map *map, void *value) return 0; } -static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) +static long bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); @@ -73,12 +73,12 @@ static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) return 0; } -static int bloom_map_pop_elem(struct bpf_map *map, void *value) +static long bloom_map_pop_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } -static int bloom_map_delete_elem(struct bpf_map *map, void *value) +static long bloom_map_delete_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } @@ -177,8 +177,8 @@ static void *bloom_map_lookup_elem(struct bpf_map *map, void *key) return ERR_PTR(-EINVAL); } -static int bloom_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long bloom_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { /* The eBPF program should use map_push_elem instead */ return -EINVAL; diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index c975cacdd16b..f5b016a5484d 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -93,8 +93,8 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key) return sdata ? sdata->data : NULL; } -static int bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct cgroup *cgroup; @@ -125,7 +125,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map) return 0; } -static int bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) { struct cgroup *cgroup; int err, fd; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index ad2ab0187e45..9a5f05151898 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -91,8 +91,8 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) return sdata ? sdata->data : NULL; } -static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct file *f; @@ -127,7 +127,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) return 0; } -static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) { struct file *f; int fd, err; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 38903fb52f98..ba7a94276e3b 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -349,8 +349,8 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, model, flags, tlinks, NULL); } -static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; const struct bpf_struct_ops *st_ops = st_map->st_ops; @@ -524,7 +524,7 @@ unlock: return err; } -static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) +static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) { enum bpf_struct_ops_state prev_state; struct bpf_struct_ops_map *st_map; diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index c88cc04c17c1..ab5bd1ef58c4 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -120,8 +120,8 @@ out: return ERR_PTR(err); } -static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct task_struct *task; @@ -173,7 +173,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map, return 0; } -static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) { struct task_struct *task; unsigned int f_flags; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 871809e71b4e..8ec18faa74ac 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -540,7 +540,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, } } -static int cpu_map_delete_elem(struct bpf_map *map, void *key) +static long cpu_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); u32 key_cpu = *(u32 *)key; @@ -553,8 +553,8 @@ static int cpu_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long cpu_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); struct bpf_cpumap_val cpumap_value = {}; @@ -667,7 +667,7 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } -static int cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags) +static long cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags) { return __bpf_xdp_redirect_map(map, index, flags, 0, __cpu_map_lookup_elem); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 19b036a228f7..802692fa3905 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -809,7 +809,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu) kfree(dev); } -static int dev_map_delete_elem(struct bpf_map *map, void *key) +static long dev_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; @@ -826,7 +826,7 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) +static long dev_map_hash_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; @@ -897,8 +897,8 @@ err_out: return ERR_PTR(-EINVAL); } -static int __dev_map_update_elem(struct net *net, struct bpf_map *map, - void *key, void *value, u64 map_flags) +static long __dev_map_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dev, *old_dev; @@ -939,15 +939,15 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map, return 0; } -static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long dev_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { return __dev_map_update_elem(current->nsproxy->net_ns, map, key, value, map_flags); } -static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, - void *key, void *value, u64 map_flags) +static long __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dev, *old_dev; @@ -999,21 +999,21 @@ out_err: return err; } -static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { return __dev_map_hash_update_elem(current->nsproxy->net_ns, map, key, value, map_flags); } -static int dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) +static long dev_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) { return __bpf_xdp_redirect_map(map, ifindex, flags, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, __dev_map_lookup_elem); } -static int dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) +static long dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags) { return __bpf_xdp_redirect_map(map, ifindex, flags, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 0df4b0c10f59..96b645bba3a4 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1073,8 +1073,8 @@ static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, } /* Called from syscall or from eBPF program */ -static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; @@ -1175,8 +1175,8 @@ static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) bpf_lru_push_free(&htab->lru, &elem->lru_node); } -static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new, *l_old = NULL; @@ -1242,9 +1242,9 @@ err: return ret; } -static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags, - bool onallcpus) +static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags, + bool onallcpus) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; @@ -1297,9 +1297,9 @@ err: return ret; } -static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags, - bool onallcpus) +static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags, + bool onallcpus) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; @@ -1364,21 +1364,21 @@ err: return ret; } -static int htab_percpu_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { return __htab_percpu_map_update_elem(map, key, value, map_flags, false); } -static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, false); } /* Called from syscall or from eBPF program */ -static int htab_map_delete_elem(struct bpf_map *map, void *key) +static long htab_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct hlist_nulls_head *head; @@ -1414,7 +1414,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) return ret; } -static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) +static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct hlist_nulls_head *head; @@ -2134,8 +2134,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), }; -static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, - void *callback_ctx, u64 flags) +static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, + void *callback_ctx, u64 flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct hlist_nulls_head *head; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index a993560f200a..4c7bbec4a9e4 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -141,8 +141,8 @@ static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key) return &READ_ONCE(storage->buf)->data[0]; } -static int cgroup_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long cgroup_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { struct bpf_cgroup_storage *storage; struct bpf_storage_buffer *new; @@ -348,7 +348,7 @@ static void cgroup_storage_map_free(struct bpf_map *_map) bpf_map_area_free(map); } -static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) +static long cgroup_storage_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index dc23f2ac9cde..e0d3ddf2037a 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -300,8 +300,8 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, } /* Called from syscall or from eBPF program */ -static int trie_update_elem(struct bpf_map *map, - void *_key, void *value, u64 flags) +static long trie_update_elem(struct bpf_map *map, + void *_key, void *value, u64 flags) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL; @@ -431,7 +431,7 @@ out: } /* Called from syscall or from eBPF program */ -static int trie_delete_elem(struct bpf_map *map, void *_key) +static long trie_delete_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct bpf_lpm_trie_key *key = _key; diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 63ecbbcb349d..601609164ef3 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -95,7 +95,7 @@ static void queue_stack_map_free(struct bpf_map *map) bpf_map_area_free(qs); } -static int __queue_map_get(struct bpf_map *map, void *value, bool delete) +static long __queue_map_get(struct bpf_map *map, void *value, bool delete) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long flags; @@ -124,7 +124,7 @@ out: } -static int __stack_map_get(struct bpf_map *map, void *value, bool delete) +static long __stack_map_get(struct bpf_map *map, void *value, bool delete) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long flags; @@ -156,32 +156,32 @@ out: } /* Called from syscall or from eBPF program */ -static int queue_map_peek_elem(struct bpf_map *map, void *value) +static long queue_map_peek_elem(struct bpf_map *map, void *value) { return __queue_map_get(map, value, false); } /* Called from syscall or from eBPF program */ -static int stack_map_peek_elem(struct bpf_map *map, void *value) +static long stack_map_peek_elem(struct bpf_map *map, void *value) { return __stack_map_get(map, value, false); } /* Called from syscall or from eBPF program */ -static int queue_map_pop_elem(struct bpf_map *map, void *value) +static long queue_map_pop_elem(struct bpf_map *map, void *value) { return __queue_map_get(map, value, true); } /* Called from syscall or from eBPF program */ -static int stack_map_pop_elem(struct bpf_map *map, void *value) +static long stack_map_pop_elem(struct bpf_map *map, void *value) { return __stack_map_get(map, value, true); } /* Called from syscall or from eBPF program */ -static int queue_stack_map_push_elem(struct bpf_map *map, void *value, - u64 flags) +static long queue_stack_map_push_elem(struct bpf_map *map, void *value, + u64 flags) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long irq_flags; @@ -227,14 +227,14 @@ static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) } /* Called from syscall or from eBPF program */ -static int queue_stack_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long queue_stack_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { return -EINVAL; } /* Called from syscall or from eBPF program */ -static int queue_stack_map_delete_elem(struct bpf_map *map, void *key) +static long queue_stack_map_delete_elem(struct bpf_map *map, void *key) { return -EINVAL; } diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 71cb72f5b733..cbf2d8d784b8 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -59,7 +59,7 @@ static void *reuseport_array_lookup_elem(struct bpf_map *map, void *key) } /* Called from syscall only */ -static int reuseport_array_delete_elem(struct bpf_map *map, void *key) +static long reuseport_array_delete_elem(struct bpf_map *map, void *key) { struct reuseport_array *array = reuseport_array(map); u32 index = *(u32 *)key; diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 0d2a45ff83f1..875ac9b698d9 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -242,13 +242,13 @@ static void *ringbuf_map_lookup_elem(struct bpf_map *map, void *key) return ERR_PTR(-ENOTSUPP); } -static int ringbuf_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 flags) +static long ringbuf_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 flags) { return -ENOTSUPP; } -static int ringbuf_map_delete_elem(struct bpf_map *map, void *key) +static long ringbuf_map_delete_elem(struct bpf_map *map, void *key) { return -ENOTSUPP; } diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 0f1d8dced933..b25fce425b2c 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -618,14 +618,14 @@ static int stack_map_get_next_key(struct bpf_map *map, void *key, return 0; } -static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long stack_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { return -EINVAL; } /* Called from syscall or from eBPF program */ -static int stack_map_delete_elem(struct bpf_map *map, void *key) +static long stack_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *old_bucket; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5693e4a92752..50c995697f0e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17692,21 +17692,21 @@ static int do_misc_fixups(struct bpf_verifier_env *env) BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, (void *(*)(struct bpf_map *map, void *key))NULL)); BUILD_BUG_ON(!__same_type(ops->map_delete_elem, - (int (*)(struct bpf_map *map, void *key))NULL)); + (long (*)(struct bpf_map *map, void *key))NULL)); BUILD_BUG_ON(!__same_type(ops->map_update_elem, - (int (*)(struct bpf_map *map, void *key, void *value, + (long (*)(struct bpf_map *map, void *key, void *value, u64 flags))NULL)); BUILD_BUG_ON(!__same_type(ops->map_push_elem, - (int (*)(struct bpf_map *map, void *value, + (long (*)(struct bpf_map *map, void *value, u64 flags))NULL)); BUILD_BUG_ON(!__same_type(ops->map_pop_elem, - (int (*)(struct bpf_map *map, void *value))NULL)); + (long (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_peek_elem, - (int (*)(struct bpf_map *map, void *value))NULL)); + (long (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, - (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); + (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, - (int (*)(struct bpf_map *map, + (long (*)(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags))NULL)); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 24c3dc0d62e5..cb0f5a105b89 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -94,8 +94,8 @@ static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) return ERR_PTR(err); } -static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct socket *sock; @@ -114,7 +114,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, return err; } -static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) { struct socket *sock; int fd, err; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 9b854e236d23..7c189c2e2fbf 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -437,7 +437,7 @@ static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk, __sock_map_delete(stab, sk, link_raw); } -static int sock_map_delete_elem(struct bpf_map *map, void *key) +static long sock_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); u32 i = *(u32 *)key; @@ -587,8 +587,8 @@ out: return ret; } -static int sock_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long sock_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { struct sock *sk = (struct sock *)value; int ret; @@ -925,7 +925,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, raw_spin_unlock_bh(&bucket->lock); } -static int sock_hash_delete_elem(struct bpf_map *map, void *key) +static long sock_hash_delete_elem(struct bpf_map *map, void *key) { struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 hash, key_size = map->key_size; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 0c38d7175922..2c1427074a3b 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -162,8 +162,8 @@ static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) return ERR_PTR(-EOPNOTSUPP); } -static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; @@ -223,7 +223,7 @@ out: return err; } -static int xsk_map_delete_elem(struct bpf_map *map, void *key) +static long xsk_map_delete_elem(struct bpf_map *map, void *key) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; @@ -243,7 +243,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) +static long xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) { return __bpf_xdp_redirect_map(map, index, flags, 0, __xsk_map_lookup_elem); -- cgit From b671c2067a04c0668df174ff5dfdb573d1f9b074 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:23:58 -0700 Subject: bpf: Retire the struct_ops map kvalue->refcnt. We have replaced kvalue-refcnt with synchronize_rcu() to wait for an RCU grace period. Maintenance of kvalue->refcnt was a complicated task, as we had to simultaneously keep track of two reference counts: one for the reference count of bpf_map. When the kvalue->refcnt reaches zero, we also have to reduce the reference count on bpf_map - yet these steps are not performed in an atomic manner and require us to be vigilant when managing them. By eliminating kvalue->refcnt, we can make our maintenance more straightforward as the refcount of bpf_map is now solely managed! To prevent the trampoline image of a struct_ops from being released while it is still in use, we wait for an RCU grace period. The setsockopt(TCP_CONGESTION, "...") command allows you to change your socket's congestion control algorithm and can result in releasing the old struct_ops implementation. It is fine. However, this function is exposed through bpf_setsockopt(), it may be accessed by BPF programs as well. To ensure that the trampoline image belonging to struct_op can be safely called while its method is in use, the trampoline safeguarde the BPF program with rcu_read_lock(). Doing so prevents any destruction of the associated images before returning from a trampoline and requires us to wait for an RCU grace period. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230323032405.3735486-2-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 1 + kernel/bpf/bpf_struct_ops.c | 77 ++++++++++++++++++++++++++------------------- kernel/bpf/syscall.c | 6 ++-- 3 files changed, 49 insertions(+), 35 deletions(-) (limited to 'kernel/bpf/bpf_struct_ops.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ec0df059f562..f04098468d7a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1945,6 +1945,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index ba7a94276e3b..2f3c4a0e03ee 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -11,6 +11,7 @@ #include #include #include +#include enum bpf_struct_ops_state { BPF_STRUCT_OPS_STATE_INIT, @@ -249,6 +250,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; struct bpf_struct_ops_value *uvalue, *kvalue; enum bpf_struct_ops_state state; + s64 refcnt; if (unlikely(*(u32 *)key != 0)) return -ENOENT; @@ -267,7 +269,14 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, uvalue = value; memcpy(uvalue, st_map->uvalue, map->value_size); uvalue->state = state; - refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); + + /* This value offers the user space a general estimate of how + * many sockets are still utilizing this struct_ops for TCP + * congestion control. The number might not be exact, but it + * should sufficiently meet our present goals. + */ + refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt); + refcount_set(&uvalue->refcnt, max_t(s64, refcnt, 0)); return 0; } @@ -491,7 +500,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, *(unsigned long *)(udata + moff) = prog->aux->id; } - refcount_set(&kvalue->refcnt, 1); bpf_map_inc(map); set_memory_rox((long)st_map->image, 1); @@ -536,8 +544,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: st_map->st_ops->unreg(&st_map->kvalue.data); - if (refcount_dec_and_test(&st_map->kvalue.refcnt)) - bpf_map_put(map); + bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: return -EINPROGRESS; @@ -570,7 +577,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, kfree(value); } -static void bpf_struct_ops_map_free(struct bpf_map *map) +static void __bpf_struct_ops_map_free(struct bpf_map *map) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; @@ -582,6 +589,28 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) bpf_map_area_free(st_map); } +static void bpf_struct_ops_map_free(struct bpf_map *map) +{ + /* The struct_ops's function may switch to another struct_ops. + * + * For example, bpf_tcp_cc_x->init() may switch to + * another tcp_cc_y by calling + * setsockopt(TCP_CONGESTION, "tcp_cc_y"). + * During the switch, bpf_struct_ops_put(tcp_cc_x) is called + * and its refcount may reach 0 which then free its + * trampoline image while tcp_cc_x is still running. + * + * A vanilla rcu gp is to wait for all bpf-tcp-cc prog + * to finish. bpf-tcp-cc prog is non sleepable. + * A rcu_tasks gp is to wait for the last few insn + * in the tramopline image to finish before releasing + * the trampoline image. + */ + synchronize_rcu_mult(call_rcu, call_rcu_tasks); + + __bpf_struct_ops_map_free(map); +} + static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) { if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || @@ -630,7 +659,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) NUMA_NO_NODE); st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); if (!st_map->uvalue || !st_map->links || !st_map->image) { - bpf_struct_ops_map_free(map); + __bpf_struct_ops_map_free(map); return ERR_PTR(-ENOMEM); } @@ -676,41 +705,23 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = { bool bpf_struct_ops_get(const void *kdata) { struct bpf_struct_ops_value *kvalue; + struct bpf_struct_ops_map *st_map; + struct bpf_map *map; kvalue = container_of(kdata, struct bpf_struct_ops_value, data); + st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); - return refcount_inc_not_zero(&kvalue->refcnt); -} - -static void bpf_struct_ops_put_rcu(struct rcu_head *head) -{ - struct bpf_struct_ops_map *st_map; - - st_map = container_of(head, struct bpf_struct_ops_map, rcu); - bpf_map_put(&st_map->map); + map = __bpf_map_inc_not_zero(&st_map->map, false); + return !IS_ERR(map); } void bpf_struct_ops_put(const void *kdata) { struct bpf_struct_ops_value *kvalue; + struct bpf_struct_ops_map *st_map; kvalue = container_of(kdata, struct bpf_struct_ops_value, data); - if (refcount_dec_and_test(&kvalue->refcnt)) { - struct bpf_struct_ops_map *st_map; - - st_map = container_of(kvalue, struct bpf_struct_ops_map, - kvalue); - /* The struct_ops's function may switch to another struct_ops. - * - * For example, bpf_tcp_cc_x->init() may switch to - * another tcp_cc_y by calling - * setsockopt(TCP_CONGESTION, "tcp_cc_y"). - * During the switch, bpf_struct_ops_put(tcp_cc_x) is called - * and its map->refcnt may reach 0 which then free its - * trampoline image while tcp_cc_x is still running. - * - * Thus, a rcu grace period is needed here. - */ - call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu); - } + st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); + + bpf_map_put(&st_map->map); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 099e9068bcdd..cff0348a2871 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1303,8 +1303,10 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) return map; } -/* map_idr_lock should have been held */ -static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) +/* map_idr_lock should have been held or the map should have been + * protected by rcu read lock. + */ +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) { int refold; -- cgit From 68b04864ca425d1894c96b8141d4fba1181f11cb Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:00 -0700 Subject: bpf: Create links for BPF struct_ops maps. Make bpf_link support struct_ops. Previously, struct_ops were always used alone without any associated links. Upon updating its value, a struct_ops would be activated automatically. Yet other BPF program types required to make a bpf_link with their instances before they could become active. Now, however, you can create an inactive struct_ops, and create a link to activate it later. With bpf_links, struct_ops has a behavior similar to other BPF program types. You can pin/unpin them from their links and the struct_ops will be deactivated when its link is removed while previously need someone to delete the value for it to be deactivated. bpf_links are responsible for registering their associated struct_ops. You can only use a struct_ops that has the BPF_F_LINK flag set to create a bpf_link, while a structs without this flag behaves in the same manner as before and is registered upon updating its value. The BPF_LINK_TYPE_STRUCT_OPS serves a dual purpose. Not only is it used to craft the links for BPF struct_ops programs, but also to create links for BPF struct_ops them-self. Since the links of BPF struct_ops programs are only used to create trampolines internally, they are never seen in other contexts. Thus, they can be reused for struct_ops themself. To maintain a reference to the map supporting this link, we add bpf_struct_ops_link as an additional type. The pointer of the map is RCU and won't be necessary until later in the patchset. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230323032405.3735486-4-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 7 ++ include/uapi/linux/bpf.h | 12 +++- kernel/bpf/bpf_struct_ops.c | 143 ++++++++++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 23 ++++--- net/ipv4/bpf_tcp_ca.c | 8 ++- tools/include/uapi/linux/bpf.h | 12 +++- 6 files changed, 190 insertions(+), 15 deletions(-) (limited to 'kernel/bpf/bpf_struct_ops.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f04098468d7a..8552279efe46 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1518,6 +1518,7 @@ struct bpf_struct_ops { void *kdata, const void *udata); int (*reg)(void *kdata); void (*unreg)(void *kdata); + int (*validate)(void *kdata); const struct btf_type *type; const struct btf_type *value_type; const char *name; @@ -1552,6 +1553,7 @@ static inline void bpf_module_put(const void *data, struct module *owner) else module_put(owner); } +int bpf_struct_ops_link_create(union bpf_attr *attr); #ifdef CONFIG_NET /* Define it here to avoid the use of forward declaration */ @@ -1592,6 +1594,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, { return -EINVAL; } +static inline int bpf_struct_ops_link_create(union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + #endif #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 13129df937cd..42f40ee083bf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1033,6 +1033,7 @@ enum bpf_attach_type { BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, + BPF_STRUCT_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -1266,6 +1267,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -1507,7 +1511,10 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; union { __u32 target_fd; /* object to attach to */ __u32 target_ifindex; /* target ifindex */ @@ -6379,6 +6386,9 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; }; } __attribute__((aligned(8))); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 2f3c4a0e03ee..3d6b5240c25a 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -17,6 +17,7 @@ enum bpf_struct_ops_state { BPF_STRUCT_OPS_STATE_INIT, BPF_STRUCT_OPS_STATE_INUSE, BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, }; #define BPF_STRUCT_OPS_COMMON_VALUE \ @@ -59,6 +60,11 @@ struct bpf_struct_ops_map { struct bpf_struct_ops_value kvalue; }; +struct bpf_struct_ops_link { + struct bpf_link link; + struct bpf_map __rcu *map; +}; + #define VALUE_PREFIX "bpf_struct_ops_" #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) @@ -500,11 +506,29 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, *(unsigned long *)(udata + moff) = prog->aux->id; } - bpf_map_inc(map); + if (st_map->map.map_flags & BPF_F_LINK) { + err = st_ops->validate(kdata); + if (err) + goto reset_unlock; + set_memory_rox((long)st_map->image, 1); + /* Let bpf_link handle registration & unregistration. + * + * Pair with smp_load_acquire() during lookup_elem(). + */ + smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY); + goto unlock; + } set_memory_rox((long)st_map->image, 1); err = st_ops->reg(kdata); if (likely(!err)) { + /* This refcnt increment on the map here after + * 'st_ops->reg()' is secure since the state of the + * map must be set to INIT at this moment, and thus + * bpf_struct_ops_map_delete_elem() can't unregister + * or transition it to TOBEFREE concurrently. + */ + bpf_map_inc(map); /* Pair with smp_load_acquire() during lookup_elem(). * It ensures the above udata updates (e.g. prog->aux->id) * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. @@ -520,7 +544,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, */ set_memory_nx((long)st_map->image, 1); set_memory_rw((long)st_map->image, 1); - bpf_map_put(map); reset_unlock: bpf_struct_ops_map_put_progs(st_map); @@ -538,6 +561,9 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) struct bpf_struct_ops_map *st_map; st_map = (struct bpf_struct_ops_map *)map; + if (st_map->map.map_flags & BPF_F_LINK) + return -EOPNOTSUPP; + prev_state = cmpxchg(&st_map->kvalue.state, BPF_STRUCT_OPS_STATE_INUSE, BPF_STRUCT_OPS_STATE_TOBEFREE); @@ -614,7 +640,7 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) { if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || - attr->map_flags || !attr->btf_vmlinux_value_type_id) + (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id) return -EINVAL; return 0; } @@ -638,6 +664,9 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (attr->value_size != vt->size) return ERR_PTR(-EINVAL); + if (attr->map_flags & BPF_F_LINK && !st_ops->validate) + return ERR_PTR(-EOPNOTSUPP); + t = st_ops->type; st_map_size = sizeof(*st_map) + @@ -725,3 +754,111 @@ void bpf_struct_ops_put(const void *kdata) bpf_map_put(&st_map->map); } + +static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map) +{ + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; + + return map->map_type == BPF_MAP_TYPE_STRUCT_OPS && + map->map_flags & BPF_F_LINK && + /* Pair with smp_store_release() during map_update */ + smp_load_acquire(&st_map->kvalue.state) == BPF_STRUCT_OPS_STATE_READY; +} + +static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_struct_ops_map *st_map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + st_map = (struct bpf_struct_ops_map *) + rcu_dereference_protected(st_link->map, true); + if (st_map) { + /* st_link->map can be NULL if + * bpf_struct_ops_link_create() fails to register. + */ + st_map->st_ops->unreg(&st_map->kvalue.data); + bpf_map_put(&st_map->map); + } + kfree(st_link); +} + +static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_map *map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + rcu_read_lock(); + map = rcu_dereference(st_link->map); + seq_printf(seq, "map_id:\t%d\n", map->id); + rcu_read_unlock(); +} + +static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_struct_ops_link *st_link; + struct bpf_map *map; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + rcu_read_lock(); + map = rcu_dereference(st_link->map); + info->struct_ops.map_id = map->id; + rcu_read_unlock(); + return 0; +} + +static const struct bpf_link_ops bpf_struct_ops_map_lops = { + .dealloc = bpf_struct_ops_map_link_dealloc, + .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, + .fill_link_info = bpf_struct_ops_map_link_fill_link_info, +}; + +int bpf_struct_ops_link_create(union bpf_attr *attr) +{ + struct bpf_struct_ops_link *link = NULL; + struct bpf_link_primer link_primer; + struct bpf_struct_ops_map *st_map; + struct bpf_map *map; + int err; + + map = bpf_map_get(attr->link_create.map_fd); + if (!map) + return -EINVAL; + + st_map = (struct bpf_struct_ops_map *)map; + + if (!bpf_struct_ops_valid_to_reg(map)) { + err = -EINVAL; + goto err_out; + } + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto err_out; + } + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL); + + err = bpf_link_prime(&link->link, &link_primer); + if (err) + goto err_out; + + err = st_map->st_ops->reg(st_map->kvalue.data); + if (err) { + bpf_link_cleanup(&link_primer); + link = NULL; + goto err_out; + } + RCU_INIT_POINTER(link->map, map); + + return bpf_link_settle(&link_primer); + +err_out: + bpf_map_put(map); + kfree(link); + return err; +} + diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cff0348a2871..21f76698875c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2825,16 +2825,19 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) const struct bpf_prog *prog = link->prog; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "link_type:\t%s\n" - "link_id:\t%u\n" - "prog_tag:\t%s\n" - "prog_id:\t%u\n", + "link_id:\t%u\n", bpf_link_type_strs[link->type], - link->id, - prog_tag, - prog->aux->id); + link->id); + if (prog) { + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); + seq_printf(m, + "prog_tag:\t%s\n" + "prog_id:\t%u\n", + prog_tag, + prog->aux->id); + } if (link->ops->show_fdinfo) link->ops->show_fdinfo(link, m); } @@ -4314,7 +4317,8 @@ static int bpf_link_get_info_by_fd(struct file *file, info.type = link->type; info.id = link->id; - info.prog_id = link->prog->aux->id; + if (link->prog) + info.prog_id = link->prog->aux->id; if (link->ops->fill_link_info) { err = link->ops->fill_link_info(link, &info); @@ -4577,6 +4581,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) if (CHECK_ATTR(BPF_LINK_CREATE)) return -EINVAL; + if (attr->link_create.attach_type == BPF_STRUCT_OPS) + return bpf_struct_ops_link_create(attr); + prog = bpf_prog_get(attr->link_create.prog_fd); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 13fc0c185cd9..bbbd5eb94db2 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -239,8 +239,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name)) <= 0) return -EINVAL; - if (tcp_ca_find(utcp_ca->name)) - return -EEXIST; return 1; } @@ -266,6 +264,11 @@ static void bpf_tcp_ca_unreg(void *kdata) tcp_unregister_congestion_control(kdata); } +static int bpf_tcp_ca_validate(void *kdata) +{ + return tcp_validate_congestion_control(kdata); +} + struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, @@ -273,6 +276,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .check_member = bpf_tcp_ca_check_member, .init_member = bpf_tcp_ca_init_member, .init = bpf_tcp_ca_init, + .validate = bpf_tcp_ca_validate, .name = "tcp_congestion_ops", }; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 13129df937cd..9cf1deaf21f2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1033,6 +1033,7 @@ enum bpf_attach_type { BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, + BPF_STRUCT_OPS, __MAX_BPF_ATTACH_TYPE }; @@ -1266,6 +1267,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -1507,7 +1511,10 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* eBPF struct_ops to attach */ + }; union { __u32 target_fd; /* object to attach to */ __u32 target_ifindex; /* target ifindex */ @@ -6379,6 +6386,9 @@ struct bpf_link_info { struct { __u32 ifindex; } xdp; + struct { + __u32 map_id; + } struct_ops; }; } __attribute__((aligned(8))); -- cgit From aef56f2e918bf8fc8de25f0b36e8c2aba44116ec Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 22 Mar 2023 20:24:02 -0700 Subject: bpf: Update the struct_ops of a bpf_link. By improving the BPF_LINK_UPDATE command of bpf(), it should allow you to conveniently switch between different struct_ops on a single bpf_link. This would enable smoother transitions from one struct_ops to another. The struct_ops maps passing along with BPF_LINK_UPDATE should have the BPF_F_LINK flag. Signed-off-by: Kui-Feng Lee Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230323032405.3735486-6-kuifeng@meta.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 21 +++++++++++++----- kernel/bpf/bpf_struct_ops.c | 48 +++++++++++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 34 ++++++++++++++++++++++++++++++ net/ipv4/bpf_tcp_ca.c | 6 ++++++ tools/include/uapi/linux/bpf.h | 21 +++++++++++++----- 6 files changed, 122 insertions(+), 11 deletions(-) (limited to 'kernel/bpf/bpf_struct_ops.c') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8552279efe46..2d8f3f639e68 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1476,6 +1476,8 @@ struct bpf_link_ops { void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); int (*fill_link_info)(const struct bpf_link *link, struct bpf_link_info *info); + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *old_map); }; struct bpf_tramp_link { @@ -1518,6 +1520,7 @@ struct bpf_struct_ops { void *kdata, const void *udata); int (*reg)(void *kdata); void (*unreg)(void *kdata); + int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); const struct btf_type *type; const struct btf_type *value_type; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42f40ee083bf..e3d3b5160d26 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1555,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 3d6b5240c25a..6401deca3b56 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -65,6 +65,8 @@ struct bpf_struct_ops_link { struct bpf_map __rcu *map; }; +static DEFINE_MUTEX(update_mutex); + #define VALUE_PREFIX "bpf_struct_ops_" #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) @@ -664,7 +666,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (attr->value_size != vt->size) return ERR_PTR(-EINVAL); - if (attr->map_flags & BPF_F_LINK && !st_ops->validate) + if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update)) return ERR_PTR(-EOPNOTSUPP); t = st_ops->type; @@ -810,10 +812,54 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, return 0; } +static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *expected_old_map) +{ + struct bpf_struct_ops_map *st_map, *old_st_map; + struct bpf_map *old_map; + struct bpf_struct_ops_link *st_link; + int err = 0; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + st_map = container_of(new_map, struct bpf_struct_ops_map, map); + + if (!bpf_struct_ops_valid_to_reg(new_map)) + return -EINVAL; + + mutex_lock(&update_mutex); + + old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); + if (expected_old_map && old_map != expected_old_map) { + err = -EPERM; + goto err_out; + } + + old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); + /* The new and old struct_ops must be the same type. */ + if (st_map->st_ops != old_st_map->st_ops) { + err = -EINVAL; + goto err_out; + } + + err = st_map->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + if (err) + goto err_out; + + bpf_map_inc(new_map); + rcu_assign_pointer(st_link->map, new_map); + bpf_map_put(old_map); + +err_out: + mutex_unlock(&update_mutex); + + return err; +} + static const struct bpf_link_ops bpf_struct_ops_map_lops = { .dealloc = bpf_struct_ops_map_link_dealloc, .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, .fill_link_info = bpf_struct_ops_map_link_fill_link_info, + .update_map = bpf_struct_ops_map_link_update, }; int bpf_struct_ops_link_create(union bpf_attr *attr) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 21f76698875c..b4d758fa5981 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4682,6 +4682,35 @@ out: return ret; } +static int link_update_map(struct bpf_link *link, union bpf_attr *attr) +{ + struct bpf_map *new_map, *old_map = NULL; + int ret; + + new_map = bpf_map_get(attr->link_update.new_map_fd); + if (IS_ERR(new_map)) + return -EINVAL; + + if (attr->link_update.flags & BPF_F_REPLACE) { + old_map = bpf_map_get(attr->link_update.old_map_fd); + if (IS_ERR(old_map)) { + ret = -EINVAL; + goto out_put; + } + } else if (attr->link_update.old_map_fd) { + ret = -EINVAL; + goto out_put; + } + + ret = link->ops->update_map(link, new_map, old_map); + + if (old_map) + bpf_map_put(old_map); +out_put: + bpf_map_put(new_map); + return ret; +} + #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd static int link_update(union bpf_attr *attr) @@ -4702,6 +4731,11 @@ static int link_update(union bpf_attr *attr) if (IS_ERR(link)) return PTR_ERR(link); + if (link->ops->update_map) { + ret = link_update_map(link, attr); + goto out_put_link; + } + new_prog = bpf_prog_get(attr->link_update.new_prog_fd); if (IS_ERR(new_prog)) { ret = PTR_ERR(new_prog); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index bbbd5eb94db2..e8b27826283e 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -264,6 +264,11 @@ static void bpf_tcp_ca_unreg(void *kdata) tcp_unregister_congestion_control(kdata); } +static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +{ + return tcp_update_congestion_control(kdata, old_kdata); +} + static int bpf_tcp_ca_validate(void *kdata) { return tcp_validate_congestion_control(kdata); @@ -273,6 +278,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, .unreg = bpf_tcp_ca_unreg, + .update = bpf_tcp_ca_update, .check_member = bpf_tcp_ca_check_member, .init_member = bpf_tcp_ca_init_member, .init = bpf_tcp_ca_init, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9cf1deaf21f2..d6c5a022ae28 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1555,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { -- cgit From 55fbae05476df65e5eee8be54f61d0257af0240b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 24 Mar 2023 11:42:41 -0700 Subject: bpf: Check IS_ERR for the bpf_map_get() return value This patch fixes a mistake in checking NULL instead of checking IS_ERR for the bpf_map_get() return value. It also fixes the return value in link_update_map() from -EINVAL to PTR_ERR(*_map). Reported-by: syzbot+71ccc0fe37abb458406b@syzkaller.appspotmail.com Fixes: 68b04864ca42 ("bpf: Create links for BPF struct_ops maps.") Fixes: aef56f2e918b ("bpf: Update the struct_ops of a bpf_link.") Signed-off-by: Martin KaFai Lau Acked-by: Kui-Feng Lee Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230324184241.1387437-1-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_struct_ops.c | 4 ++-- kernel/bpf/syscall.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/bpf/bpf_struct_ops.c') diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 6401deca3b56..d3f0a4825fa6 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -871,8 +871,8 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) int err; map = bpf_map_get(attr->link_create.map_fd); - if (!map) - return -EINVAL; + if (IS_ERR(map)) + return PTR_ERR(map); st_map = (struct bpf_struct_ops_map *)map; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b4d758fa5981..a09597c95029 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4689,12 +4689,12 @@ static int link_update_map(struct bpf_link *link, union bpf_attr *attr) new_map = bpf_map_get(attr->link_update.new_map_fd); if (IS_ERR(new_map)) - return -EINVAL; + return PTR_ERR(new_map); if (attr->link_update.flags & BPF_F_REPLACE) { old_map = bpf_map_get(attr->link_update.old_map_fd); if (IS_ERR(old_map)) { - ret = -EINVAL; + ret = PTR_ERR(old_map); goto out_put; } } else if (attr->link_update.old_map_fd) { -- cgit