aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c514
1 files changed, 417 insertions, 97 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d77b2f8b9364..c287925471f6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -35,8 +35,9 @@
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
#include <linux/trace_events.h>
-#include <net/netfilter/nf_bpf_link.h>
+#include <net/netfilter/nf_bpf_link.h>
+#include <net/netkit.h>
#include <net/tcx.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -141,9 +142,13 @@ static u32 bpf_map_value_size(const struct bpf_map *map)
static void maybe_wait_bpf_programs(struct bpf_map *map)
{
- /* Wait for any running BPF programs to complete so that
- * userspace, when we return to it, knows that all programs
- * that could be running use the new map value.
+ /* Wait for any running non-sleepable BPF programs to complete so that
+ * userspace, when we return to it, knows that all non-sleepable
+ * programs that could be running use the new map value. For sleepable
+ * BPF programs, synchronize_rcu_tasks_trace() should be used to wait
+ * for the completions of these programs, but considering the waiting
+ * time can be very long and userspace may think it will hang forever,
+ * so don't handle sleepable BPF programs now.
*/
if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
@@ -159,6 +164,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
if (bpf_map_is_offloaded(map)) {
return bpf_map_offload_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
+ map->map_type == BPF_MAP_TYPE_ARENA ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
return map->ops->map_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
@@ -179,15 +185,11 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
err = bpf_percpu_cgroup_storage_update(map, key, value,
flags);
} else if (IS_FD_ARRAY(map)) {
- rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, map_file, key, value,
flags);
- rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- rcu_read_lock();
err = bpf_fd_htab_map_update_elem(map, map_file, key, value,
flags);
- rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
/* rcu_read_lock() is not needed */
err = bpf_fd_reuseport_array_update_elem(map, key, value,
@@ -202,7 +204,6 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
rcu_read_unlock();
}
bpf_enable_instrumentation();
- maybe_wait_bpf_programs(map);
return err;
}
@@ -263,7 +264,6 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
}
bpf_enable_instrumentation();
- maybe_wait_bpf_programs(map);
return err;
}
@@ -480,6 +480,39 @@ static void bpf_map_release_memcg(struct bpf_map *map)
}
#endif
+int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
+ unsigned long nr_pages, struct page **pages)
+{
+ unsigned long i, j;
+ struct page *pg;
+ int ret = 0;
+#ifdef CONFIG_MEMCG_KMEM
+ struct mem_cgroup *memcg, *old_memcg;
+
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
+#endif
+ for (i = 0; i < nr_pages; i++) {
+ pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0);
+
+ if (pg) {
+ pages[i] = pg;
+ continue;
+ }
+ for (j = 0; j < i; j++)
+ __free_page(pages[j]);
+ ret = -ENOMEM;
+ break;
+ }
+
+#ifdef CONFIG_MEMCG_KMEM
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+#endif
+ return ret;
+}
+
+
static int btf_field_cmp(const void *a, const void *b)
{
const struct btf_field *f1 = a, *f2 = b;
@@ -514,6 +547,7 @@ void btf_record_free(struct btf_record *rec)
switch (rec->fields[i].type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
if (rec->fields[i].kptr.module)
module_put(rec->fields[i].kptr.module);
btf_put(rec->fields[i].kptr.btf);
@@ -560,6 +594,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
switch (fields[i].type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
btf_get(fields[i].kptr.btf);
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
ret = -ENXIO;
@@ -624,8 +659,6 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj)
bpf_timer_cancel_and_free(obj + rec->timer_off);
}
-extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
-
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
{
const struct btf_field *fields;
@@ -650,6 +683,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
WRITE_ONCE(*(u64 *)field_ptr, 0);
break;
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
if (!xchgd_field)
break;
@@ -659,8 +693,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
field->kptr.btf_id);
migrate_disable();
__bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ?
- pointee_struct_meta->record :
- NULL);
+ pointee_struct_meta->record : NULL,
+ fields[i].type == BPF_KPTR_PERCPU);
migrate_enable();
} else {
field->kptr.dtor(xchgd_field);
@@ -692,6 +726,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
struct btf_record *rec = map->record;
+ struct btf *btf = map->btf;
security_bpf_map_free(map);
bpf_map_release_memcg(map);
@@ -707,6 +742,10 @@ static void bpf_map_free_deferred(struct work_struct *work)
* template bpf_map struct used during verification.
*/
btf_record_free(rec);
+ /* Delay freeing of btf for maps, as map_free callback may need
+ * struct_meta info which will be freed with btf_put().
+ */
+ btf_put(btf);
}
static void bpf_map_put_uref(struct bpf_map *map)
@@ -717,6 +756,28 @@ static void bpf_map_put_uref(struct bpf_map *map)
}
}
+static void bpf_map_free_in_work(struct bpf_map *map)
+{
+ INIT_WORK(&map->work, bpf_map_free_deferred);
+ /* Avoid spawning kworkers, since they all might contend
+ * for the same mutex like slab_mutex.
+ */
+ queue_work(system_unbound_wq, &map->work);
+}
+
+static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
+{
+ bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu));
+}
+
+static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_map_free_rcu_gp(rcu);
+ else
+ call_rcu(rcu, bpf_map_free_rcu_gp);
+}
+
/* decrement map refcnt and schedule it for freeing via workqueue
* (underlying map implementation ops->map_free() might sleep)
*/
@@ -725,12 +786,14 @@ void bpf_map_put(struct bpf_map *map)
if (atomic64_dec_and_test(&map->refcnt)) {
/* bpf_map_free_id() must be called first */
bpf_map_free_id(map);
- btf_put(map->btf);
- INIT_WORK(&map->work, bpf_map_free_deferred);
- /* Avoid spawning kworkers, since they all might contend
- * for the same mutex like slab_mutex.
- */
- queue_work(system_unbound_wq, &map->work);
+
+ WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt));
+ if (READ_ONCE(map->free_after_mult_rcu_gp))
+ call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp);
+ else if (READ_ONCE(map->free_after_rcu_gp))
+ call_rcu(&map->rcu, bpf_map_free_rcu_gp);
+ else
+ bpf_map_free_in_work(map);
}
}
EXPORT_SYMBOL_GPL(bpf_map_put);
@@ -908,6 +971,21 @@ static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
return EPOLLERR;
}
+static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct bpf_map *map = filp->private_data;
+
+ if (map->ops->map_get_unmapped_area)
+ return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags);
+#ifdef CONFIG_MMU
+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+#else
+ return addr;
+#endif
+}
+
const struct file_operations bpf_map_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_map_show_fdinfo,
@@ -917,6 +995,7 @@ const struct file_operations bpf_map_fops = {
.write = bpf_dummy_write,
.mmap = bpf_map_mmap,
.poll = bpf_map_poll,
+ .get_unmapped_area = bpf_get_unmapped_area,
};
int bpf_map_new_fd(struct bpf_map *map, int flags)
@@ -982,8 +1061,8 @@ int map_check_no_btf(const struct bpf_map *map,
return -ENOTSUPP;
}
-static int map_check_btf(struct bpf_map *map, const struct btf *btf,
- u32 btf_key_id, u32 btf_value_id)
+static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
+ const struct btf *btf, u32 btf_key_id, u32 btf_value_id)
{
const struct btf_type *key_type, *value_type;
u32 key_size, value_size;
@@ -1011,7 +1090,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
if (!IS_ERR_OR_NULL(map->record)) {
int i;
- if (!bpf_capable()) {
+ if (!bpf_token_capable(token, CAP_BPF)) {
ret = -EPERM;
goto free_map_tab;
}
@@ -1045,6 +1124,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
case BPF_REFCOUNT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
@@ -1093,14 +1173,21 @@ free_map_tab:
return ret;
}
-#define BPF_MAP_CREATE_LAST_FIELD map_extra
+static bool bpf_net_capable(void)
+{
+ return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
+}
+
+#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
const struct bpf_map_ops *ops;
+ struct bpf_token *token = NULL;
int numa_node = bpf_map_attr_numa_node(attr);
u32 map_type = attr->map_type;
struct bpf_map *map;
+ bool token_flag;
int f_flags;
int err;
@@ -1108,6 +1195,12 @@ static int map_create(union bpf_attr *attr)
if (err)
return -EINVAL;
+ /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it
+ * to avoid per-map type checks tripping on unknown flag
+ */
+ token_flag = attr->map_flags & BPF_F_TOKEN_FD;
+ attr->map_flags &= ~BPF_F_TOKEN_FD;
+
if (attr->btf_vmlinux_value_type_id) {
if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
attr->btf_key_type_id || attr->btf_value_type_id)
@@ -1117,6 +1210,7 @@ static int map_create(union bpf_attr *attr)
}
if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
+ attr->map_type != BPF_MAP_TYPE_ARENA &&
attr->map_extra != 0)
return -EINVAL;
@@ -1148,14 +1242,32 @@ static int map_create(union bpf_attr *attr)
if (!ops->map_mem_usage)
return -EINVAL;
+ if (token_flag) {
+ token = bpf_token_get_from_fd(attr->map_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+
+ /* if current token doesn't grant map creation permissions,
+ * then we can't use this token, so ignore it and rely on
+ * system-wide capabilities checks
+ */
+ if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) ||
+ !bpf_token_allow_map_type(token, attr->map_type)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ err = -EPERM;
+
/* Intent here is for unprivileged_bpf_disabled to block BPF map
* creation for unprivileged users; other actions depend
* on fd availability and access to bpffs, so are dependent on
* object creation success. Even with unprivileged BPF disabled,
* capability checks are still carried out.
*/
- if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
- return -EPERM;
+ if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF))
+ goto put_token;
/* check privileged map type permissions */
switch (map_type) {
@@ -1188,25 +1300,28 @@ static int map_create(union bpf_attr *attr)
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
case BPF_MAP_TYPE_STRUCT_OPS:
case BPF_MAP_TYPE_CPUMAP:
- if (!bpf_capable())
- return -EPERM;
+ case BPF_MAP_TYPE_ARENA:
+ if (!bpf_token_capable(token, CAP_BPF))
+ goto put_token;
break;
case BPF_MAP_TYPE_SOCKMAP:
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
case BPF_MAP_TYPE_XSKMAP:
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
+ if (!bpf_token_capable(token, CAP_NET_ADMIN))
+ goto put_token;
break;
default:
WARN(1, "unsupported map type %d", map_type);
- return -EPERM;
+ goto put_token;
}
map = ops->map_alloc(attr);
- if (IS_ERR(map))
- return PTR_ERR(map);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto put_token;
+ }
map->ops = ops;
map->map_type = map_type;
@@ -1243,7 +1358,7 @@ static int map_create(union bpf_attr *attr)
map->btf = btf;
if (attr->btf_value_type_id) {
- err = map_check_btf(map, btf, attr->btf_key_type_id,
+ err = map_check_btf(map, token, btf, attr->btf_key_type_id,
attr->btf_value_type_id);
if (err)
goto free_map;
@@ -1255,15 +1370,16 @@ static int map_create(union bpf_attr *attr)
attr->btf_vmlinux_value_type_id;
}
- err = security_bpf_map_alloc(map);
+ err = security_bpf_map_create(map, attr, token);
if (err)
- goto free_map;
+ goto free_map_sec;
err = bpf_map_alloc_id(map);
if (err)
goto free_map_sec;
bpf_map_save_memcg(map);
+ bpf_token_put(token);
err = bpf_map_new_fd(map, f_flags);
if (err < 0) {
@@ -1284,6 +1400,8 @@ free_map_sec:
free_map:
btf_put(map->btf);
map->ops->map_free(map);
+put_token:
+ bpf_token_put(token);
return err;
}
@@ -1521,6 +1639,8 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
}
err = bpf_map_update_value(map, f.file, key, value, attr->flags);
+ if (!err)
+ maybe_wait_bpf_programs(map);
kvfree(value);
free_key:
@@ -1576,7 +1696,8 @@ static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
err = map->ops->map_delete_elem(map, key);
rcu_read_unlock();
bpf_enable_instrumentation();
- maybe_wait_bpf_programs(map);
+ if (!err)
+ maybe_wait_bpf_programs(map);
out:
kvfree(key);
err_put:
@@ -1673,6 +1794,9 @@ int generic_map_delete_batch(struct bpf_map *map,
if (!max_count)
return 0;
+ if (put_user(0, &uattr->batch.count))
+ return -EFAULT;
+
key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!key)
return -ENOMEM;
@@ -1702,7 +1826,6 @@ int generic_map_delete_batch(struct bpf_map *map,
kvfree(key);
- maybe_wait_bpf_programs(map);
return err;
}
@@ -1730,6 +1853,9 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
if (!max_count)
return 0;
+ if (put_user(0, &uattr->batch.count))
+ return -EFAULT;
+
key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!key)
return -ENOMEM;
@@ -1760,6 +1886,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
kvfree(value);
kvfree(key);
+
return err;
}
@@ -2105,7 +2232,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
kvfree(aux->func_info);
kfree(aux->func_info_aux);
free_uid(aux->user);
- security_bpf_prog_free(aux);
+ security_bpf_prog_free(aux->prog);
bpf_prog_free(aux->prog);
}
@@ -2121,7 +2248,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
btf_put(prog->aux->attach_btf);
if (deferred) {
- if (prog->aux->sleepable)
+ if (prog->sleepable)
call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
else
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
@@ -2442,14 +2569,19 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
return 0;
default:
return -EINVAL;
@@ -2546,13 +2678,15 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD log_true_size
+#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
{
enum bpf_prog_type type = attr->prog_type;
struct bpf_prog *prog, *dst_prog = NULL;
struct btf *attach_btf = NULL;
+ struct bpf_token *token = NULL;
+ bool bpf_cap;
int err;
char license[128];
@@ -2565,13 +2699,36 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
BPF_F_SLEEPABLE |
BPF_F_TEST_RND_HI32 |
BPF_F_XDP_HAS_FRAGS |
- BPF_F_XDP_DEV_BOUND_ONLY))
+ BPF_F_XDP_DEV_BOUND_ONLY |
+ BPF_F_TEST_REG_INVARIANTS |
+ BPF_F_TOKEN_FD))
return -EINVAL;
+ bpf_prog_load_fixup_attach_type(attr);
+
+ if (attr->prog_flags & BPF_F_TOKEN_FD) {
+ token = bpf_token_get_from_fd(attr->prog_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+ /* if current token doesn't grant prog loading permissions,
+ * then we can't use this token, so ignore it and rely on
+ * system-wide capabilities checks
+ */
+ if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) ||
+ !bpf_token_allow_prog_type(token, attr->prog_type,
+ attr->expected_attach_type)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ bpf_cap = bpf_token_capable(token, CAP_BPF);
+ err = -EPERM;
+
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
(attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
- !bpf_capable())
- return -EPERM;
+ !bpf_cap)
+ goto put_token;
/* Intent here is for unprivileged_bpf_disabled to block BPF program
* creation for unprivileged users; other actions depend
@@ -2580,21 +2737,23 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
* capability checks are still carried out for these
* and other operations.
*/
- if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
- return -EPERM;
+ if (sysctl_unprivileged_bpf_disabled && !bpf_cap)
+ goto put_token;
if (attr->insn_cnt == 0 ||
- attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
- return -E2BIG;
+ attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) {
+ err = -E2BIG;
+ goto put_token;
+ }
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
- !bpf_capable())
- return -EPERM;
+ !bpf_cap)
+ goto put_token;
- if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (is_perfmon_prog_type(type) && !perfmon_capable())
- return -EPERM;
+ if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN))
+ goto put_token;
+ if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON))
+ goto put_token;
/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
* or btf, we need to check which one it is
@@ -2604,27 +2763,33 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (IS_ERR(dst_prog)) {
dst_prog = NULL;
attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
- if (IS_ERR(attach_btf))
- return -EINVAL;
+ if (IS_ERR(attach_btf)) {
+ err = -EINVAL;
+ goto put_token;
+ }
if (!btf_is_kernel(attach_btf)) {
/* attaching through specifying bpf_prog's BTF
* objects directly might be supported eventually
*/
btf_put(attach_btf);
- return -ENOTSUPP;
+ err = -ENOTSUPP;
+ goto put_token;
}
}
} else if (attr->attach_btf_id) {
/* fall back to vmlinux BTF, if BTF type ID is specified */
attach_btf = bpf_get_btf_vmlinux();
- if (IS_ERR(attach_btf))
- return PTR_ERR(attach_btf);
- if (!attach_btf)
- return -EINVAL;
+ if (IS_ERR(attach_btf)) {
+ err = PTR_ERR(attach_btf);
+ goto put_token;
+ }
+ if (!attach_btf) {
+ err = -EINVAL;
+ goto put_token;
+ }
btf_get(attach_btf);
}
- bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
attach_btf, attr->attach_btf_id,
dst_prog)) {
@@ -2632,7 +2797,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
bpf_prog_put(dst_prog);
if (attach_btf)
btf_put(attach_btf);
- return -EINVAL;
+ err = -EINVAL;
+ goto put_token;
}
/* plain bpf_prog allocation */
@@ -2642,20 +2808,21 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
bpf_prog_put(dst_prog);
if (attach_btf)
btf_put(attach_btf);
- return -ENOMEM;
+ err = -EINVAL;
+ goto put_token;
}
prog->expected_attach_type = attr->expected_attach_type;
+ prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE);
prog->aux->attach_btf = attach_btf;
prog->aux->attach_btf_id = attr->attach_btf_id;
prog->aux->dst_prog = dst_prog;
prog->aux->dev_bound = !!attr->prog_ifindex;
- prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
- err = security_bpf_prog_alloc(prog->aux);
- if (err)
- goto free_prog;
+ /* move token into prog->aux, reuse taken refcnt */
+ prog->aux->token = token;
+ token = NULL;
prog->aux->user = get_current_user();
prog->len = attr->insn_cnt;
@@ -2664,12 +2831,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (copy_from_bpfptr(prog->insns,
make_bpfptr(attr->insns, uattr.is_kernel),
bpf_prog_insn_size(prog)) != 0)
- goto free_prog_sec;
+ goto free_prog;
/* copy eBPF program license from user space */
if (strncpy_from_bpfptr(license,
make_bpfptr(attr->license, uattr.is_kernel),
sizeof(license) - 1) < 0)
- goto free_prog_sec;
+ goto free_prog;
license[sizeof(license) - 1] = 0;
/* eBPF programs must be GPL compatible to use GPL-ed functions */
@@ -2683,25 +2850,45 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_dev_bound_init(prog, attr);
if (err)
- goto free_prog_sec;
+ goto free_prog;
}
if (type == BPF_PROG_TYPE_EXT && dst_prog &&
bpf_prog_is_dev_bound(dst_prog->aux)) {
err = bpf_prog_dev_bound_inherit(prog, dst_prog);
if (err)
- goto free_prog_sec;
+ goto free_prog;
+ }
+
+ /*
+ * Bookkeeping for managing the program attachment chain.
+ *
+ * It might be tempting to set attach_tracing_prog flag at the attachment
+ * time, but this will not prevent from loading bunch of tracing prog
+ * first, then attach them one to another.
+ *
+ * The flag attach_tracing_prog is set for the whole program lifecycle, and
+ * doesn't have to be cleared in bpf_tracing_link_release, since tracing
+ * programs cannot change attachment target.
+ */
+ if (type == BPF_PROG_TYPE_TRACING && dst_prog &&
+ dst_prog->type == BPF_PROG_TYPE_TRACING) {
+ prog->aux->attach_tracing_prog = true;
}
/* find program type: socket_filter vs tracing_filter */
err = find_prog_type(type, prog);
if (err < 0)
- goto free_prog_sec;
+ goto free_prog;
prog->aux->load_time = ktime_get_boottime_ns();
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
sizeof(attr->prog_name));
if (err < 0)
+ goto free_prog;
+
+ err = security_bpf_prog_load(prog, attr, token);
+ if (err)
goto free_prog_sec;
/* run eBPF verifier */
@@ -2745,15 +2932,18 @@ free_used_maps:
* period before we can tear down JIT memory since symbols
* are already exposed under kallsyms.
*/
- __bpf_prog_put_noref(prog, prog->aux->func_cnt);
+ __bpf_prog_put_noref(prog, prog->aux->real_func_cnt);
return err;
+
free_prog_sec:
- free_uid(prog->aux->user);
- security_bpf_prog_free(prog->aux);
+ security_bpf_prog_free(prog);
free_prog:
+ free_uid(prog->aux->user);
if (prog->aux->attach_btf)
btf_put(prog->aux->attach_btf);
bpf_prog_free(prog);
+put_token:
+ bpf_token_put(token);
return err;
}
@@ -2834,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link)
atomic64_inc(&link->refcnt);
}
+static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
+{
+ struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
+
+ /* free bpf_link and its containing memory */
+ link->ops->dealloc_deferred(link);
+}
+
+static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_link_defer_dealloc_rcu_gp(rcu);
+ else
+ call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
+}
+
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
+ bool sleepable = false;
+
bpf_link_free_id(link->id);
if (link->prog) {
+ sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
link->ops->release(link);
bpf_prog_put(link->prog);
}
- /* free bpf_link and its containing memory */
- link->ops->dealloc(link);
+ if (link->ops->dealloc_deferred) {
+ /* schedule BPF link deallocation; if underlying BPF program
+ * is sleepable, we need to first wait for RCU tasks trace
+ * sync, then go through "classic" RCU grace period
+ */
+ if (sleepable)
+ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ else
+ call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
+ }
+ if (link->ops->dealloc)
+ link->ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
@@ -3126,7 +3345,12 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
}
if (tgt_prog_fd) {
- /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
+ /*
+ * For now we only allow new targets for BPF_PROG_TYPE_EXT. If this
+ * part would be changed to implement the same for
+ * BPF_PROG_TYPE_TRACING, do not forget to update the way how
+ * attach_tracing_prog flag is set.
+ */
if (prog->type != BPF_PROG_TYPE_EXT) {
err = -EINVAL;
goto out_put_prog;
@@ -3171,6 +3395,10 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
*
* - if prog->aux->dst_trampoline and tgt_prog is NULL, the program
* was detached and is going for re-attachment.
+ *
+ * - if prog->aux->dst_trampoline is NULL and tgt_prog and prog->aux->attach_btf
+ * are NULL, then program was already attached and user did not provide
+ * tgt_prog_fd so we have no way to find out or create trampoline
*/
if (!prog->aux->dst_trampoline && !tgt_prog) {
/*
@@ -3184,6 +3412,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
err = -EINVAL;
goto out_unlock;
}
+ /* We can allow re-attach only if we have valid attach_btf. */
+ if (!prog->aux->attach_btf) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
btf_id = prog->aux->attach_btf_id;
key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id);
}
@@ -3340,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.release = bpf_raw_tp_link_release,
- .dealloc = bpf_raw_tp_link_dealloc,
+ .dealloc_deferred = bpf_raw_tp_link_dealloc,
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};
@@ -3370,7 +3603,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link)
static int bpf_perf_link_fill_common(const struct perf_event *event,
char __user *uname, u32 ulen,
u64 *probe_offset, u64 *probe_addr,
- u32 *fd_type)
+ u32 *fd_type, unsigned long *missed)
{
const char *buf;
u32 prog_id;
@@ -3381,7 +3614,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
return -EINVAL;
err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf,
- probe_offset, probe_addr);
+ probe_offset, probe_addr, missed);
if (err)
return err;
if (!uname)
@@ -3404,6 +3637,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
struct bpf_link_info *info)
{
+ unsigned long missed;
char __user *uname;
u64 addr, offset;
u32 ulen, type;
@@ -3412,7 +3646,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
ulen = info->perf_event.kprobe.name_len;
err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
- &type);
+ &type, &missed);
if (err)
return err;
if (type == BPF_FD_TYPE_KRETPROBE)
@@ -3421,9 +3655,11 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
info->perf_event.type = BPF_PERF_EVENT_KPROBE;
info->perf_event.kprobe.offset = offset;
+ info->perf_event.kprobe.missed = missed;
if (!kallsyms_show_value(current_cred()))
addr = 0;
info->perf_event.kprobe.addr = addr;
+ info->perf_event.kprobe.cookie = event->bpf_cookie;
return 0;
}
#endif
@@ -3440,7 +3676,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
ulen = info->perf_event.uprobe.name_len;
err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
- &type);
+ &type, NULL);
if (err)
return err;
@@ -3449,6 +3685,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
else
info->perf_event.type = BPF_PERF_EVENT_UPROBE;
info->perf_event.uprobe.offset = offset;
+ info->perf_event.uprobe.cookie = event->bpf_cookie;
return 0;
}
#endif
@@ -3476,7 +3713,8 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
ulen = info->perf_event.tracepoint.name_len;
info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
- return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL);
+ info->perf_event.tracepoint.cookie = event->bpf_cookie;
+ return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL);
}
static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
@@ -3484,6 +3722,7 @@ static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
{
info->perf_event.event.type = event->attr.type;
info->perf_event.event.config = event->attr.config;
+ info->perf_event.event.cookie = event->bpf_cookie;
info->perf_event.type = BPF_PERF_EVENT_EVENT;
return 0;
}
@@ -3672,14 +3911,19 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
case BPF_CGROUP_SOCK_OPS:
return BPF_PROG_TYPE_SOCK_OPS;
@@ -3716,6 +3960,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_LSM;
case BPF_TCX_INGRESS:
case BPF_TCX_EGRESS:
+ case BPF_NETKIT_PRIMARY:
+ case BPF_NETKIT_PEER:
return BPF_PROG_TYPE_SCHED_CLS;
default:
return BPF_PROG_TYPE_UNSPEC;
@@ -3734,7 +3980,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_SK_LOOKUP:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
- if (!capable(CAP_NET_ADMIN))
+ if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN))
/* cg-skb progs can be loaded by unpriv user.
* check permissions at attach time.
*/
@@ -3767,7 +4013,9 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
return 0;
case BPF_PROG_TYPE_SCHED_CLS:
if (attach_type != BPF_TCX_INGRESS &&
- attach_type != BPF_TCX_EGRESS)
+ attach_type != BPF_TCX_EGRESS &&
+ attach_type != BPF_NETKIT_PRIMARY &&
+ attach_type != BPF_NETKIT_PEER)
return -EINVAL;
return 0;
default:
@@ -3850,7 +4098,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
break;
case BPF_PROG_TYPE_SCHED_CLS:
- ret = tcx_prog_attach(attr, prog);
+ if (attr->attach_type == BPF_TCX_INGRESS ||
+ attr->attach_type == BPF_TCX_EGRESS)
+ ret = tcx_prog_attach(attr, prog);
+ else
+ ret = netkit_prog_attach(attr, prog);
break;
default:
ret = -EINVAL;
@@ -3911,7 +4163,11 @@ static int bpf_prog_detach(const union bpf_attr *attr)
ret = cgroup_bpf_prog_detach(attr, ptype);
break;
case BPF_PROG_TYPE_SCHED_CLS:
- ret = tcx_prog_detach(attr, prog);
+ if (attr->attach_type == BPF_TCX_INGRESS ||
+ attr->attach_type == BPF_TCX_EGRESS)
+ ret = tcx_prog_detach(attr, prog);
+ else
+ ret = netkit_prog_detach(attr, prog);
break;
default:
ret = -EINVAL;
@@ -3927,7 +4183,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
static int bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
- if (!capable(CAP_NET_ADMIN))
+ if (!bpf_net_capable())
return -EPERM;
if (CHECK_ATTR(BPF_PROG_QUERY))
return -EINVAL;
@@ -3945,14 +4201,19 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET6_POST_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UNIX_CONNECT:
case BPF_CGROUP_INET4_GETPEERNAME:
case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_UNIX_GETPEERNAME:
case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_INET6_GETSOCKNAME:
+ case BPF_CGROUP_UNIX_GETSOCKNAME:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UNIX_SENDMSG:
case BPF_CGROUP_UDP4_RECVMSG:
case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UNIX_RECVMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
case BPF_CGROUP_SYSCTL:
@@ -3973,6 +4234,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_TCX_INGRESS:
case BPF_TCX_EGRESS:
return tcx_prog_query(attr, uattr);
+ case BPF_NETKIT_PRIMARY:
+ case BPF_NETKIT_PEER:
+ return netkit_prog_query(attr, uattr);
default:
return -EINVAL;
}
@@ -4218,6 +4482,12 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
continue;
}
+ if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX ||
+ BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) {
+ insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM;
+ continue;
+ }
+
if (code != (BPF_LD | BPF_IMM | BPF_DW))
continue;
@@ -4585,6 +4855,8 @@ static int bpf_map_get_info_by_fd(struct file *file,
info.btf_value_type_id = map->btf_value_type_id;
}
info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS)
+ bpf_map_struct_ops_info_fill(&info, map);
if (bpf_map_is_offloaded(map)) {
err = bpf_map_offload_info_fill(&info, map);
@@ -4687,15 +4959,34 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
return err;
}
-#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size
+#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
+ struct bpf_token *token = NULL;
+
if (CHECK_ATTR(BPF_BTF_LOAD))
return -EINVAL;
- if (!bpf_capable())
+ if (attr->btf_flags & ~BPF_F_TOKEN_FD)
+ return -EINVAL;
+
+ if (attr->btf_flags & BPF_F_TOKEN_FD) {
+ token = bpf_token_get_from_fd(attr->btf_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+ if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ if (!bpf_token_capable(token, CAP_BPF)) {
+ bpf_token_put(token);
return -EPERM;
+ }
+
+ bpf_token_put(token);
return btf_new_fd(attr, uattr, uattr_size);
}
@@ -4818,7 +5109,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
&buf, &probe_offset,
- &probe_addr);
+ &probe_addr, NULL);
if (!err)
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
fd_type, buf,
@@ -4884,8 +5175,10 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
else
BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr);
err_put:
- if (has_write)
+ if (has_write) {
+ maybe_wait_bpf_programs(map);
bpf_map_write_active_dec(map);
+ }
fdput(f);
return err;
}
@@ -4954,7 +5247,11 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_xdp_link_attach(attr, prog);
break;
case BPF_PROG_TYPE_SCHED_CLS:
- ret = tcx_link_attach(attr, prog);
+ if (attr->link_create.attach_type == BPF_TCX_INGRESS ||
+ attr->link_create.attach_type == BPF_TCX_EGRESS)
+ ret = tcx_link_attach(attr, prog);
+ else
+ ret = netkit_link_attach(attr, prog);
break;
case BPF_PROG_TYPE_NETFILTER:
ret = bpf_nf_link_attach(attr, prog);
@@ -5283,6 +5580,11 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
goto out_unlock;
}
+ /* The bpf program will not access the bpf map, but for the sake of
+ * simplicity, increase sleepable_refcnt for sleepable program as well.
+ */
+ if (prog->sleepable)
+ atomic64_inc(&map->sleepable_refcnt);
memcpy(used_maps_new, used_maps_old,
sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
used_maps_new[prog->aux->used_map_cnt] = map;
@@ -5302,6 +5604,20 @@ out_prog_put:
return ret;
}
+#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd
+
+static int token_create(union bpf_attr *attr)
+{
+ if (CHECK_ATTR(BPF_TOKEN_CREATE))
+ return -EINVAL;
+
+ /* no flags are supported yet */
+ if (attr->token_create.flags)
+ return -EINVAL;
+
+ return bpf_token_create(attr);
+}
+
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
@@ -5435,6 +5751,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_BIND_MAP:
err = bpf_prog_bind_map(&attr);
break;
+ case BPF_TOKEN_CREATE:
+ err = token_create(&attr);
+ break;
default:
err = -EINVAL;
break;
@@ -5541,7 +5860,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = {
const struct bpf_func_proto * __weak
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
BPF_CALL_1(bpf_sys_close, u32, fd)
@@ -5591,7 +5910,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_sys_bpf:
- return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
+ return !bpf_token_capable(prog->aux->token, CAP_PERFMON)
+ ? NULL : &bpf_sys_bpf_proto;
case BPF_FUNC_btf_find_by_name_kind:
return &bpf_btf_find_by_name_kind_proto;
case BPF_FUNC_sys_close: