aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c720
1 files changed, 558 insertions, 162 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 19b6129eab23..045646da97cc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -10,8 +10,10 @@
* General Public License for more details.
*/
#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
#include <linux/vmalloc.h>
#include <linux/mmzone.h>
#include <linux/anon_inodes.h>
@@ -20,35 +22,46 @@
#include <linux/filter.h>
#include <linux/version.h>
#include <linux/kernel.h>
+#include <linux/idr.h>
+
+#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
DEFINE_PER_CPU(int, bpf_prog_active);
+static DEFINE_IDR(prog_idr);
+static DEFINE_SPINLOCK(prog_idr_lock);
+static DEFINE_IDR(map_idr);
+static DEFINE_SPINLOCK(map_idr_lock);
int sysctl_unprivileged_bpf_disabled __read_mostly;
-static LIST_HEAD(bpf_map_types);
+static const struct bpf_map_ops * const bpf_map_types[] = {
+#define BPF_PROG_TYPE(_id, _ops)
+#define BPF_MAP_TYPE(_id, _ops) \
+ [_id] = &_ops,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+#undef BPF_MAP_TYPE
+};
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
{
- struct bpf_map_type_list *tl;
struct bpf_map *map;
- list_for_each_entry(tl, &bpf_map_types, list_node) {
- if (tl->type == attr->map_type) {
- map = tl->ops->map_alloc(attr);
- if (IS_ERR(map))
- return map;
- map->ops = tl->ops;
- map->map_type = attr->map_type;
- return map;
- }
- }
- return ERR_PTR(-EINVAL);
-}
+ if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
+ !bpf_map_types[attr->map_type])
+ return ERR_PTR(-EINVAL);
-/* boot time registration of different map implementations */
-void bpf_register_map_type(struct bpf_map_type_list *tl)
-{
- list_add(&tl->list_node, &bpf_map_types);
+ map = bpf_map_types[attr->map_type]->map_alloc(attr);
+ if (IS_ERR(map))
+ return map;
+ map->ops = bpf_map_types[attr->map_type];
+ map->map_type = attr->map_type;
+ return map;
}
void *bpf_map_area_alloc(size_t size)
@@ -66,8 +79,7 @@ void *bpf_map_area_alloc(size_t size)
return area;
}
- return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
- PAGE_KERNEL);
+ return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
}
void bpf_map_area_free(void *area)
@@ -114,6 +126,37 @@ static void bpf_map_uncharge_memlock(struct bpf_map *map)
free_uid(user);
}
+static int bpf_map_alloc_id(struct bpf_map *map)
+{
+ int id;
+
+ spin_lock_bh(&map_idr_lock);
+ id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
+ if (id > 0)
+ map->id = id;
+ spin_unlock_bh(&map_idr_lock);
+
+ if (WARN_ON_ONCE(!id))
+ return -ENOSPC;
+
+ return id > 0 ? 0 : id;
+}
+
+static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
+{
+ if (do_idr_lock)
+ spin_lock_bh(&map_idr_lock);
+ else
+ __acquire(&map_idr_lock);
+
+ idr_remove(&map_idr, map->id);
+
+ if (do_idr_lock)
+ spin_unlock_bh(&map_idr_lock);
+ else
+ __release(&map_idr_lock);
+}
+
/* called from workqueue */
static void bpf_map_free_deferred(struct work_struct *work)
{
@@ -135,14 +178,21 @@ static void bpf_map_put_uref(struct bpf_map *map)
/* decrement map refcnt and schedule it for freeing via workqueue
* (unrelying map implementation ops->map_free() might sleep)
*/
-void bpf_map_put(struct bpf_map *map)
+static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
{
if (atomic_dec_and_test(&map->refcnt)) {
+ /* bpf_map_free_id() must be called first */
+ bpf_map_free_id(map, do_idr_lock);
INIT_WORK(&map->work, bpf_map_free_deferred);
schedule_work(&map->work);
}
}
+void bpf_map_put(struct bpf_map *map)
+{
+ __bpf_map_put(map, true);
+}
+
void bpf_map_put_with_uref(struct bpf_map *map)
{
bpf_map_put_uref(map);
@@ -166,10 +216,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
const struct bpf_map *map = filp->private_data;
const struct bpf_array *array;
u32 owner_prog_type = 0;
+ u32 owner_jited = 0;
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
array = container_of(map, struct bpf_array, map);
owner_prog_type = array->owner_prog_type;
+ owner_jited = array->owner_jited;
}
seq_printf(m,
@@ -186,9 +238,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->map_flags,
map->pages * 1ULL << PAGE_SHIFT);
- if (owner_prog_type)
+ if (owner_prog_type) {
seq_printf(m, "owner_prog_type:\t%u\n",
owner_prog_type);
+ seq_printf(m, "owner_jited:\t%u\n",
+ owner_jited);
+ }
}
#endif
@@ -213,7 +268,7 @@ int bpf_map_new_fd(struct bpf_map *map)
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
sizeof(attr->CMD##_LAST_FIELD)) != NULL
-#define BPF_MAP_CREATE_LAST_FIELD map_flags
+#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -236,11 +291,23 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map_nouncharge;
- err = bpf_map_new_fd(map);
- if (err < 0)
- /* failed to allocate fd */
+ err = bpf_map_alloc_id(map);
+ if (err)
goto free_map;
+ err = bpf_map_new_fd(map);
+ if (err < 0) {
+ /* failed to allocate fd.
+ * bpf_map_put() is needed because the above
+ * bpf_map_alloc_id() has published the map
+ * to the userspace and the userspace may
+ * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
+ */
+ bpf_map_put(map);
+ return err;
+ }
+
+ trace_bpf_map_create(map, err);
return err;
free_map:
@@ -294,6 +361,28 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
return map;
}
+/* map_idr_lock should have been held */
+static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
+ bool uref)
+{
+ int refold;
+
+ refold = __atomic_add_unless(&map->refcnt, 1, 0);
+
+ if (refold >= BPF_MAX_REFCNT) {
+ __bpf_map_put(map, false);
+ return ERR_PTR(-EBUSY);
+ }
+
+ if (!refold)
+ return ERR_PTR(-ENOENT);
+
+ if (uref)
+ atomic_inc(&map->usercnt);
+
+ return map;
+}
+
int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
{
return -ENOTSUPP;
@@ -321,19 +410,18 @@ static int map_lookup_elem(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- err = -ENOMEM;
- key = kmalloc(map->key_size, GFP_USER);
- if (!key)
+ key = memdup_user(ukey, map->key_size);
+ if (IS_ERR(key)) {
+ err = PTR_ERR(key);
goto err_put;
-
- err = -EFAULT;
- if (copy_from_user(key, ukey, map->key_size) != 0)
- goto free_key;
+ }
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
value_size = round_up(map->value_size, 8) * num_possible_cpus();
+ else if (IS_FD_MAP(map))
+ value_size = sizeof(u32);
else
value_size = map->value_size;
@@ -349,6 +437,10 @@ static int map_lookup_elem(union bpf_attr *attr)
err = bpf_percpu_array_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
err = bpf_stackmap_copy(map, key, value);
+ } else if (IS_FD_ARRAY(map)) {
+ err = bpf_fd_array_map_lookup_elem(map, key, value);
+ } else if (IS_FD_HASH(map)) {
+ err = bpf_fd_htab_map_lookup_elem(map, key, value);
} else {
rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key);
@@ -365,6 +457,7 @@ static int map_lookup_elem(union bpf_attr *attr)
if (copy_to_user(uvalue, value, value_size) != 0)
goto free_value;
+ trace_bpf_map_lookup_elem(map, ufd, key, value);
err = 0;
free_value:
@@ -397,14 +490,11 @@ static int map_update_elem(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- err = -ENOMEM;
- key = kmalloc(map->key_size, GFP_USER);
- if (!key)
+ key = memdup_user(ukey, map->key_size);
+ if (IS_ERR(key)) {
+ err = PTR_ERR(key);
goto err_put;
-
- err = -EFAULT;
- if (copy_from_user(key, ukey, map->key_size) != 0)
- goto free_key;
+ }
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -434,11 +524,17 @@ static int map_update_elem(union bpf_attr *attr)
err = bpf_percpu_array_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
- map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
+ map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
+ map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
attr->flags);
rcu_read_unlock();
+ } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ rcu_read_lock();
+ err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
+ attr->flags);
+ rcu_read_unlock();
} else {
rcu_read_lock();
err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -447,6 +543,8 @@ static int map_update_elem(union bpf_attr *attr)
__this_cpu_dec(bpf_prog_active);
preempt_enable();
+ if (!err)
+ trace_bpf_map_update_elem(map, ufd, key, value);
free_value:
kfree(value);
free_key:
@@ -475,14 +573,11 @@ static int map_delete_elem(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- err = -ENOMEM;
- key = kmalloc(map->key_size, GFP_USER);
- if (!key)
+ key = memdup_user(ukey, map->key_size);
+ if (IS_ERR(key)) {
+ err = PTR_ERR(key);
goto err_put;
-
- err = -EFAULT;
- if (copy_from_user(key, ukey, map->key_size) != 0)
- goto free_key;
+ }
preempt_disable();
__this_cpu_inc(bpf_prog_active);
@@ -492,7 +587,8 @@ static int map_delete_elem(union bpf_attr *attr)
__this_cpu_dec(bpf_prog_active);
preempt_enable();
-free_key:
+ if (!err)
+ trace_bpf_map_delete_elem(map, ufd, key);
kfree(key);
err_put:
fdput(f);
@@ -520,14 +616,15 @@ static int map_get_next_key(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- err = -ENOMEM;
- key = kmalloc(map->key_size, GFP_USER);
- if (!key)
- goto err_put;
-
- err = -EFAULT;
- if (copy_from_user(key, ukey, map->key_size) != 0)
- goto free_key;
+ if (ukey) {
+ key = memdup_user(ukey, map->key_size);
+ if (IS_ERR(key)) {
+ err = PTR_ERR(key);
+ goto err_put;
+ }
+ } else {
+ key = NULL;
+ }
err = -ENOMEM;
next_key = kmalloc(map->key_size, GFP_USER);
@@ -544,6 +641,7 @@ static int map_get_next_key(union bpf_attr *attr)
if (copy_to_user(unext_key, next_key, map->key_size) != 0)
goto free_next_key;
+ trace_bpf_map_next_key(map, ufd, key, next_key);
err = 0;
free_next_key:
@@ -555,79 +653,23 @@ err_put:
return err;
}
-static LIST_HEAD(bpf_prog_types);
+static const struct bpf_verifier_ops * const bpf_prog_types[] = {
+#define BPF_PROG_TYPE(_id, _ops) \
+ [_id] = &_ops,
+#define BPF_MAP_TYPE(_id, _ops)
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+#undef BPF_MAP_TYPE
+};
static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
{
- struct bpf_prog_type_list *tl;
-
- list_for_each_entry(tl, &bpf_prog_types, list_node) {
- if (tl->type == type) {
- prog->aux->ops = tl->ops;
- prog->type = type;
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
-void bpf_register_prog_type(struct bpf_prog_type_list *tl)
-{
- list_add(&tl->list_node, &bpf_prog_types);
-}
-
-/* fixup insn->imm field of bpf_call instructions:
- * if (insn->imm == BPF_FUNC_map_lookup_elem)
- * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
- * else if (insn->imm == BPF_FUNC_map_update_elem)
- * insn->imm = bpf_map_update_elem - __bpf_call_base;
- * else ...
- *
- * this function is called after eBPF program passed verification
- */
-static void fixup_bpf_calls(struct bpf_prog *prog)
-{
- const struct bpf_func_proto *fn;
- int i;
+ if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
+ return -EINVAL;
- for (i = 0; i < prog->len; i++) {
- struct bpf_insn *insn = &prog->insnsi[i];
-
- if (insn->code == (BPF_JMP | BPF_CALL)) {
- /* we reach here when program has bpf_call instructions
- * and it passed bpf_check(), means that
- * ops->get_func_proto must have been supplied, check it
- */
- BUG_ON(!prog->aux->ops->get_func_proto);
-
- if (insn->imm == BPF_FUNC_get_route_realm)
- prog->dst_needed = 1;
- if (insn->imm == BPF_FUNC_get_prandom_u32)
- bpf_user_rnd_init_once();
- if (insn->imm == BPF_FUNC_xdp_adjust_head)
- prog->xdp_adjust_head = 1;
- if (insn->imm == BPF_FUNC_tail_call) {
- /* mark bpf_tail_call as different opcode
- * to avoid conditional branch in
- * interpeter for every normal call
- * and to prevent accidental JITing by
- * JIT compiler that doesn't support
- * bpf_tail_call yet
- */
- insn->imm = 0;
- insn->code |= BPF_X;
- continue;
- }
-
- fn = prog->aux->ops->get_func_proto(insn->imm);
- /* all functions that have prototype and verifier allowed
- * programs to call them, must be real in-kernel functions
- */
- BUG_ON(!fn->func);
- insn->imm = fn->func - __bpf_call_base;
- }
- }
+ prog->aux->ops = bpf_prog_types[type];
+ prog->type = type;
+ return 0;
}
/* drop refcnt on maps used by eBPF program and free auxilary data */
@@ -686,6 +728,42 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
free_uid(user);
}
+static int bpf_prog_alloc_id(struct bpf_prog *prog)
+{
+ int id;
+
+ spin_lock_bh(&prog_idr_lock);
+ id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
+ if (id > 0)
+ prog->aux->id = id;
+ spin_unlock_bh(&prog_idr_lock);
+
+ /* id is in [1, INT_MAX) */
+ if (WARN_ON_ONCE(!id))
+ return -ENOSPC;
+
+ return id > 0 ? 0 : id;
+}
+
+static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
+{
+ /* cBPF to eBPF migrations are currently not in the idr store. */
+ if (!prog->aux->id)
+ return;
+
+ if (do_idr_lock)
+ spin_lock_bh(&prog_idr_lock);
+ else
+ __acquire(&prog_idr_lock);
+
+ idr_remove(&prog_idr, prog->aux->id);
+
+ if (do_idr_lock)
+ spin_unlock_bh(&prog_idr_lock);
+ else
+ __release(&prog_idr_lock);
+}
+
static void __bpf_prog_put_rcu(struct rcu_head *rcu)
{
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
@@ -695,10 +773,20 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
bpf_prog_free(aux->prog);
}
-void bpf_prog_put(struct bpf_prog *prog)
+static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
- if (atomic_dec_and_test(&prog->aux->refcnt))
+ if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ trace_bpf_prog_put_rcu(prog);
+ /* bpf_prog_free_id() must be called first */
+ bpf_prog_free_id(prog, do_idr_lock);
+ bpf_prog_kallsyms_del(prog);
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ }
+}
+
+void bpf_prog_put(struct bpf_prog *prog)
+{
+ __bpf_prog_put(prog, true);
}
EXPORT_SYMBOL_GPL(bpf_prog_put);
@@ -781,6 +869,24 @@ struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
}
EXPORT_SYMBOL_GPL(bpf_prog_inc);
+/* prog_idr_lock should have been held */
+static struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
+{
+ int refold;
+
+ refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0);
+
+ if (refold >= BPF_MAX_REFCNT) {
+ __bpf_prog_put(prog, false);
+ return ERR_PTR(-EBUSY);
+ }
+
+ if (!refold)
+ return ERR_PTR(-ENOENT);
+
+ return prog;
+}
+
static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
{
struct fd f = fdget(ufd);
@@ -807,12 +913,16 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
{
- return __bpf_prog_get(ufd, &type);
+ struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
+
+ if (!IS_ERR(prog))
+ trace_bpf_prog_get_type(prog);
+ return prog;
}
EXPORT_SYMBOL_GPL(bpf_prog_get_type);
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD kern_version
+#define BPF_PROG_LOAD_LAST_FIELD prog_flags
static int bpf_prog_load(union bpf_attr *attr)
{
@@ -825,6 +935,9 @@ static int bpf_prog_load(union bpf_attr *attr)
if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;
+ if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
+ return -EINVAL;
+
/* copy eBPF program license from user space */
if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
sizeof(license) - 1) < 0)
@@ -841,7 +954,9 @@ static int bpf_prog_load(union bpf_attr *attr)
attr->kern_version != LINUX_VERSION_CODE)
return -EINVAL;
- if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
+ if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
+ type != BPF_PROG_TYPE_CGROUP_SKB &&
+ !capable(CAP_SYS_ADMIN))
return -EPERM;
/* plain bpf_prog allocation */
@@ -876,19 +991,29 @@ static int bpf_prog_load(union bpf_attr *attr)
if (err < 0)
goto free_used_maps;
- /* fixup BPF_CALL->imm field */
- fixup_bpf_calls(prog);
-
/* eBPF program is ready to be JITed */
prog = bpf_prog_select_runtime(prog, &err);
if (err < 0)
goto free_used_maps;
- err = bpf_prog_new_fd(prog);
- if (err < 0)
- /* failed to allocate fd */
+ err = bpf_prog_alloc_id(prog);
+ if (err)
goto free_used_maps;
+ err = bpf_prog_new_fd(prog);
+ if (err < 0) {
+ /* failed to allocate fd.
+ * bpf_prog_put() is needed because the above
+ * bpf_prog_alloc_id() has published the prog
+ * to the userspace and the userspace may
+ * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID.
+ */
+ bpf_prog_put(prog);
+ return err;
+ }
+
+ bpf_prog_kallsyms_add(prog);
+ trace_bpf_prog_load(prog, err);
return err;
free_used_maps:
@@ -920,13 +1045,14 @@ static int bpf_obj_get(const union bpf_attr *attr)
#ifdef CONFIG_CGROUP_BPF
-#define BPF_PROG_ATTACH_LAST_FIELD attach_type
+#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
static int bpf_prog_attach(const union bpf_attr *attr)
{
+ enum bpf_prog_type ptype;
struct bpf_prog *prog;
struct cgroup *cgrp;
- enum bpf_prog_type ptype;
+ int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -934,6 +1060,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL;
+ if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
+ return -EINVAL;
+
switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
@@ -942,6 +1071,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_INET_SOCK_CREATE:
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break;
+ case BPF_CGROUP_SOCK_OPS:
+ ptype = BPF_PROG_TYPE_SOCK_OPS;
+ break;
default:
return -EINVAL;
}
@@ -956,10 +1088,13 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return PTR_ERR(cgrp);
}
- cgroup_bpf_update(cgrp, prog, attr->attach_type);
+ ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
+ attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
+ if (ret)
+ bpf_prog_put(prog);
cgroup_put(cgrp);
- return 0;
+ return ret;
}
#define BPF_PROG_DETACH_LAST_FIELD attach_type
@@ -967,6 +1102,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
static int bpf_prog_detach(const union bpf_attr *attr)
{
struct cgroup *cgrp;
+ int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -978,11 +1114,12 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_SOCK_OPS:
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
- cgroup_bpf_update(cgrp, NULL, attr->attach_type);
+ ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
cgroup_put(cgrp);
break;
@@ -990,10 +1127,264 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return -EINVAL;
}
- return 0;
+ return ret;
}
+
#endif /* CONFIG_CGROUP_BPF */
+#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
+
+static int bpf_prog_test_run(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ struct bpf_prog *prog;
+ int ret = -ENOTSUPP;
+
+ if (CHECK_ATTR(BPF_PROG_TEST_RUN))
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->test.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (prog->aux->ops->test_run)
+ ret = prog->aux->ops->test_run(prog, attr, uattr);
+
+ bpf_prog_put(prog);
+ return ret;
+}
+
+#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
+
+static int bpf_obj_get_next_id(const union bpf_attr *attr,
+ union bpf_attr __user *uattr,
+ struct idr *idr,
+ spinlock_t *lock)
+{
+ u32 next_id = attr->start_id;
+ int err = 0;
+
+ if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ next_id++;
+ spin_lock_bh(lock);
+ if (!idr_get_next(idr, &next_id))
+ err = -ENOENT;
+ spin_unlock_bh(lock);
+
+ if (!err)
+ err = put_user(next_id, &uattr->next_id);
+
+ return err;
+}
+
+#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
+
+static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ u32 id = attr->prog_id;
+ int fd;
+
+ if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ spin_lock_bh(&prog_idr_lock);
+ prog = idr_find(&prog_idr, id);
+ if (prog)
+ prog = bpf_prog_inc_not_zero(prog);
+ else
+ prog = ERR_PTR(-ENOENT);
+ spin_unlock_bh(&prog_idr_lock);
+
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ fd = bpf_prog_new_fd(prog);
+ if (fd < 0)
+ bpf_prog_put(prog);
+
+ return fd;
+}
+
+#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id
+
+static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
+{
+ struct bpf_map *map;
+ u32 id = attr->map_id;
+ int fd;
+
+ if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ spin_lock_bh(&map_idr_lock);
+ map = idr_find(&map_idr, id);
+ if (map)
+ map = bpf_map_inc_not_zero(map, true);
+ else
+ map = ERR_PTR(-ENOENT);
+ spin_unlock_bh(&map_idr_lock);
+
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ fd = bpf_map_new_fd(map);
+ if (fd < 0)
+ bpf_map_put(map);
+
+ return fd;
+}
+
+static int check_uarg_tail_zero(void __user *uaddr,
+ size_t expected_size,
+ size_t actual_size)
+{
+ unsigned char __user *addr;
+ unsigned char __user *end;
+ unsigned char val;
+ int err;
+
+ if (actual_size <= expected_size)
+ return 0;
+
+ addr = uaddr + expected_size;
+ end = uaddr + actual_size;
+
+ for (; addr < end; addr++) {
+ err = get_user(val, addr);
+ if (err)
+ return err;
+ if (val)
+ return -E2BIG;
+ }
+
+ return 0;
+}
+
+static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+ struct bpf_prog_info info = {};
+ u32 info_len = attr->info.info_len;
+ char __user *uinsns;
+ u32 ulen;
+ int err;
+
+ err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+ if (err)
+ return err;
+ info_len = min_t(u32, sizeof(info), info_len);
+
+ if (copy_from_user(&info, uinfo, info_len))
+ return err;
+
+ info.type = prog->type;
+ info.id = prog->aux->id;
+
+ memcpy(info.tag, prog->tag, sizeof(prog->tag));
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ info.jited_prog_len = 0;
+ info.xlated_prog_len = 0;
+ goto done;
+ }
+
+ ulen = info.jited_prog_len;
+ info.jited_prog_len = prog->jited_len;
+ if (info.jited_prog_len && ulen) {
+ uinsns = u64_to_user_ptr(info.jited_prog_insns);
+ ulen = min_t(u32, info.jited_prog_len, ulen);
+ if (copy_to_user(uinsns, prog->bpf_func, ulen))
+ return -EFAULT;
+ }
+
+ ulen = info.xlated_prog_len;
+ info.xlated_prog_len = bpf_prog_size(prog->len);
+ if (info.xlated_prog_len && ulen) {
+ uinsns = u64_to_user_ptr(info.xlated_prog_insns);
+ ulen = min_t(u32, info.xlated_prog_len, ulen);
+ if (copy_to_user(uinsns, prog->insnsi, ulen))
+ return -EFAULT;
+ }
+
+done:
+ if (copy_to_user(uinfo, &info, info_len) ||
+ put_user(info_len, &uattr->info.info_len))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int bpf_map_get_info_by_fd(struct bpf_map *map,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+ struct bpf_map_info info = {};
+ u32 info_len = attr->info.info_len;
+ int err;
+
+ err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+ if (err)
+ return err;
+ info_len = min_t(u32, sizeof(info), info_len);
+
+ info.type = map->map_type;
+ info.id = map->id;
+ info.key_size = map->key_size;
+ info.value_size = map->value_size;
+ info.max_entries = map->max_entries;
+ info.map_flags = map->map_flags;
+
+ if (copy_to_user(uinfo, &info, info_len) ||
+ put_user(info_len, &uattr->info.info_len))
+ return -EFAULT;
+
+ return 0;
+}
+
+#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
+
+static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ int ufd = attr->info.bpf_fd;
+ struct fd f;
+ int err;
+
+ if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
+ return -EINVAL;
+
+ f = fdget(ufd);
+ if (!f.file)
+ return -EBADFD;
+
+ if (f.file->f_op == &bpf_prog_fops)
+ err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
+ uattr);
+ else if (f.file->f_op == &bpf_map_fops)
+ err = bpf_map_get_info_by_fd(f.file->private_data, attr,
+ uattr);
+ else
+ err = -EINVAL;
+
+ fdput(f);
+ return err;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -1013,23 +1404,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
* user-space does not rely on any kernel feature
* extensions we dont know about yet.
*/
- if (size > sizeof(attr)) {
- unsigned char __user *addr;
- unsigned char __user *end;
- unsigned char val;
-
- addr = (void __user *)uattr + sizeof(attr);
- end = (void __user *)uattr + size;
-
- for (; addr < end; addr++) {
- err = get_user(val, addr);
- if (err)
- return err;
- if (val)
- return -E2BIG;
- }
- size = sizeof(attr);
- }
+ err = check_uarg_tail_zero(uattr, sizeof(attr), size);
+ if (err)
+ return err;
+ size = min_t(u32, size, sizeof(attr));
/* copy attributes from user space, may be less than sizeof(bpf_attr) */
if (copy_from_user(&attr, uattr, size) != 0)
@@ -1060,7 +1438,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET:
err = bpf_obj_get(&attr);
break;
-
#ifdef CONFIG_CGROUP_BPF
case BPF_PROG_ATTACH:
err = bpf_prog_attach(&attr);
@@ -1069,7 +1446,26 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
err = bpf_prog_detach(&attr);
break;
#endif
-
+ case BPF_PROG_TEST_RUN:
+ err = bpf_prog_test_run(&attr, uattr);
+ break;
+ case BPF_PROG_GET_NEXT_ID:
+ err = bpf_obj_get_next_id(&attr, uattr,
+ &prog_idr, &prog_idr_lock);
+ break;
+ case BPF_MAP_GET_NEXT_ID:
+ err = bpf_obj_get_next_id(&attr, uattr,
+ &map_idr, &map_idr_lock);
+ break;
+ case BPF_PROG_GET_FD_BY_ID:
+ err = bpf_prog_get_fd_by_id(&attr);
+ break;
+ case BPF_MAP_GET_FD_BY_ID:
+ err = bpf_map_get_fd_by_id(&attr);
+ break;
+ case BPF_OBJ_GET_INFO_BY_FD:
+ err = bpf_obj_get_info_by_fd(&attr, uattr);
+ break;
default:
err = -EINVAL;
break;