aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c275
1 files changed, 240 insertions, 35 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 237f3d6a7ddc..461eb1e66a0f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -10,13 +10,17 @@
* General Public License for more details.
*/
#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mmzone.h>
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/license.h>
#include <linux/filter.h>
#include <linux/version.h>
+#include <linux/kernel.h>
DEFINE_PER_CPU(int, bpf_prog_active);
@@ -48,6 +52,30 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
list_add(&tl->list_node, &bpf_map_types);
}
+void *bpf_map_area_alloc(size_t size)
+{
+ /* We definitely need __GFP_NORETRY, so OOM killer doesn't
+ * trigger under memory pressure as we really just want to
+ * fail instead.
+ */
+ const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
+ void *area;
+
+ if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+ area = kmalloc(size, GFP_USER | flags);
+ if (area != NULL)
+ return area;
+ }
+
+ return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
+ PAGE_KERNEL);
+}
+
+void bpf_map_area_free(void *area)
+{
+ kvfree(area);
+}
+
int bpf_map_precharge_memlock(u32 pages)
{
struct user_struct *user = get_current_user();
@@ -137,18 +165,31 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_map *map = filp->private_data;
+ const struct bpf_array *array;
+ u32 owner_prog_type = 0;
+
+ if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+ array = container_of(map, struct bpf_array, map);
+ owner_prog_type = array->owner_prog_type;
+ }
seq_printf(m,
"map_type:\t%u\n"
"key_size:\t%u\n"
"value_size:\t%u\n"
"max_entries:\t%u\n"
- "map_flags:\t%#x\n",
+ "map_flags:\t%#x\n"
+ "memlock:\t%llu\n",
map->map_type,
map->key_size,
map->value_size,
map->max_entries,
- map->map_flags);
+ map->map_flags,
+ map->pages * 1ULL << PAGE_SHIFT);
+
+ if (owner_prog_type)
+ seq_printf(m, "owner_prog_type:\t%u\n",
+ owner_prog_type);
}
#endif
@@ -201,6 +242,7 @@ static int map_create(union bpf_attr *attr)
/* failed to allocate fd */
goto free_map;
+ trace_bpf_map_create(map, err);
return err;
free_map:
@@ -254,12 +296,6 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
return map;
}
-/* helper to convert user pointers passed inside __aligned_u64 fields */
-static void __user *u64_to_ptr(__u64 val)
-{
- return (void __user *) (unsigned long) val;
-}
-
int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
{
return -ENOTSUPP;
@@ -270,8 +306,8 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
static int map_lookup_elem(union bpf_attr *attr)
{
- void __user *ukey = u64_to_ptr(attr->key);
- void __user *uvalue = u64_to_ptr(attr->value);
+ void __user *ukey = u64_to_user_ptr(attr->key);
+ void __user *uvalue = u64_to_user_ptr(attr->value);
int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *value, *ptr;
@@ -297,6 +333,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto free_key;
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
value_size = round_up(map->value_size, 8) * num_possible_cpus();
else
@@ -307,7 +344,8 @@ static int map_lookup_elem(union bpf_attr *attr)
if (!value)
goto free_key;
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value);
@@ -329,6 +367,7 @@ static int map_lookup_elem(union bpf_attr *attr)
if (copy_to_user(uvalue, value, value_size) != 0)
goto free_value;
+ trace_bpf_map_lookup_elem(map, ufd, key, value);
err = 0;
free_value:
@@ -344,8 +383,8 @@ err_put:
static int map_update_elem(union bpf_attr *attr)
{
- void __user *ukey = u64_to_ptr(attr->key);
- void __user *uvalue = u64_to_ptr(attr->value);
+ void __user *ukey = u64_to_user_ptr(attr->key);
+ void __user *uvalue = u64_to_user_ptr(attr->value);
int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *value;
@@ -371,6 +410,7 @@ static int map_update_elem(union bpf_attr *attr)
goto free_key;
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
value_size = round_up(map->value_size, 8) * num_possible_cpus();
else
@@ -390,7 +430,8 @@ static int map_update_elem(union bpf_attr *attr)
*/
preempt_disable();
__this_cpu_inc(bpf_prog_active);
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, attr->flags);
@@ -409,6 +450,8 @@ static int map_update_elem(union bpf_attr *attr)
__this_cpu_dec(bpf_prog_active);
preempt_enable();
+ if (!err)
+ trace_bpf_map_update_elem(map, ufd, key, value);
free_value:
kfree(value);
free_key:
@@ -422,7 +465,7 @@ err_put:
static int map_delete_elem(union bpf_attr *attr)
{
- void __user *ukey = u64_to_ptr(attr->key);
+ void __user *ukey = u64_to_user_ptr(attr->key);
int ufd = attr->map_fd;
struct bpf_map *map;
struct fd f;
@@ -454,6 +497,8 @@ static int map_delete_elem(union bpf_attr *attr)
__this_cpu_dec(bpf_prog_active);
preempt_enable();
+ if (!err)
+ trace_bpf_map_delete_elem(map, ufd, key);
free_key:
kfree(key);
err_put:
@@ -466,8 +511,8 @@ err_put:
static int map_get_next_key(union bpf_attr *attr)
{
- void __user *ukey = u64_to_ptr(attr->key);
- void __user *unext_key = u64_to_ptr(attr->next_key);
+ void __user *ukey = u64_to_user_ptr(attr->key);
+ void __user *unext_key = u64_to_user_ptr(attr->next_key);
int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *next_key;
@@ -506,6 +551,7 @@ static int map_get_next_key(union bpf_attr *attr)
if (copy_to_user(unext_key, next_key, map->key_size) != 0)
goto free_next_key;
+ trace_bpf_map_next_key(map, ufd, key, next_key);
err = 0;
free_next_key:
@@ -567,6 +613,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32)
bpf_user_rnd_init_once();
+ if (insn->imm == BPF_FUNC_xdp_adjust_head)
+ prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
/* mark bpf_tail_call as different opcode
* to avoid conditional branch in
@@ -601,19 +649,39 @@ static void free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
+int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+ unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ unsigned long user_bufs;
+
+ if (user) {
+ user_bufs = atomic_long_add_return(pages, &user->locked_vm);
+ if (user_bufs > memlock_limit) {
+ atomic_long_sub(pages, &user->locked_vm);
+ return -EPERM;
+ }
+ }
+
+ return 0;
+}
+
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+ if (user)
+ atomic_long_sub(pages, &user->locked_vm);
+}
+
static int bpf_prog_charge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = get_current_user();
- unsigned long memlock_limit;
-
- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ int ret;
- atomic_long_add(prog->pages, &user->locked_vm);
- if (atomic_long_read(&user->locked_vm) > memlock_limit) {
- atomic_long_sub(prog->pages, &user->locked_vm);
+ ret = __bpf_prog_charge(user, prog->pages);
+ if (ret) {
free_uid(user);
- return -EPERM;
+ return ret;
}
+
prog->aux->user = user;
return 0;
}
@@ -622,7 +690,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = prog->aux->user;
- atomic_long_sub(prog->pages, &user->locked_vm);
+ __bpf_prog_uncharge(user, prog->pages);
free_uid(user);
}
@@ -637,8 +705,11 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
void bpf_prog_put(struct bpf_prog *prog)
{
- if (atomic_dec_and_test(&prog->aux->refcnt))
+ if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ trace_bpf_prog_put_rcu(prog);
+ bpf_prog_kallsyms_del(prog);
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ }
}
EXPORT_SYMBOL_GPL(bpf_prog_put);
@@ -650,8 +721,30 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
return 0;
}
+#ifdef CONFIG_PROC_FS
+static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ const struct bpf_prog *prog = filp->private_data;
+ char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
+
+ bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
+ seq_printf(m,
+ "prog_type:\t%u\n"
+ "prog_jited:\t%u\n"
+ "prog_tag:\t%s\n"
+ "memlock:\t%llu\n",
+ prog->type,
+ prog->jited,
+ prog_tag,
+ prog->pages * 1ULL << PAGE_SHIFT);
+}
+#endif
+
static const struct file_operations bpf_prog_fops = {
- .release = bpf_prog_release,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_prog_show_fdinfo,
+#endif
+ .release = bpf_prog_release,
};
int bpf_prog_new_fd(struct bpf_prog *prog)
@@ -682,10 +775,22 @@ struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
}
EXPORT_SYMBOL_GPL(bpf_prog_add);
+void bpf_prog_sub(struct bpf_prog *prog, int i)
+{
+ /* Only to be used for undoing previous bpf_prog_add() in some
+ * error path. We still know that another entity in our call
+ * path holds a reference to the program, thus atomic_sub() can
+ * be safely used in such cases!
+ */
+ WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
+}
+EXPORT_SYMBOL_GPL(bpf_prog_sub);
+
struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
{
return bpf_prog_add(prog, 1);
}
+EXPORT_SYMBOL_GPL(bpf_prog_inc);
static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
{
@@ -713,7 +818,11 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
{
- return __bpf_prog_get(ufd, &type);
+ struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
+
+ if (!IS_ERR(prog))
+ trace_bpf_prog_get_type(prog);
+ return prog;
}
EXPORT_SYMBOL_GPL(bpf_prog_get_type);
@@ -732,7 +841,7 @@ static int bpf_prog_load(union bpf_attr *attr)
return -EINVAL;
/* copy eBPF program license from user space */
- if (strncpy_from_user(license, u64_to_ptr(attr->license),
+ if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
sizeof(license) - 1) < 0)
return -EFAULT;
license[sizeof(license) - 1] = 0;
@@ -740,8 +849,8 @@ static int bpf_prog_load(union bpf_attr *attr)
/* eBPF programs must be GPL compatible to use GPL-ed functions */
is_gpl = license_is_gpl_compatible(license);
- if (attr->insn_cnt >= BPF_MAXINSNS)
- return -EINVAL;
+ if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
+ return -E2BIG;
if (type == BPF_PROG_TYPE_KPROBE &&
attr->kern_version != LINUX_VERSION_CODE)
@@ -762,8 +871,8 @@ static int bpf_prog_load(union bpf_attr *attr)
prog->len = attr->insn_cnt;
err = -EFAULT;
- if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
- prog->len * sizeof(struct bpf_insn)) != 0)
+ if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
+ bpf_prog_insn_size(prog)) != 0)
goto free_prog;
prog->orig_prog = NULL;
@@ -795,6 +904,8 @@ static int bpf_prog_load(union bpf_attr *attr)
/* failed to allocate fd */
goto free_used_maps;
+ bpf_prog_kallsyms_add(prog);
+ trace_bpf_prog_load(prog, err);
return err;
free_used_maps:
@@ -813,7 +924,7 @@ static int bpf_obj_pin(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_OBJ))
return -EINVAL;
- return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
+ return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
}
static int bpf_obj_get(const union bpf_attr *attr)
@@ -821,8 +932,92 @@ static int bpf_obj_get(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
return -EINVAL;
- return bpf_obj_get_user(u64_to_ptr(attr->pathname));
+ return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
+}
+
+#ifdef CONFIG_CGROUP_BPF
+
+#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
+
+static int bpf_prog_attach(const union bpf_attr *attr)
+{
+ enum bpf_prog_type ptype;
+ struct bpf_prog *prog;
+ struct cgroup *cgrp;
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (CHECK_ATTR(BPF_PROG_ATTACH))
+ return -EINVAL;
+
+ if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
+ return -EINVAL;
+
+ switch (attr->attach_type) {
+ case BPF_CGROUP_INET_INGRESS:
+ case BPF_CGROUP_INET_EGRESS:
+ ptype = BPF_PROG_TYPE_CGROUP_SKB;
+ break;
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp)) {
+ bpf_prog_put(prog);
+ return PTR_ERR(cgrp);
+ }
+
+ ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
+ attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
+ if (ret)
+ bpf_prog_put(prog);
+ cgroup_put(cgrp);
+
+ return ret;
+}
+
+#define BPF_PROG_DETACH_LAST_FIELD attach_type
+
+static int bpf_prog_detach(const union bpf_attr *attr)
+{
+ struct cgroup *cgrp;
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (CHECK_ATTR(BPF_PROG_DETACH))
+ return -EINVAL;
+
+ switch (attr->attach_type) {
+ case BPF_CGROUP_INET_INGRESS:
+ case BPF_CGROUP_INET_EGRESS:
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
+ cgroup_put(cgrp);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
}
+#endif /* CONFIG_CGROUP_BPF */
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
@@ -890,6 +1085,16 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET:
err = bpf_obj_get(&attr);
break;
+
+#ifdef CONFIG_CGROUP_BPF
+ case BPF_PROG_ATTACH:
+ err = bpf_prog_attach(&attr);
+ break;
+ case BPF_PROG_DETACH:
+ err = bpf_prog_detach(&attr);
+ break;
+#endif
+
default:
err = -EINVAL;
break;