diff options
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r-- | kernel/bpf/syscall.c | 275 |
1 files changed, 240 insertions, 35 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 237f3d6a7ddc..461eb1e66a0f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -10,13 +10,17 @@ * General Public License for more details. */ #include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/syscalls.h> #include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mmzone.h> #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/license.h> #include <linux/filter.h> #include <linux/version.h> +#include <linux/kernel.h> DEFINE_PER_CPU(int, bpf_prog_active); @@ -48,6 +52,30 @@ void bpf_register_map_type(struct bpf_map_type_list *tl) list_add(&tl->list_node, &bpf_map_types); } +void *bpf_map_area_alloc(size_t size) +{ + /* We definitely need __GFP_NORETRY, so OOM killer doesn't + * trigger under memory pressure as we really just want to + * fail instead. + */ + const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; + void *area; + + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { + area = kmalloc(size, GFP_USER | flags); + if (area != NULL) + return area; + } + + return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, + PAGE_KERNEL); +} + +void bpf_map_area_free(void *area) +{ + kvfree(area); +} + int bpf_map_precharge_memlock(u32 pages) { struct user_struct *user = get_current_user(); @@ -137,18 +165,31 @@ static int bpf_map_release(struct inode *inode, struct file *filp) static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_map *map = filp->private_data; + const struct bpf_array *array; + u32 owner_prog_type = 0; + + if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { + array = container_of(map, struct bpf_array, map); + owner_prog_type = array->owner_prog_type; + } seq_printf(m, "map_type:\t%u\n" "key_size:\t%u\n" "value_size:\t%u\n" "max_entries:\t%u\n" - "map_flags:\t%#x\n", + "map_flags:\t%#x\n" + "memlock:\t%llu\n", map->map_type, map->key_size, map->value_size, map->max_entries, - map->map_flags); + map->map_flags, + map->pages * 1ULL << PAGE_SHIFT); + + if (owner_prog_type) + seq_printf(m, "owner_prog_type:\t%u\n", + owner_prog_type); } #endif @@ -201,6 +242,7 @@ static int map_create(union bpf_attr *attr) /* failed to allocate fd */ goto free_map; + trace_bpf_map_create(map, err); return err; free_map: @@ -254,12 +296,6 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) return map; } -/* helper to convert user pointers passed inside __aligned_u64 fields */ -static void __user *u64_to_ptr(__u64 val) -{ - return (void __user *) (unsigned long) val; -} - int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) { return -ENOTSUPP; @@ -270,8 +306,8 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) static int map_lookup_elem(union bpf_attr *attr) { - void __user *ukey = u64_to_ptr(attr->key); - void __user *uvalue = u64_to_ptr(attr->value); + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *uvalue = u64_to_user_ptr(attr->value); int ufd = attr->map_fd; struct bpf_map *map; void *key, *value, *ptr; @@ -297,6 +333,7 @@ static int map_lookup_elem(union bpf_attr *attr) goto free_key; if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) value_size = round_up(map->value_size, 8) * num_possible_cpus(); else @@ -307,7 +344,8 @@ static int map_lookup_elem(union bpf_attr *attr) if (!value) goto free_key; - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value); @@ -329,6 +367,7 @@ static int map_lookup_elem(union bpf_attr *attr) if (copy_to_user(uvalue, value, value_size) != 0) goto free_value; + trace_bpf_map_lookup_elem(map, ufd, key, value); err = 0; free_value: @@ -344,8 +383,8 @@ err_put: static int map_update_elem(union bpf_attr *attr) { - void __user *ukey = u64_to_ptr(attr->key); - void __user *uvalue = u64_to_ptr(attr->value); + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *uvalue = u64_to_user_ptr(attr->value); int ufd = attr->map_fd; struct bpf_map *map; void *key, *value; @@ -371,6 +410,7 @@ static int map_update_elem(union bpf_attr *attr) goto free_key; if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) value_size = round_up(map->value_size, 8) * num_possible_cpus(); else @@ -390,7 +430,8 @@ static int map_update_elem(union bpf_attr *attr) */ preempt_disable(); __this_cpu_inc(bpf_prog_active); - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_update(map, key, value, attr->flags); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_update(map, key, value, attr->flags); @@ -409,6 +450,8 @@ static int map_update_elem(union bpf_attr *attr) __this_cpu_dec(bpf_prog_active); preempt_enable(); + if (!err) + trace_bpf_map_update_elem(map, ufd, key, value); free_value: kfree(value); free_key: @@ -422,7 +465,7 @@ err_put: static int map_delete_elem(union bpf_attr *attr) { - void __user *ukey = u64_to_ptr(attr->key); + void __user *ukey = u64_to_user_ptr(attr->key); int ufd = attr->map_fd; struct bpf_map *map; struct fd f; @@ -454,6 +497,8 @@ static int map_delete_elem(union bpf_attr *attr) __this_cpu_dec(bpf_prog_active); preempt_enable(); + if (!err) + trace_bpf_map_delete_elem(map, ufd, key); free_key: kfree(key); err_put: @@ -466,8 +511,8 @@ err_put: static int map_get_next_key(union bpf_attr *attr) { - void __user *ukey = u64_to_ptr(attr->key); - void __user *unext_key = u64_to_ptr(attr->next_key); + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *unext_key = u64_to_user_ptr(attr->next_key); int ufd = attr->map_fd; struct bpf_map *map; void *key, *next_key; @@ -506,6 +551,7 @@ static int map_get_next_key(union bpf_attr *attr) if (copy_to_user(unext_key, next_key, map->key_size) != 0) goto free_next_key; + trace_bpf_map_next_key(map, ufd, key, next_key); err = 0; free_next_key: @@ -567,6 +613,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_xdp_adjust_head) + prog->xdp_adjust_head = 1; if (insn->imm == BPF_FUNC_tail_call) { /* mark bpf_tail_call as different opcode * to avoid conditional branch in @@ -601,19 +649,39 @@ static void free_used_maps(struct bpf_prog_aux *aux) kfree(aux->used_maps); } +int __bpf_prog_charge(struct user_struct *user, u32 pages) +{ + unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + unsigned long user_bufs; + + if (user) { + user_bufs = atomic_long_add_return(pages, &user->locked_vm); + if (user_bufs > memlock_limit) { + atomic_long_sub(pages, &user->locked_vm); + return -EPERM; + } + } + + return 0; +} + +void __bpf_prog_uncharge(struct user_struct *user, u32 pages) +{ + if (user) + atomic_long_sub(pages, &user->locked_vm); +} + static int bpf_prog_charge_memlock(struct bpf_prog *prog) { struct user_struct *user = get_current_user(); - unsigned long memlock_limit; - - memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + int ret; - atomic_long_add(prog->pages, &user->locked_vm); - if (atomic_long_read(&user->locked_vm) > memlock_limit) { - atomic_long_sub(prog->pages, &user->locked_vm); + ret = __bpf_prog_charge(user, prog->pages); + if (ret) { free_uid(user); - return -EPERM; + return ret; } + prog->aux->user = user; return 0; } @@ -622,7 +690,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) { struct user_struct *user = prog->aux->user; - atomic_long_sub(prog->pages, &user->locked_vm); + __bpf_prog_uncharge(user, prog->pages); free_uid(user); } @@ -637,8 +705,11 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) void bpf_prog_put(struct bpf_prog *prog) { - if (atomic_dec_and_test(&prog->aux->refcnt)) + if (atomic_dec_and_test(&prog->aux->refcnt)) { + trace_bpf_prog_put_rcu(prog); + bpf_prog_kallsyms_del(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); + } } EXPORT_SYMBOL_GPL(bpf_prog_put); @@ -650,8 +721,30 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) return 0; } +#ifdef CONFIG_PROC_FS +static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) +{ + const struct bpf_prog *prog = filp->private_data; + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; + + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); + seq_printf(m, + "prog_type:\t%u\n" + "prog_jited:\t%u\n" + "prog_tag:\t%s\n" + "memlock:\t%llu\n", + prog->type, + prog->jited, + prog_tag, + prog->pages * 1ULL << PAGE_SHIFT); +} +#endif + static const struct file_operations bpf_prog_fops = { - .release = bpf_prog_release, +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_prog_show_fdinfo, +#endif + .release = bpf_prog_release, }; int bpf_prog_new_fd(struct bpf_prog *prog) @@ -682,10 +775,22 @@ struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) } EXPORT_SYMBOL_GPL(bpf_prog_add); +void bpf_prog_sub(struct bpf_prog *prog, int i) +{ + /* Only to be used for undoing previous bpf_prog_add() in some + * error path. We still know that another entity in our call + * path holds a reference to the program, thus atomic_sub() can + * be safely used in such cases! + */ + WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); +} +EXPORT_SYMBOL_GPL(bpf_prog_sub); + struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) { return bpf_prog_add(prog, 1); } +EXPORT_SYMBOL_GPL(bpf_prog_inc); static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) { @@ -713,7 +818,11 @@ struct bpf_prog *bpf_prog_get(u32 ufd) struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { - return __bpf_prog_get(ufd, &type); + struct bpf_prog *prog = __bpf_prog_get(ufd, &type); + + if (!IS_ERR(prog)) + trace_bpf_prog_get_type(prog); + return prog; } EXPORT_SYMBOL_GPL(bpf_prog_get_type); @@ -732,7 +841,7 @@ static int bpf_prog_load(union bpf_attr *attr) return -EINVAL; /* copy eBPF program license from user space */ - if (strncpy_from_user(license, u64_to_ptr(attr->license), + if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) return -EFAULT; license[sizeof(license) - 1] = 0; @@ -740,8 +849,8 @@ static int bpf_prog_load(union bpf_attr *attr) /* eBPF programs must be GPL compatible to use GPL-ed functions */ is_gpl = license_is_gpl_compatible(license); - if (attr->insn_cnt >= BPF_MAXINSNS) - return -EINVAL; + if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) + return -E2BIG; if (type == BPF_PROG_TYPE_KPROBE && attr->kern_version != LINUX_VERSION_CODE) @@ -762,8 +871,8 @@ static int bpf_prog_load(union bpf_attr *attr) prog->len = attr->insn_cnt; err = -EFAULT; - if (copy_from_user(prog->insns, u64_to_ptr(attr->insns), - prog->len * sizeof(struct bpf_insn)) != 0) + if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), + bpf_prog_insn_size(prog)) != 0) goto free_prog; prog->orig_prog = NULL; @@ -795,6 +904,8 @@ static int bpf_prog_load(union bpf_attr *attr) /* failed to allocate fd */ goto free_used_maps; + bpf_prog_kallsyms_add(prog); + trace_bpf_prog_load(prog, err); return err; free_used_maps: @@ -813,7 +924,7 @@ static int bpf_obj_pin(const union bpf_attr *attr) if (CHECK_ATTR(BPF_OBJ)) return -EINVAL; - return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname)); + return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); } static int bpf_obj_get(const union bpf_attr *attr) @@ -821,8 +932,92 @@ static int bpf_obj_get(const union bpf_attr *attr) if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) return -EINVAL; - return bpf_obj_get_user(u64_to_ptr(attr->pathname)); + return bpf_obj_get_user(u64_to_user_ptr(attr->pathname)); +} + +#ifdef CONFIG_CGROUP_BPF + +#define BPF_PROG_ATTACH_LAST_FIELD attach_flags + +static int bpf_prog_attach(const union bpf_attr *attr) +{ + enum bpf_prog_type ptype; + struct bpf_prog *prog; + struct cgroup *cgrp; + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (CHECK_ATTR(BPF_PROG_ATTACH)) + return -EINVAL; + + if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + return -EINVAL; + + switch (attr->attach_type) { + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + ptype = BPF_PROG_TYPE_CGROUP_SKB; + break; + case BPF_CGROUP_INET_SOCK_CREATE: + ptype = BPF_PROG_TYPE_CGROUP_SOCK; + break; + default: + return -EINVAL; + } + + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) { + bpf_prog_put(prog); + return PTR_ERR(cgrp); + } + + ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, + attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + if (ret) + bpf_prog_put(prog); + cgroup_put(cgrp); + + return ret; +} + +#define BPF_PROG_DETACH_LAST_FIELD attach_type + +static int bpf_prog_detach(const union bpf_attr *attr) +{ + struct cgroup *cgrp; + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (CHECK_ATTR(BPF_PROG_DETACH)) + return -EINVAL; + + switch (attr->attach_type) { + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + case BPF_CGROUP_INET_SOCK_CREATE: + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); + cgroup_put(cgrp); + break; + + default: + return -EINVAL; + } + + return ret; } +#endif /* CONFIG_CGROUP_BPF */ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { @@ -890,6 +1085,16 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_OBJ_GET: err = bpf_obj_get(&attr); break; + +#ifdef CONFIG_CGROUP_BPF + case BPF_PROG_ATTACH: + err = bpf_prog_attach(&attr); + break; + case BPF_PROG_DETACH: + err = bpf_prog_detach(&attr); + break; +#endif + default: err = -EINVAL; break; |