From f1a2e44a3aeccb3ff18d3ccc0b0203e70b95bd92 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:25 +0200 Subject: bpf: add queue and stack maps Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs. These maps support peek, pop and push operations that are exposed to eBPF programs through the new bpf_map[peek/pop/push] helpers. Those operations are exposed to userspace applications through the already existing syscalls in the following way: BPF_MAP_LOOKUP_ELEM -> peek BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop BPF_MAP_UPDATE_ELEM -> push Queue/stack maps are implemented using a buffer, tail and head indexes, hence BPF_F_NO_PREALLOC is not supported. As opposite to other maps, queue and stack do not use RCU for protecting maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE argument that is a pointer to a memory zone where to save the value of a map. Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra argument. Our main motivation for implementing queue/stack maps was to keep track of a pool of elements, like network ports in a SNAT, however we forsee other use cases, like for exampling saving last N kernel events in a map and then analysing from userspace. Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5e46f6732781..70082cb626b4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -128,6 +128,8 @@ enum bpf_map_type { BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, }; enum bpf_prog_type { @@ -462,6 +464,28 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -2303,7 +2327,10 @@ union bpf_attr { FN(skb_ancestor_cgroup_id), \ FN(sk_lookup_tcp), \ FN(sk_lookup_udp), \ - FN(sk_release), + FN(sk_release), \ + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit From bd513cd08f10cbe28856f99ae951e86e86803861 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:30 +0200 Subject: bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall The previous patch implemented a bpf queue/stack maps that provided the peek/pop/push functions. There is not a direct relationship between those functions and the current maps syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added, this is mapped to the pop operation in the queue/stack maps and it is still to implement in other kind of maps. Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 70082cb626b4..a2fb333290dc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -103,6 +103,7 @@ enum bpf_cmd { BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, }; enum bpf_map_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1617407f9ee5..49ae64a26562 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -999,6 +999,69 @@ err_put: return err; } +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value + +static int map_lookup_and_delete_elem(union bpf_attr *attr) +{ + void __user *ukey = u64_to_user_ptr(attr->key); + void __user *uvalue = u64_to_user_ptr(attr->value); + int ufd = attr->map_fd; + struct bpf_map *map; + void *key, *value, *ptr; + u32 value_size; + struct fd f; + int err; + + if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } + + key = __bpf_copy_key(ukey, map->key_size); + if (IS_ERR(key)) { + err = PTR_ERR(key); + goto err_put; + } + + value_size = map->value_size; + + err = -ENOMEM; + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); + if (!value) + goto free_key; + + if (map->map_type == BPF_MAP_TYPE_QUEUE || + map->map_type == BPF_MAP_TYPE_STACK) { + err = map->ops->map_pop_elem(map, value); + } else { + err = -ENOTSUPP; + } + + if (err) + goto free_value; + + if (copy_to_user(uvalue, value, value_size) != 0) + goto free_value; + + err = 0; + +free_value: + kfree(value); +free_key: + kfree(key); +err_put: + fdput(f); + return err; +} + static const struct bpf_prog_ops * const bpf_prog_types[] = { #define BPF_PROG_TYPE(_id, _name) \ [_id] = & _name ## _prog_ops, @@ -2472,6 +2535,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_TASK_FD_QUERY: err = bpf_task_fd_query(&attr, uattr); break; + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: + err = map_lookup_and_delete_elem(&attr); + break; default: err = -EINVAL; break; -- cgit