From d8d7218ad842e18fc6976b87c08ed749e8d56313 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 6 Jul 2018 11:49:00 +0900
Subject: xdp: XDP_REDIRECT should check IFF_UP and MTU

Otherwise we end up with attempting to send packets from down devices
or to send oversized packets, which may cause unexpected driver/device
behaviour. Generic XDP has already done this check, so reuse the logic
in native XDP.

Fixes: 814abfabef3c ("xdp: add bpf_redirect helper function")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/devmap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'kernel/bpf/devmap.c')

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 642c97f6d1b8..d361fc1e3bf3 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 {
 	struct net_device *dev = dst->dev;
 	struct xdp_frame *xdpf;
+	int err;
 
 	if (!dev->netdev_ops->ndo_xdp_xmit)
 		return -EOPNOTSUPP;
 
+	err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+	if (unlikely(err))
+		return err;
+
 	xdpf = convert_to_xdp_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
@@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 {
 	int err;
 
-	err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
+	err = xdp_ok_fwd_dev(dst->dev, skb->len);
 	if (unlikely(err))
 		return err;
 	skb->dev = dst->dev;
-- 
cgit 


From 1bf9116d0866a649104a5dfa008c302ad54d1e02 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 8 Aug 2018 23:00:45 +0200
Subject: xdp: fix bug in devmap teardown code path

Like cpumap teardown, the devmap teardown code also flush remaining
xdp_frames, via bq_xmit_all() in case map entry is removed.  The code
can call xdp_return_frame_rx_napi, from the the wrong context, in-case
ndo_xdp_xmit() fails.

Fixes: 389ab7f01af9 ("xdp: introduce xdp_return_frame_rx_napi")
Fixes: 735fc4054b3a ("xdp: change ndo_xdp_xmit API to support bulking")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/devmap.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'kernel/bpf/devmap.c')

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d361fc1e3bf3..750d45edae79 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -217,7 +217,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
 }
 
 static int bq_xmit_all(struct bpf_dtab_netdev *obj,
-		       struct xdp_bulk_queue *bq, u32 flags)
+		       struct xdp_bulk_queue *bq, u32 flags,
+		       bool in_napi_ctx)
 {
 	struct net_device *dev = obj->dev;
 	int sent = 0, drops = 0, err = 0;
@@ -254,7 +255,10 @@ error:
 		struct xdp_frame *xdpf = bq->q[i];
 
 		/* RX path under NAPI protection, can return frames faster */
-		xdp_return_frame_rx_napi(xdpf);
+		if (likely(in_napi_ctx))
+			xdp_return_frame_rx_napi(xdpf);
+		else
+			xdp_return_frame(xdpf);
 		drops++;
 	}
 	goto out;
@@ -286,7 +290,7 @@ void __dev_map_flush(struct bpf_map *map)
 		__clear_bit(bit, bitmap);
 
 		bq = this_cpu_ptr(dev->bulkq);
-		bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
+		bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true);
 	}
 }
 
@@ -316,7 +320,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
 	struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
 
 	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
-		bq_xmit_all(obj, bq, 0);
+		bq_xmit_all(obj, bq, 0, true);
 
 	/* Ingress dev_rx will be the same for all xdp_frame's in
 	 * bulk_queue, because bq stored per-CPU and must be flushed
@@ -385,7 +389,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
 			__clear_bit(dev->bit, bitmap);
 
 			bq = per_cpu_ptr(dev->bulkq, cpu);
-			bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
+			bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false);
 		}
 	}
 }
-- 
cgit 


From e8d2bec0457962e8f348a9a3627b398f7fe5c5fc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 12 Aug 2018 01:59:17 +0200
Subject: bpf: decouple btf from seq bpf fs dump and enable more maps

Commit a26ca7c982cb ("bpf: btf: Add pretty print support to
the basic arraymap") and 699c86d6ec21 ("bpf: btf: add pretty
print for hash/lru_hash maps") enabled support for BTF and
dumping via BPF fs for array and hash/lru map. However, both
can be decoupled from each other such that regular BPF maps
can be supported for attaching BTF key/value information,
while not all maps necessarily need to dump via map_seq_show_elem()
callback.

The basic sanity check which is a prerequisite for all maps
is that key/value size has to match in any case, and some maps
can have extra checks via map_check_btf() callback, e.g.
probing certain types or indicating no support in general. With
that we can also enable retrieving BTF info for per-cpu map
types and lpm.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h        | 13 +++++++++----
 kernel/bpf/arraymap.c      | 26 ++++++++++++--------------
 kernel/bpf/cpumap.c        |  1 +
 kernel/bpf/devmap.c        |  1 +
 kernel/bpf/hashtab.c       | 20 +-------------------
 kernel/bpf/inode.c         |  3 ++-
 kernel/bpf/local_storage.c |  1 +
 kernel/bpf/lpm_trie.c      | 12 ++++++++++++
 kernel/bpf/sockmap.c       |  2 ++
 kernel/bpf/stackmap.c      |  1 +
 kernel/bpf/syscall.c       | 36 ++++++++++++++++++++++++++++++++----
 kernel/bpf/xskmap.c        |  3 +--
 12 files changed, 75 insertions(+), 44 deletions(-)

(limited to 'kernel/bpf/devmap.c')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index db11662faea6..523481a3471b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -23,7 +23,7 @@ struct bpf_prog;
 struct bpf_map;
 struct sock;
 struct seq_file;
-struct btf;
+struct btf_type;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
@@ -48,8 +48,9 @@ struct bpf_map_ops {
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
 				  struct seq_file *m);
-	int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
-			     u32 key_type_id, u32 value_type_id);
+	int (*map_check_btf)(const struct bpf_map *map,
+			     const struct btf_type *key_type,
+			     const struct btf_type *value_type);
 };
 
 struct bpf_map {
@@ -118,9 +119,13 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
 
 static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
 {
-	return map->ops->map_seq_show_elem && map->ops->map_check_btf;
+	return map->btf && map->ops->map_seq_show_elem;
 }
 
+int map_check_no_btf(const struct bpf_map *map,
+		     const struct btf_type *key_type,
+		     const struct btf_type *value_type);
+
 extern const struct bpf_map_ops bpf_map_offload_ops;
 
 /* function argument constraints */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index f6ca3e712831..0c17aab3ce5f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -358,27 +358,20 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
 	rcu_read_unlock();
 }
 
-static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
-			       u32 btf_key_id, u32 btf_value_id)
+static int array_map_check_btf(const struct bpf_map *map,
+			       const struct btf_type *key_type,
+			       const struct btf_type *value_type)
 {
-	const struct btf_type *key_type, *value_type;
-	u32 key_size, value_size;
 	u32 int_data;
 
-	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
-	if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
 		return -EINVAL;
 
 	int_data = *(u32 *)(key_type + 1);
-	/* bpf array can only take a u32 key.  This check makes
-	 * sure that the btf matches the attr used during map_create.
+	/* bpf array can only take a u32 key. This check makes sure
+	 * that the btf matches the attr used during map_create.
 	 */
-	if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
-	    BTF_INT_OFFSET(int_data))
-		return -EINVAL;
-
-	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
-	if (!value_type || value_size != map->value_size)
+	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
 		return -EINVAL;
 
 	return 0;
@@ -405,6 +398,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_lookup_elem = percpu_array_map_lookup_elem,
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
+	.map_check_btf = array_map_check_btf,
 };
 
 static int fd_array_map_alloc_check(union bpf_attr *attr)
@@ -546,6 +540,7 @@ const struct bpf_map_ops prog_array_map_ops = {
 	.map_fd_put_ptr = prog_fd_array_put_ptr,
 	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
 	.map_release_uref = bpf_fd_array_map_clear,
+	.map_check_btf = map_check_no_btf,
 };
 
 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -634,6 +629,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
 	.map_release = perf_event_fd_array_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 #ifdef CONFIG_CGROUPS
@@ -665,6 +661,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
 	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
+	.map_check_btf = map_check_no_btf,
 };
 #endif
 
@@ -749,4 +746,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 	.map_gen_lookup = array_of_map_gen_lookup,
+	.map_check_btf = map_check_no_btf,
 };
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e0918d180f08..3b494941a44a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -555,6 +555,7 @@ const struct bpf_map_ops cpu_map_ops = {
 	.map_update_elem	= cpu_map_update_elem,
 	.map_lookup_elem	= cpu_map_lookup_elem,
 	.map_get_next_key	= cpu_map_get_next_key,
+	.map_check_btf		= map_check_no_btf,
 };
 
 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d361fc1e3bf3..a7c6620d8389 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -484,6 +484,7 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_lookup_elem = dev_map_lookup_elem,
 	.map_update_elem = dev_map_update_elem,
 	.map_delete_elem = dev_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 static int dev_map_notification(struct notifier_block *notifier,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d6110042e0d9..04b8eda94e7d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1185,23 +1185,6 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
 	rcu_read_unlock();
 }
 
-static int htab_map_check_btf(const struct bpf_map *map, const struct btf *btf,
-			      u32 btf_key_id, u32 btf_value_id)
-{
-	const struct btf_type *key_type, *value_type;
-	u32 key_size, value_size;
-
-	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
-	if (!key_type || key_size != map->key_size)
-		return -EINVAL;
-
-	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
-	if (!value_type || value_size != map->value_size)
-		return -EINVAL;
-
-	return 0;
-}
-
 const struct bpf_map_ops htab_map_ops = {
 	.map_alloc_check = htab_map_alloc_check,
 	.map_alloc = htab_map_alloc,
@@ -1212,7 +1195,6 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_delete_elem = htab_map_delete_elem,
 	.map_gen_lookup = htab_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
-	.map_check_btf = htab_map_check_btf,
 };
 
 const struct bpf_map_ops htab_lru_map_ops = {
@@ -1225,7 +1207,6 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_gen_lookup = htab_lru_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
-	.map_check_btf = htab_map_check_btf,
 };
 
 /* Called from eBPF program */
@@ -1452,4 +1433,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 	.map_gen_lookup = htab_of_map_gen_lookup,
+	.map_check_btf = map_check_no_btf,
 };
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index fc5b103512e7..2ada5e21dfa6 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -334,7 +334,8 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 	struct bpf_map *map = arg;
 
 	return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
-			     map->btf ? &bpffs_map_fops : &bpffs_obj_fops);
+			     bpf_map_support_seq_show(map) ?
+			     &bpffs_map_fops : &bpffs_obj_fops);
 }
 
 static struct dentry *
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index fc4e37f68f2a..22ad967d1e5f 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -246,6 +246,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
 	.map_lookup_elem = cgroup_storage_lookup_elem,
 	.map_update_elem = cgroup_storage_update_elem,
 	.map_delete_elem = cgroup_storage_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1603492c9cc7..9058317ba9de 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -10,11 +10,13 @@
  */
 
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <net/ipv6.h>
+#include <uapi/linux/btf.h>
 
 /* Intermediate node */
 #define LPM_TREE_NODE_FLAG_IM BIT(0)
@@ -686,6 +688,15 @@ free_stack:
 	return err;
 }
 
+static int trie_check_btf(const struct bpf_map *map,
+			  const struct btf_type *key_type,
+			  const struct btf_type *value_type)
+{
+	/* Keys must have struct bpf_lpm_trie_key embedded. */
+	return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ?
+	       -EINVAL : 0;
+}
+
 const struct bpf_map_ops trie_map_ops = {
 	.map_alloc = trie_alloc,
 	.map_free = trie_free,
@@ -693,4 +704,5 @@ const struct bpf_map_ops trie_map_ops = {
 	.map_lookup_elem = trie_lookup_elem,
 	.map_update_elem = trie_update_elem,
 	.map_delete_elem = trie_delete_elem,
+	.map_check_btf = trie_check_btf,
 };
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 0b38be5a955c..e376ffffebce 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -2495,6 +2495,7 @@ const struct bpf_map_ops sock_map_ops = {
 	.map_update_elem = sock_map_update_elem,
 	.map_delete_elem = sock_map_delete_elem,
 	.map_release_uref = sock_map_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 const struct bpf_map_ops sock_hash_ops = {
@@ -2505,6 +2506,7 @@ const struct bpf_map_ops sock_hash_ops = {
 	.map_update_elem = sock_hash_update_elem,
 	.map_delete_elem = sock_hash_delete_elem,
 	.map_release_uref = sock_map_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b675a3f3d141..8061a439ef18 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -607,6 +607,7 @@ const struct bpf_map_ops stack_map_ops = {
 	.map_lookup_elem = stack_map_lookup_elem,
 	.map_update_elem = stack_map_update_elem,
 	.map_delete_elem = stack_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 static int __init stack_map_init(void)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 57f4d076141b..43727ed0d94a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -103,6 +103,7 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
 const struct bpf_map_ops bpf_map_offload_ops = {
 	.map_alloc = bpf_map_offload_map_alloc,
 	.map_free = bpf_map_offload_map_free,
+	.map_check_btf = map_check_no_btf,
 };
 
 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -455,6 +456,34 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
 	return 0;
 }
 
+int map_check_no_btf(const struct bpf_map *map,
+		     const struct btf_type *key_type,
+		     const struct btf_type *value_type)
+{
+	return -ENOTSUPP;
+}
+
+static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
+			 u32 btf_key_id, u32 btf_value_id)
+{
+	const struct btf_type *key_type, *value_type;
+	u32 key_size, value_size;
+	int ret = 0;
+
+	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+	if (!key_type || key_size != map->key_size)
+		return -EINVAL;
+
+	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
+	if (!value_type || value_size != map->value_size)
+		return -EINVAL;
+
+	if (map->ops->map_check_btf)
+		ret = map->ops->map_check_btf(map, key_type, value_type);
+
+	return ret;
+}
+
 #define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
@@ -489,8 +518,7 @@ static int map_create(union bpf_attr *attr)
 	atomic_set(&map->refcnt, 1);
 	atomic_set(&map->usercnt, 1);
 
-	if (bpf_map_support_seq_show(map) &&
-	    (attr->btf_key_type_id || attr->btf_value_type_id)) {
+	if (attr->btf_key_type_id || attr->btf_value_type_id) {
 		struct btf *btf;
 
 		if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
@@ -504,8 +532,8 @@ static int map_create(union bpf_attr *attr)
 			goto free_map_nouncharge;
 		}
 
-		err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
-					      attr->btf_value_type_id);
+		err = map_check_btf(map, btf, attr->btf_key_type_id,
+				    attr->btf_value_type_id);
 		if (err) {
 			btf_put(btf);
 			goto free_map_nouncharge;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index b3c557476a8d..4ddf61e158f6 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -227,6 +227,5 @@ const struct bpf_map_ops xsk_map_ops = {
 	.map_lookup_elem = xsk_map_lookup_elem,
 	.map_update_elem = xsk_map_update_elem,
 	.map_delete_elem = xsk_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
-
-
-- 
cgit 


From f6069b9aa9934ede26f41ac0781fce241279ad43 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 17 Aug 2018 23:26:14 +0200
Subject: bpf: fix redirect to map under tail calls

Commits 109980b894e9 ("bpf: don't select potentially stale ri->map
from buggy xdp progs") and 7c3001313396 ("bpf: fix ri->map_owner
pointer on bpf_prog_realloc") tried to mitigate that buggy programs
using bpf_redirect_map() helper call do not leave stale maps behind.
Idea was to add a map_owner cookie into the per CPU struct redirect_info
which was set to prog->aux by the prog making the helper call as a
proof that the map is not stale since the prog is implicitly holding
a reference to it. This owner cookie could later on get compared with
the program calling into BPF whether they match and therefore the
redirect could proceed with processing the map safely.

In (obvious) hindsight, this approach breaks down when tail calls are
involved since the original caller's prog->aux pointer does not have
to match the one from one of the progs out of the tail call chain,
and therefore the xdp buffer will be dropped instead of redirected.
A way around that would be to fix the issue differently (which also
allows to remove related work in fast path at the same time): once
the life-time of a redirect map has come to its end we use it's map
free callback where we need to wait on synchronize_rcu() for current
outstanding xdp buffers and remove such a map pointer from the
redirect info if found to be present. At that time no program is
using this map anymore so we simply invalidate the map pointers to
NULL iff they previously pointed to that instance while making sure
that the redirect path only reads out the map once.

Fixes: 97f91a7cf04f ("bpf: add bpf_redirect_map helper routine")
Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs")
Reported-by: Sebastiano Miano <sebastiano.miano@polito.it>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h     |  3 +-
 include/trace/events/xdp.h |  5 ++--
 kernel/bpf/cpumap.c        |  2 ++
 kernel/bpf/devmap.c        |  1 +
 kernel/bpf/verifier.c      | 21 --------------
 kernel/bpf/xskmap.c        |  1 +
 net/core/filter.c          | 68 ++++++++++++++++++++--------------------------
 7 files changed, 38 insertions(+), 63 deletions(-)

(limited to 'kernel/bpf/devmap.c')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5d565c50bcb2..6791a0ac0139 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -543,7 +543,6 @@ struct bpf_redirect_info {
 	u32 flags;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
-	unsigned long   map_owner;
 	u32 kern_flags;
 };
 
@@ -781,6 +780,8 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
+void bpf_clear_redirect_map(struct bpf_map *map);
+
 static inline bool xdp_return_frame_no_direct(void)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 1ecf4c67fcf7..e95cb86b65cf 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -147,9 +147,8 @@ struct _bpf_dtab_netdev {
 
 #define devmap_ifindex(fwd, map)				\
 	(!fwd ? 0 :						\
-	 (!map ? 0 :						\
-	  ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
-	   ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
+	 ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
+	  ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))
 
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
 	 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),	\
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 620bc5024d7d..24aac0d0f412 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -479,6 +479,8 @@ static void cpu_map_free(struct bpf_map *map)
 	 * It does __not__ ensure pending flush operations (if any) are
 	 * complete.
 	 */
+
+	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* To ensure all pending flush operations have completed wait for flush
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ac1df79f3788..141710b82a6c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -161,6 +161,7 @@ static void dev_map_free(struct bpf_map *map)
 	list_del_rcu(&dtab->list);
 	spin_unlock(&dev_map_lock);
 
+	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* To ensure all pending flush operations have completed wait for flush
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ca90679a7fe5..92246117d2b0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5844,27 +5844,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			goto patch_call_imm;
 		}
 
-		if (insn->imm == BPF_FUNC_redirect_map) {
-			/* Note, we cannot use prog directly as imm as subsequent
-			 * rewrites would still change the prog pointer. The only
-			 * stable address we can use is aux, which also works with
-			 * prog clones during blinding.
-			 */
-			u64 addr = (unsigned long)prog->aux;
-			struct bpf_insn r4_ld[] = {
-				BPF_LD_IMM64(BPF_REG_4, addr),
-				*insn,
-			};
-			cnt = ARRAY_SIZE(r4_ld);
-
-			new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-		}
 patch_call_imm:
 		fn = env->ops->get_func_proto(insn->imm, env->prog);
 		/* all functions that have prototype and verifier allowed
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 4ddf61e158f6..9f8463afda9c 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -75,6 +75,7 @@ static void xsk_map_free(struct bpf_map *map)
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
 	int i;
 
+	bpf_clear_redirect_map(map);
 	synchronize_net();
 
 	for (i = 0; i < map->max_entries; i++) {
diff --git a/net/core/filter.c b/net/core/filter.c
index fd423ce3da34..c25eb36f1320 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3246,31 +3246,33 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
 	}
 }
 
-static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
-				   unsigned long aux)
+void bpf_clear_redirect_map(struct bpf_map *map)
 {
-	return (unsigned long)xdp_prog->aux != aux;
+	struct bpf_redirect_info *ri;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+		/* Avoid polluting remote cacheline due to writes if
+		 * not needed. Once we pass this test, we need the
+		 * cmpxchg() to make sure it hasn't been changed in
+		 * the meantime by remote CPU.
+		 */
+		if (unlikely(READ_ONCE(ri->map) == map))
+			cmpxchg(&ri->map, map, NULL);
+	}
 }
 
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
-			       struct bpf_prog *xdp_prog)
+			       struct bpf_prog *xdp_prog, struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	unsigned long map_owner = ri->map_owner;
-	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
 	void *fwd = NULL;
 	int err;
 
 	ri->ifindex = 0;
-	ri->map = NULL;
-	ri->map_owner = 0;
-
-	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
-		err = -EFAULT;
-		map = NULL;
-		goto err;
-	}
+	WRITE_ONCE(ri->map, NULL);
 
 	fwd = __xdp_map_lookup_elem(map, index);
 	if (!fwd) {
@@ -3296,12 +3298,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_map *map = READ_ONCE(ri->map);
 	struct net_device *fwd;
 	u32 index = ri->ifindex;
 	int err;
 
-	if (ri->map)
-		return xdp_do_redirect_map(dev, xdp, xdp_prog);
+	if (map)
+		return xdp_do_redirect_map(dev, xdp, xdp_prog, map);
 
 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
 	ri->ifindex = 0;
@@ -3325,24 +3328,17 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect);
 static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
-				       struct bpf_prog *xdp_prog)
+				       struct bpf_prog *xdp_prog,
+				       struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	unsigned long map_owner = ri->map_owner;
-	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
 	void *fwd = NULL;
 	int err = 0;
 
 	ri->ifindex = 0;
-	ri->map = NULL;
-	ri->map_owner = 0;
+	WRITE_ONCE(ri->map, NULL);
 
-	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
-		err = -EFAULT;
-		map = NULL;
-		goto err;
-	}
 	fwd = __xdp_map_lookup_elem(map, index);
 	if (unlikely(!fwd)) {
 		err = -EINVAL;
@@ -3379,13 +3375,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_map *map = READ_ONCE(ri->map);
 	u32 index = ri->ifindex;
 	struct net_device *fwd;
 	int err = 0;
 
-	if (ri->map)
-		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog);
-
+	if (map)
+		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
+						   map);
 	ri->ifindex = 0;
 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
 	if (unlikely(!fwd)) {
@@ -3416,8 +3413,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
-	ri->map = NULL;
-	ri->map_owner = 0;
+	WRITE_ONCE(ri->map, NULL);
 
 	return XDP_REDIRECT;
 }
@@ -3430,8 +3426,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
-	   unsigned long, map_owner)
+BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
+	   u64, flags)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
@@ -3440,15 +3436,11 @@ BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
-	ri->map = map;
-	ri->map_owner = map_owner;
+	WRITE_ONCE(ri->map, map);
 
 	return XDP_REDIRECT;
 }
 
-/* Note, arg4 is hidden from users and populated by the verifier
- * with the right pointer.
- */
 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.func           = bpf_xdp_redirect_map,
 	.gpl_only       = false,
-- 
cgit