aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2019-04-23 18:36:35 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2019-04-23 18:36:36 +0200
commit2aad32613c353b1e05da5994324bc5f20d0dd55a (patch)
treeda9f76ad09a765c5818de87d3b4e5401725ed49d
parent7e6e185c74dd8a8dc539300c079adc6bc27045d6 (diff)
parent02ee0658362d3713421851bb7487af77a4098bb5 (diff)
Merge branch 'bpf-eth-get-headlen'
Stanislav Fomichev says: ==================== Currently, when eth_get_headlen calls flow dissector, it doesn't pass any skb. Because we use passed skb to lookup associated networking namespace to find whether we have a BPF program attached or not, we always use C-based flow dissector in this case. The goal of this patch series is to add new networking namespace argument to the eth_get_headlen and make BPF flow dissector programs be able to work in the skb-less case. The series goes like this: * use new kernel context (struct bpf_flow_dissector) for flow dissector programs; this makes it easy to distinguish between skb and no-skb case and supports calling BPF flow dissector on a chunk of raw data * convert BPF_PROG_TEST_RUN to use raw data * plumb network namespace into __skb_flow_dissect from all callers * handle no-skb case in __skb_flow_dissect * update eth_get_headlen to include net namespace argument and convert all existing users * add selftest to make sure bpf_skb_load_bytes is not allowed in the no-skb mode * extend test_progs to exercise skb-less flow dissection as well * stop adjusting nhoff/thoff by ETH_HLEN in BPF_PROG_TEST_RUN v6: * more suggestions by Alexei: * eth_get_headlen now takes net dev, not net namespace * test skb-less case via tun eth_get_headlen * fix return errors in bpf_flow_load * don't adjust nhoff/thoff by ETH_HLEN v5: * API changes have been submitted via bpf/stable tree v4: * prohibit access to vlan fields as well (otherwise, inconsistent between skb/skb-less cases) * drop extra unneeded check for skb->vlan_present in bpf_flow.c v3: * new kernel xdp_buff-like context per Alexei suggestion * drop skb_net helper * properly clamp flow_keys->nhoff v2: * moved temporary skb from stack into percpu (avoids memset of ~200 bytes per packet) * tightened down access to __sk_buff fields from flow dissector programs to avoid touching shinfo (whitelist only relevant fields) * addressed suggestions from Willem ==================== Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c2
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c3
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c2
-rw-r--r--drivers/net/tun.c3
-rw-r--r--include/linux/etherdevice.h2
-rw-r--r--include/linux/skbuff.h28
-rw-r--r--include/net/flow_dissector.h7
-rw-r--r--include/net/sch_generic.h11
-rw-r--r--net/bpf/test_run.c48
-rw-r--r--net/core/filter.c105
-rw-r--r--net/core/flow_dissector.c90
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.c2
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.h24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c113
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c48
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c79
27 files changed, 411 insertions, 186 deletions
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index c64e2fb5a4f1..350e385528fd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -354,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
hdr_len = buff->len;
if (hdr_len > AQ_CFG_RX_HDR_SIZE)
- hdr_len = eth_get_headlen(aq_buf_vaddr(&buff->rxdata),
+ hdr_len = eth_get_headlen(skb->dev,
+ aq_buf_vaddr(&buff->rxdata),
AQ_CFG_RX_HDR_SIZE);
memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6528a597367b..526f36dcb204 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -899,7 +899,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
DMA_ATTR_WEAK_ORDERING);
if (unlikely(!payload))
- payload = eth_get_headlen(data_ptr, len);
+ payload = eth_get_headlen(bp->dev, data_ptr, len);
skb = napi_alloc_skb(&rxr->bnapi->napi, payload);
if (!skb) {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 297b95c1b3c1..65b985acae38 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -598,7 +598,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
} else {
ring->stats.seg_pkt_cnt++;
- pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE);
+ pull_len = eth_get_headlen(ndev, va, HNS_RX_HEAD_SIZE);
memcpy(__skb_put(skb, pull_len), va,
ALIGN(pull_len, sizeof(long)));
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 176d4b965709..5f7b51c6ee91 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2580,7 +2580,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length,
ring->stats.seg_pkt_cnt++;
u64_stats_update_end(&ring->syncp);
- ring->pull_len = eth_get_headlen(va, HNS3_RX_HEAD_SIZE);
+ ring->pull_len = eth_get_headlen(netdev, va, HNS3_RX_HEAD_SIZE);
__skb_put(skb, ring->pull_len);
hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len,
desc_cb);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 2325cee76211..b4d970e44163 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -280,7 +280,7 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer,
/* we need the header to contain the greater of either ETH_HLEN or
* 60 bytes if the skb->len is less than 60 for skb_pad.
*/
- pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN);
+ pull_len = eth_get_headlen(skb->dev, va, FM10K_RX_HDR_LEN);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 1a95223c9f99..e1931701cd7e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2035,7 +2035,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > I40E_RX_HDR_SIZE)
- headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
+ headlen = eth_get_headlen(skb->dev, xdp->data,
+ I40E_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), xdp->data,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index b64187753ad6..cf8be63a8a4f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1315,7 +1315,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IAVF_RX_HDR_SIZE)
- headlen = eth_get_headlen(va, IAVF_RX_HDR_SIZE);
+ headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 79043fec0187..259f118c7d8b 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -699,7 +699,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
/* Determine available headroom for copy */
headlen = size;
if (headlen > ICE_RX_HDR_SIZE)
- headlen = eth_get_headlen(va, ICE_RX_HDR_SIZE);
+ headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index acbb5b4f333d..9b8a4bb25327 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8051,7 +8051,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IGB_RX_HDR_LEN)
- headlen = eth_get_headlen(va, IGB_RX_HDR_LEN);
+ headlen = eth_get_headlen(skb->dev, va, IGB_RX_HDR_LEN);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index f79728381e8a..e58a6e0dc4d9 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1199,7 +1199,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IGC_RX_HDR_LEN)
- headlen = eth_get_headlen(va, IGC_RX_HDR_LEN);
+ headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 60cec3540dd7..7b903206b534 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1800,7 +1800,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
* we need the header to contain the greater of either ETH_HLEN or
* 60 bytes if the skb->len is less than 60 for skb_pad.
*/
- pull_len = eth_get_headlen(va, IXGBE_RX_HDR_SIZE);
+ pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 49e23afa05a2..d189ed247665 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -895,7 +895,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
/* Determine available headroom for copy */
headlen = size;
if (headlen > IXGBEVF_RX_HDR_SIZE)
- headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE);
+ headlen = eth_get_headlen(skb->dev, xdp->data,
+ IXGBEVF_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), xdp->data,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 40f3f98aa279..7b61126fcec9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -163,7 +163,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
case MLX5_INLINE_MODE_NONE:
return 0;
case MLX5_INLINE_MODE_TCP_UDP:
- hlen = eth_get_headlen(skb->data, skb_headlen(skb));
+ hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
hlen += VLAN_HLEN;
break;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 24d0220b9ba0..9d72f8c76c15 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1965,7 +1965,8 @@ drop:
if (frags) {
/* Exercise flow dissector code path. */
- u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb));
+ u32 headlen = eth_get_headlen(tun->dev, skb->data,
+ skb_headlen(skb));
if (unlikely(headlen > skb_headlen(skb))) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index e2f3b21cd72a..c6c1930e28a0 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -33,7 +33,7 @@ struct device;
int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
unsigned char *arch_get_platform_mac_address(void);
int nvmem_get_mac_address(struct device *dev, void *addrbuf);
-u32 eth_get_headlen(void *data, unsigned int max_len);
+u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len);
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
extern const struct header_ops eth_header_ops;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f42942a443b..998256c2820b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1275,12 +1275,12 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
}
#endif
-struct bpf_flow_keys;
-bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
- const struct sk_buff *skb,
- struct flow_dissector *flow_dissector,
- struct bpf_flow_keys *flow_keys);
-bool __skb_flow_dissect(const struct sk_buff *skb,
+struct bpf_flow_dissector;
+bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
+ __be16 proto, int nhoff, int hlen);
+
+bool __skb_flow_dissect(const struct net *net,
+ const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
void *data, __be16 proto, int nhoff, int hlen,
@@ -1290,8 +1290,8 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, unsigned int flags)
{
- return __skb_flow_dissect(skb, flow_dissector, target_container,
- NULL, 0, 0, 0, flags);
+ return __skb_flow_dissect(NULL, skb, flow_dissector,
+ target_container, NULL, 0, 0, 0, flags);
}
static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
@@ -1299,18 +1299,19 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
unsigned int flags)
{
memset(flow, 0, sizeof(*flow));
- return __skb_flow_dissect(skb, &flow_keys_dissector, flow,
- NULL, 0, 0, 0, flags);
+ return __skb_flow_dissect(NULL, skb, &flow_keys_dissector,
+ flow, NULL, 0, 0, 0, flags);
}
static inline bool
-skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb,
+skb_flow_dissect_flow_keys_basic(const struct net *net,
+ const struct sk_buff *skb,
struct flow_keys_basic *flow, void *data,
__be16 proto, int nhoff, int hlen,
unsigned int flags)
{
memset(flow, 0, sizeof(*flow));
- return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow,
+ return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
data, proto, nhoff, hlen, flags);
}
@@ -2488,7 +2489,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb)
if (skb_transport_header_was_set(skb))
return;
- if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
+ if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+ NULL, 0, 0, 0, 0))
skb_set_transport_header(skb, keys.control.thoff);
}
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 2b26979efb48..7c5a8d9a8d2a 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
return ((char *)target_container) + flow_dissector->offset[key_id];
}
+struct bpf_flow_dissector {
+ struct bpf_flow_keys *flow_keys;
+ const struct sk_buff *skb;
+ void *data;
+ void *data_end;
+};
+
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e8f85cd2afce..21f434f3ac9e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -364,13 +364,10 @@ struct tcf_proto {
};
struct qdisc_skb_cb {
- union {
- struct {
- unsigned int pkt_len;
- u16 slave_dev_queue_mapping;
- u16 tc_classid;
- };
- struct bpf_flow_keys *flow_keys;
+ struct {
+ unsigned int pkt_len;
+ u16 slave_dev_queue_mapping;
+ u16 tc_classid;
};
#define QDISC_CB_PRIV_LEN 20
unsigned char data[QDISC_CB_PRIV_LEN];
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2221573dacdb..8606e5aef0b6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -379,13 +379,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
union bpf_attr __user *uattr)
{
u32 size = kattr->test.data_size_in;
+ struct bpf_flow_dissector ctx = {};
u32 repeat = kattr->test.repeat;
struct bpf_flow_keys flow_keys;
u64 time_start, time_spent = 0;
- struct bpf_skb_data_end *cb;
+ const struct ethhdr *eth;
u32 retval, duration;
- struct sk_buff *skb;
- struct sock *sk;
void *data;
int ret;
u32 i;
@@ -396,46 +395,28 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (kattr->test.ctx_in || kattr->test.ctx_out)
return -EINVAL;
- data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+ if (size < ETH_HLEN)
+ return -EINVAL;
+
+ data = bpf_test_init(kattr, size, 0, 0);
if (IS_ERR(data))
return PTR_ERR(data);
- sk = kzalloc(sizeof(*sk), GFP_USER);
- if (!sk) {
- kfree(data);
- return -ENOMEM;
- }
- sock_net_set(sk, current->nsproxy->net_ns);
- sock_init_data(NULL, sk);
-
- skb = build_skb(data, 0);
- if (!skb) {
- kfree(data);
- kfree(sk);
- return -ENOMEM;
- }
- skb->sk = sk;
-
- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- __skb_put(skb, size);
- skb->protocol = eth_type_trans(skb,
- current->nsproxy->net_ns->loopback_dev);
- skb_reset_network_header(skb);
-
- cb = (struct bpf_skb_data_end *)skb->cb;
- cb->qdisc_cb.flow_keys = &flow_keys;
+ eth = (struct ethhdr *)data;
if (!repeat)
repeat = 1;
+ ctx.flow_keys = &flow_keys;
+ ctx.data = data;
+ ctx.data_end = (__u8 *)data + size;
+
rcu_read_lock();
preempt_disable();
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
- retval = __skb_flow_bpf_dissect(prog, skb,
- &flow_keys_dissector,
- &flow_keys);
+ retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
+ size);
if (signal_pending(current)) {
preempt_enable();
@@ -468,7 +449,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
retval, duration);
out:
- kfree_skb(skb);
- kfree(sk);
+ kfree(data);
return ret;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index fa8fb0548217..edb3a7c22f6c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1730,6 +1730,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+BPF_CALL_4(bpf_flow_dissector_load_bytes,
+ const struct bpf_flow_dissector *, ctx, u32, offset,
+ void *, to, u32, len)
+{
+ void *ptr;
+
+ if (unlikely(offset > 0xffff))
+ goto err_clear;
+
+ if (unlikely(!ctx->skb))
+ goto err_clear;
+
+ ptr = skb_header_pointer(ctx->skb, offset, len, to);
+ if (unlikely(!ptr))
+ goto err_clear;
+ if (ptr != to)
+ memcpy(to, ptr, len);
+
+ return 0;
+err_clear:
+ memset(to, 0, len);
+ return -EFAULT;
+}
+
+static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
+ .func = bpf_flow_dissector_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header)
{
@@ -6121,7 +6155,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
- return &bpf_skb_load_bytes_proto;
+ return &bpf_flow_dissector_load_bytes_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6248,9 +6282,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
break;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
- if (size != sizeof(__u64))
- return false;
- break;
+ return false;
case bpf_ctx_range(struct __sk_buff, tstamp):
if (size != sizeof(__u64))
return false;
@@ -6285,7 +6317,6 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
@@ -6312,7 +6343,6 @@ static bool cg_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
case bpf_ctx_range(struct __sk_buff, data):
@@ -6358,7 +6388,6 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
@@ -6601,7 +6630,6 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -6803,7 +6831,6 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
@@ -6877,24 +6904,65 @@ static bool flow_dissector_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct __sk_buff))
+ return false;
+
if (type == BPF_WRITE)
return false;
switch (off) {
case bpf_ctx_range(struct __sk_buff, data):
+ if (size != size_default)
+ return false;
info->reg_type = PTR_TO_PACKET;
- break;
+ return true;
case bpf_ctx_range(struct __sk_buff, data_end):
+ if (size != size_default)
+ return false;
info->reg_type = PTR_TO_PACKET_END;
- break;
+ return true;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ if (size != sizeof(__u64))
+ return false;
info->reg_type = PTR_TO_FLOW_KEYS;
- break;
+ return true;
default:
return false;
}
+}
- return bpf_skb_is_valid_access(off, size, type, prog, info);
+static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct __sk_buff, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, data));
+ break;
+
+ case offsetof(struct __sk_buff, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, data_end));
+ break;
+
+ case offsetof(struct __sk_buff, flow_keys):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_flow_dissector, flow_keys));
+ break;
+ }
+
+ return insn - insn_buf;
}
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
@@ -7201,15 +7269,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
skc_num, 2, target_size));
break;
- case offsetof(struct __sk_buff, flow_keys):
- off = si->off;
- off -= offsetof(struct __sk_buff, flow_keys);
- off += offsetof(struct sk_buff, cb);
- off += offsetof(struct qdisc_skb_cb, flow_keys);
- *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
- si->src_reg, off);
- break;
-
case offsetof(struct __sk_buff, tstamp):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
@@ -8214,7 +8273,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = {
const struct bpf_verifier_ops flow_dissector_verifier_ops = {
.get_func_proto = flow_dissector_func_proto,
.is_valid_access = flow_dissector_is_valid_access,
- .convert_ctx_access = bpf_convert_ctx_access,
+ .convert_ctx_access = flow_dissector_convert_ctx_access,
};
const struct bpf_prog_ops flow_dissector_prog_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 795449713ba4..fac712cee9d5 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -683,50 +683,30 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
}
}
-bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
- const struct sk_buff *skb,
- struct flow_dissector *flow_dissector,
- struct bpf_flow_keys *flow_keys)
+bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
+ __be16 proto, int nhoff, int hlen)
{
- struct bpf_skb_data_end cb_saved;
- struct bpf_skb_data_end *cb;
+ struct bpf_flow_keys *flow_keys = ctx->flow_keys;
u32 result;
- /* Note that even though the const qualifier is discarded
- * throughout the execution of the BPF program, all changes(the
- * control block) are reverted after the BPF program returns.
- * Therefore, __skb_flow_dissect does not alter the skb.
- */
-
- cb = (struct bpf_skb_data_end *)skb->cb;
-
- /* Save Control Block */
- memcpy(&cb_saved, cb, sizeof(cb_saved));
- memset(cb, 0, sizeof(*cb));
-
/* Pass parameters to the BPF program */
memset(flow_keys, 0, sizeof(*flow_keys));
- cb->qdisc_cb.flow_keys = flow_keys;
- flow_keys->n_proto = skb->protocol;
- flow_keys->nhoff = skb_network_offset(skb);
+ flow_keys->n_proto = proto;
+ flow_keys->nhoff = nhoff;
flow_keys->thoff = flow_keys->nhoff;
- bpf_compute_data_pointers((struct sk_buff *)skb);
- result = BPF_PROG_RUN(prog, skb);
-
- /* Restore state */
- memcpy(cb, &cb_saved, sizeof(cb_saved));
+ result = BPF_PROG_RUN(prog, ctx);
- flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff,
- skb_network_offset(skb), skb->len);
+ flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
- flow_keys->nhoff, skb->len);
+ flow_keys->nhoff, hlen);
return result == BPF_OK;
}
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
+ * @net: associated network namespace, derived from @skb if NULL
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
* @flow_dissector: list of keys to dissect
* @target_container: target structure to put dissected values into
@@ -743,7 +723,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
*
* Caller must take care of zeroing target container memory.
*/
-bool __skb_flow_dissect(const struct sk_buff *skb,
+bool __skb_flow_dissect(const struct net *net,
+ const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
void *data, __be16 proto, int nhoff, int hlen,
@@ -756,6 +737,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
+ struct bpf_prog *attached = NULL;
enum flow_dissect_ret fdret;
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
int num_hdrs = 0;
@@ -798,22 +780,39 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
target_container);
if (skb) {
- struct bpf_flow_keys flow_keys;
- struct bpf_prog *attached = NULL;
+ if (!net) {
+ if (skb->dev)
+ net = dev_net(skb->dev);
+ else if (skb->sk)
+ net = sock_net(skb->sk);
+ }
+ }
+ WARN_ON_ONCE(!net);
+ if (net) {
rcu_read_lock();
-
- if (skb->dev)
- attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog);
- else if (skb->sk)
- attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog);
- else
- WARN_ON_ONCE(1);
+ attached = rcu_dereference(net->flow_dissector_prog);
if (attached) {
- ret = __skb_flow_bpf_dissect(attached, skb,
- flow_dissector,
- &flow_keys);
+ struct bpf_flow_keys flow_keys;
+ struct bpf_flow_dissector ctx = {
+ .flow_keys = &flow_keys,
+ .data = data,
+ .data_end = data + hlen,
+ };
+ __be16 n_proto = proto;
+
+ if (skb) {
+ ctx.skb = skb;
+ /* we can't use 'proto' in the skb case
+ * because it might be set to skb->vlan_proto
+ * which has been pulled from the data
+ */
+ n_proto = skb->protocol;
+ }
+
+ ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
+ hlen);
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
target_container);
rcu_read_unlock();
@@ -1410,8 +1409,8 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
__flow_hash_secret_init();
memset(&keys, 0, sizeof(keys));
- __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
- NULL, 0, 0, 0,
+ __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
+ &keys, NULL, 0, 0, 0,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
return __flow_hash_from_keys(&keys, hashrnd);
@@ -1512,7 +1511,8 @@ u32 skb_get_poff(const struct sk_buff *skb)
{
struct flow_keys_basic keys;
- if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
+ if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+ NULL, 0, 0, 0, 0))
return 0;
return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f7a3d7a171c7..0f9863dc4d44 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -119,13 +119,14 @@ EXPORT_SYMBOL(eth_header);
/**
* eth_get_headlen - determine the length of header for an ethernet frame
+ * @dev: pointer to network device
* @data: pointer to start of frame
* @len: total length of frame
*
* Make a best effort attempt to pull the length for all of the headers for
* a given frame in a linear buffer.
*/
-u32 eth_get_headlen(void *data, unsigned int len)
+u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len)
{
const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
const struct ethhdr *eth = (const struct ethhdr *)data;
@@ -136,8 +137,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
return len;
/* parse any remaining L2/L3 headers, check for L4 */
- if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto,
- sizeof(*eth), len, flags))
+ if (!skb_flow_dissect_flow_keys_basic(dev_net(dev), NULL, &keys, data,
+ eth->h_proto, sizeof(*eth),
+ len, flags))
return max_t(u32, keys.control.thoff, sizeof(*eth));
/* parse for any L4 headers */
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 7136ab9ffa73..3fd83b9dc1bf 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -26,7 +26,7 @@ static void load_and_attach_program(void)
struct bpf_object *obj;
ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name,
- cfg_map_name, &prog_fd);
+ cfg_map_name, NULL, &prog_fd, NULL);
if (ret)
error(1, 0, "bpf_flow_load %s", cfg_path_name);
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index 41dd6959feb0..daeaeb518894 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -9,10 +9,12 @@ static inline int bpf_flow_load(struct bpf_object **obj,
const char *path,
const char *section_name,
const char *map_name,
- int *prog_fd)
+ const char *keys_map_name,
+ int *prog_fd,
+ int *keys_fd)
{
struct bpf_program *prog, *main_prog;
- struct bpf_map *prog_array;
+ struct bpf_map *prog_array, *keys;
int prog_array_fd;
int ret, fd, i;
@@ -23,19 +25,29 @@ static inline int bpf_flow_load(struct bpf_object **obj,
main_prog = bpf_object__find_program_by_title(*obj, section_name);
if (!main_prog)
- return ret;
+ return -1;
*prog_fd = bpf_program__fd(main_prog);
if (*prog_fd < 0)
- return ret;
+ return -1;
prog_array = bpf_object__find_map_by_name(*obj, map_name);
if (!prog_array)
- return ret;
+ return -1;
prog_array_fd = bpf_map__fd(prog_array);
if (prog_array_fd < 0)
- return ret;
+ return -1;
+
+ if (keys_map_name && keys_fd) {
+ keys = bpf_object__find_map_by_name(*obj, keys_map_name);
+ if (!keys)
+ return -1;
+
+ *keys_fd = bpf_map__fd(keys);
+ if (*keys_fd < 0)
+ return -1;
+ }
i = 0;
bpf_object__for_each_program(prog, *obj) {
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 126319f9a97c..8b54adfd6264 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <error.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
#define CHECK_FLOW_KEYS(desc, got, expected) \
CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \
@@ -79,8 +82,8 @@ struct test tests[] = {
.tcp.doff = 5,
},
.keys = {
- .nhoff = 0,
- .thoff = sizeof(struct iphdr),
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct iphdr),
.addr_proto = ETH_P_IP,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IP),
@@ -95,8 +98,8 @@ struct test tests[] = {
.tcp.doff = 5,
},
.keys = {
- .nhoff = 0,
- .thoff = sizeof(struct ipv6hdr),
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr),
.addr_proto = ETH_P_IPV6,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IPV6),
@@ -113,8 +116,8 @@ struct test tests[] = {
.tcp.doff = 5,
},
.keys = {
- .nhoff = VLAN_HLEN,
- .thoff = VLAN_HLEN + sizeof(struct iphdr),
+ .nhoff = ETH_HLEN + VLAN_HLEN,
+ .thoff = ETH_HLEN + VLAN_HLEN + sizeof(struct iphdr),
.addr_proto = ETH_P_IP,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IP),
@@ -131,8 +134,9 @@ struct test tests[] = {
.tcp.doff = 5,
},
.keys = {
- .nhoff = VLAN_HLEN * 2,
- .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
+ .nhoff = ETH_HLEN + VLAN_HLEN * 2,
+ .thoff = ETH_HLEN + VLAN_HLEN * 2 +
+ sizeof(struct ipv6hdr),
.addr_proto = ETH_P_IPV6,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IPV6),
@@ -140,13 +144,73 @@ struct test tests[] = {
},
};
+static int create_tap(const char *ifname)
+{
+ struct ifreq ifr = {
+ .ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS,
+ };
+ int fd, ret;
+
+ strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0)
+ return -1;
+
+ ret = ioctl(fd, TUNSETIFF, &ifr);
+ if (ret)
+ return -1;
+
+ return fd;
+}
+
+static int tx_tap(int fd, void *pkt, size_t len)
+{
+ struct iovec iov[] = {
+ {
+ .iov_len = len,
+ .iov_base = pkt,
+ },
+ };
+ return writev(fd, iov, ARRAY_SIZE(iov));
+}
+
+static int ifup(const char *ifname)
+{
+ struct ifreq ifr = {};
+ int sk, ret;
+
+ strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ sk = socket(PF_INET, SOCK_DGRAM, 0);
+ if (sk < 0)
+ return -1;
+
+ ret = ioctl(sk, SIOCGIFFLAGS, &ifr);
+ if (ret) {
+ close(sk);
+ return -1;
+ }
+
+ ifr.ifr_flags |= IFF_UP;
+ ret = ioctl(sk, SIOCSIFFLAGS, &ifr);
+ if (ret) {
+ close(sk);
+ return -1;
+ }
+
+ close(sk);
+ return 0;
+}
+
void test_flow_dissector(void)
{
+ int i, err, prog_fd, keys_fd = -1, tap_fd;
struct bpf_object *obj;
- int i, err, prog_fd;
+ __u32 duration = 0;
err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
- "jmp_table", &prog_fd);
+ "jmp_table", "last_dissection", &prog_fd, &keys_fd);
if (err) {
error_cnt++;
return;
@@ -171,5 +235,34 @@ void test_flow_dissector(void)
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
}
+ /* Do the same tests but for skb-less flow dissector.
+ * We use a known path in the net/tun driver that calls
+ * eth_get_headlen and we manually export bpf_flow_keys
+ * via BPF map in this case.
+ */
+
+ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno);
+
+ tap_fd = create_tap("tap0");
+ CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno);
+ err = ifup("tap0");
+ CHECK(err, "ifup", "err %d errno %d", err, errno);
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ struct bpf_flow_keys flow_keys = {};
+ struct bpf_prog_test_run_attr tattr = {};
+ __u32 key = 0;
+
+ err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
+ CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno);
+
+ err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
+ CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+
+ CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
+ CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+ }
+
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
new file mode 100644
index 000000000000..dc5ef155ec28
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_flow_dissector_load_bytes(void)
+{
+ struct bpf_flow_keys flow_keys;
+ __u32 duration = 0, retval, size;
+ struct bpf_insn prog[] = {
+ // BPF_REG_1 - 1st argument: context
+ // BPF_REG_2 - 2nd argument: offset, start at first byte
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ // BPF_REG_3 - 3rd argument: destination, reserve byte on stack
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1),
+ // BPF_REG_4 - 4th argument: copy one byte
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ // bpf_skb_load_bytes(ctx, sizeof(pkt_v4), ptr, 1)
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ // if (ret == 0) return BPF_DROP (2)
+ BPF_MOV64_IMM(BPF_REG_0, BPF_DROP),
+ BPF_EXIT_INSN(),
+ // if (ret != 0) return BPF_OK (0)
+ BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
+ BPF_EXIT_INSN(),
+ };
+ int fd, err;
+
+ /* make sure bpf_skb_load_bytes is not allowed from skb-less context
+ */
+ fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
+ ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ CHECK(fd < 0,
+ "flow_dissector-bpf_skb_load_bytes-load",
+ "fd %d errno %d\n",
+ fd, errno);
+
+ err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
+ &flow_keys, &size, &retval, &duration);
+ CHECK(size != sizeof(flow_keys) || err || retval != 1,
+ "flow_dissector-bpf_skb_load_bytes",
+ "err %d errno %d retval %d duration %d size %u/%zu\n",
+ err, errno, retval, duration, size, sizeof(flow_keys));
+
+ if (fd >= -1)
+ close(fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 75b17cada539..81ad9a0b29d0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -64,6 +64,25 @@ struct bpf_map_def SEC("maps") jmp_table = {
.max_entries = 8
};
+struct bpf_map_def SEC("maps") last_dissection = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_flow_keys),
+ .max_entries = 1,
+};
+
+static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
+ int ret)
+{
+ struct bpf_flow_keys *val;
+ __u32 key = 0;
+
+ val = bpf_map_lookup_elem(&last_dissection, &key);
+ if (val)
+ memcpy(val, keys, sizeof(*val));
+ return ret;
+}
+
static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
__u16 hdr_size,
void *buffer)
@@ -109,10 +128,10 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
break;
default:
/* Protocol not supported */
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
}
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
}
SEC("flow_dissector")
@@ -139,8 +158,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
case IPPROTO_ICMP:
icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
if (!icmp)
- return BPF_DROP;
- return BPF_OK;
+ return export_flow_keys(keys, BPF_DROP);
+ return export_flow_keys(keys, BPF_OK);
case IPPROTO_IPIP:
keys->is_encap = true;
return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
@@ -150,11 +169,11 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
case IPPROTO_GRE:
gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
if (!gre)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
if (bpf_htons(gre->flags & GRE_VERSION))
/* Only inspect standard GRE packets with version 0 */
- return BPF_OK;
+ return export_flow_keys(keys, BPF_OK);
keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
if (GRE_IS_CSUM(gre->flags))
@@ -170,7 +189,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
&_eth);
if (!eth)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->thoff += sizeof(*eth);
@@ -181,31 +200,31 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
case IPPROTO_TCP:
tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
if (!tcp)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
if (tcp->doff < 5)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->sport = tcp->source;
keys->dport = tcp->dest;
- return BPF_OK;
+ return export_flow_keys(keys, BPF_OK);
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
if (!udp)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->sport = udp->source;
keys->dport = udp->dest;
- return BPF_OK;
+ return export_flow_keys(keys, BPF_OK);
default:
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
}
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
}
static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
@@ -225,7 +244,7 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
return parse_ip_proto(skb, nexthdr);
}
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
}
PROG(IP)(struct __sk_buff *skb)
@@ -238,11 +257,11 @@ PROG(IP)(struct __sk_buff *skb)
iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
if (!iph)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
/* IP header cannot be smaller than 20 bytes */
if (iph->ihl < 5)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->addr_proto = ETH_P_IP;
keys->ipv4_src = iph->saddr;
@@ -250,7 +269,7 @@ PROG(IP)(struct __sk_buff *skb)
keys->thoff += iph->ihl << 2;
if (data + keys->thoff > data_end)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
keys->is_frag = true;
@@ -264,7 +283,7 @@ PROG(IP)(struct __sk_buff *skb)
}
if (done)
- return BPF_OK;
+ return export_flow_keys(keys, BPF_OK);
return parse_ip_proto(skb, iph->protocol);
}
@@ -276,7 +295,7 @@ PROG(IPV6)(struct __sk_buff *skb)
ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
if (!ip6h)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->addr_proto = ETH_P_IPV6;
memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
@@ -288,11 +307,12 @@ PROG(IPV6)(struct __sk_buff *skb)
PROG(IPV6OP)(struct __sk_buff *skb)
{
+ struct bpf_flow_keys *keys = skb->flow_keys;
struct ipv6_opt_hdr *ip6h, _ip6h;
ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
if (!ip6h)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
/* hlen is in 8-octets and does not include the first 8 bytes
* of the header
@@ -309,7 +329,7 @@ PROG(IPV6FR)(struct __sk_buff *skb)
fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
if (!fragh)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->thoff += sizeof(*fragh);
keys->is_frag = true;
@@ -321,13 +341,14 @@ PROG(IPV6FR)(struct __sk_buff *skb)
PROG(MPLS)(struct __sk_buff *skb)
{
+ struct bpf_flow_keys *keys = skb->flow_keys;
struct mpls_label *mpls, _mpls;
mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
if (!mpls)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
- return BPF_OK;
+ return export_flow_keys(keys, BPF_OK);
}
PROG(VLAN)(struct __sk_buff *skb)
@@ -339,10 +360,10 @@ PROG(VLAN)(struct __sk_buff *skb)
if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
if (!vlan)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->nhoff += sizeof(*vlan);
keys->thoff += sizeof(*vlan);
@@ -350,14 +371,14 @@ PROG(VLAN)(struct __sk_buff *skb)
vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
if (!vlan)
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->nhoff += sizeof(*vlan);
keys->thoff += sizeof(*vlan);
/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
- return BPF_DROP;
+ return export_flow_keys(keys, BPF_DROP);
keys->n_proto = vlan->h_vlan_encapsulated_proto;
return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);