diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2019-04-23 18:36:35 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2019-04-23 18:36:36 +0200 |
commit | 2aad32613c353b1e05da5994324bc5f20d0dd55a (patch) | |
tree | da9f76ad09a765c5818de87d3b4e5401725ed49d | |
parent | 7e6e185c74dd8a8dc539300c079adc6bc27045d6 (diff) | |
parent | 02ee0658362d3713421851bb7487af77a4098bb5 (diff) |
Merge branch 'bpf-eth-get-headlen'
Stanislav Fomichev says:
====================
Currently, when eth_get_headlen calls flow dissector, it doesn't pass any
skb. Because we use passed skb to lookup associated networking namespace
to find whether we have a BPF program attached or not, we always use
C-based flow dissector in this case.
The goal of this patch series is to add new networking namespace argument
to the eth_get_headlen and make BPF flow dissector programs be able to
work in the skb-less case.
The series goes like this:
* use new kernel context (struct bpf_flow_dissector) for flow dissector
programs; this makes it easy to distinguish between skb and no-skb
case and supports calling BPF flow dissector on a chunk of raw data
* convert BPF_PROG_TEST_RUN to use raw data
* plumb network namespace into __skb_flow_dissect from all callers
* handle no-skb case in __skb_flow_dissect
* update eth_get_headlen to include net namespace argument and
convert all existing users
* add selftest to make sure bpf_skb_load_bytes is not allowed in
the no-skb mode
* extend test_progs to exercise skb-less flow dissection as well
* stop adjusting nhoff/thoff by ETH_HLEN in BPF_PROG_TEST_RUN
v6:
* more suggestions by Alexei:
* eth_get_headlen now takes net dev, not net namespace
* test skb-less case via tun eth_get_headlen
* fix return errors in bpf_flow_load
* don't adjust nhoff/thoff by ETH_HLEN
v5:
* API changes have been submitted via bpf/stable tree
v4:
* prohibit access to vlan fields as well (otherwise, inconsistent
between skb/skb-less cases)
* drop extra unneeded check for skb->vlan_present in bpf_flow.c
v3:
* new kernel xdp_buff-like context per Alexei suggestion
* drop skb_net helper
* properly clamp flow_keys->nhoff
v2:
* moved temporary skb from stack into percpu (avoids memset of ~200 bytes
per packet)
* tightened down access to __sk_buff fields from flow dissector programs to
avoid touching shinfo (whitelist only relevant fields)
* addressed suggestions from Willem
====================
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
27 files changed, 411 insertions, 186 deletions
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index c64e2fb5a4f1..350e385528fd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -354,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, hdr_len = buff->len; if (hdr_len > AQ_CFG_RX_HDR_SIZE) - hdr_len = eth_get_headlen(aq_buf_vaddr(&buff->rxdata), + hdr_len = eth_get_headlen(skb->dev, + aq_buf_vaddr(&buff->rxdata), AQ_CFG_RX_HDR_SIZE); memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata), diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6528a597367b..526f36dcb204 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -899,7 +899,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, DMA_ATTR_WEAK_ORDERING); if (unlikely(!payload)) - payload = eth_get_headlen(data_ptr, len); + payload = eth_get_headlen(bp->dev, data_ptr, len); skb = napi_alloc_skb(&rxr->bnapi->napi, payload); if (!skb) { diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 297b95c1b3c1..65b985acae38 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -598,7 +598,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, } else { ring->stats.seg_pkt_cnt++; - pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE); + pull_len = eth_get_headlen(ndev, va, HNS_RX_HEAD_SIZE); memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 176d4b965709..5f7b51c6ee91 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2580,7 +2580,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length, ring->stats.seg_pkt_cnt++; u64_stats_update_end(&ring->syncp); - ring->pull_len = eth_get_headlen(va, HNS3_RX_HEAD_SIZE); + ring->pull_len = eth_get_headlen(netdev, va, HNS3_RX_HEAD_SIZE); __skb_put(skb, ring->pull_len); hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len, desc_cb); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 2325cee76211..b4d970e44163 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -280,7 +280,7 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer, /* we need the header to contain the greater of either ETH_HLEN or * 60 bytes if the skb->len is less than 60 for skb_pad. */ - pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN); + pull_len = eth_get_headlen(skb->dev, va, FM10K_RX_HDR_LEN); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1a95223c9f99..e1931701cd7e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2035,7 +2035,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > I40E_RX_HDR_SIZE) - headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, xdp->data, + I40E_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), xdp->data, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index b64187753ad6..cf8be63a8a4f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1315,7 +1315,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IAVF_RX_HDR_SIZE) - headlen = eth_get_headlen(va, IAVF_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 79043fec0187..259f118c7d8b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -699,7 +699,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, /* Determine available headroom for copy */ headlen = size; if (headlen > ICE_RX_HDR_SIZE) - headlen = eth_get_headlen(va, ICE_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index acbb5b4f333d..9b8a4bb25327 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8051,7 +8051,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IGB_RX_HDR_LEN) - headlen = eth_get_headlen(va, IGB_RX_HDR_LEN); + headlen = eth_get_headlen(skb->dev, va, IGB_RX_HDR_LEN); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f79728381e8a..e58a6e0dc4d9 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1199,7 +1199,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IGC_RX_HDR_LEN) - headlen = eth_get_headlen(va, IGC_RX_HDR_LEN); + headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 60cec3540dd7..7b903206b534 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1800,7 +1800,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, * we need the header to contain the greater of either ETH_HLEN or * 60 bytes if the skb->len is less than 60 for skb_pad. */ - pull_len = eth_get_headlen(va, IXGBE_RX_HDR_SIZE); + pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 49e23afa05a2..d189ed247665 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -895,7 +895,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IXGBEVF_RX_HDR_SIZE) - headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, xdp->data, + IXGBEVF_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), xdp->data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 40f3f98aa279..7b61126fcec9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -163,7 +163,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, case MLX5_INLINE_MODE_NONE: return 0; case MLX5_INLINE_MODE_TCP_UDP: - hlen = eth_get_headlen(skb->data, skb_headlen(skb)); + hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) hlen += VLAN_HLEN; break; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 24d0220b9ba0..9d72f8c76c15 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1965,7 +1965,8 @@ drop: if (frags) { /* Exercise flow dissector code path. */ - u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); + u32 headlen = eth_get_headlen(tun->dev, skb->data, + skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { this_cpu_inc(tun->pcpu_stats->rx_dropped); diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index e2f3b21cd72a..c6c1930e28a0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -33,7 +33,7 @@ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); -u32 eth_get_headlen(void *data, unsigned int max_len); +u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f42942a443b..998256c2820b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1275,12 +1275,12 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) } #endif -struct bpf_flow_keys; -bool __skb_flow_bpf_dissect(struct bpf_prog *prog, - const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - struct bpf_flow_keys *flow_keys); -bool __skb_flow_dissect(const struct sk_buff *skb, +struct bpf_flow_dissector; +bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen); + +bool __skb_flow_dissect(const struct net *net, + const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, void *data, __be16 proto, int nhoff, int hlen, @@ -1290,8 +1290,8 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, unsigned int flags) { - return __skb_flow_dissect(skb, flow_dissector, target_container, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, flow_dissector, + target_container, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, @@ -1299,18 +1299,19 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_dissector, flow, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, &flow_keys_dissector, + flow, NULL, 0, 0, 0, flags); } static inline bool -skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb, +skb_flow_dissect_flow_keys_basic(const struct net *net, + const struct sk_buff *skb, struct flow_keys_basic *flow, void *data, __be16 proto, int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow, + return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, data, proto, nhoff, hlen, flags); } @@ -2488,7 +2489,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb) if (skb_transport_header_was_set(skb)) return; - if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) + if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); } diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 2b26979efb48..7c5a8d9a8d2a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +struct bpf_flow_dissector { + struct bpf_flow_keys *flow_keys; + const struct sk_buff *skb; + void *data; + void *data_end; +}; + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e8f85cd2afce..21f434f3ac9e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -364,13 +364,10 @@ struct tcf_proto { }; struct qdisc_skb_cb { - union { - struct { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; - }; - struct bpf_flow_keys *flow_keys; + struct { + unsigned int pkt_len; + u16 slave_dev_queue_mapping; + u16 tc_classid; }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 2221573dacdb..8606e5aef0b6 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -379,13 +379,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, union bpf_attr __user *uattr) { u32 size = kattr->test.data_size_in; + struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; struct bpf_flow_keys flow_keys; u64 time_start, time_spent = 0; - struct bpf_skb_data_end *cb; + const struct ethhdr *eth; u32 retval, duration; - struct sk_buff *skb; - struct sock *sk; void *data; int ret; u32 i; @@ -396,46 +395,28 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (kattr->test.ctx_in || kattr->test.ctx_out) return -EINVAL; - data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (size < ETH_HLEN) + return -EINVAL; + + data = bpf_test_init(kattr, size, 0, 0); if (IS_ERR(data)) return PTR_ERR(data); - sk = kzalloc(sizeof(*sk), GFP_USER); - if (!sk) { - kfree(data); - return -ENOMEM; - } - sock_net_set(sk, current->nsproxy->net_ns); - sock_init_data(NULL, sk); - - skb = build_skb(data, 0); - if (!skb) { - kfree(data); - kfree(sk); - return -ENOMEM; - } - skb->sk = sk; - - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - __skb_put(skb, size); - skb->protocol = eth_type_trans(skb, - current->nsproxy->net_ns->loopback_dev); - skb_reset_network_header(skb); - - cb = (struct bpf_skb_data_end *)skb->cb; - cb->qdisc_cb.flow_keys = &flow_keys; + eth = (struct ethhdr *)data; if (!repeat) repeat = 1; + ctx.flow_keys = &flow_keys; + ctx.data = data; + ctx.data_end = (__u8 *)data + size; + rcu_read_lock(); preempt_disable(); time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - retval = __skb_flow_bpf_dissect(prog, skb, - &flow_keys_dissector, - &flow_keys); + retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, + size); if (signal_pending(current)) { preempt_enable(); @@ -468,7 +449,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, retval, duration); out: - kfree_skb(skb); - kfree(sk); + kfree(data); return ret; } diff --git a/net/core/filter.c b/net/core/filter.c index fa8fb0548217..edb3a7c22f6c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1730,6 +1730,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +BPF_CALL_4(bpf_flow_dissector_load_bytes, + const struct bpf_flow_dissector *, ctx, u32, offset, + void *, to, u32, len) +{ + void *ptr; + + if (unlikely(offset > 0xffff)) + goto err_clear; + + if (unlikely(!ctx->skb)) + goto err_clear; + + ptr = skb_header_pointer(ctx->skb, offset, len, to); + if (unlikely(!ptr)) + goto err_clear; + if (ptr != to) + memcpy(to, ptr, len); + + return 0; +err_clear: + memset(to, 0, len); + return -EFAULT; +} + +static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = { + .func = bpf_flow_dissector_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, u32, offset, void *, to, u32, len, u32, start_header) { @@ -6121,7 +6155,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_skb_load_bytes: - return &bpf_skb_load_bytes_proto; + return &bpf_flow_dissector_load_bytes_proto; default: return bpf_base_func_proto(func_id); } @@ -6248,9 +6282,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type return false; break; case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): - if (size != sizeof(__u64)) - return false; - break; + return false; case bpf_ctx_range(struct __sk_buff, tstamp): if (size != sizeof(__u64)) return false; @@ -6285,7 +6317,6 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): @@ -6312,7 +6343,6 @@ static bool cg_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, wire_len): return false; case bpf_ctx_range(struct __sk_buff, data): @@ -6358,7 +6388,6 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): return false; @@ -6601,7 +6630,6 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -6803,7 +6831,6 @@ static bool sk_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): return false; @@ -6877,24 +6904,65 @@ static bool flow_dissector_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + if (type == BPF_WRITE) return false; switch (off) { case bpf_ctx_range(struct __sk_buff, data): + if (size != size_default) + return false; info->reg_type = PTR_TO_PACKET; - break; + return true; case bpf_ctx_range(struct __sk_buff, data_end): + if (size != size_default) + return false; info->reg_type = PTR_TO_PACKET_END; - break; + return true; case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): + if (size != sizeof(__u64)) + return false; info->reg_type = PTR_TO_FLOW_KEYS; - break; + return true; default: return false; } +} - return bpf_skb_is_valid_access(off, size, type, prog, info); +static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) + +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct __sk_buff, data): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, data)); + break; + + case offsetof(struct __sk_buff, data_end): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, data_end)); + break; + + case offsetof(struct __sk_buff, flow_keys): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, flow_keys)); + break; + } + + return insn - insn_buf; } static u32 bpf_convert_ctx_access(enum bpf_access_type type, @@ -7201,15 +7269,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, skc_num, 2, target_size)); break; - case offsetof(struct __sk_buff, flow_keys): - off = si->off; - off -= offsetof(struct __sk_buff, flow_keys); - off += offsetof(struct sk_buff, cb); - off += offsetof(struct qdisc_skb_cb, flow_keys); - *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, - si->src_reg, off); - break; - case offsetof(struct __sk_buff, tstamp): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); @@ -8214,7 +8273,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = { const struct bpf_verifier_ops flow_dissector_verifier_ops = { .get_func_proto = flow_dissector_func_proto, .is_valid_access = flow_dissector_is_valid_access, - .convert_ctx_access = bpf_convert_ctx_access, + .convert_ctx_access = flow_dissector_convert_ctx_access, }; const struct bpf_prog_ops flow_dissector_prog_ops = { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 795449713ba4..fac712cee9d5 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -683,50 +683,30 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, } } -bool __skb_flow_bpf_dissect(struct bpf_prog *prog, - const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - struct bpf_flow_keys *flow_keys) +bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen) { - struct bpf_skb_data_end cb_saved; - struct bpf_skb_data_end *cb; + struct bpf_flow_keys *flow_keys = ctx->flow_keys; u32 result; - /* Note that even though the const qualifier is discarded - * throughout the execution of the BPF program, all changes(the - * control block) are reverted after the BPF program returns. - * Therefore, __skb_flow_dissect does not alter the skb. - */ - - cb = (struct bpf_skb_data_end *)skb->cb; - - /* Save Control Block */ - memcpy(&cb_saved, cb, sizeof(cb_saved)); - memset(cb, 0, sizeof(*cb)); - /* Pass parameters to the BPF program */ memset(flow_keys, 0, sizeof(*flow_keys)); - cb->qdisc_cb.flow_keys = flow_keys; - flow_keys->n_proto = skb->protocol; - flow_keys->nhoff = skb_network_offset(skb); + flow_keys->n_proto = proto; + flow_keys->nhoff = nhoff; flow_keys->thoff = flow_keys->nhoff; - bpf_compute_data_pointers((struct sk_buff *)skb); - result = BPF_PROG_RUN(prog, skb); - - /* Restore state */ - memcpy(cb, &cb_saved, sizeof(cb_saved)); + result = BPF_PROG_RUN(prog, ctx); - flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, - skb_network_offset(skb), skb->len); + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, - flow_keys->nhoff, skb->len); + flow_keys->nhoff, hlen); return result == BPF_OK; } /** * __skb_flow_dissect - extract the flow_keys struct and return it + * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified * @flow_dissector: list of keys to dissect * @target_container: target structure to put dissected values into @@ -743,7 +723,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, * * Caller must take care of zeroing target container memory. */ -bool __skb_flow_dissect(const struct sk_buff *skb, +bool __skb_flow_dissect(const struct net *net, + const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, void *data, __be16 proto, int nhoff, int hlen, @@ -756,6 +737,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; + struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; int num_hdrs = 0; @@ -798,22 +780,39 @@ bool __skb_flow_dissect(const struct sk_buff *skb, target_container); if (skb) { - struct bpf_flow_keys flow_keys; - struct bpf_prog *attached = NULL; + if (!net) { + if (skb->dev) + net = dev_net(skb->dev); + else if (skb->sk) + net = sock_net(skb->sk); + } + } + WARN_ON_ONCE(!net); + if (net) { rcu_read_lock(); - - if (skb->dev) - attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog); - else if (skb->sk) - attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog); - else - WARN_ON_ONCE(1); + attached = rcu_dereference(net->flow_dissector_prog); if (attached) { - ret = __skb_flow_bpf_dissect(attached, skb, - flow_dissector, - &flow_keys); + struct bpf_flow_keys flow_keys; + struct bpf_flow_dissector ctx = { + .flow_keys = &flow_keys, + .data = data, + .data_end = data + hlen, + }; + __be16 n_proto = proto; + + if (skb) { + ctx.skb = skb; + /* we can't use 'proto' in the skb case + * because it might be set to skb->vlan_proto + * which has been pulled from the data + */ + n_proto = skb->protocol; + } + + ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + hlen); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); rcu_read_unlock(); @@ -1410,8 +1409,8 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) __flow_hash_secret_init(); memset(&keys, 0, sizeof(keys)); - __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, - NULL, 0, 0, 0, + __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, + &keys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); return __flow_hash_from_keys(&keys, hashrnd); @@ -1512,7 +1511,8 @@ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys_basic keys; - if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) + if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f7a3d7a171c7..0f9863dc4d44 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -119,13 +119,14 @@ EXPORT_SYMBOL(eth_header); /** * eth_get_headlen - determine the length of header for an ethernet frame + * @dev: pointer to network device * @data: pointer to start of frame * @len: total length of frame * * Make a best effort attempt to pull the length for all of the headers for * a given frame in a linear buffer. */ -u32 eth_get_headlen(void *data, unsigned int len) +u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len) { const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG; const struct ethhdr *eth = (const struct ethhdr *)data; @@ -136,8 +137,9 @@ u32 eth_get_headlen(void *data, unsigned int len) return len; /* parse any remaining L2/L3 headers, check for L4 */ - if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto, - sizeof(*eth), len, flags)) + if (!skb_flow_dissect_flow_keys_basic(dev_net(dev), NULL, &keys, data, + eth->h_proto, sizeof(*eth), + len, flags)) return max_t(u32, keys.control.thoff, sizeof(*eth)); /* parse for any L4 headers */ diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 7136ab9ffa73..3fd83b9dc1bf 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -26,7 +26,7 @@ static void load_and_attach_program(void) struct bpf_object *obj; ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, - cfg_map_name, &prog_fd); + cfg_map_name, NULL, &prog_fd, NULL); if (ret) error(1, 0, "bpf_flow_load %s", cfg_path_name); diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index 41dd6959feb0..daeaeb518894 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -9,10 +9,12 @@ static inline int bpf_flow_load(struct bpf_object **obj, const char *path, const char *section_name, const char *map_name, - int *prog_fd) + const char *keys_map_name, + int *prog_fd, + int *keys_fd) { struct bpf_program *prog, *main_prog; - struct bpf_map *prog_array; + struct bpf_map *prog_array, *keys; int prog_array_fd; int ret, fd, i; @@ -23,19 +25,29 @@ static inline int bpf_flow_load(struct bpf_object **obj, main_prog = bpf_object__find_program_by_title(*obj, section_name); if (!main_prog) - return ret; + return -1; *prog_fd = bpf_program__fd(main_prog); if (*prog_fd < 0) - return ret; + return -1; prog_array = bpf_object__find_map_by_name(*obj, map_name); if (!prog_array) - return ret; + return -1; prog_array_fd = bpf_map__fd(prog_array); if (prog_array_fd < 0) - return ret; + return -1; + + if (keys_map_name && keys_fd) { + keys = bpf_object__find_map_by_name(*obj, keys_map_name); + if (!keys) + return -1; + + *keys_fd = bpf_map__fd(keys); + if (*keys_fd < 0) + return -1; + } i = 0; bpf_object__for_each_program(prog, *obj) { diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 126319f9a97c..8b54adfd6264 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <error.h> +#include <linux/if.h> +#include <linux/if_tun.h> #define CHECK_FLOW_KEYS(desc, got, expected) \ CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \ @@ -79,8 +82,8 @@ struct test tests[] = { .tcp.doff = 5, }, .keys = { - .nhoff = 0, - .thoff = sizeof(struct iphdr), + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), @@ -95,8 +98,8 @@ struct test tests[] = { .tcp.doff = 5, }, .keys = { - .nhoff = 0, - .thoff = sizeof(struct ipv6hdr), + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), @@ -113,8 +116,8 @@ struct test tests[] = { .tcp.doff = 5, }, .keys = { - .nhoff = VLAN_HLEN, - .thoff = VLAN_HLEN + sizeof(struct iphdr), + .nhoff = ETH_HLEN + VLAN_HLEN, + .thoff = ETH_HLEN + VLAN_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), @@ -131,8 +134,9 @@ struct test tests[] = { .tcp.doff = 5, }, .keys = { - .nhoff = VLAN_HLEN * 2, - .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr), + .nhoff = ETH_HLEN + VLAN_HLEN * 2, + .thoff = ETH_HLEN + VLAN_HLEN * 2 + + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), @@ -140,13 +144,73 @@ struct test tests[] = { }, }; +static int create_tap(const char *ifname) +{ + struct ifreq ifr = { + .ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS, + }; + int fd, ret; + + strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) + return -1; + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + return -1; + + return fd; +} + +static int tx_tap(int fd, void *pkt, size_t len) +{ + struct iovec iov[] = { + { + .iov_len = len, + .iov_base = pkt, + }, + }; + return writev(fd, iov, ARRAY_SIZE(iov)); +} + +static int ifup(const char *ifname) +{ + struct ifreq ifr = {}; + int sk, ret; + + strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + sk = socket(PF_INET, SOCK_DGRAM, 0); + if (sk < 0) + return -1; + + ret = ioctl(sk, SIOCGIFFLAGS, &ifr); + if (ret) { + close(sk); + return -1; + } + + ifr.ifr_flags |= IFF_UP; + ret = ioctl(sk, SIOCSIFFLAGS, &ifr); + if (ret) { + close(sk); + return -1; + } + + close(sk); + return 0; +} + void test_flow_dissector(void) { + int i, err, prog_fd, keys_fd = -1, tap_fd; struct bpf_object *obj; - int i, err, prog_fd; + __u32 duration = 0; err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector", - "jmp_table", &prog_fd); + "jmp_table", "last_dissection", &prog_fd, &keys_fd); if (err) { error_cnt++; return; @@ -171,5 +235,34 @@ void test_flow_dissector(void) CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); } + /* Do the same tests but for skb-less flow dissector. + * We use a known path in the net/tun driver that calls + * eth_get_headlen and we manually export bpf_flow_keys + * via BPF map in this case. + */ + + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno); + + tap_fd = create_tap("tap0"); + CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno); + err = ifup("tap0"); + CHECK(err, "ifup", "err %d errno %d", err, errno); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct bpf_flow_keys flow_keys = {}; + struct bpf_prog_test_run_attr tattr = {}; + __u32 key = 0; + + err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); + CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno); + + err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); + CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); + + CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err); + CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + } + bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c new file mode 100644 index 000000000000..dc5ef155ec28 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +void test_flow_dissector_load_bytes(void) +{ + struct bpf_flow_keys flow_keys; + __u32 duration = 0, retval, size; + struct bpf_insn prog[] = { + // BPF_REG_1 - 1st argument: context + // BPF_REG_2 - 2nd argument: offset, start at first byte + BPF_MOV64_IMM(BPF_REG_2, 0), + // BPF_REG_3 - 3rd argument: destination, reserve byte on stack + BPF_ALU64_REG(BPF_MOV, BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1), + // BPF_REG_4 - 4th argument: copy one byte + BPF_MOV64_IMM(BPF_REG_4, 1), + // bpf_skb_load_bytes(ctx, sizeof(pkt_v4), ptr, 1) + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + // if (ret == 0) return BPF_DROP (2) + BPF_MOV64_IMM(BPF_REG_0, BPF_DROP), + BPF_EXIT_INSN(), + // if (ret != 0) return BPF_OK (0) + BPF_MOV64_IMM(BPF_REG_0, BPF_OK), + BPF_EXIT_INSN(), + }; + int fd, err; + + /* make sure bpf_skb_load_bytes is not allowed from skb-less context + */ + fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + CHECK(fd < 0, + "flow_dissector-bpf_skb_load_bytes-load", + "fd %d errno %d\n", + fd, errno); + + err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, + "flow_dissector-bpf_skb_load_bytes", + "err %d errno %d retval %d duration %d size %u/%zu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + + if (fd >= -1) + close(fd); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 75b17cada539..81ad9a0b29d0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -64,6 +64,25 @@ struct bpf_map_def SEC("maps") jmp_table = { .max_entries = 8 }; +struct bpf_map_def SEC("maps") last_dissection = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_flow_keys), + .max_entries = 1, +}; + +static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, + int ret) +{ + struct bpf_flow_keys *val; + __u32 key = 0; + + val = bpf_map_lookup_elem(&last_dissection, &key); + if (val) + memcpy(val, keys, sizeof(*val)); + return ret; +} + static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, __u16 hdr_size, void *buffer) @@ -109,10 +128,10 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) break; default: /* Protocol not supported */ - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } SEC("flow_dissector") @@ -139,8 +158,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_ICMP: icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); if (!icmp) - return BPF_DROP; - return BPF_OK; + return export_flow_keys(keys, BPF_DROP); + return export_flow_keys(keys, BPF_OK); case IPPROTO_IPIP: keys->is_encap = true; return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); @@ -150,11 +169,11 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_GRE: gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); if (!gre) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (bpf_htons(gre->flags & GRE_VERSION)) /* Only inspect standard GRE packets with version 0 */ - return BPF_OK; + return export_flow_keys(keys, BPF_OK); keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ if (GRE_IS_CSUM(gre->flags)) @@ -170,7 +189,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), &_eth); if (!eth) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->thoff += sizeof(*eth); @@ -181,31 +200,31 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_TCP: tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); if (!tcp) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (tcp->doff < 5) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if ((__u8 *)tcp + (tcp->doff << 2) > data_end) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->sport = tcp->source; keys->dport = tcp->dest; - return BPF_OK; + return export_flow_keys(keys, BPF_OK); case IPPROTO_UDP: case IPPROTO_UDPLITE: udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); if (!udp) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->sport = udp->source; keys->dport = udp->dest; - return BPF_OK; + return export_flow_keys(keys, BPF_OK); default: - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) @@ -225,7 +244,7 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) return parse_ip_proto(skb, nexthdr); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } PROG(IP)(struct __sk_buff *skb) @@ -238,11 +257,11 @@ PROG(IP)(struct __sk_buff *skb) iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); if (!iph) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); /* IP header cannot be smaller than 20 bytes */ if (iph->ihl < 5) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->addr_proto = ETH_P_IP; keys->ipv4_src = iph->saddr; @@ -250,7 +269,7 @@ PROG(IP)(struct __sk_buff *skb) keys->thoff += iph->ihl << 2; if (data + keys->thoff > data_end) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { keys->is_frag = true; @@ -264,7 +283,7 @@ PROG(IP)(struct __sk_buff *skb) } if (done) - return BPF_OK; + return export_flow_keys(keys, BPF_OK); return parse_ip_proto(skb, iph->protocol); } @@ -276,7 +295,7 @@ PROG(IPV6)(struct __sk_buff *skb) ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); if (!ip6h) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->addr_proto = ETH_P_IPV6; memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); @@ -288,11 +307,12 @@ PROG(IPV6)(struct __sk_buff *skb) PROG(IPV6OP)(struct __sk_buff *skb) { + struct bpf_flow_keys *keys = skb->flow_keys; struct ipv6_opt_hdr *ip6h, _ip6h; ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); if (!ip6h) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); /* hlen is in 8-octets and does not include the first 8 bytes * of the header @@ -309,7 +329,7 @@ PROG(IPV6FR)(struct __sk_buff *skb) fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); if (!fragh) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->thoff += sizeof(*fragh); keys->is_frag = true; @@ -321,13 +341,14 @@ PROG(IPV6FR)(struct __sk_buff *skb) PROG(MPLS)(struct __sk_buff *skb) { + struct bpf_flow_keys *keys = skb->flow_keys; struct mpls_label *mpls, _mpls; mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); if (!mpls) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); - return BPF_OK; + return export_flow_keys(keys, BPF_OK); } PROG(VLAN)(struct __sk_buff *skb) @@ -339,10 +360,10 @@ PROG(VLAN)(struct __sk_buff *skb) if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); @@ -350,14 +371,14 @@ PROG(VLAN)(struct __sk_buff *skb) vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->n_proto = vlan->h_vlan_encapsulated_proto; return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); |