diff options
| author | Alexei Starovoitov <[email protected]> | 2023-12-13 16:16:42 -0800 |
|---|---|---|
| committer | Alexei Starovoitov <[email protected]> | 2023-12-13 16:16:42 -0800 |
| commit | ec14325c7339bf1d40fc29bb8a0d2121cfe649aa (patch) | |
| tree | 69dbb3887b907d62888bd4cad1781053d55ddd3c /include | |
| parent | 733763285acfe8dffd6e39ad2ed3d1222b32a901 (diff) | |
| parent | 4c6612f6100c2d85212865dbd1a5d8a7e391d3cb (diff) | |
Merge branch 'xdp-metadata-via-kfuncs-for-ice-vlan-hint'
Larysa Zaremba says:
====================
XDP metadata via kfuncs for ice + VLAN hint
This series introduces XDP hints via kfuncs [0] to the ice driver.
Series brings the following existing hints to the ice driver:
- HW timestamp
- RX hash with type
Series also introduces VLAN tag with protocol XDP hint, it now be accessed by
XDP and userspace (AF_XDP) programs. They can also be checked with xdp_metadata
test and xdp_hw_metadata program.
Impact of these patches on ice performance:
ZC:
* Full hints implementation decreases pps in ZC mode by less than 3%
(64B, rxdrop)
skb (packets with invalid IP, dropped by stack):
* Overall, patchset improves peak performance in skb mode by about 0.5%
[0] https://patchwork.kernel.org/project/netdevbpf/cover/[email protected]/
v7:
https://lore.kernel.org/bpf/[email protected]/
v6:
https://lore.kernel.org/bpf/[email protected]/
Intermediate RFC v2:
https://lore.kernel.org/bpf/[email protected]/
Intermediate RFC v1:
https://lore.kernel.org/bpf/[email protected]/
v5:
https://lore.kernel.org/bpf/[email protected]/
v4:
https://lore.kernel.org/bpf/[email protected]/
v3:
https://lore.kernel.org/bpf/[email protected]/
v2:
https://lore.kernel.org/bpf/[email protected]/
v1:
https://lore.kernel.org/all/[email protected]/
Changes since v7:
* shorten timestamp assignment in ice
* change first argument of ice_fill_rx_descs back to xsk_buff_pool
* fix kernel-doc for ice_run_xdp_zc
* add missing XSK_CHECK_PRIV_TYPE() in ice
* resolved selftests merge conflicts with TX hints
* AF_INET patch adds new packet generation, not replaces AF_XDP one
* fix destination port in xdp_metadata
Changes since v6:
* add ability to fill cb of all xdp_buffs in xsk_buff_pool
* place just pointer to packet context in ice_xdp_buff
* add const qualifiers in veth implementation
* generate uapi for VLAN hint
Changes since v5:
* drop checksum hint from the patchset entirely
* Alex's patch that lifts the data_meta size limitation is no longer
required in this patchset, so will be sent separately
* new patch: hide some ice hints code behind a static key
* fix several bugs in ZC mode (ice)
* change argument order in VLAN hint kfunc (tci, proto -> proto, tci)
* cosmetic changes
* analyze performance impact
Changes since v4:
* Drop the concept of partial checksum from the hint design
* Drop the concept of checksum level from the hint design
Changes since v3:
* use XDP_CHECKSUM_VALID_LVL0 + csum_level instead of csum_level + 1
* fix spelling mistakes
* read XDP timestamp unconditionally
* add TO_STR() macro
Changes since v2:
* redesign checksum hint, so now it gives full status
* rename vlan_tag -> vlan_tci, where applicable
* use open_netns() and close_netns() in xdp_metadata
* improve VLAN hint documentation
* replace CFI with DEI
* use VLAN_VID_MASK in xdp_metadata
* make vlan_get_tag() return -ENODATA
* remove unused rx_ptype in ice_xsk.c
* fix ice timestamp code division between patches
Changes since v1:
* directly return RX hash, RX timestamp and RX checksum status
in skb-common functions
* use intermediate enum value for checksum status in ice
* get rid of ring structure dependency in ice kfunc implementation
* make variables const, when possible, in ice implementation
* use -ENODATA instead of -EOPNOTSUPP for driver implementation
* instead of having 2 separate functions for c-tag and s-tag,
use 1 function that outputs both VLAN tag and protocol ID
* improve documentation for introduced hints
* update xdp_metadata selftest to test new hints
* implement new hints in veth, so they can be tested in xdp_metadata
* parse VLAN tag in xdp_hw_metadata
====================
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Alexei Starovoitov <[email protected]>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/if_vlan.h | 4 | ||||
| -rw-r--r-- | include/linux/mlx5/device.h | 2 | ||||
| -rw-r--r-- | include/net/xdp.h | 9 | ||||
| -rw-r--r-- | include/net/xdp_sock_drv.h | 17 | ||||
| -rw-r--r-- | include/net/xsk_buff_pool.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/netdev.h | 3 |
6 files changed, 34 insertions, 3 deletions
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3028af87716e..c1645c86eed9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -540,7 +540,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb); if (!eth_type_vlan(veth->h_vlan_proto)) - return -EINVAL; + return -ENODATA; *vlan_tci = ntohs(veth->h_vlan_TCI); return 0; @@ -561,7 +561,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, return 0; } else { *vlan_tci = 0; - return -EINVAL; + return -ENODATA; } } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 820bca965fb6..01275c6e8468 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -918,7 +918,7 @@ static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe) return (cqe->tls_outer_l3_tunneled >> 3) & 0x3; } -static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe) +static inline bool cqe_has_vlan(const struct mlx5_cqe64 *cqe) { return cqe->l4_l3_hdr_type & 0x1; } diff --git a/include/net/xdp.h b/include/net/xdp.h index 5d3673afc037..8cd04a74dba5 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -404,6 +404,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, NETDEV_XDP_RX_METADATA_HASH, \ bpf_xdp_metadata_rx_hash, \ xmo_rx_hash) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \ + NETDEV_XDP_RX_METADATA_VLAN_TAG, \ + bpf_xdp_metadata_rx_vlan_tag, \ + xmo_rx_vlan_tag) \ enum xdp_rx_metadata { #define XDP_METADATA_KFUNC(name, _, __, ___) name, @@ -432,6 +436,7 @@ enum xdp_rss_hash_type { XDP_RSS_L4_UDP = BIT(5), XDP_RSS_L4_SCTP = BIT(6), XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */ + XDP_RSS_L4_ICMP = BIT(8), /* Second part: RSS hash type combinations used for driver HW mapping */ XDP_RSS_TYPE_NONE = 0, @@ -447,11 +452,13 @@ enum xdp_rss_hash_type { XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, + XDP_RSS_TYPE_L4_IPV4_ICMP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_ICMP, XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP, XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, + XDP_RSS_TYPE_L4_IPV6_ICMP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_ICMP, XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR, XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR, @@ -462,6 +469,8 @@ struct xdp_metadata_ops { int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type); + int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto, + u16 *vlan_tci); }; #ifdef CONFIG_NET diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 81e02de3f453..b62bb8525a5f 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -14,6 +14,12 @@ #ifdef CONFIG_XDP_SOCKETS +struct xsk_cb_desc { + void *src; + u8 off; + u8 bytes; +}; + void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); @@ -47,6 +53,12 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, xp_set_rxq_info(pool, rxq); } +static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, + struct xsk_cb_desc *desc) +{ + xp_fill_cb(pool, desc); +} + static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) { #ifdef CONFIG_NET_RX_BUSY_POLL @@ -274,6 +286,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, { } +static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, + struct xsk_cb_desc *desc) +{ +} + static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) { return 0; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 8d48d37ab7c0..99dd7376df6a 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -12,6 +12,7 @@ struct xsk_buff_pool; struct xdp_rxq_info; +struct xsk_cb_desc; struct xsk_queue; struct xdp_desc; struct xdp_umem; @@ -135,6 +136,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p /* AF_XDP ZC drivers, via xdp_sock_buff.h */ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq); +void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc); int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages); void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs); diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6244c0164976..966638b08ccf 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -44,10 +44,13 @@ enum netdev_xdp_act { * timestamp via bpf_xdp_metadata_rx_timestamp(). * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). */ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, }; /** |