From b470985c76df6d53a9454670fb7551e1197f55e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 27 Jul 2023 13:38:04 -0400 Subject: net/handshake: Trace events for TLS Alert helpers Add observability for the new TLS Alert infrastructure. Reviewed-by: Hannes Reinecke Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/169047947409.5241.14548832149596892717.stgit@oracle-102.nfsv4bat.org Signed-off-by: Jakub Kicinski --- include/trace/events/handshake.h | 160 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/handshake.h b/include/trace/events/handshake.h index 8dadcab5f12a..bdd8a03cf5ba 100644 --- a/include/trace/events/handshake.h +++ b/include/trace/events/handshake.h @@ -6,7 +6,86 @@ #define _TRACE_HANDSHAKE_H #include +#include #include +#include + +#define TLS_RECORD_TYPE_LIST \ + record_type(CHANGE_CIPHER_SPEC) \ + record_type(ALERT) \ + record_type(HANDSHAKE) \ + record_type(DATA) \ + record_type(HEARTBEAT) \ + record_type(TLS12_CID) \ + record_type_end(ACK) + +#undef record_type +#undef record_type_end +#define record_type(x) TRACE_DEFINE_ENUM(TLS_RECORD_TYPE_##x); +#define record_type_end(x) TRACE_DEFINE_ENUM(TLS_RECORD_TYPE_##x); + +TLS_RECORD_TYPE_LIST + +#undef record_type +#undef record_type_end +#define record_type(x) { TLS_RECORD_TYPE_##x, #x }, +#define record_type_end(x) { TLS_RECORD_TYPE_##x, #x } + +#define show_tls_content_type(type) \ + __print_symbolic(type, TLS_RECORD_TYPE_LIST) + +TRACE_DEFINE_ENUM(TLS_ALERT_LEVEL_WARNING); +TRACE_DEFINE_ENUM(TLS_ALERT_LEVEL_FATAL); + +#define show_tls_alert_level(level) \ + __print_symbolic(level, \ + { TLS_ALERT_LEVEL_WARNING, "Warning" }, \ + { TLS_ALERT_LEVEL_FATAL, "Fatal" }) + +#define TLS_ALERT_DESCRIPTION_LIST \ + alert_description(CLOSE_NOTIFY) \ + alert_description(UNEXPECTED_MESSAGE) \ + alert_description(BAD_RECORD_MAC) \ + alert_description(RECORD_OVERFLOW) \ + alert_description(HANDSHAKE_FAILURE) \ + alert_description(BAD_CERTIFICATE) \ + alert_description(UNSUPPORTED_CERTIFICATE) \ + alert_description(CERTIFICATE_REVOKED) \ + alert_description(CERTIFICATE_EXPIRED) \ + alert_description(CERTIFICATE_UNKNOWN) \ + alert_description(ILLEGAL_PARAMETER) \ + alert_description(UNKNOWN_CA) \ + alert_description(ACCESS_DENIED) \ + alert_description(DECODE_ERROR) \ + alert_description(DECRYPT_ERROR) \ + alert_description(TOO_MANY_CIDS_REQUESTED) \ + alert_description(PROTOCOL_VERSION) \ + alert_description(INSUFFICIENT_SECURITY) \ + alert_description(INTERNAL_ERROR) \ + alert_description(INAPPROPRIATE_FALLBACK) \ + alert_description(USER_CANCELED) \ + alert_description(MISSING_EXTENSION) \ + alert_description(UNSUPPORTED_EXTENSION) \ + alert_description(UNRECOGNIZED_NAME) \ + alert_description(BAD_CERTIFICATE_STATUS_RESPONSE) \ + alert_description(UNKNOWN_PSK_IDENTITY) \ + alert_description(CERTIFICATE_REQUIRED) \ + alert_description_end(NO_APPLICATION_PROTOCOL) + +#undef alert_description +#undef alert_description_end +#define alert_description(x) TRACE_DEFINE_ENUM(TLS_ALERT_DESC_##x); +#define alert_description_end(x) TRACE_DEFINE_ENUM(TLS_ALERT_DESC_##x); + +TLS_ALERT_DESCRIPTION_LIST + +#undef alert_description +#undef alert_description_end +#define alert_description(x) { TLS_ALERT_DESC_##x, #x }, +#define alert_description_end(x) { TLS_ALERT_DESC_##x, #x } + +#define show_tls_alert_description(desc) \ + __print_symbolic(desc, TLS_ALERT_DESCRIPTION_LIST) DECLARE_EVENT_CLASS(handshake_event_class, TP_PROTO( @@ -106,6 +185,47 @@ DECLARE_EVENT_CLASS(handshake_error_class, ), \ TP_ARGS(net, req, sk, err)) +DECLARE_EVENT_CLASS(handshake_alert_class, + TP_PROTO( + const struct sock *sk, + unsigned char level, + unsigned char description + ), + TP_ARGS(sk, level, description), + TP_STRUCT__entry( + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(unsigned int, netns_ino) + __field(unsigned long, level) + __field(unsigned long, description) + ), + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + __entry->netns_ino = sock_net(sk)->ns.inum; + __entry->level = level; + __entry->description = description; + ), + TP_printk("src=%pISpc dest=%pISpc %s: %s", + __entry->saddr, __entry->daddr, + show_tls_alert_level(__entry->level), + show_tls_alert_description(__entry->description) + ) +); +#define DEFINE_HANDSHAKE_ALERT(name) \ + DEFINE_EVENT(handshake_alert_class, name, \ + TP_PROTO( \ + const struct sock *sk, \ + unsigned char level, \ + unsigned char description \ + ), \ + TP_ARGS(sk, level, description)) + /* * Request lifetime events @@ -154,6 +274,46 @@ DEFINE_HANDSHAKE_ERROR(handshake_cmd_accept_err); DEFINE_HANDSHAKE_FD_EVENT(handshake_cmd_done); DEFINE_HANDSHAKE_ERROR(handshake_cmd_done_err); +/* + * TLS Record events + */ + +TRACE_EVENT(tls_contenttype, + TP_PROTO( + const struct sock *sk, + unsigned char type + ), + TP_ARGS(sk, type), + TP_STRUCT__entry( + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(unsigned int, netns_ino) + __field(unsigned long, type) + ), + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + __entry->netns_ino = sock_net(sk)->ns.inum; + __entry->type = type; + ), + TP_printk("src=%pISpc dest=%pISpc %s", + __entry->saddr, __entry->daddr, + show_tls_content_type(__entry->type) + ) +); + +/* + * TLS Alert events + */ + +DEFINE_HANDSHAKE_ALERT(tls_alert_send); +DEFINE_HANDSHAKE_ALERT(tls_alert_recv); + #endif /* _TRACE_HANDSHAKE_H */ #include -- cgit From bf4ea1d0b2cb2251f9e5619c81daa98591087c33 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 1 Aug 2023 22:26:20 +0800 Subject: bpf, xdp: Add tracepoint to xdp attaching failure When error happens in dev_xdp_attach(), it should have a way to tell users the error message like the netlink approach. To avoid breaking uapi, adding a tracepoint in bpf_xdp_link_attach() is an appropriate way to notify users the error message. Hence, bpf libraries are able to retrieve the error message by this tracepoint, and then report the error message to users. Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20230801142621.7925-2-hffilwlqm@gmail.com Signed-off-by: Alexei Starovoitov --- include/trace/events/xdp.h | 17 +++++++++++++++++ net/core/dev.c | 5 ++++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/trace/events') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index c40fc97f9417..cd89f1d5ce7b 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -404,6 +404,23 @@ TRACE_EVENT(mem_return_failed, ) ); +TRACE_EVENT(bpf_xdp_link_attach_failed, + + TP_PROTO(const char *msg), + + TP_ARGS(msg), + + TP_STRUCT__entry( + __string(msg, msg) + ), + + TP_fast_assign( + __assign_str(msg, msg); + ), + + TP_printk("errmsg=%s", __get_str(msg)) +); + #endif /* _TRACE_XDP_H */ #include diff --git a/net/core/dev.c b/net/core/dev.c index 8e7d0cb540cd..002fec07de73 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -133,6 +133,7 @@ #include #include #include +#include #include #include #include @@ -9470,6 +9471,7 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct net *net = current->nsproxy->net_ns; struct bpf_link_primer link_primer; + struct netlink_ext_ack extack = {}; struct bpf_xdp_link *link; struct net_device *dev; int err, fd; @@ -9497,12 +9499,13 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) goto unlock; } - err = dev_xdp_attach_link(dev, NULL, link); + err = dev_xdp_attach_link(dev, &extack, link); rtnl_unlock(); if (err) { link->dev = NULL; bpf_link_cleanup(&link_primer); + trace_bpf_xdp_link_attach_failed(extack._msg); goto out_put_dev; } -- cgit From 680ee0456a5712309db9ec2692e908ea1d6b1644 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Aug 2023 18:02:30 -0700 Subject: net: invert the netdevice.h vs xdp.h dependency xdp.h is far more specific and is included in only 67 other files vs netdevice.h's 1538 include sites. Make xdp.h include netdevice.h, instead of the other way around. This decreases the incremental allmodconfig builds size when xdp.h is touched from 5947 to 662 objects. Move bpf_prog_run_xdp() to xdp.h, seems appropriate and filter.h is a mega-header in its own right so it's nice to avoid xdp.h getting included there as well. The only unfortunate part is that the typedef for xdp_features_t has to move to netdevice.h, since its embedded in struct netdevice. Signed-off-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20230803010230.1755386-4-kuba@kernel.org Signed-off-by: Martin KaFai Lau --- include/linux/filter.h | 17 ----------------- include/linux/netdevice.h | 11 ++++------- include/net/busy_poll.h | 1 + include/net/xdp.h | 29 +++++++++++++++++++++++++---- include/trace/events/xdp.h | 1 + kernel/bpf/btf.c | 1 + kernel/bpf/offload.c | 1 + kernel/bpf/verifier.c | 1 + net/netfilter/nf_conntrack_bpf.c | 1 + 9 files changed, 35 insertions(+), 28 deletions(-) (limited to 'include/trace/events') diff --git a/include/linux/filter.h b/include/linux/filter.h index f5eabe3fa5e8..2d6fe30bad5f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -774,23 +774,6 @@ DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); u32 xdp_master_redirect(struct xdp_buff *xdp); -static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, - struct xdp_buff *xdp) -{ - /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus - * under local_bh_disable(), which provides the needed RCU protection - * for accessing map entries. - */ - u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); - - if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { - if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) - act = xdp_master_redirect(xdp); - } - - return act; -} - void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5563c8a210b5..d8ed85183fe4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,7 +40,6 @@ #include #endif #include -#include #include #include @@ -76,8 +75,12 @@ struct udp_tunnel_nic_info; struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; +struct xdp_frame; +struct xdp_metadata_ops; struct xdp_md; +typedef u32 xdp_features_t; + void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1628,12 +1631,6 @@ struct net_device_ops { bool cycles); }; -struct xdp_metadata_ops { - int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); - int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, - enum xdp_rss_hash_type *rss_type); -}; - /** * enum netdev_priv_flags - &struct net_device priv_flags * diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index f90f0021f5f2..4dabeb6c76d3 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -16,6 +16,7 @@ #include #include #include +#include /* 0 - Reserved to indicate value not set * 1..NR_CPUS - Reserved for sender_cpu diff --git a/include/net/xdp.h b/include/net/xdp.h index d1c5381fc95f..de08c8e0d134 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -6,9 +6,10 @@ #ifndef __LINUX_NET_XDP_H__ #define __LINUX_NET_XDP_H__ -#include /* skb_shared_info */ -#include #include +#include +#include +#include /* skb_shared_info */ /** * DOC: XDP RX-queue information @@ -45,8 +46,6 @@ enum xdp_mem_type { MEM_TYPE_MAX, }; -typedef u32 xdp_features_t; - /* XDP flags for ndo_xdp_xmit */ #define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ #define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH @@ -443,6 +442,12 @@ enum xdp_rss_hash_type { XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR, }; +struct xdp_metadata_ops { + int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); + int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type); +}; + #ifdef CONFIG_NET u32 bpf_xdp_metadata_kfunc_id(int id); bool bpf_dev_bound_kfunc_id(u32 btf_id); @@ -474,4 +479,20 @@ static inline void xdp_clear_features_flag(struct net_device *dev) xdp_set_features_flag(dev, 0); } +static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, + struct xdp_buff *xdp) +{ + /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus + * under local_bh_disable(), which provides the needed RCU protection + * for accessing map entries. + */ + u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + + if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { + if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) + act = xdp_master_redirect(xdp); + } + + return act; +} #endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index cd89f1d5ce7b..9adc2bdf2f94 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -9,6 +9,7 @@ #include #include #include +#include #define __XDP_ACT_MAP(FN) \ FN(ABORTED) \ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ef9581a580e2..249657c466dd 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -29,6 +29,7 @@ #include #include +#include #include "../tools/lib/bpf/relo_core.h" /* BTF (BPF Type Format) is the meta data format which describes diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 8a26cd8814c1..3e4f2ec1af06 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -25,6 +25,7 @@ #include #include #include +#include /* Protects offdevs, members of bpf_offload_netdev and offload members * of all progs. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e7b1af016841..132f25dab931 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "disasm.h" diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 0d36d7285e3f..c7a6114091ae 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include -- cgit From a9ca9f9ceff382b58b488248f0c0da9e157f5d06 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 4 Aug 2023 20:05:24 +0200 Subject: page_pool: split types and declarations from page_pool.h Split types and pure function declarations from page_pool.h and add them in page_page/types.h, so that C sources can include page_pool.h and headers should generally only include page_pool/types.h as suggested by jakub. Rename page_pool.h to page_pool/helpers.h to have both in one place. Signed-off-by: Yunsheng Lin Suggested-by: Jakub Kicinski Signed-off-by: Alexander Lobakin Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230804180529.2483231-2-aleksander.lobakin@intel.com [Jakub: change microsoft/mana, fix kdoc paths in Documentation] Signed-off-by: Jakub Kicinski --- Documentation/networking/page_pool.rst | 6 +- MAINTAINERS | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +- drivers/net/ethernet/engleder/tsnep_main.c | 1 + drivers/net/ethernet/freescale/fec_main.c | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2 +- drivers/net/ethernet/marvell/mvneta.c | 2 +- drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 2 +- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 + .../ethernet/marvell/octeontx2/nic/otx2_common.c | 1 + .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 1 + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en/params.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 2 +- .../net/ethernet/microchip/lan966x/lan966x_fdma.c | 1 + .../net/ethernet/microchip/lan966x/lan966x_main.h | 2 +- drivers/net/ethernet/microsoft/mana/mana_en.c | 1 + drivers/net/ethernet/socionext/netsec.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + drivers/net/ethernet/ti/cpsw.c | 2 +- drivers/net/ethernet/ti/cpsw_new.c | 2 +- drivers/net/ethernet/ti/cpsw_priv.c | 2 +- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +- drivers/net/veth.c | 2 +- drivers/net/wireless/mediatek/mt76/mac80211.c | 1 - drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/xen-netfront.c | 2 +- include/linux/skbuff.h | 2 +- include/net/page_pool.h | 470 --------------------- include/net/page_pool/helpers.h | 238 +++++++++++ include/net/page_pool/types.h | 238 +++++++++++ include/trace/events/page_pool.h | 2 +- net/bpf/test_run.c | 2 +- net/core/page_pool.c | 2 +- net/core/skbuff.c | 2 +- net/core/xdp.c | 2 +- 44 files changed, 517 insertions(+), 500 deletions(-) delete mode 100644 include/net/page_pool.h create mode 100644 include/net/page_pool/helpers.h create mode 100644 include/net/page_pool/types.h (limited to 'include/trace/events') diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst index 53b5448cc0f1..68b82cea13e4 100644 --- a/Documentation/networking/page_pool.rst +++ b/Documentation/networking/page_pool.rst @@ -67,10 +67,10 @@ a page will cause no race conditions is enough. .. kernel-doc:: net/core/page_pool.c :identifiers: page_pool_create -.. kernel-doc:: include/net/page_pool.h +.. kernel-doc:: include/net/page_pool/types.h :identifiers: struct page_pool_params -.. kernel-doc:: include/net/page_pool.h +.. kernel-doc:: include/net/page_pool/helpers.h :identifiers: page_pool_put_page page_pool_put_full_page page_pool_recycle_direct page_pool_dev_alloc_pages page_pool_get_dma_addr page_pool_get_dma_dir @@ -122,7 +122,7 @@ page_pool_stats allocated by the caller. The API will fill in the provided struct page_pool_stats with statistics about the page_pool. -.. kernel-doc:: include/net/page_pool.h +.. kernel-doc:: include/net/page_pool/types.h :identifiers: struct page_pool_recycle_stats struct page_pool_alloc_stats struct page_pool_stats diff --git a/MAINTAINERS b/MAINTAINERS index 5e2bb1059ab6..08bcf3a7c482 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16020,7 +16020,7 @@ M: Ilias Apalodimas L: netdev@vger.kernel.org S: Supported F: Documentation/networking/page_pool.rst -F: include/net/page_pool.h +F: include/net/page_pool/ F: include/trace/events/page_pool.h F: net/core/page_pool.c diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6a643aae7802..eb168ca983b7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 2ce46d7affe4..96f5ca778c67 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "bnxt_hsi.h" #include "bnxt.h" #include "bnxt_xdp.h" diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 079f9f6ae21a..f61bd89734c5 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 43f14cec91e9..3bd0bf03aedb 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 9f6890059666..e5e37a33fd81 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 88af34bbee34..acd756b0c7c9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include "hnae3.h" diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index acf4f6ba73a6..d483b8c00ec0 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 11e603686a27..e809f91c08fb 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 9e1b596c8f08..eb74ccddb440 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 8cdd92dd9762..8336cea16aff 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -7,6 +7,7 @@ #include #include +#include #include #include diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 61f62a6ec662..70b9065f7d10 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "otx2_reg.h" #include "otx2_common.h" diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1b89f800f6df..fe05c9020269 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "mtk_eth_soc.h" #include "mtk_wed.h" diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 80d17729e557..4a2470fbad2c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include "mtk_ppe.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 5ce28ff7685f..e097f336e1c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -6,6 +6,7 @@ #include "en/port.h" #include "en_accel/en_accel.h" #include "en_accel/ipsec.h" +#include #include static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 201ac7dd338f..698647cc8c0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies */ -#include #include "en/txrx.h" #include "en/params.h" #include "en/trap.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 40589cebb773..12f56d0db0af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -35,6 +35,7 @@ #include "en/xdp.h" #include "en/params.h" #include +#include int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1c820119e438..c8ec6467d4d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include "eswitch.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f7bb5f4aaaca..3fd11b0761e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 4d77055abd4b..07b84d668fcc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -38,7 +38,7 @@ #include "en/port.h" #ifdef CONFIG_PAGE_POOL_STATS -#include +#include #endif static unsigned int stats_grps_num(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index bd72fbc2220f..3960534ac2ad 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -2,6 +2,7 @@ #include #include +#include #include "lan966x_main.h" diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index aebc9154693a..caa9e0533c96 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index a08023c57e25..31e2f2c74e15 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 0dcd6a568b06..f358ea003193 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #define NETSEC_REG_SOFT_RST 0x104 diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index a6d034968654..3401e888a9f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 99aa5360b3ff..fcab363d8dfa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include "stmmac_ptp.h" diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index f9cd566d1c9b..ca4d4548f85e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index c61e4e44a78f..0e4f526b1753 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -30,7 +30,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index ae52cdbcf8cc..0ec85635dfd6 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 2c3f08be8c37..e04d4a5eed7b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 614f3e3efab0..953f6d8f8db0 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #define DRV_NAME "veth" #define DRV_VERSION "1.0" diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index c0ff36a98bed..d158320bc15d 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -4,7 +4,6 @@ */ #include #include -#include #include "mt76.h" #define CHAN2G(_idx, _freq) { \ diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 878087257ea7..e8757865a3d0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -15,6 +15,7 @@ #include #include #include +#include #include "util.h" #include "testmode.h" diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 47d54d8ea59d..ad29f370034e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 16a49ba534e4..888e3d7e74c1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,7 +32,7 @@ #include #include #include -#include +#include #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include #endif diff --git a/include/net/page_pool.h b/include/net/page_pool.h deleted file mode 100644 index 73d4f786418d..000000000000 --- a/include/net/page_pool.h +++ /dev/null @@ -1,470 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * page_pool.h - * Author: Jesper Dangaard Brouer - * Copyright (C) 2016 Red Hat, Inc. - */ - -/** - * DOC: page_pool allocator - * - * This page_pool allocator is optimized for the XDP mode that - * uses one-frame-per-page, but have fallbacks that act like the - * regular page allocator APIs. - * - * Basic use involve replacing alloc_pages() calls with the - * page_pool_alloc_pages() call. Drivers should likely use - * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). - * - * API keeps track of in-flight pages, in-order to let API user know - * when it is safe to dealloactor page_pool object. Thus, API users - * must call page_pool_put_page() where appropriate and only attach - * the page to a page_pool-aware objects, like skbs marked for recycling. - * - * API user must only call page_pool_put_page() once on a page, as it - * will either recycle the page, or in case of elevated refcnt, it - * will release the DMA mapping and in-flight state accounting. We - * hope to lift this requirement in the future. - */ -#ifndef _NET_PAGE_POOL_H -#define _NET_PAGE_POOL_H - -#include /* Needed by ptr_ring */ -#include -#include - -#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA - * map/unmap - */ -#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets - * from page_pool will be - * DMA-synced-for-device according to - * the length provided by the device - * driver. - * Please note DMA-sync-for-CPU is still - * device driver responsibility - */ -#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ - PP_FLAG_DMA_SYNC_DEV |\ - PP_FLAG_PAGE_FRAG) - -/* - * Fast allocation side cache array/stack - * - * The cache size and refill watermark is related to the network - * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX - * ring is usually refilled and the max consumed elements will be 64, - * thus a natural max size of objects needed in the cache. - * - * Keeping room for more objects, is due to XDP_DROP use-case. As - * XDP_DROP allows the opportunity to recycle objects directly into - * this array, as it shares the same softirq/NAPI protection. If - * cache is already full (or partly full) then the XDP_DROP recycles - * would have to take a slower code path. - */ -#define PP_ALLOC_CACHE_SIZE 128 -#define PP_ALLOC_CACHE_REFILL 64 -struct pp_alloc_cache { - u32 count; - struct page *cache[PP_ALLOC_CACHE_SIZE]; -}; - -/** - * struct page_pool_params - page pool parameters - * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG - * @order: 2^order pages on allocation - * @pool_size: size of the ptr_ring - * @nid: NUMA node id to allocate from pages from - * @dev: device, for DMA pre-mapping purposes - * @napi: NAPI which is the sole consumer of pages, otherwise NULL - * @dma_dir: DMA mapping direction - * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV - * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV - */ -struct page_pool_params { - unsigned int flags; - unsigned int order; - unsigned int pool_size; - int nid; - struct device *dev; - struct napi_struct *napi; - enum dma_data_direction dma_dir; - unsigned int max_len; - unsigned int offset; -/* private: used by test code only */ - void (*init_callback)(struct page *page, void *arg); - void *init_arg; -}; - -#ifdef CONFIG_PAGE_POOL_STATS -/** - * struct page_pool_alloc_stats - allocation statistics - * @fast: successful fast path allocations - * @slow: slow path order-0 allocations - * @slow_high_order: slow path high order allocations - * @empty: ptr ring is empty, so a slow path allocation was forced - * @refill: an allocation which triggered a refill of the cache - * @waive: pages obtained from the ptr ring that cannot be added to - * the cache due to a NUMA mismatch - */ -struct page_pool_alloc_stats { - u64 fast; - u64 slow; - u64 slow_high_order; - u64 empty; - u64 refill; - u64 waive; -}; - -/** - * struct page_pool_recycle_stats - recycling (freeing) statistics - * @cached: recycling placed page in the page pool cache - * @cache_full: page pool cache was full - * @ring: page placed into the ptr ring - * @ring_full: page released from page pool because the ptr ring was full - * @released_refcnt: page released (and not recycled) because refcnt > 1 - */ -struct page_pool_recycle_stats { - u64 cached; - u64 cache_full; - u64 ring; - u64 ring_full; - u64 released_refcnt; -}; - -/** - * struct page_pool_stats - combined page pool use statistics - * @alloc_stats: see struct page_pool_alloc_stats - * @recycle_stats: see struct page_pool_recycle_stats - * - * Wrapper struct for combining page pool stats with different storage - * requirements. - */ -struct page_pool_stats { - struct page_pool_alloc_stats alloc_stats; - struct page_pool_recycle_stats recycle_stats; -}; - -int page_pool_ethtool_stats_get_count(void); -u8 *page_pool_ethtool_stats_get_strings(u8 *data); -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); - -/* - * Drivers that wish to harvest page pool stats and report them to users - * (perhaps via ethtool, debugfs, or another mechanism) can allocate a - * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. - */ -bool page_pool_get_stats(struct page_pool *pool, - struct page_pool_stats *stats); -#else - -static inline int page_pool_ethtool_stats_get_count(void) -{ - return 0; -} - -static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) -{ - return data; -} - -static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) -{ - return data; -} - -#endif - -struct page_pool { - struct page_pool_params p; - - struct delayed_work release_dw; - void (*disconnect)(void *); - unsigned long defer_start; - unsigned long defer_warn; - - u32 pages_state_hold_cnt; - unsigned int frag_offset; - struct page *frag_page; - long frag_users; - -#ifdef CONFIG_PAGE_POOL_STATS - /* these stats are incremented while in softirq context */ - struct page_pool_alloc_stats alloc_stats; -#endif - u32 xdp_mem_id; - - /* - * Data structure for allocation side - * - * Drivers allocation side usually already perform some kind - * of resource protection. Piggyback on this protection, and - * require driver to protect allocation side. - * - * For NIC drivers this means, allocate a page_pool per - * RX-queue. As the RX-queue is already protected by - * Softirq/BH scheduling and napi_schedule. NAPI schedule - * guarantee that a single napi_struct will only be scheduled - * on a single CPU (see napi_schedule). - */ - struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; - - /* Data structure for storing recycled pages. - * - * Returning/freeing pages is more complicated synchronization - * wise, because free's can happen on remote CPUs, with no - * association with allocation resource. - * - * Use ptr_ring, as it separates consumer and producer - * effeciently, it a way that doesn't bounce cache-lines. - * - * TODO: Implement bulk return pages into this structure. - */ - struct ptr_ring ring; - -#ifdef CONFIG_PAGE_POOL_STATS - /* recycle stats are per-cpu to avoid locking */ - struct page_pool_recycle_stats __percpu *recycle_stats; -#endif - atomic_t pages_state_release_cnt; - - /* A page_pool is strictly tied to a single RX-queue being - * protected by NAPI, due to above pp_alloc_cache. This - * refcnt serves purpose is to simplify drivers error handling. - */ - refcount_t user_cnt; - - u64 destroy_cnt; -}; - -struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); - -/** - * page_pool_dev_alloc_pages() - allocate a page. - * @pool: pool from which to allocate - * - * Get a page from the page allocator or page_pool caches. - */ -static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) -{ - gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - - return page_pool_alloc_pages(pool, gfp); -} - -struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, - unsigned int size, gfp_t gfp); - -static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, - unsigned int *offset, - unsigned int size) -{ - gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - - return page_pool_alloc_frag(pool, offset, size, gfp); -} - -/** - * page_pool_get_dma_dir() - Retrieve the stored DMA direction. - * @pool: pool from which page was allocated - * - * Get the stored dma direction. A driver might decide to store this locally - * and avoid the extra cache line from page_pool to determine the direction. - */ -static -inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) -{ - return pool->p.dma_dir; -} - -bool page_pool_return_skb_page(struct page *page, bool napi_safe); - -struct page_pool *page_pool_create(const struct page_pool_params *params); - -struct xdp_mem_info; - -#ifdef CONFIG_PAGE_POOL -void page_pool_unlink_napi(struct page_pool *pool); -void page_pool_destroy(struct page_pool *pool); -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), - struct xdp_mem_info *mem); -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count); -#else -static inline void page_pool_unlink_napi(struct page_pool *pool) -{ -} - -static inline void page_pool_destroy(struct page_pool *pool) -{ -} - -static inline void page_pool_use_xdp_mem(struct page_pool *pool, - void (*disconnect)(void *), - struct xdp_mem_info *mem) -{ -} - -static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count) -{ -} -#endif - -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, - bool allow_direct); - -/* pp_frag_count represents the number of writers who can update the page - * either by updating skb->data or via DMA mappings for the device. - * We can't rely on the page refcnt for that as we don't know who might be - * holding page references and we can't reliably destroy or sync DMA mappings - * of the fragments. - * - * When pp_frag_count reaches 0 we can either recycle the page if the page - * refcnt is 1 or return it back to the memory allocator and destroy any - * mappings we have. - */ -static inline void page_pool_fragment_page(struct page *page, long nr) -{ - atomic_long_set(&page->pp_frag_count, nr); -} - -static inline long page_pool_defrag_page(struct page *page, long nr) -{ - long ret; - - /* If nr == pp_frag_count then we have cleared all remaining - * references to the page. No need to actually overwrite it, instead - * we can leave this to be overwritten by the calling function. - * - * The main advantage to doing this is that an atomic_read is - * generally a much cheaper operation than an atomic update, - * especially when dealing with a page that may be partitioned - * into only 2 or 3 pieces. - */ - if (atomic_long_read(&page->pp_frag_count) == nr) - return 0; - - ret = atomic_long_sub_return(nr, &page->pp_frag_count); - WARN_ON(ret < 0); - return ret; -} - -static inline bool page_pool_is_last_frag(struct page_pool *pool, - struct page *page) -{ - /* If fragments aren't enabled or count is 0 we were the last user */ - return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || - (page_pool_defrag_page(page, 1) == 0); -} - -/** - * page_pool_put_page() - release a reference to a page pool page - * @pool: pool from which page was allocated - * @page: page to release a reference on - * @dma_sync_size: how much of the page may have been touched by the device - * @allow_direct: released by the consumer, allow lockless caching - * - * The outcome of this depends on the page refcnt. If the driver bumps - * the refcnt > 1 this will unmap the page. If the page refcnt is 1 - * the allocator owns the page and will try to recycle it in one of the pool - * caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device - * using dma_sync_single_range_for_device(). - */ -static inline void page_pool_put_page(struct page_pool *pool, - struct page *page, - unsigned int dma_sync_size, - bool allow_direct) -{ - /* When page_pool isn't compiled-in, net/core/xdp.c doesn't - * allow registering MEM_TYPE_PAGE_POOL, but shield linker. - */ -#ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_frag(pool, page)) - return; - - page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); -#endif -} - -/** - * page_pool_put_full_page() - release a reference on a page pool page - * @pool: pool from which page was allocated - * @page: page to release a reference on - * @allow_direct: released by the consumer, allow lockless caching - * - * Similar to page_pool_put_page(), but will DMA sync the entire memory area - * as configured in &page_pool_params.max_len. - */ -static inline void page_pool_put_full_page(struct page_pool *pool, - struct page *page, bool allow_direct) -{ - page_pool_put_page(pool, page, -1, allow_direct); -} - -/** - * page_pool_recycle_direct() - release a reference on a page pool page - * @pool: pool from which page was allocated - * @page: page to release a reference on - * - * Similar to page_pool_put_full_page() but caller must guarantee safe context - * (e.g NAPI), since it will recycle the page directly into the pool fast cache. - */ -static inline void page_pool_recycle_direct(struct page_pool *pool, - struct page *page) -{ - page_pool_put_full_page(pool, page, true); -} - -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ - (sizeof(dma_addr_t) > sizeof(unsigned long)) - -/** - * page_pool_get_dma_addr() - Retrieve the stored DMA address. - * @page: page allocated from a page pool - * - * Fetch the DMA address of the page. The page pool to which the page belongs - * must had been created with PP_FLAG_DMA_MAP. - */ -static inline dma_addr_t page_pool_get_dma_addr(struct page *page) -{ - dma_addr_t ret = page->dma_addr; - - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; - - return ret; -} - -static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) -{ - page->dma_addr = addr; - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) - page->dma_addr_upper = upper_32_bits(addr); -} - -static inline bool is_page_pool_compiled_in(void) -{ -#ifdef CONFIG_PAGE_POOL - return true; -#else - return false; -#endif -} - -static inline bool page_pool_put(struct page_pool *pool) -{ - return refcount_dec_and_test(&pool->user_cnt); -} - -/* Caller must provide appropriate safe context, e.g. NAPI. */ -void page_pool_update_nid(struct page_pool *pool, int new_nid); -static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) -{ - if (unlikely(pool->p.nid != new_nid)) - page_pool_update_nid(pool, new_nid); -} - -#endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h new file mode 100644 index 000000000000..78df91804c87 --- /dev/null +++ b/include/net/page_pool/helpers.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * page_pool/helpers.h + * Author: Jesper Dangaard Brouer + * Copyright (C) 2016 Red Hat, Inc. + */ + +/** + * DOC: page_pool allocator + * + * This page_pool allocator is optimized for the XDP mode that + * uses one-frame-per-page, but have fallbacks that act like the + * regular page allocator APIs. + * + * Basic use involve replacing alloc_pages() calls with the + * page_pool_alloc_pages() call. Drivers should likely use + * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). + * + * API keeps track of in-flight pages, in-order to let API user know + * when it is safe to dealloactor page_pool object. Thus, API users + * must call page_pool_put_page() where appropriate and only attach + * the page to a page_pool-aware objects, like skbs marked for recycling. + * + * API user must only call page_pool_put_page() once on a page, as it + * will either recycle the page, or in case of elevated refcnt, it + * will release the DMA mapping and in-flight state accounting. We + * hope to lift this requirement in the future. + */ +#ifndef _NET_PAGE_POOL_HELPERS_H +#define _NET_PAGE_POOL_HELPERS_H + +#include + +#ifdef CONFIG_PAGE_POOL_STATS +int page_pool_ethtool_stats_get_count(void); +u8 *page_pool_ethtool_stats_get_strings(u8 *data); +u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); + +/* + * Drivers that wish to harvest page pool stats and report them to users + * (perhaps via ethtool, debugfs, or another mechanism) can allocate a + * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. + */ +bool page_pool_get_stats(struct page_pool *pool, + struct page_pool_stats *stats); +#else +static inline int page_pool_ethtool_stats_get_count(void) +{ + return 0; +} + +static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) +{ + return data; +} + +static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +{ + return data; +} +#endif + +/** + * page_pool_dev_alloc_pages() - allocate a page. + * @pool: pool from which to allocate + * + * Get a page from the page allocator or page_pool caches. + */ +static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_pages(pool, gfp); +} + +static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_frag(pool, offset, size, gfp); +} + +/** + * page_pool_get_dma_dir() - Retrieve the stored DMA direction. + * @pool: pool from which page was allocated + * + * Get the stored dma direction. A driver might decide to store this locally + * and avoid the extra cache line from page_pool to determine the direction. + */ +static +inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) +{ + return pool->p.dma_dir; +} + +/* pp_frag_count represents the number of writers who can update the page + * either by updating skb->data or via DMA mappings for the device. + * We can't rely on the page refcnt for that as we don't know who might be + * holding page references and we can't reliably destroy or sync DMA mappings + * of the fragments. + * + * When pp_frag_count reaches 0 we can either recycle the page if the page + * refcnt is 1 or return it back to the memory allocator and destroy any + * mappings we have. + */ +static inline void page_pool_fragment_page(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_defrag_page(struct page *page, long nr) +{ + long ret; + + /* If nr == pp_frag_count then we have cleared all remaining + * references to the page. No need to actually overwrite it, instead + * we can leave this to be overwritten by the calling function. + * + * The main advantage to doing this is that an atomic_read is + * generally a much cheaper operation than an atomic update, + * especially when dealing with a page that may be partitioned + * into only 2 or 3 pieces. + */ + if (atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; +} + +static inline bool page_pool_is_last_frag(struct page_pool *pool, + struct page *page) +{ + /* If fragments aren't enabled or count is 0 we were the last user */ + return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || + (page_pool_defrag_page(page, 1) == 0); +} + +/** + * page_pool_put_page() - release a reference to a page pool page + * @pool: pool from which page was allocated + * @page: page to release a reference on + * @dma_sync_size: how much of the page may have been touched by the device + * @allow_direct: released by the consumer, allow lockless caching + * + * The outcome of this depends on the page refcnt. If the driver bumps + * the refcnt > 1 this will unmap the page. If the page refcnt is 1 + * the allocator owns the page and will try to recycle it in one of the pool + * caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device + * using dma_sync_single_range_for_device(). + */ +static inline void page_pool_put_page(struct page_pool *pool, + struct page *page, + unsigned int dma_sync_size, + bool allow_direct) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + if (!page_pool_is_last_frag(pool, page)) + return; + + page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); +#endif +} + +/** + * page_pool_put_full_page() - release a reference on a page pool page + * @pool: pool from which page was allocated + * @page: page to release a reference on + * @allow_direct: released by the consumer, allow lockless caching + * + * Similar to page_pool_put_page(), but will DMA sync the entire memory area + * as configured in &page_pool_params.max_len. + */ +static inline void page_pool_put_full_page(struct page_pool *pool, + struct page *page, bool allow_direct) +{ + page_pool_put_page(pool, page, -1, allow_direct); +} + +/** + * page_pool_recycle_direct() - release a reference on a page pool page + * @pool: pool from which page was allocated + * @page: page to release a reference on + * + * Similar to page_pool_put_full_page() but caller must guarantee safe context + * (e.g NAPI), since it will recycle the page directly into the pool fast cache. + */ +static inline void page_pool_recycle_direct(struct page_pool *pool, + struct page *page) +{ + page_pool_put_full_page(pool, page, true); +} + +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + +/** + * page_pool_get_dma_addr() - Retrieve the stored DMA address. + * @page: page allocated from a page pool + * + * Fetch the DMA address of the page. The page pool to which the page belongs + * must had been created with PP_FLAG_DMA_MAP. + */ +static inline dma_addr_t page_pool_get_dma_addr(struct page *page) +{ + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + + return ret; +} + +static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +{ + page->dma_addr = addr; + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + page->dma_addr_upper = upper_32_bits(addr); +} + +static inline bool page_pool_put(struct page_pool *pool) +{ + return refcount_dec_and_test(&pool->user_cnt); +} + +static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) +{ + if (unlikely(pool->p.nid != new_nid)) + page_pool_update_nid(pool, new_nid); +} + +#endif /* _NET_PAGE_POOL_HELPERS_H */ diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h new file mode 100644 index 000000000000..9ac39191bed7 --- /dev/null +++ b/include/net/page_pool/types.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NET_PAGE_POOL_TYPES_H +#define _NET_PAGE_POOL_TYPES_H + +#include +#include + +#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA + * map/unmap + */ +#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets + * from page_pool will be + * DMA-synced-for-device according to + * the length provided by the device + * driver. + * Please note DMA-sync-for-CPU is still + * device driver responsibility + */ +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ + PP_FLAG_DMA_SYNC_DEV |\ + PP_FLAG_PAGE_FRAG) + +/* + * Fast allocation side cache array/stack + * + * The cache size and refill watermark is related to the network + * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX + * ring is usually refilled and the max consumed elements will be 64, + * thus a natural max size of objects needed in the cache. + * + * Keeping room for more objects, is due to XDP_DROP use-case. As + * XDP_DROP allows the opportunity to recycle objects directly into + * this array, as it shares the same softirq/NAPI protection. If + * cache is already full (or partly full) then the XDP_DROP recycles + * would have to take a slower code path. + */ +#define PP_ALLOC_CACHE_SIZE 128 +#define PP_ALLOC_CACHE_REFILL 64 +struct pp_alloc_cache { + u32 count; + struct page *cache[PP_ALLOC_CACHE_SIZE]; +}; + +/** + * struct page_pool_params - page pool parameters + * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG + * @order: 2^order pages on allocation + * @pool_size: size of the ptr_ring + * @nid: NUMA node id to allocate from pages from + * @dev: device, for DMA pre-mapping purposes + * @napi: NAPI which is the sole consumer of pages, otherwise NULL + * @dma_dir: DMA mapping direction + * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV + * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV + */ +struct page_pool_params { + unsigned int flags; + unsigned int order; + unsigned int pool_size; + int nid; + struct device *dev; + struct napi_struct *napi; + enum dma_data_direction dma_dir; + unsigned int max_len; + unsigned int offset; +/* private: used by test code only */ + void (*init_callback)(struct page *page, void *arg); + void *init_arg; +}; + +#ifdef CONFIG_PAGE_POOL_STATS +/** + * struct page_pool_alloc_stats - allocation statistics + * @fast: successful fast path allocations + * @slow: slow path order-0 allocations + * @slow_high_order: slow path high order allocations + * @empty: ptr ring is empty, so a slow path allocation was forced + * @refill: an allocation which triggered a refill of the cache + * @waive: pages obtained from the ptr ring that cannot be added to + * the cache due to a NUMA mismatch + */ +struct page_pool_alloc_stats { + u64 fast; + u64 slow; + u64 slow_high_order; + u64 empty; + u64 refill; + u64 waive; +}; + +/** + * struct page_pool_recycle_stats - recycling (freeing) statistics + * @cached: recycling placed page in the page pool cache + * @cache_full: page pool cache was full + * @ring: page placed into the ptr ring + * @ring_full: page released from page pool because the ptr ring was full + * @released_refcnt: page released (and not recycled) because refcnt > 1 + */ +struct page_pool_recycle_stats { + u64 cached; + u64 cache_full; + u64 ring; + u64 ring_full; + u64 released_refcnt; +}; + +/** + * struct page_pool_stats - combined page pool use statistics + * @alloc_stats: see struct page_pool_alloc_stats + * @recycle_stats: see struct page_pool_recycle_stats + * + * Wrapper struct for combining page pool stats with different storage + * requirements. + */ +struct page_pool_stats { + struct page_pool_alloc_stats alloc_stats; + struct page_pool_recycle_stats recycle_stats; +}; +#endif + +struct page_pool { + struct page_pool_params p; + + struct delayed_work release_dw; + void (*disconnect)(void *pool); + unsigned long defer_start; + unsigned long defer_warn; + + u32 pages_state_hold_cnt; + unsigned int frag_offset; + struct page *frag_page; + long frag_users; + +#ifdef CONFIG_PAGE_POOL_STATS + /* these stats are incremented while in softirq context */ + struct page_pool_alloc_stats alloc_stats; +#endif + u32 xdp_mem_id; + + /* + * Data structure for allocation side + * + * Drivers allocation side usually already perform some kind + * of resource protection. Piggyback on this protection, and + * require driver to protect allocation side. + * + * For NIC drivers this means, allocate a page_pool per + * RX-queue. As the RX-queue is already protected by + * Softirq/BH scheduling and napi_schedule. NAPI schedule + * guarantee that a single napi_struct will only be scheduled + * on a single CPU (see napi_schedule). + */ + struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; + + /* Data structure for storing recycled pages. + * + * Returning/freeing pages is more complicated synchronization + * wise, because free's can happen on remote CPUs, with no + * association with allocation resource. + * + * Use ptr_ring, as it separates consumer and producer + * efficiently, it a way that doesn't bounce cache-lines. + * + * TODO: Implement bulk return pages into this structure. + */ + struct ptr_ring ring; + +#ifdef CONFIG_PAGE_POOL_STATS + /* recycle stats are per-cpu to avoid locking */ + struct page_pool_recycle_stats __percpu *recycle_stats; +#endif + atomic_t pages_state_release_cnt; + + /* A page_pool is strictly tied to a single RX-queue being + * protected by NAPI, due to above pp_alloc_cache. This + * refcnt serves purpose is to simplify drivers error handling. + */ + refcount_t user_cnt; + + u64 destroy_cnt; +}; + +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp); +bool page_pool_return_skb_page(struct page *page, bool napi_safe); + +struct page_pool *page_pool_create(const struct page_pool_params *params); + +struct xdp_mem_info; + +#ifdef CONFIG_PAGE_POOL +void page_pool_unlink_napi(struct page_pool *pool); +void page_pool_destroy(struct page_pool *pool); +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), + struct xdp_mem_info *mem); +void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count); +#else +static inline void page_pool_unlink_napi(struct page_pool *pool) +{ +} + +static inline void page_pool_destroy(struct page_pool *pool) +{ +} + +static inline void page_pool_use_xdp_mem(struct page_pool *pool, + void (*disconnect)(void *), + struct xdp_mem_info *mem) +{ +} + +static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count) +{ +} +#endif + +void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, + bool allow_direct); + +static inline bool is_page_pool_compiled_in(void) +{ +#ifdef CONFIG_PAGE_POOL + return true; +#else + return false; +#endif +} + +/* Caller must provide appropriate safe context, e.g. NAPI. */ +void page_pool_update_nid(struct page_pool *pool, int new_nid); + +#endif /* _NET_PAGE_POOL_H */ diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index ca534501158b..6834356b2d2a 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -9,7 +9,7 @@ #include #include -#include +#include TRACE_EVENT(page_pool_release, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 0aac76c13fd4..57a7a64b84ed 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5d615a169718..cd28c1f14002 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c6f98245582c..d3bed964123c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -73,7 +73,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/core/xdp.c b/net/core/xdp.c index 8362130bf085..a70670fe9a2d 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include /* struct xdp_mem_allocator */ -- cgit