aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <[email protected]>2024-07-12 22:27:25 -0700
committerJakub Kicinski <[email protected]>2024-07-12 22:27:26 -0700
commit69cf87304dcb08d61e35bcfccdb1a5d52ce2969c (patch)
treee92a4029d531425533edb545c76dee5b1a441242 /include
parent26f453176a66bb36bf9e3a8abad808b144a94f6a (diff)
parent74d1412ac8f3719bc8dd08b5775276dd1cbc3997 (diff)
Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue
Tony Nguyen says: ==================== idpf: XDP chapter I: convert Rx to libeth Alexander Lobakin says: XDP for idpf is currently 5 chapters: * convert Rx to libeth (this); * convert Tx and stats to libeth; * generic XDP and XSk code changes, libeth_xdp; * actual XDP for idpf via libeth_xdp; * XSk for idpf (^). Part I does the following: * splits &idpf_queue into 4 (RQ, SQ, FQ, CQ) and puts them on a diet; * ensures optimal cacheline placement, strictly asserts CL sizes; * moves currently unused/dead singleq mode out of line; * reuses libeth's Rx ptype definitions and helpers; * uses libeth's Rx buffer management for both header and payload; * eliminates memcpy()s and coherent DMA uses on hotpath, uses napi_build_skb() instead of in-place short skb allocation. Most idpf patches, except for the queue split, removes more lines than adds. Expect far better memory utilization and +5-8% on Rx depending on the case (+17% on skb XDP_DROP :>). * '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue: idpf: use libeth Rx buffer management for payload buffer idpf: convert header split mode to libeth + napi_build_skb() libeth: support different types of buffers for Rx idpf: remove legacy Page Pool Ethtool stats idpf: reuse libeth's definitions of parsed ptype structures idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ idpf: merge singleq and splitq &net_device_ops idpf: strictly assert cachelines of queue and queue vector structures idpf: avoid bloating &idpf_q_vector with big %NR_CPUS idpf: split &idpf_queue into 4 strictly-typed queue structures idpf: stop using macros for accessing queue descriptors libeth: add cacheline / struct layout assertion helpers page_pool: use __cacheline_group_{begin, end}_aligned() cache: add __cacheline_group_{begin, end}_aligned() (+ couple more) ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
Diffstat (limited to 'include')
-rw-r--r--include/linux/cache.h59
-rw-r--r--include/net/libeth/cache.h66
-rw-r--r--include/net/libeth/rx.h19
-rw-r--r--include/net/page_pool/types.h22
4 files changed, 156 insertions, 10 deletions
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 0ecb17bb6883..ca2a05682a54 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -13,6 +13,32 @@
#define SMP_CACHE_BYTES L1_CACHE_BYTES
#endif
+/**
+ * SMP_CACHE_ALIGN - align a value to the L2 cacheline size
+ * @x: value to align
+ *
+ * On some architectures, L2 ("SMP") CL size is bigger than L1, and sometimes,
+ * this needs to be accounted.
+ *
+ * Return: aligned value.
+ */
+#ifndef SMP_CACHE_ALIGN
+#define SMP_CACHE_ALIGN(x) ALIGN(x, SMP_CACHE_BYTES)
+#endif
+
+/*
+ * ``__aligned_largest`` aligns a field to the value most optimal for the
+ * target architecture to perform memory operations. Get the actual value
+ * to be able to use it anywhere else.
+ */
+#ifndef __LARGEST_ALIGN
+#define __LARGEST_ALIGN sizeof(struct { long x; } __aligned_largest)
+#endif
+
+#ifndef LARGEST_ALIGN
+#define LARGEST_ALIGN(x) ALIGN(x, __LARGEST_ALIGN)
+#endif
+
/*
* __read_mostly is used to keep rarely changing variables out of frequently
* updated cachelines. Its use should be reserved for data that is used
@@ -95,6 +121,39 @@
__u8 __cacheline_group_end__##GROUP[0]
#endif
+/**
+ * __cacheline_group_begin_aligned - declare an aligned group start
+ * @GROUP: name of the group
+ * @...: optional group alignment
+ *
+ * The following block inside a struct:
+ *
+ * __cacheline_group_begin_aligned(grp);
+ * field a;
+ * field b;
+ * __cacheline_group_end_aligned(grp);
+ *
+ * will always be aligned to either the specified alignment or
+ * ``SMP_CACHE_BYTES``.
+ */
+#define __cacheline_group_begin_aligned(GROUP, ...) \
+ __cacheline_group_begin(GROUP) \
+ __aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
+
+/**
+ * __cacheline_group_end_aligned - declare an aligned group end
+ * @GROUP: name of the group
+ * @...: optional alignment (same as was in __cacheline_group_begin_aligned())
+ *
+ * Note that the end marker is aligned to sizeof(long) to allow more precise
+ * size assertion. It also declares a padding at the end to avoid next field
+ * falling into this cacheline.
+ */
+#define __cacheline_group_end_aligned(GROUP, ...) \
+ __cacheline_group_end(GROUP) __aligned(sizeof(long)); \
+ struct { } __cacheline_group_pad__##GROUP \
+ __aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
+
#ifndef CACHELINE_ASSERT_GROUP_MEMBER
#define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \
BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \
diff --git a/include/net/libeth/cache.h b/include/net/libeth/cache.h
new file mode 100644
index 000000000000..bdb0c043ce61
--- /dev/null
+++ b/include/net/libeth/cache.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef __LIBETH_CACHE_H
+#define __LIBETH_CACHE_H
+
+#include <linux/cache.h>
+
+/**
+ * libeth_cacheline_group_assert - make sure cacheline group size is expected
+ * @type: type of the structure containing the group
+ * @grp: group name inside the struct
+ * @sz: expected group size
+ */
+#if defined(CONFIG_64BIT) && SMP_CACHE_BYTES == 64
+#define libeth_cacheline_group_assert(type, grp, sz) \
+ static_assert(offsetof(type, __cacheline_group_end__##grp) - \
+ offsetofend(type, __cacheline_group_begin__##grp) == \
+ (sz))
+#define __libeth_cacheline_struct_assert(type, sz) \
+ static_assert(sizeof(type) == (sz))
+#else /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */
+#define libeth_cacheline_group_assert(type, grp, sz) \
+ static_assert(offsetof(type, __cacheline_group_end__##grp) - \
+ offsetofend(type, __cacheline_group_begin__##grp) <= \
+ (sz))
+#define __libeth_cacheline_struct_assert(type, sz) \
+ static_assert(sizeof(type) <= (sz))
+#endif /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */
+
+#define __libeth_cls1(sz1) SMP_CACHE_ALIGN(sz1)
+#define __libeth_cls2(sz1, sz2) (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2))
+#define __libeth_cls3(sz1, sz2, sz3) \
+ (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2) + SMP_CACHE_ALIGN(sz3))
+#define __libeth_cls(...) \
+ CONCATENATE(__libeth_cls, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
+
+/**
+ * libeth_cacheline_struct_assert - make sure CL-based struct size is expected
+ * @type: type of the struct
+ * @...: from 1 to 3 CL group sizes (read-mostly, read-write, cold)
+ *
+ * When a struct contains several CL groups, it's difficult to predict its size
+ * on different architectures. The macro instead takes sizes of all of the
+ * groups the structure contains and generates the final struct size.
+ */
+#define libeth_cacheline_struct_assert(type, ...) \
+ __libeth_cacheline_struct_assert(type, __libeth_cls(__VA_ARGS__)); \
+ static_assert(__alignof(type) >= SMP_CACHE_BYTES)
+
+/**
+ * libeth_cacheline_set_assert - make sure CL-based struct layout is expected
+ * @type: type of the struct
+ * @ro: expected size of the read-mostly group
+ * @rw: expected size of the read-write group
+ * @c: expected size of the cold group
+ *
+ * Check that each group size is expected and then do final struct size check.
+ */
+#define libeth_cacheline_set_assert(type, ro, rw, c) \
+ libeth_cacheline_group_assert(type, read_mostly, ro); \
+ libeth_cacheline_group_assert(type, read_write, rw); \
+ libeth_cacheline_group_assert(type, cold, c); \
+ libeth_cacheline_struct_assert(type, ro, rw, c)
+
+#endif /* __LIBETH_CACHE_H */
diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
index f29ea3e34c6c..43574bd6612f 100644
--- a/include/net/libeth/rx.h
+++ b/include/net/libeth/rx.h
@@ -17,6 +17,8 @@
#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM
/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
+/* Maximum supported L2-L4 header length */
+#define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256))
/* Always use order-0 pages */
#define LIBETH_RX_PAGE_ORDER 0
@@ -44,12 +46,26 @@ struct libeth_fqe {
} __aligned_largest;
/**
+ * enum libeth_fqe_type - enum representing types of Rx buffers
+ * @LIBETH_FQE_MTU: buffer size is determined by MTU
+ * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames
+ * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers
+ */
+enum libeth_fqe_type {
+ LIBETH_FQE_MTU = 0U,
+ LIBETH_FQE_SHORT,
+ LIBETH_FQE_HDR,
+};
+
+/**
* struct libeth_fq - structure representing a buffer (fill) queue
* @fp: hotpath part of the structure
* @pp: &page_pool for buffer management
* @fqes: array of Rx buffers
* @truesize: size to allocate per buffer, w/overhead
* @count: number of descriptors/buffers the queue has
+ * @type: type of the buffers this queue has
+ * @hsplit: flag whether header split is enabled
* @buf_len: HW-writeable length per each buffer
* @nid: ID of the closest NUMA node with memory
*/
@@ -63,6 +79,9 @@ struct libeth_fq {
);
/* Cold fields */
+ enum libeth_fqe_type type:2;
+ bool hsplit:1;
+
u32 buf_len;
int nid;
};
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index b70bcc14ceda..50569fed7868 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -129,6 +129,16 @@ struct page_pool_stats {
};
#endif
+/* The whole frag API block must stay within one cacheline. On 32-bit systems,
+ * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``.
+ * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``.
+ * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that
+ * one for simplicity.
+ * Having it aligned to a cacheline boundary may be excessive and doesn't bring
+ * any good.
+ */
+#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
+
struct page_pool {
struct page_pool_params_fast p;
@@ -142,19 +152,11 @@ struct page_pool {
bool system:1; /* This is a global percpu pool */
#endif
- /* The following block must stay within one cacheline. On 32-bit
- * systems, sizeof(long) == sizeof(int), so that the block size is
- * ``3 * sizeof(long)``. On 64-bit systems, the actual size is
- * ``2 * sizeof(long) + sizeof(int)``. The closest pow-2 to both of
- * them is ``4 * sizeof(long)``, so just use that one for simplicity.
- * Having it aligned to a cacheline boundary may be excessive and
- * doesn't bring any good.
- */
- __cacheline_group_begin(frag) __aligned(4 * sizeof(long));
+ __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
long frag_users;
netmem_ref frag_page;
unsigned int frag_offset;
- __cacheline_group_end(frag);
+ __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
struct delayed_work release_dw;
void (*disconnect)(void *pool);