aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <[email protected]>2019-04-17 19:09:25 -0700
committerAlexei Starovoitov <[email protected]>2019-04-17 19:09:26 -0700
commit193d0002ef04d331466f4d211d008ff8257bfa6a (patch)
treeac3d55b7b5731f7e19fd2e27b242135b3fd453d2
parent00967e84f742f87603e769529628e32076ade188 (diff)
parent86d231459d6dc9094e70c35c3517f4ef860b2f1e (diff)
Merge branch 'bulk-cpumap-redirect'
Jesper Dangaard Brouer says: ==================== This patchset utilize a number of different kernel bulk APIs for optimizing the performance for the XDP cpumap redirect feature. Benchmark details are available here: https://github.com/xdp-project/xdp-project/blob/master/areas/cpumap/cpumap03-optimizations.org Performance measurements can be considered micro benchmarks, as they measure dropping packets at different stages in the network stack. Summary based on above: Baseline benchmarks - baseline-redirect: UdpNoPorts: 3,180,074 - baseline-redirect: iptables-raw drop: 6,193,534 Patch1: bpf: cpumap use ptr_ring_consume_batched - redirect: UdpNoPorts: 3,327,729 - redirect: iptables-raw drop: 6,321,540 Patch2: net: core: introduce build_skb_around - redirect: UdpNoPorts: 3,221,303 - redirect: iptables-raw drop: 6,320,066 Patch3: bpf: cpumap do bulk allocation of SKBs - redirect: UdpNoPorts: 3,290,563 - redirect: iptables-raw drop: 6,650,112 Patch4: bpf: cpumap memory prefetchw optimizations for struct page - redirect: UdpNoPorts: 3,520,250 - redirect: iptables-raw drop: 7,649,604 In this V2 submission I have chosen drop the SKB-list patch using netif_receive_skb_list() as it was not showing a performance improvement for these micro benchmarks. ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--kernel/bpf/cpumap.c53
-rw-r--r--net/core/skbuff.c71
3 files changed, 91 insertions, 35 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a06275a618f0..e81f2b0e8a83 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1042,6 +1042,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
int node);
struct sk_buff *__build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb(void *data, unsigned int frag_size);
+struct sk_buff *build_skb_around(struct sk_buff *skb,
+ void *data, unsigned int frag_size);
/**
* alloc_skb - allocate a network buffer
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 3c18260403dd..cf727d77c6c6 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -160,12 +160,12 @@ static void cpu_map_kthread_stop(struct work_struct *work)
}
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
- struct xdp_frame *xdpf)
+ struct xdp_frame *xdpf,
+ struct sk_buff *skb)
{
unsigned int hard_start_headroom;
unsigned int frame_size;
void *pkt_data_start;
- struct sk_buff *skb;
/* Part of headroom was reserved to xdpf */
hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
@@ -191,8 +191,8 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
pkt_data_start = xdpf->data - hard_start_headroom;
- skb = build_skb(pkt_data_start, frame_size);
- if (!skb)
+ skb = build_skb_around(skb, pkt_data_start, frame_size);
+ if (unlikely(!skb))
return NULL;
skb_reserve(skb, hard_start_headroom);
@@ -240,6 +240,8 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
}
}
+#define CPUMAP_BATCH 8
+
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;
@@ -252,8 +254,11 @@ static int cpu_map_kthread_run(void *data)
* kthread_stop signal until queue is empty.
*/
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
- unsigned int processed = 0, drops = 0, sched = 0;
- struct xdp_frame *xdpf;
+ unsigned int drops = 0, sched = 0;
+ void *frames[CPUMAP_BATCH];
+ void *skbs[CPUMAP_BATCH];
+ gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
+ int i, n, m;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
@@ -269,18 +274,38 @@ static int cpu_map_kthread_run(void *data)
sched = cond_resched();
}
- /* Process packets in rcpu->queue */
- local_bh_disable();
/*
* The bpf_cpu_map_entry is single consumer, with this
* kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue.
*/
- while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
- struct sk_buff *skb;
+ n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
+
+ for (i = 0; i < n; i++) {
+ void *f = frames[i];
+ struct page *page = virt_to_page(f);
+
+ /* Bring struct page memory area to curr CPU. Read by
+ * build_skb_around via page_is_pfmemalloc(), and when
+ * freed written by page_frag_free call.
+ */
+ prefetchw(page);
+ }
+
+ m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
+ if (unlikely(m == 0)) {
+ for (i = 0; i < n; i++)
+ skbs[i] = NULL; /* effect: xdp_return_frame */
+ drops = n;
+ }
+
+ local_bh_disable();
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ struct sk_buff *skb = skbs[i];
int ret;
- skb = cpu_map_build_skb(rcpu, xdpf);
+ skb = cpu_map_build_skb(rcpu, xdpf, skb);
if (!skb) {
xdp_return_frame(xdpf);
continue;
@@ -290,13 +315,9 @@ static int cpu_map_kthread_run(void *data)
ret = netif_receive_skb_core(skb);
if (ret == NET_RX_DROP)
drops++;
-
- /* Limit BH-disable period */
- if (++processed == 8)
- break;
}
/* Feedback loop via tracepoint */
- trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);
+ trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
local_bh_enable(); /* resched point, may call do_softirq() */
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9901f5322852..087622298d77 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -258,6 +258,33 @@ nodata:
}
EXPORT_SYMBOL(__alloc_skb);
+/* Caller must provide SKB that is memset cleared */
+static struct sk_buff *__build_skb_around(struct sk_buff *skb,
+ void *data, unsigned int frag_size)
+{
+ struct skb_shared_info *shinfo;
+ unsigned int size = frag_size ? : ksize(data);
+
+ size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ /* Assumes caller memset cleared SKB */
+ skb->truesize = SKB_TRUESIZE(size);
+ refcount_set(&skb->users, 1);
+ skb->head = data;
+ skb->data = data;
+ skb_reset_tail_pointer(skb);
+ skb->end = skb->tail + size;
+ skb->mac_header = (typeof(skb->mac_header))~0U;
+ skb->transport_header = (typeof(skb->transport_header))~0U;
+
+ /* make sure we initialize shinfo sequentially */
+ shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+ atomic_set(&shinfo->dataref, 1);
+
+ return skb;
+}
+
/**
* __build_skb - build a network buffer
* @data: data buffer provided by caller
@@ -279,32 +306,15 @@ EXPORT_SYMBOL(__alloc_skb);
*/
struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
- struct skb_shared_info *shinfo;
struct sk_buff *skb;
- unsigned int size = frag_size ? : ksize(data);
skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
- if (!skb)
+ if (unlikely(!skb))
return NULL;
- size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->truesize = SKB_TRUESIZE(size);
- refcount_set(&skb->users, 1);
- skb->head = data;
- skb->data = data;
- skb_reset_tail_pointer(skb);
- skb->end = skb->tail + size;
- skb->mac_header = (typeof(skb->mac_header))~0U;
- skb->transport_header = (typeof(skb->transport_header))~0U;
- /* make sure we initialize shinfo sequentially */
- shinfo = skb_shinfo(skb);
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
- atomic_set(&shinfo->dataref, 1);
-
- return skb;
+ return __build_skb_around(skb, data, frag_size);
}
/* build_skb() is wrapper over __build_skb(), that specifically
@@ -325,6 +335,29 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
}
EXPORT_SYMBOL(build_skb);
+/**
+ * build_skb_around - build a network buffer around provided skb
+ * @skb: sk_buff provide by caller, must be memset cleared
+ * @data: data buffer provided by caller
+ * @frag_size: size of data, or 0 if head was kmalloced
+ */
+struct sk_buff *build_skb_around(struct sk_buff *skb,
+ void *data, unsigned int frag_size)
+{
+ if (unlikely(!skb))
+ return NULL;
+
+ skb = __build_skb_around(skb, data, frag_size);
+
+ if (skb && frag_size) {
+ skb->head_frag = 1;
+ if (page_is_pfmemalloc(virt_to_head_page(data)))
+ skb->pfmemalloc = 1;
+ }
+ return skb;
+}
+EXPORT_SYMBOL(build_skb_around);
+
#define NAPI_SKB_CACHE_SIZE 64
struct napi_alloc_cache {