From 09df037017799b723d03a97c3f0de069bae9f625 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 27 Jan 2023 17:06:12 +0200 Subject: net/mlx5e: RX, Remove mlx5e_alloc_unit argument in page allocation Change internal page cache and page pool api to use a struct page ** instead of a mlx5e_alloc_unit *. This is the first change in a series which is meant to remove the mlx5e_alloc_unit altogether. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 35 +++++++++++++------------ 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3f7b63d6616b..36300118b6e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -292,7 +292,7 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page) return true; } -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, struct page **pagep) { struct mlx5e_page_cache *cache = &rq->page_cache; struct mlx5e_rq_stats *stats = rq->stats; @@ -308,35 +308,35 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_uni return false; } - au->page = cache->page_cache[cache->head]; + *pagep = cache->page_cache[cache->head]; cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); stats->cache_reuse++; - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(*pagep); /* Non-XSK always uses PAGE_SIZE. */ dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir); return true; } -static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) +static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, struct page **pagep) { dma_addr_t addr; - if (mlx5e_rx_cache_get(rq, au)) + if (mlx5e_rx_cache_get(rq, pagep)) return 0; - au->page = page_pool_dev_alloc_pages(rq->page_pool); - if (unlikely(!au->page)) + *pagep = page_pool_dev_alloc_pages(rq->page_pool); + if (unlikely(!*pagep)) return -ENOMEM; /* Non-XSK always uses PAGE_SIZE. */ - addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir); + addr = dma_map_page(rq->pdev, *pagep, 0, PAGE_SIZE, rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, addr))) { - page_pool_recycle_direct(rq->page_pool, au->page); - au->page = NULL; + page_pool_recycle_direct(rq->page_pool, *pagep); + *pagep = NULL; return -ENOMEM; } - page_pool_set_dma_addr(au->page, addr); + page_pool_set_dma_addr(*pagep, addr); return 0; } @@ -376,7 +376,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_pool(rq, frag->au); + err = mlx5e_page_alloc_pool(rq, &frag->au->page); return err; } @@ -605,13 +605,14 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; if (!(header_offset & (PAGE_SIZE - 1))) { - union mlx5e_alloc_unit au; - err = mlx5e_page_alloc_pool(rq, &au); + err = mlx5e_page_alloc_pool(rq, &page); if (unlikely(err)) goto err_unmap; - page = dma_info->page = au.page; - addr = dma_info->addr = page_pool_get_dma_addr(au.page); + + addr = page_pool_get_dma_addr(page); + dma_info->addr = addr; + dma_info->page = page; } else { dma_info->addr = addr + header_offset; dma_info->page = page; @@ -715,7 +716,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) { dma_addr_t addr; - err = mlx5e_page_alloc_pool(rq, au); + err = mlx5e_page_alloc_pool(rq, &au->page); if (unlikely(err)) goto err_unmap; addr = page_pool_get_dma_addr(au->page); -- cgit From 8fb1814f58f691373c692df75b761668069e197a Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 27 Jan 2023 16:58:52 +0200 Subject: net/mlx5e: RX, Remove alloc unit layout constraint for legacy rq The mlx5e_alloc_unit union is conveniently used to store arrays of pointers to struct page or struct xdp_buff (for xsk). The union is currently expected to have the size of a pointer for xsk batch allocations to work. This is conveniet for the current state of the code but makes it impossible to add a structure of a different size to the alloc unit. A further patch in the series will add the mlx5e_frag_page struct for which the described size constraint will no longer hold. This change removes the usage of mlx5e_alloc_unit union for legacy rq: - A union of arrays is introduced (mlx5e_alloc_units) to replace the array of unions to allow structures of different sizes. - Each fragment has a pointer to a unit in the mlx5e_alloc_units array. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 +++- .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 19 +++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 82 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 36 +++++----- 4 files changed, 87 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 02237e630d13..32036f23d962 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -606,11 +606,19 @@ struct mlx5e_icosq { } ____cacheline_aligned_in_smp; struct mlx5e_wqe_frag_info { - union mlx5e_alloc_unit *au; + union { + struct page **pagep; + struct xdp_buff **xskp; + }; u32 offset; bool last_in_page; }; +union mlx5e_alloc_units { + DECLARE_FLEX_ARRAY(struct page *, pages); + DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs); +}; + struct mlx5e_mpw_info { u16 consumed_strides; DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); @@ -702,7 +710,7 @@ struct mlx5e_rq { struct { struct mlx5_wq_cyc wq; struct mlx5e_wqe_frag_info *frags; - union mlx5e_alloc_unit *alloc_units; + union mlx5e_alloc_units *alloc_units; struct mlx5e_rq_frags_info info; mlx5e_fp_skb_from_cqe skb_from_cqe; } wqe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index fab787600459..8a5ae80e6142 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -163,13 +163,10 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) u32 contig, alloc; int i; - /* mlx5e_init_frags_partition creates a 1:1 mapping between - * rq->wqe.frags and rq->wqe.alloc_units, which allows us to - * allocate XDP buffers straight into alloc_units. + /* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the + * rq->wqe.alloc_units->xsk_buffs array allocated here. */ - BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) != - sizeof(rq->wqe.alloc_units[0].xsk)); - buffs = (struct xdp_buff **)rq->wqe.alloc_units; + buffs = rq->wqe.alloc_units->xsk_buffs; contig = mlx5_wq_cyc_get_size(wq) - ix; if (wqe_bulk <= contig) { alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); @@ -189,7 +186,7 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) /* Assumes log_num_frags == 0. */ frag = &rq->wqe.frags[j]; - addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); } @@ -211,11 +208,11 @@ int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) /* Assumes log_num_frags == 0. */ frag = &rq->wqe.frags[j]; - frag->au->xsk = xsk_buff_alloc(rq->xsk_pool); - if (unlikely(!frag->au->xsk)) + *frag->xskp = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!*frag->xskp)) return i; - addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); } @@ -306,7 +303,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp); struct bpf_prog *prog; /* wi->offset is not used in this function, because xdp->data and the diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 90eac1aa5f6b..917b98d1da2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -499,15 +499,9 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) struct mlx5e_wqe_frag_info *prev = NULL; int i; - if (rq->xsk_pool) { - /* Assumptions used by XSK batched allocator. */ - WARN_ON(rq->wqe.info.num_frags != 1); - WARN_ON(rq->wqe.info.log_num_frags != 0); - WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); - } - - next_frag.au = &rq->wqe.alloc_units[0]; + WARN_ON(rq->xsk_pool); + next_frag.pagep = &rq->wqe.alloc_units->pages[0]; for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; struct mlx5e_wqe_frag_info *frag = @@ -516,7 +510,8 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { - next_frag.au++; + /* Pages are assigned at runtime. */ + next_frag.pagep++; next_frag.offset = 0; if (prev) prev->last_in_page = true; @@ -533,22 +528,59 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) prev->last_in_page = true; } -static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node) +static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq) +{ + int i; + + /* Assumptions used by XSK batched allocator. */ + WARN_ON(rq->wqe.info.num_frags != 1); + WARN_ON(rq->wqe.info.log_num_frags != 0); + WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE); + + /* Considering the above assumptions a fragment maps to a single + * xsk_buff. + */ + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) + rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i]; +} + +static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node) { + int wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); int len = wq_sz << rq->wqe.info.log_num_frags; + struct mlx5e_wqe_frag_info *frags; + union mlx5e_alloc_units *aus; + int aus_sz; + + if (rq->xsk_pool) + aus_sz = sizeof(*aus->xsk_buffs); + else + aus_sz = sizeof(*aus->pages); + + aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node); + if (!aus) + return -ENOMEM; - rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)), - GFP_KERNEL, node); - if (!rq->wqe.alloc_units) + frags = kvzalloc_node(array_size(len, sizeof(*frags)), GFP_KERNEL, node); + if (!frags) { + kvfree(aus); return -ENOMEM; + } + + rq->wqe.alloc_units = aus; + rq->wqe.frags = frags; - mlx5e_init_frags_partition(rq); + if (rq->xsk_pool) + mlx5e_init_xsk_buffs(rq); + else + mlx5e_init_frags_partition(rq); return 0; } -static void mlx5e_free_au_list(struct mlx5e_rq *rq) +static void mlx5e_free_wqe_alloc_info(struct mlx5e_rq *rq) { + kvfree(rq->wqe.frags); kvfree(rq->wqe.alloc_units); } @@ -778,18 +810,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->wqe.info = rqp->frags_info; rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; - rq->wqe.frags = - kvzalloc_node(array_size(sizeof(*rq->wqe.frags), - (wq_sz << rq->wqe.info.log_num_frags)), - GFP_KERNEL, node); - if (!rq->wqe.frags) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - - err = mlx5e_init_au_list(rq, wq_sz, node); + err = mlx5e_init_wqe_alloc_info(rq, node); if (err) - goto err_rq_frags; + goto err_rq_wq_destroy; } if (xsk) { @@ -888,9 +911,7 @@ err_rq_drop_page: mlx5e_free_mpwqe_rq_drop_page(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ - mlx5e_free_au_list(rq); -err_rq_frags: - kvfree(rq->wqe.frags); + mlx5e_free_wqe_alloc_info(rq); } err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); @@ -921,8 +942,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5e_rq_free_shampo(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ - kvfree(rq->wqe.frags); - mlx5e_free_au_list(rq); + mlx5e_free_wqe_alloc_info(rq); } for (i = rq->page_cache.head; i != rq->page_cache.tail; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 36300118b6e4..74e7e00cf494 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -371,12 +371,12 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, int err = 0; if (!frag->offset) - /* On first frag (offset == 0), replenish page (alloc_unit actually). - * Other frags that point to the same alloc_unit (with a different + /* On first frag (offset == 0), replenish page. + * Other frags that point to the same page (with a different * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_pool(rq, &frag->au->page); + err = mlx5e_page_alloc_pool(rq, frag->pagep); return err; } @@ -386,7 +386,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, bool recycle) { if (frag->last_in_page) - mlx5e_page_release_dynamic(rq, frag->au->page, recycle); + mlx5e_page_release_dynamic(rq, *frag->pagep, recycle); } static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) @@ -410,7 +410,7 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, goto free_frags; headroom = i == 0 ? rq->buff.headroom : 0; - addr = page_pool_get_dma_addr(frag->au->page); + addr = page_pool_get_dma_addr(*frag->pagep); wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); } @@ -434,7 +434,7 @@ static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, * put into the Reuse Ring, because there is no way to return * the page to the userspace when the interface goes down. */ - xsk_buff_free(wi->au->xsk); + xsk_buff_free(*wi->xskp); return; } @@ -1587,8 +1587,8 @@ static struct sk_buff * mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { - union mlx5e_alloc_unit *au = wi->au; u16 rx_headroom = rq->buff.headroom; + struct page *page = *wi->pagep; struct bpf_prog *prog; struct sk_buff *skb; u32 metasize = 0; @@ -1596,11 +1596,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, dma_addr_t addr; u32 frag_size; - va = page_address(au->page) + wi->offset; + va = page_address(page) + wi->offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(page); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -1624,7 +1624,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, return NULL; /* queue up for recycling/reuse */ - page_ref_inc(au->page); + page_ref_inc(page); return skb; } @@ -1635,8 +1635,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; struct mlx5e_wqe_frag_info *head_wi = wi; - union mlx5e_alloc_unit *au = wi->au; u16 rx_headroom = rq->buff.headroom; + struct page *page = *wi->pagep; struct skb_shared_info *sinfo; struct mlx5e_xdp_buff mxbuf; u32 frag_consumed_bytes; @@ -1646,10 +1646,10 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi u32 truesize; void *va; - va = page_address(au->page) + wi->offset; + va = page_address(page) + wi->offset; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(page); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, rq->buff.frame0_sz, rq->buff.map_dir); net_prefetchw(va); /* xdp_frame data area */ @@ -1666,11 +1666,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi while (cqe_bcnt) { skb_frag_t *frag; - au = wi->au; + page = *wi->pagep; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(page); dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, frag_consumed_bytes, rq->buff.map_dir); @@ -1684,11 +1684,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi } frag = &sinfo->frags[sinfo->nr_frags++]; - __skb_frag_set_page(frag, au->page); + __skb_frag_set_page(frag, page); skb_frag_off_set(frag, wi->offset); skb_frag_size_set(frag, frag_consumed_bytes); - if (page_is_pfmemalloc(au->page)) + if (page_is_pfmemalloc(page)) xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp); sinfo->xdp_frags_size += frag_consumed_bytes; @@ -1717,7 +1717,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi if (unlikely(!skb)) return NULL; - page_ref_inc(head_wi->au->page); + page_ref_inc(*head_wi->pagep); if (xdp_buff_has_frags(&mxbuf.xdp)) { int i; -- cgit From d39092caaedf89c67170c035cad87c79988d1527 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 30 Jan 2023 12:02:45 +0200 Subject: net/mlx5e: RX, Remove alloc unit layout constraint for striding rq This change removes the usage of mlx5e_alloc_unit union for striding rq. The change is more straightforward than legacy rq as the alloc units union is already in place. This patch only moves things around: instead of an array of unions make it a union of arrays. This has the effect that each mlx5e_mpw_info will allocate the largest possible size of the array member. For xsk this means that the array of xdp_buff pointers for the wqe will still be contiguous, but there will be some extra unused bytes at the end of the array. As further patch in the series will add the mlx5e_frag_page struct for which the described size constraint will no longer hold. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 +-- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 29 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 61 ++++++++++++---------- 5 files changed, 51 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 32036f23d962..ad4ad14853bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -475,11 +475,6 @@ struct mlx5e_txqsq { cqe_ts_to_ns ptp_cyc2time; } ____cacheline_aligned_in_smp; -union mlx5e_alloc_unit { - struct page *page; - struct xdp_buff *xsk; -}; - /* XDP packets can be transmitted in different ways. On completion, we need to * distinguish between them to clean up things in a proper way. */ @@ -622,7 +617,7 @@ union mlx5e_alloc_units { struct mlx5e_mpw_info { u16 consumed_strides; DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); - union mlx5e_alloc_unit alloc_units[]; + union mlx5e_alloc_units alloc_units; }; #define MLX5E_MAX_RX_FRAGS 4 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 816ea83e6413..dab00a2c2eb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -489,7 +489,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) { - size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe); + size_t isz = struct_size(rq->mpwqe.info, alloc_units.pages, rq->mpwqe.pages_per_wqe); return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 8a5ae80e6142..b2c1af07c317 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -22,6 +22,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5e_icosq *icosq = rq->icosq; struct mlx5_wq_cyc *wq = &icosq->wq; struct mlx5e_umr_wqe *umr_wqe; + struct xdp_buff **xsk_buffs; int batch, i; u32 offset; /* 17-bit value with MTT. */ u16 pi; @@ -29,9 +30,9 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) goto err; - BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk)); XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff); - batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units, + xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs; + batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs, rq->mpwqe.pages_per_wqe); /* If batch < pages_per_wqe, either: @@ -41,8 +42,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) * the first error, which will mean there are no more valid descriptors. */ for (; batch < rq->mpwqe.pages_per_wqe; batch++) { - wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool); - if (unlikely(!wi->alloc_units[batch].xsk)) + xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!xsk_buffs[batch])) goto err_reuse_batch; } @@ -52,8 +53,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { for (i = 0; i < batch; i++) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); - dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), @@ -62,8 +63,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { for (i = 0; i < batch; i++) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); - dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { .key = rq->mkey_be, @@ -75,8 +76,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); for (i = 0; i < batch; i++) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); - dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { .key = rq->mkey_be, @@ -102,8 +103,8 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); for (i = 0; i < batch; i++) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); - dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { .key = rq->mkey_be, @@ -149,7 +150,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_reuse_batch: while (--batch >= 0) - xsk_buff_free(wi->alloc_units[batch].xsk); + xsk_buff_free(xsk_buffs[batch]); err: rq->stats->buff_alloc_err++; @@ -248,7 +249,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, u32 head_offset, u32 page_idx) { - struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk); + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]); struct bpf_prog *prog; /* Check packet size. Note LRO doesn't use linear SKB */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 917b98d1da2d..0ed3f67f7dfc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -286,7 +286,8 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); size_t alloc_size; - alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units, + alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, + alloc_units.pages, rq->mpwqe.pages_per_wqe)); rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 74e7e00cf494..d02f2f2af4ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -469,16 +469,16 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) static inline void mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, - union mlx5e_alloc_unit *au, u32 frag_offset, u32 len, + struct page *page, u32 frag_offset, u32 len, unsigned int truesize) { - dma_addr_t addr = page_pool_get_dma_addr(au->page); + dma_addr_t addr = page_pool_get_dma_addr(page); dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); - page_ref_inc(au->page); + page_ref_inc(page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - au->page, frag_offset, len, truesize); + page, frag_offset, len, truesize); } static inline void @@ -498,7 +498,6 @@ mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, static void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) { - union mlx5e_alloc_unit *alloc_units = wi->alloc_units; bool no_xdp_xmit; int i; @@ -509,17 +508,21 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); if (rq->xsk_pool) { + struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs; + /* The `recycle` parameter is ignored, and the page is always * put into the Reuse Ring, because there is no way to return * the page to the userspace when the interface goes down. */ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) - xsk_buff_free(alloc_units[i].xsk); + xsk_buff_free(xsk_buffs[i]); } else { + struct page **pages = wi->alloc_units.pages; + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) - mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle); + mlx5e_page_release_dynamic(rq, pages[i], recycle); } } @@ -694,7 +697,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); - union mlx5e_alloc_unit *au = &wi->alloc_units[0]; + struct page **pagep = &wi->alloc_units.pages[0]; struct mlx5e_icosq *sq = rq->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; @@ -713,13 +716,13 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); - for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) { + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, pagep++) { dma_addr_t addr; - err = mlx5e_page_alloc_pool(rq, &au->page); + err = mlx5e_page_alloc_pool(rq, pagep); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(*pagep); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), }; @@ -760,8 +763,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_unmap: while (--i >= 0) { - au--; - mlx5e_page_release_dynamic(rq, au->page, true); + pagep--; + mlx5e_page_release_dynamic(rq, *pagep, true); } err: @@ -1914,7 +1917,7 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { static void mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, - union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset) + struct page **pagep, u32 data_bcnt, u32 data_offset) { net_prefetchw(skb->data); @@ -1928,12 +1931,12 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, else truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - mlx5e_add_skb_frag(rq, skb, au, data_offset, + mlx5e_add_skb_frag(rq, skb, *pagep, data_offset, pg_consumed_bytes, truesize); data_bcnt -= pg_consumed_bytes; data_offset = 0; - au++; + pagep++; } } @@ -1942,11 +1945,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; + struct page **pagep = &wi->alloc_units.pages[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); u32 frag_offset = head_offset + headlen; u32 byte_cnt = cqe_bcnt - headlen; - union mlx5e_alloc_unit *head_au = au; + struct page *head_page = *pagep; struct sk_buff *skb; dma_addr_t addr; @@ -1961,14 +1964,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ if (unlikely(frag_offset >= PAGE_SIZE)) { - au++; + pagep++; frag_offset -= PAGE_SIZE; } - mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset); + mlx5e_fill_skb_data(skb, rq, pagep, byte_cnt, frag_offset); /* copy header */ - addr = page_pool_get_dma_addr(head_au->page); - mlx5e_copy_skb_header(rq, skb, head_au->page, addr, + addr = page_pool_get_dma_addr(head_page); + mlx5e_copy_skb_header(rq, skb, head_page, addr, head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; @@ -1982,7 +1985,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; + struct page *page = wi->alloc_units.pages[page_idx]; u16 rx_headroom = rq->buff.headroom; struct bpf_prog *prog; struct sk_buff *skb; @@ -1997,11 +2000,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; } - va = page_address(au->page) + head_offset; + va = page_address(page) + head_offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(au->page); + addr = page_pool_get_dma_addr(page); dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -2028,7 +2031,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; /* queue up for recycling/reuse */ - page_ref_inc(au->page); + page_ref_inc(page); return skb; } @@ -2146,7 +2149,6 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq bool match = cqe->shampo.match; struct mlx5e_rq_stats *stats = rq->stats; struct mlx5e_rx_wqe_ll *wqe; - union mlx5e_alloc_unit *au; struct mlx5e_mpw_info *wi; struct mlx5_wq_ll *wq; @@ -2196,8 +2198,9 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (likely(head_size)) { - au = &wi->alloc_units[page_idx]; - mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset); + struct page **pagep = &wi->alloc_units.pages[page_idx]; + + mlx5e_fill_skb_data(*skb, rq, pagep, data_bcnt, data_offset); } mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); -- cgit From ca6ef9f031946134030e1b583e172c2e47c9a992 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 14 Mar 2023 21:05:56 +0200 Subject: net/mlx5e: RX, Store SHAMPO header pages in array Save allocated SHAMPO header pages to an array to which the mlx5e_dma_info page will point to. This change is a preparation for introducing mlx5e_frag_page structure in a downstream patch. There's no new functionality introduced. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 +++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 21 ++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 42 ++++++++++++++--------- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ad4ad14853bf..b38fbacbb4d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -670,13 +670,17 @@ struct mlx5e_rq_frags_info { struct mlx5e_dma_info { dma_addr_t addr; - struct page *page; + union { + struct page **pagep; + struct page *page; + }; }; struct mlx5e_shampo_hd { u32 mkey; struct mlx5e_dma_info *info; - struct page *last_page; + struct page **pages; + u16 curr_page_index; u16 hd_per_wq; u16 hd_per_wqe; unsigned long *bitmap; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0ed3f67f7dfc..77f81d74ff30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -262,23 +262,30 @@ static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, node); - if (!shampo->bitmap) - return -ENOMEM; - shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq, sizeof(*shampo->info)), GFP_KERNEL, node); - if (!shampo->info) { - kvfree(shampo->bitmap); - return -ENOMEM; - } + shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq, + sizeof(*shampo->pages)), + GFP_KERNEL, node); + if (!shampo->bitmap || !shampo->info || !shampo->pages) + goto err_nomem; + return 0; + +err_nomem: + kvfree(shampo->info); + kvfree(shampo->bitmap); + kvfree(shampo->pages); + + return -ENOMEM; } static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) { kvfree(rq->mpwqe.shampo->bitmap); kvfree(rq->mpwqe.shampo->info); + kvfree(rq->mpwqe.shampo->pages); } static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d02f2f2af4ec..7057db954f6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -586,10 +586,11 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; u16 entries, pi, header_offset, err, wqe_bbs, new_entries; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; - struct page *page = shampo->last_page; + u16 page_index = shampo->curr_page_index; u64 addr = shampo->last_addr; struct mlx5e_dma_info *dma_info; struct mlx5e_umr_wqe *umr_wqe; + struct page **pagep; int headroom, i; headroom = rq->buff.headroom; @@ -600,6 +601,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); + pagep = &shampo->pages[page_index]; + for (i = 0; i < entries; i++, index++) { dma_info = &shampo->info[index]; if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < @@ -608,17 +611,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; if (!(header_offset & (PAGE_SIZE - 1))) { + page_index = (page_index + 1) & (shampo->hd_per_wq - 1); + pagep = &shampo->pages[page_index]; - err = mlx5e_page_alloc_pool(rq, &page); + err = mlx5e_page_alloc_pool(rq, pagep); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(page); + addr = page_pool_get_dma_addr(*pagep); + dma_info->addr = addr; - dma_info->page = page; + dma_info->pagep = pagep; } else { dma_info->addr = addr + header_offset; - dma_info->page = page; + dma_info->pagep = pagep; } update_klm: @@ -636,7 +642,7 @@ update_klm: }; shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1); - shampo->last_page = page; + shampo->curr_page_index = page_index; shampo->last_addr = addr; sq->pc += wqe_bbs; sq->doorbell_cseg = &umr_wqe->ctrl; @@ -648,7 +654,7 @@ err_unmap: dma_info = &shampo->info[--index]; if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); - mlx5e_page_release_dynamic(rq, dma_info->page, true); + mlx5e_page_release_dynamic(rq, *dma_info->pagep, true); } } rq->stats->buff_alloc_err++; @@ -783,7 +789,7 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; int hd_per_wq = shampo->hd_per_wq; - struct page *deleted_page = NULL; + struct page **deleted_page = NULL; struct mlx5e_dma_info *hd_info; int i, index = start; @@ -796,9 +802,9 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close hd_info = &shampo->info[index]; hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); - if (hd_info->page != deleted_page) { - deleted_page = hd_info->page; - mlx5e_page_release_dynamic(rq, hd_info->page, false); + if (hd_info->pagep != deleted_page) { + deleted_page = hd_info->pagep; + mlx5e_page_release_dynamic(rq, *hd_info->pagep, false); } } @@ -1137,7 +1143,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; - return page_address(last_head->page) + head_offset; + return page_address(*last_head->pagep) + head_offset; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -2048,7 +2054,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, void *hdr, *data; u32 frag_size; - hdr = page_address(head->page) + head_offset; + hdr = page_address(*head->pagep) + head_offset; data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); @@ -2063,7 +2069,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; /* queue up for recycling/reuse */ - page_ref_inc(head->page); + page_ref_inc(*head->pagep); } else { /* allocate SKB and copy header for large header */ @@ -2076,7 +2082,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, } prefetchw(skb->data); - mlx5e_copy_skb_header(rq, skb, head->page, head->addr, + mlx5e_copy_skb_header(rq, skb, *head->pagep, head->addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ @@ -2127,8 +2133,10 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) u64 addr = shampo->info[header_index].addr; if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { - shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE); - mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true); + struct mlx5e_dma_info *dma_info = &shampo->info[header_index]; + + dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE); + mlx5e_page_release_dynamic(rq, *dma_info->pagep, true); } bitmap_clear(shampo->bitmap, header_index, 1); } -- cgit From 08c9b61b071ca780ac2740b9de755d2ebac2a2e5 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 13 Dec 2022 14:37:07 +0200 Subject: net/mlx5e: RX, Remove internal page_cache This patch removes the internal rx page_cache and uses the generic page_pool api only. It used to be that the page_pool couldn't handle all the mlx5 driver usecases, but with the introduction of skb recycling and page fragmentaton in the page_pool full switch can now be made. Some benfits of this transition: * Better page recycling in the cases when the page_cache was suffering from head of queue blocking. The page_pool doesn't have this issue. * DMA mapping/unmapping can be managed by the page_pool. * mlx5e_rq size reduced by more than 50% due to the page_cache array being deleted. This patch only removes the page_cache. Downstream patches will enable the required page_pool features and will add further fine-tuning. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ------ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 53 ----------------------- 3 files changed, 72 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b38fbacbb4d1..2684e7af5a7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -628,11 +628,6 @@ struct mlx5e_mpw_info { #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT) #define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) -struct mlx5e_page_cache { - u32 head; - u32 tail; - struct page *page_cache[MLX5E_CACHE_SIZE]; -}; struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); @@ -745,7 +740,6 @@ struct mlx5e_rq { struct mlx5e_rq_stats *stats; struct mlx5e_cq cq; struct mlx5e_cq_decomp cqd; - struct mlx5e_page_cache page_cache; struct hwtstamp_config *tstamp; struct mlx5_clock *clock; struct mlx5e_icosq *icosq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 77f81d74ff30..b0322a20b71b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -900,9 +900,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } - rq->page_cache.head = 0; - rq->page_cache.tail = 0; - return 0; err_destroy_page_pool: @@ -933,7 +930,6 @@ err_rq_xdp_prog: static void mlx5e_free_rq(struct mlx5e_rq *rq) { struct bpf_prog *old_prog; - int i; if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { old_prog = rcu_dereference_protected(rq->xdp_prog, @@ -953,15 +949,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5e_free_wqe_alloc_info(rq); } - for (i = rq->page_cache.head; i != rq->page_cache.tail; - i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { - /* With AF_XDP, page_cache is not used, so this loop is not - * entered, and it's safe to call mlx5e_page_release_dynamic - * directly. - */ - mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false); - } - xdp_rxq_info_unreg(&rq->xdp_rxq); page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7057db954f6f..192f12a7d9a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -271,60 +271,10 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem); } -static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - struct mlx5e_rq_stats *stats = rq->stats; - - if (tail_next == cache->head) { - stats->cache_full++; - return false; - } - - if (!dev_page_is_reusable(page)) { - stats->cache_waive++; - return false; - } - - cache->page_cache[cache->tail] = page; - cache->tail = tail_next; - return true; -} - -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, struct page **pagep) -{ - struct mlx5e_page_cache *cache = &rq->page_cache; - struct mlx5e_rq_stats *stats = rq->stats; - dma_addr_t addr; - - if (unlikely(cache->head == cache->tail)) { - stats->cache_empty++; - return false; - } - - if (page_ref_count(cache->page_cache[cache->head]) != 1) { - stats->cache_busy++; - return false; - } - - *pagep = cache->page_cache[cache->head]; - cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); - stats->cache_reuse++; - - addr = page_pool_get_dma_addr(*pagep); - /* Non-XSK always uses PAGE_SIZE. */ - dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir); - return true; -} - static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, struct page **pagep) { dma_addr_t addr; - if (mlx5e_rx_cache_get(rq, pagep)) - return 0; - *pagep = page_pool_dev_alloc_pages(rq->page_pool); if (unlikely(!*pagep)) return -ENOMEM; @@ -353,9 +303,6 @@ void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page) void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle) { if (likely(recycle)) { - if (mlx5e_rx_cache_put(rq, page)) - return; - mlx5e_page_dma_unmap(rq, page); page_pool_recycle_direct(rq->page_pool, page); } else { -- cgit From 4a5c5e25008f374e525178f5ba2581cfa3303a0c Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 14 Dec 2022 15:44:33 +0200 Subject: net/mlx5e: RX, Enable dma map and sync from page_pool allocator Remove driver dma mapping and unmapping of pages. Let the page_pool api do it. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 22 ---------------------- 4 files changed, 4 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index dab00a2c2eb7..04419f56ac85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -65,7 +65,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); /* RX */ -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page); void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index c5dae48b7932..5e6ef602c748 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -209,8 +209,6 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); - if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) - mlx5e_page_dma_unmap(rq, virt_to_page(xdp->data)); rq->stats->xdp_redirect++; return true; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b0322a20b71b..2a73680021c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -733,7 +733,6 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_rq_param *rqp, int node, struct mlx5e_rq *rq) { - struct page_pool_params pp_params = { 0 }; struct mlx5_core_dev *mdev = rq->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); @@ -829,12 +828,15 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); } else { /* Create a page_pool and register it with rxq */ + struct page_pool_params pp_params = { 0 }; + pp_params.order = 0; - pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; pp_params.pool_size = pool_size; pp_params.nid = node; pp_params.dev = rq->pdev; pp_params.dma_dir = rq->buff.map_dir; + pp_params.max_len = PAGE_SIZE; /* page_pool can be used even when there is no rq->xdp_prog, * given page_pool does not handle DMA mapping there is no diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 192f12a7d9a9..01c789b89cb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -273,40 +273,18 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, struct page **pagep) { - dma_addr_t addr; - *pagep = page_pool_dev_alloc_pages(rq->page_pool); if (unlikely(!*pagep)) return -ENOMEM; - /* Non-XSK always uses PAGE_SIZE. */ - addr = dma_map_page(rq->pdev, *pagep, 0, PAGE_SIZE, rq->buff.map_dir); - if (unlikely(dma_mapping_error(rq->pdev, addr))) { - page_pool_recycle_direct(rq->page_pool, *pagep); - *pagep = NULL; - return -ENOMEM; - } - page_pool_set_dma_addr(*pagep, addr); - return 0; } -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page) -{ - dma_addr_t dma_addr = page_pool_get_dma_addr(page); - - dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir, - DMA_ATTR_SKIP_CPU_SYNC); - page_pool_set_dma_addr(page, 0); -} - void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle) { if (likely(recycle)) { - mlx5e_page_dma_unmap(rq, page); page_pool_recycle_direct(rq->page_pool, page); } else { - mlx5e_page_dma_unmap(rq, page); page_pool_release_page(rq->page_pool, page); put_page(page); } -- cgit From 6f5742846053c7656992e70726aad26a5129cf19 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 18 Jan 2023 17:08:51 +0200 Subject: net/mlx5e: RX, Enable skb page recycling through the page_pool Start using the page_pool skb recycling api to recycle all pages back to the page pool and stop using atomic page reference counting. The mlx5e driver used to manage in-flight pages using page refcounting: for each fragment there were 2 atomic write operations happening (one for building the skb and one on skb release). The page_pool api introduced a method to track page fragments more optimally: * The page's pp_fragment_count is set to a large bias on page alloc (1 x atomic write operation). * The driver tracks the actual page fragments in a non atomic variable. * When the skb is recycled, pp_fragment_count is decremented (atomic write operation). * When page is released in the driver, the unused number of fragments (relative to the bias) is deducted from pp_fragment_count (atomic write operation). * Last page defragmentation will only be an atomic read. So in total there are `number of fragments + 1` atomic write ops. As opposed to previously: `2 * frags` atomic writes ops. Pages are wrapped in a mlx5e_frag_page structure which also contains the number of fragments. This makes it easy to count the fragments in the driver. This change brings performance improvements for the case when the old rx page_cache had low recycling rates due to head of queue blocking. For a iperf3 TCP test with a single stream, on a single core (iperf and receive queue running on same core), the following improvements can be noticed: * Striding rq: - before (net-next baseline): bitrate = 30.1 Gbits/sec - after : bitrate = 31.4 Gbits/sec (diff: 4.14 %) * Legacy rq: - before (net-next baseline): bitrate = 30.2 Gbits/sec - after : bitrate = 33.0 Gbits/sec (diff: 8.48 %) There are 2 temporary performance degradations introduced: 1) TCP streams that had a good recycling rate with the old page_cache have a degradation for both striding and linear rq. This is due to very low page pool cache recycling: the pages are released during skb recycle which will release pages to the page pool ring for safety. The following patches in this series will tackle this problem by deferring the page release in the driver to increase the chance of having pages recycled to the cache. 2) XDP performance is now lower (4-5 %) due to the higher number of atomic operations used for fragment management. But this opens the door for supporting multiple packets per page in XDP, which will bring a big gain. Otherwise, performance is similar to baseline. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 +- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 180 +++++++++++++--------- 5 files changed, 121 insertions(+), 88 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2684e7af5a7a..0862556105d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -600,9 +600,14 @@ struct mlx5e_icosq { struct work_struct recover_work; } ____cacheline_aligned_in_smp; +struct mlx5e_frag_page { + struct page *page; + u16 frags; +}; + struct mlx5e_wqe_frag_info { union { - struct page **pagep; + struct mlx5e_frag_page *frag_page; struct xdp_buff **xskp; }; u32 offset; @@ -610,6 +615,7 @@ struct mlx5e_wqe_frag_info { }; union mlx5e_alloc_units { + DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages); DECLARE_FLEX_ARRAY(struct page *, pages); DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs); }; @@ -666,7 +672,7 @@ struct mlx5e_rq_frags_info { struct mlx5e_dma_info { dma_addr_t addr; union { - struct page **pagep; + struct mlx5e_frag_page *frag_page; struct page *page; }; }; @@ -674,7 +680,7 @@ struct mlx5e_dma_info { struct mlx5e_shampo_hd { u32 mkey; struct mlx5e_dma_info *info; - struct page **pages; + struct mlx5e_frag_page *pages; u16 curr_page_index; u16 hd_per_wq; u16 hd_per_wqe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 04419f56ac85..cd7779a9d046 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -65,7 +65,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); int mlx5e_poll_ico_cq(struct mlx5e_cq *cq); /* RX */ -void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); @@ -488,7 +487,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) { - size_t isz = struct_size(rq->mpwqe.info, alloc_units.pages, rq->mpwqe.pages_per_wqe); + size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe); return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 5e6ef602c748..ca6ac9772d22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -523,7 +523,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, break; case MLX5E_XDP_XMIT_MODE_PAGE: /* XDP_TX from the regular RQ */ - mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle); + page_pool_put_defragged_page(xdpi.page.rq->page_pool, + xdpi.page.page, -1, recycle); break; case MLX5E_XDP_XMIT_MODE_XSK: /* AF_XDP send */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2a73680021c2..eca9a11454e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -294,7 +294,7 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) size_t alloc_size; alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, - alloc_units.pages, + alloc_units.frag_pages, rq->mpwqe.pages_per_wqe)); rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); @@ -509,7 +509,8 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) WARN_ON(rq->xsk_pool); - next_frag.pagep = &rq->wqe.alloc_units->pages[0]; + next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0]; + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; struct mlx5e_wqe_frag_info *frag = @@ -519,7 +520,7 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) { if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) { /* Pages are assigned at runtime. */ - next_frag.pagep++; + next_frag.frag_page++; next_frag.offset = 0; if (prev) prev->last_in_page = true; @@ -563,7 +564,7 @@ static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node) if (rq->xsk_pool) aus_sz = sizeof(*aus->xsk_buffs); else - aus_sz = sizeof(*aus->pages); + aus_sz = sizeof(*aus->frag_pages); aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node); if (!aus) @@ -831,7 +832,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, struct page_pool_params pp_params = { 0 }; pp_params.order = 0; - pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG; pp_params.pool_size = pool_size; pp_params.nid = node; pp_params.dev = rq->pdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 01c789b89cb9..7724e30ec133 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -271,23 +271,36 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem); } -static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, struct page **pagep) +#define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) + +static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, + struct mlx5e_frag_page *frag_page) { - *pagep = page_pool_dev_alloc_pages(rq->page_pool); - if (unlikely(!*pagep)) + struct page *page; + + page = page_pool_dev_alloc_pages(rq->page_pool); + if (unlikely(!page)) return -ENOMEM; + page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); + + *frag_page = (struct mlx5e_frag_page) { + .page = page, + .frags = 0, + }; + return 0; } -void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle) +static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, + struct mlx5e_frag_page *frag_page, + bool recycle) { - if (likely(recycle)) { - page_pool_recycle_direct(rq->page_pool, page); - } else { - page_pool_release_page(rq->page_pool, page); - put_page(page); - } + u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; + struct page *page = frag_page->page; + + if (page_pool_defrag_page(page, drain_count) == 0) + page_pool_put_defragged_page(rq->page_pool, page, -1, recycle); } static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, @@ -301,7 +314,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_pool(rq, frag->pagep); + err = mlx5e_page_alloc_fragmented(rq, frag->frag_page); return err; } @@ -311,7 +324,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, bool recycle) { if (frag->last_in_page) - mlx5e_page_release_dynamic(rq, *frag->pagep, recycle); + mlx5e_page_release_fragmented(rq, frag->frag_page, recycle); } static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) @@ -335,7 +348,7 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, goto free_frags; headroom = i == 0 ? rq->buff.headroom : 0; - addr = page_pool_get_dma_addr(*frag->pagep); + addr = page_pool_get_dma_addr(frag->frag_page->page); wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); } @@ -401,7 +414,6 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); - page_ref_inc(page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, frag_offset, len, truesize); } @@ -443,11 +455,14 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) xsk_buff_free(xsk_buffs[i]); } else { - struct page **pages = wi->alloc_units.pages; + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) { + if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) { + struct mlx5e_frag_page *frag_page; - for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) - if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) - mlx5e_page_release_dynamic(rq, pages[i], recycle); + frag_page = &wi->alloc_units.frag_pages[i]; + mlx5e_page_release_fragmented(rq, frag_page, recycle); + } + } } } @@ -512,10 +527,10 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u16 entries, pi, header_offset, err, wqe_bbs, new_entries; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; u16 page_index = shampo->curr_page_index; + struct mlx5e_frag_page *frag_page; u64 addr = shampo->last_addr; struct mlx5e_dma_info *dma_info; struct mlx5e_umr_wqe *umr_wqe; - struct page **pagep; int headroom, i; headroom = rq->buff.headroom; @@ -526,7 +541,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); - pagep = &shampo->pages[page_index]; + frag_page = &shampo->pages[page_index]; for (i = 0; i < entries; i++, index++) { dma_info = &shampo->info[index]; @@ -537,19 +552,19 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; if (!(header_offset & (PAGE_SIZE - 1))) { page_index = (page_index + 1) & (shampo->hd_per_wq - 1); - pagep = &shampo->pages[page_index]; + frag_page = &shampo->pages[page_index]; - err = mlx5e_page_alloc_pool(rq, pagep); + err = mlx5e_page_alloc_fragmented(rq, frag_page); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(*pagep); + addr = page_pool_get_dma_addr(frag_page->page); dma_info->addr = addr; - dma_info->pagep = pagep; + dma_info->frag_page = frag_page; } else { dma_info->addr = addr + header_offset; - dma_info->pagep = pagep; + dma_info->frag_page = frag_page; } update_klm: @@ -579,7 +594,7 @@ err_unmap: dma_info = &shampo->info[--index]; if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); - mlx5e_page_release_dynamic(rq, *dma_info->pagep, true); + mlx5e_page_release_fragmented(rq, dma_info->frag_page, true); } } rq->stats->buff_alloc_err++; @@ -628,8 +643,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); - struct page **pagep = &wi->alloc_units.pages[0]; struct mlx5e_icosq *sq = rq->icosq; + struct mlx5e_frag_page *frag_page; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; u32 offset; /* 17-bit value with MTT. */ @@ -647,13 +662,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); - for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, pagep++) { + frag_page = &wi->alloc_units.frag_pages[0]; + + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) { dma_addr_t addr; - err = mlx5e_page_alloc_pool(rq, pagep); + err = mlx5e_page_alloc_fragmented(rq, frag_page); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(*pagep); + addr = page_pool_get_dma_addr(frag_page->page); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), }; @@ -694,8 +711,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_unmap: while (--i >= 0) { - pagep--; - mlx5e_page_release_dynamic(rq, *pagep, true); + frag_page--; + mlx5e_page_release_fragmented(rq, frag_page, true); } err: @@ -713,8 +730,8 @@ err: void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + struct mlx5e_frag_page *deleted_page = NULL; int hd_per_wq = shampo->hd_per_wq; - struct page **deleted_page = NULL; struct mlx5e_dma_info *hd_info; int i, index = start; @@ -727,10 +744,12 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close hd_info = &shampo->info[index]; hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); - if (hd_info->pagep != deleted_page) { - deleted_page = hd_info->pagep; - mlx5e_page_release_dynamic(rq, *hd_info->pagep, false); + if (hd_info->frag_page && hd_info->frag_page != deleted_page) { + deleted_page = hd_info->frag_page; + mlx5e_page_release_fragmented(rq, hd_info->frag_page, false); } + + hd_info->frag_page = NULL; } if (start + len > hd_per_wq) { @@ -1068,7 +1087,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; - return page_address(*last_head->pagep) + head_offset; + return page_address(last_head->frag_page->page) + head_offset; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -1521,8 +1540,8 @@ static struct sk_buff * mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { + struct mlx5e_frag_page *frag_page = wi->frag_page; u16 rx_headroom = rq->buff.headroom; - struct page *page = *wi->pagep; struct bpf_prog *prog; struct sk_buff *skb; u32 metasize = 0; @@ -1530,11 +1549,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, dma_addr_t addr; u32 frag_size; - va = page_address(page) + wi->offset; + va = page_address(frag_page->page) + wi->offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(page); + addr = page_pool_get_dma_addr(frag_page->page); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -1558,7 +1577,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, return NULL; /* queue up for recycling/reuse */ - page_ref_inc(page); + skb_mark_for_recycle(skb); + frag_page->frags++; return skb; } @@ -1570,7 +1590,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; struct mlx5e_wqe_frag_info *head_wi = wi; u16 rx_headroom = rq->buff.headroom; - struct page *page = *wi->pagep; + struct mlx5e_frag_page *frag_page; struct skb_shared_info *sinfo; struct mlx5e_xdp_buff mxbuf; u32 frag_consumed_bytes; @@ -1580,10 +1600,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi u32 truesize; void *va; - va = page_address(page) + wi->offset; + frag_page = wi->frag_page; + + va = page_address(frag_page->page) + wi->offset; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(page); + addr = page_pool_get_dma_addr(frag_page->page); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, rq->buff.frame0_sz, rq->buff.map_dir); net_prefetchw(va); /* xdp_frame data area */ @@ -1600,11 +1622,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi while (cqe_bcnt) { skb_frag_t *frag; - page = *wi->pagep; + frag_page = wi->frag_page; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(page); + addr = page_pool_get_dma_addr(frag_page->page); dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, frag_consumed_bytes, rq->buff.map_dir); @@ -1618,11 +1640,12 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi } frag = &sinfo->frags[sinfo->nr_frags++]; - __skb_frag_set_page(frag, page); + + __skb_frag_set_page(frag, frag_page->page); skb_frag_off_set(frag, wi->offset); skb_frag_size_set(frag, frag_consumed_bytes); - if (page_is_pfmemalloc(page)) + if (page_is_pfmemalloc(frag_page->page)) xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp); sinfo->xdp_frags_size += frag_consumed_bytes; @@ -1651,21 +1674,17 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi if (unlikely(!skb)) return NULL; - page_ref_inc(*head_wi->pagep); + skb_mark_for_recycle(skb); + head_wi->frag_page->frags++; if (xdp_buff_has_frags(&mxbuf.xdp)) { - int i; - /* sinfo->nr_frags is reset by build_skb, calculate again. */ xdp_update_skb_shared_info(skb, wi - head_wi - 1, sinfo->xdp_frags_size, truesize, xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); - for (i = 0; i < sinfo->nr_frags; i++) { - skb_frag_t *frag = &sinfo->frags[i]; - - page_ref_inc(skb_frag_page(frag)); - } + for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) + pwi->frag_page->frags++; } return skb; @@ -1848,7 +1867,8 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { static void mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, - struct page **pagep, u32 data_bcnt, u32 data_offset) + struct mlx5e_frag_page *frag_page, + u32 data_bcnt, u32 data_offset) { net_prefetchw(skb->data); @@ -1862,12 +1882,13 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, else truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - mlx5e_add_skb_frag(rq, skb, *pagep, data_offset, + frag_page->frags++; + mlx5e_add_skb_frag(rq, skb, frag_page->page, data_offset, pg_consumed_bytes, truesize); data_bcnt -= pg_consumed_bytes; data_offset = 0; - pagep++; + frag_page++; } } @@ -1876,11 +1897,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - struct page **pagep = &wi->alloc_units.pages[page_idx]; + struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); + struct mlx5e_frag_page *head_page = frag_page; u32 frag_offset = head_offset + headlen; u32 byte_cnt = cqe_bcnt - headlen; - struct page *head_page = *pagep; struct sk_buff *skb; dma_addr_t addr; @@ -1895,14 +1916,15 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ if (unlikely(frag_offset >= PAGE_SIZE)) { - pagep++; + frag_page++; frag_offset -= PAGE_SIZE; } - mlx5e_fill_skb_data(skb, rq, pagep, byte_cnt, frag_offset); + skb_mark_for_recycle(skb); + mlx5e_fill_skb_data(skb, rq, frag_page, byte_cnt, frag_offset); /* copy header */ - addr = page_pool_get_dma_addr(head_page); - mlx5e_copy_skb_header(rq, skb, head_page, addr, + addr = page_pool_get_dma_addr(head_page->page); + mlx5e_copy_skb_header(rq, skb, head_page->page, addr, head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; @@ -1916,7 +1938,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - struct page *page = wi->alloc_units.pages[page_idx]; + struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; u16 rx_headroom = rq->buff.headroom; struct bpf_prog *prog; struct sk_buff *skb; @@ -1931,11 +1953,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; } - va = page_address(page) + head_offset; + va = page_address(frag_page->page) + head_offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(page); + addr = page_pool_get_dma_addr(frag_page->page); dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -1962,7 +1984,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; /* queue up for recycling/reuse */ - page_ref_inc(page); + skb_mark_for_recycle(skb); + frag_page->frags++; return skb; } @@ -1979,7 +2002,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, void *hdr, *data; u32 frag_size; - hdr = page_address(*head->pagep) + head_offset; + hdr = page_address(head->frag_page->page) + head_offset; data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); @@ -1993,9 +2016,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, if (unlikely(!skb)) return NULL; - /* queue up for recycling/reuse */ - page_ref_inc(*head->pagep); - + head->frag_page->frags++; } else { /* allocate SKB and copy header for large header */ rq->stats->gro_large_hds++; @@ -2007,13 +2028,17 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, } prefetchw(skb->data); - mlx5e_copy_skb_header(rq, skb, *head->pagep, head->addr, + mlx5e_copy_skb_header(rq, skb, head->frag_page->page, head->addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += head_size; skb->len += head_size; } + + /* queue up for recycling/reuse */ + skb_mark_for_recycle(skb); + return skb; } @@ -2061,7 +2086,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_dma_info *dma_info = &shampo->info[header_index]; dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE); - mlx5e_page_release_dynamic(rq, *dma_info->pagep, true); + mlx5e_page_release_fragmented(rq, dma_info->frag_page, true); } bitmap_clear(shampo->bitmap, header_index, 1); } @@ -2131,9 +2156,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (likely(head_size)) { - struct page **pagep = &wi->alloc_units.pages[page_idx]; + struct mlx5e_frag_page *frag_page; - mlx5e_fill_skb_data(*skb, rq, pagep, data_bcnt, data_offset); + frag_page = &wi->alloc_units.frag_pages[page_idx]; + mlx5e_fill_skb_data(*skb, rq, frag_page, data_bcnt, data_offset); } mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); -- cgit From 38a36efccd902939cc415f3757fb0d9a0f0618f9 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 17 Feb 2023 12:56:30 +0200 Subject: net/mlx5e: RX, Rename xdp_xmit_bitmap to a more generic name The xdp_xmit_bitmap currently serves only one purpose: to avoid releasing pages that are still in use due to XDP TX. A following patch will use this bitmap in a slightly different context but for the same purpose. So rename the bitmap to a more generic name that reflects the purpose not the context. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0862556105d5..566ddf7a7aa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -622,7 +622,7 @@ union mlx5e_alloc_units { struct mlx5e_mpw_info { u16 consumed_strides; - DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); + DECLARE_BITMAP(skip_release_bitmap, MLX5_MPWRQ_MAX_PAGES_PER_WQE); union mlx5e_alloc_units alloc_units; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index b2c1af07c317..3cde264cbe4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -120,7 +120,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } } - bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; umr_wqe->ctrl.opmod_idx_opcode = @@ -289,7 +289,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, prog = rcu_dereference(rq->xdp_prog); if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) - __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7724e30ec133..eab8cba33ce4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -439,10 +439,10 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle int i; /* A common case for AF_XDP. */ - if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe)) + if (bitmap_full(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe)) return; - no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + no_xdp_xmit = bitmap_empty(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); if (rq->xsk_pool) { struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs; @@ -452,11 +452,11 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle * the page to the userspace when the interface goes down. */ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) - if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) + if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) xsk_buff_free(xsk_buffs[i]); } else { for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) { - if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) { + if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) { struct mlx5e_frag_page *frag_page; frag_page = &wi->alloc_units.frag_pages[i]; @@ -687,7 +687,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) sizeof(*umr_wqe->inline_mtts) * pad); } - bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; umr_wqe->ctrl.opmod_idx_opcode = @@ -1970,7 +1970,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) - __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ } -- cgit From 4c2a1323680709078736a2886c754004e9fe42e6 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 14 Feb 2023 12:01:40 +0200 Subject: net/mlx5e: RX, Defer page release in striding rq for better recycling Currently, for striding RQ, fragmented pages from the page pool can get released in two ways: 1) In the mlx5e driver when trimming off the unused fragments AND the associated skb fragments have been released. This path allows recycling of pages to the page pool cache (allow_direct == true). 2) On the skb release path (last fragment release), which will always release pages to the page pool ring (allow_direct == false). Whichever is releasing the last fragment will be decisive on where the page gets released: the cache or the ring. So we obviously want to maximize for doing the release from 1. This patch does that by deferring the release of page fragments right before requesting new ones from the page pool. Extra care needs to be taken for the corner cases: * On first call, make sure that release is not called. The skip_release_bitmap is used for this purpose. * On rq shutdown, make sure that all wqes that were not in the linked list are released. For a single ring, single core, default MTU (1500) TCP stream test the number of pages allocated from the cache directly (rx_pp_recycle_cached) increases from 31 % to 98 %: +----------------------------------------------+ | Page Pool stats (/sec) | Before | After | +-------------------------+---------+----------+ |rx_pp_alloc_fast | 2137754 | 2261033 | |rx_pp_alloc_slow | 47 | 9 | |rx_pp_alloc_empty | 47 | 9 | |rx_pp_alloc_refill | 23230 | 819 | |rx_pp_alloc_waive | 0 | 0 | |rx_pp_recycle_cached | 672182 | 2209015 | |rx_pp_recycle_cache_full | 1789 | 0 | |rx_pp_recycle_ring | 1485848 | 52259 | |rx_pp_recycle_ring_full | 3003 | 584 | +----------------------------------------------+ With this patch, the performance in striding rq for the above test is back to baseline. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 21 +++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +++++--- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index b621f735cdc3..a047a2a4ddac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -121,9 +121,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) mlx5e_reset_icosq_cc_pc(icosq); - mlx5e_free_rx_in_progress_descs(rq); + mlx5e_free_rx_missing_descs(rq); if (xskrq) - mlx5e_free_rx_in_progress_descs(xskrq); + mlx5e_free_rx_missing_descs(xskrq); clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index cd7779a9d046..651be7aaf7d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -69,7 +69,7 @@ INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)); INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_rx_descs(struct mlx5e_rq *rq); -void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); +void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq); static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index eca9a11454e5..53eef689f225 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -301,6 +301,15 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) if (!rq->mpwqe.info) return -ENOMEM; + /* For deferred page release (release right before alloc), make sure + * that on first round release is not called. + */ + for (int i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, i); + + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + } + mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); return 0; @@ -1112,7 +1121,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) return -ETIMEDOUT; } -void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) +void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq; u16 head; @@ -1124,8 +1133,12 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) wq = &rq->mpwqe.wq; head = wq->head; - /* Outstanding UMR WQEs (in progress) start at wq->head */ - for (i = 0; i < rq->mpwqe.umr_in_progress; i++) { + /* Release WQEs that are in missing state: they have been + * popped from the list after completion but were not freed + * due to deferred release. + * Also free the linked-list reserved entry, hence the "+ 1". + */ + for (i = 0; i < mlx5_wq_ll_missing(wq) + 1; i++) { rq->dealloc_wqe(rq, head); head = mlx5_wq_ll_get_wqe_next_ix(wq, head); } @@ -1152,7 +1165,7 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { struct mlx5_wq_ll *wq = &rq->mpwqe.wq; - mlx5e_free_rx_in_progress_descs(rq); + mlx5e_free_rx_missing_descs(rq); while (!mlx5_wq_ll_is_empty(wq)) { struct mlx5e_rx_wqe_ll *wqe; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index eab8cba33ce4..73bc373bf27d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -983,6 +983,11 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) head = rq->mpwqe.actual_wq_head; i = missing; do { + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, head); + + /* Deferred free for better page pool cache usage. */ + mlx5e_free_rx_mpwqe(rq, wi, true); + alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : mlx5e_alloc_rx_mpwqe(rq, head); @@ -1855,7 +1860,6 @@ mpwrq_cqe_out: wq = &rq->mpwqe.wq; wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); - mlx5e_free_rx_mpwqe(rq, wi, true); mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); } @@ -2173,7 +2177,6 @@ mpwrq_cqe_out: wq = &rq->mpwqe.wq; wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); - mlx5e_free_rx_mpwqe(rq, wi, true); mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); } @@ -2233,7 +2236,6 @@ mpwrq_cqe_out: wq = &rq->mpwqe.wq; wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); - mlx5e_free_rx_mpwqe(rq, wi, true); mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); } -- cgit From 625dff29df3957f8a864c05d21a327231ebeff94 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 21 Feb 2023 17:24:09 +0200 Subject: net/mlx5e: RX, Change wqe last_in_page field from bool to bit flags Change the bool flag to a bitfield as we'll use it in a downstream patch in the series to add signaling about skipping a fragment release. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 566ddf7a7aa9..9ef4b7163e5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -605,13 +605,17 @@ struct mlx5e_frag_page { u16 frags; }; +enum mlx5e_wqe_frag_flag { + MLX5E_WQE_FRAG_LAST_IN_PAGE, +}; + struct mlx5e_wqe_frag_info { union { struct mlx5e_frag_page *frag_page; struct xdp_buff **xskp; }; u32 offset; - bool last_in_page; + u8 flags; }; union mlx5e_alloc_units { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 53eef689f225..bb1cbf008876 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -532,7 +532,7 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) next_frag.frag_page++; next_frag.offset = 0; if (prev) - prev->last_in_page = true; + prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); } *frag = next_frag; @@ -543,7 +543,7 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) } if (prev) - prev->last_in_page = true; + prev->flags |= BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE); } static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 73bc373bf27d..f98212596c1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -323,7 +323,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag, bool recycle) { - if (frag->last_in_page) + if (frag->flags & BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE)) mlx5e_page_release_fragmented(rq, frag->frag_page, recycle); } -- cgit From 3f93f82988bc6b4104314bf9aa485d47fbb7e241 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 21 Feb 2023 20:25:15 +0200 Subject: net/mlx5e: RX, Defer page release in legacy rq for better recycling Currently, fragmented pages from the page pool can be released in two ways: 1) In the mlx5e driver when trimming off the unused fragments AND the associated skb fragments have been released. This path allows recycling of pages to the page pool cache (allow_direct == true). 2) On the skb release path (last fragment release), which will always release pages to the page pool ring (allow_direct == false). Whichever is releasing the last fragment will be decisive on where the page gets released: the cache or the ring. So we obviously want to maximize for doing the release from 1. This patch does that by deferring the release of page fragments right before requesting new ones from the page pool. A flag is added to make sure that there's no release before first alloc and that XDP_TX fragments are not released prematurely. This is a preparation patch that doesn't unlock the performance improvements yet. A followup patch will do that. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 2 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 20 ++++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 66 ++++++++++++++-------- 4 files changed, 66 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9ef4b7163e5a..613a7daf9595 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -607,6 +607,7 @@ struct mlx5e_frag_page { enum mlx5e_wqe_frag_flag { MLX5E_WQE_FRAG_LAST_IN_PAGE, + MLX5E_WQE_FRAG_SKIP_RELEASE, }; struct mlx5e_wqe_frag_info { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 3cde264cbe4e..d97e6df66f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -189,6 +189,7 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } return alloc; @@ -215,6 +216,7 @@ int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } return wqe_bulk; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bb1cbf008876..3cf7d2b59037 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -520,6 +520,9 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0]; + /* Skip first release due to deferred release. */ + next_frag.flags = BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; struct mlx5e_wqe_frag_info *frag = @@ -558,8 +561,14 @@ static void mlx5e_init_xsk_buffs(struct mlx5e_rq *rq) /* Considering the above assumptions a fragment maps to a single * xsk_buff. */ - for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) + for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) { rq->wqe.frags[i].xskp = &rq->wqe.alloc_units->xsk_buffs[i]; + + /* Skip first release due to deferred release as WQES are + * not allocated yet. + */ + rq->wqe.frags[i].flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } } static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node) @@ -1183,12 +1192,21 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) 0, true); } else { struct mlx5_wq_cyc *wq = &rq->wqe.wq; + u16 missing = mlx5_wq_cyc_missing(wq); + u16 head = mlx5_wq_cyc_get_head(wq); while (!mlx5_wq_cyc_is_empty(wq)) { wqe_ix = mlx5_wq_cyc_get_tail(wq); rq->dealloc_wqe(rq, wqe_ix); mlx5_wq_cyc_pop(wq); } + /* Missing slots might also contain unreleased pages due to + * deferred release. + */ + while (missing--) { + wqe_ix = mlx5_wq_cyc_ctr2ix(wq, head++); + rq->dealloc_wqe(rq, wqe_ix); + } } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f98212596c1e..0675ffa5f8df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -319,11 +319,21 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, return err; } +static bool mlx5e_frag_can_release(struct mlx5e_wqe_frag_info *frag) +{ +#define CAN_RELEASE_MASK \ + (BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) | BIT(MLX5E_WQE_FRAG_SKIP_RELEASE)) + +#define CAN_RELEASE_VALUE BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE) + + return (frag->flags & CAN_RELEASE_MASK) == CAN_RELEASE_VALUE; +} + static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag, bool recycle) { - if (frag->flags & BIT(MLX5E_WQE_FRAG_LAST_IN_PAGE)) + if (mlx5e_frag_can_release(frag)) mlx5e_page_release_fragmented(rq, frag->frag_page, recycle); } @@ -347,6 +357,8 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, if (unlikely(err)) goto free_frags; + frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + headroom = i == 0 ? rq->buff.headroom : 0; addr = page_pool_get_dma_addr(frag->frag_page->page); wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); @@ -367,7 +379,7 @@ static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, { int i; - if (rq->xsk_pool) { + if (rq->xsk_pool && !(wi->flags & BIT(MLX5E_WQE_FRAG_SKIP_RELEASE))) { /* The `recycle` parameter is ignored, and the page is always * put into the Reuse Ring, because there is no way to return * the page to the userspace when the interface goes down. @@ -387,6 +399,20 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) mlx5e_free_rx_wqe(rq, wi, false); } +static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *wi; + + wi = get_frag(rq, j); + mlx5e_free_rx_wqe(rq, wi, true); + } +} + static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; @@ -792,6 +818,8 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) */ wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; + mlx5e_free_rx_wqes(rq, head, wqe_bulk); + if (!rq->xsk_pool) count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); else if (likely(!rq->xsk_pool->dma_need_sync)) @@ -1727,7 +1755,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { mlx5e_handle_rx_err_cqe(rq, cqe); - goto free_wqe; + goto wq_cyc_pop; } skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, @@ -1741,9 +1769,9 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) /* do not return page to cache, * it will be returned on XDP_TX completion. */ - goto wq_cyc_pop; + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } - goto free_wqe; + goto wq_cyc_pop; } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); @@ -1751,13 +1779,11 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (mlx5e_cqe_regb_chain(cqe)) if (!mlx5e_tc_update_skb_nic(cqe, skb)) { dev_kfree_skb_any(skb); - goto free_wqe; + goto wq_cyc_pop; } napi_gro_receive(rq->cq.napi, skb); -free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); wq_cyc_pop: mlx5_wq_cyc_pop(wq); } @@ -1781,7 +1807,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { mlx5e_handle_rx_err_cqe(rq, cqe); - goto free_wqe; + goto wq_cyc_pop; } skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, @@ -1794,9 +1820,9 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) /* do not return page to cache, * it will be returned on XDP_TX completion. */ - goto wq_cyc_pop; + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); } - goto free_wqe; + goto wq_cyc_pop; } mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); @@ -1806,8 +1832,6 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_rep_tc_receive(cqe, rq, skb); -free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); wq_cyc_pop: mlx5_wq_cyc_pop(wq); } @@ -2454,7 +2478,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { rq->stats->wqe_err++; - goto wq_free_wqe; + goto wq_cyc_pop; } skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, @@ -2462,17 +2486,16 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_skb_from_cqe_nonlinear, rq, wi, cqe, cqe_bcnt); if (!skb) - goto wq_free_wqe; + goto wq_cyc_pop; mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (unlikely(!skb->dev)) { dev_kfree_skb_any(skb); - goto wq_free_wqe; + goto wq_cyc_pop; } napi_gro_receive(rq->cq.napi, skb); -wq_free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); +wq_cyc_pop: mlx5_wq_cyc_pop(wq); } @@ -2547,12 +2570,12 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { rq->stats->wqe_err++; - goto free_wqe; + goto wq_cyc_pop; } skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt); if (!skb) - goto free_wqe; + goto wq_cyc_pop; mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); skb_push(skb, ETH_HLEN); @@ -2561,8 +2584,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe rq->netdev->devlink_port); dev_kfree_skb_any(skb); -free_wqe: - mlx5e_free_rx_wqe(rq, wi, false); +wq_cyc_pop: mlx5_wq_cyc_pop(wq); } -- cgit From 76238d0fbd21948dd9423e53f57e7354898967d3 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 21 Feb 2023 20:31:39 +0200 Subject: net/mlx5e: RX, Split off release path for xsk buffers for legacy rq Don't mix xsk buffer releases with page releases anymore. This is needed for handling of deferred page release. Add a new bulk free function for xsk buffers from wqe frags. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 50 +++++++++++++++++-------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0675ffa5f8df..9c5270eb9dc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -379,24 +379,42 @@ static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, { int i; - if (rq->xsk_pool && !(wi->flags & BIT(MLX5E_WQE_FRAG_SKIP_RELEASE))) { - /* The `recycle` parameter is ignored, and the page is always - * put into the Reuse Ring, because there is no way to return - * the page to the userspace when the interface goes down. - */ - xsk_buff_free(*wi->xskp); - return; - } - for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) mlx5e_put_rx_frag(rq, wi, recycle); } +static void mlx5e_xsk_free_rx_wqe(struct mlx5e_wqe_frag_info *wi) +{ + if (!(wi->flags & BIT(MLX5E_WQE_FRAG_SKIP_RELEASE))) + xsk_buff_free(*wi->xskp); +} + static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); - mlx5e_free_rx_wqe(rq, wi, false); + if (rq->xsk_pool) + mlx5e_xsk_free_rx_wqe(wi); + else + mlx5e_free_rx_wqe(rq, wi, false); +} + +static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *wi; + + wi = get_frag(rq, j); + /* The page is always put into the Reuse Ring, because there + * is no way to return the page to the userspace when the + * interface goes down. + */ + mlx5e_xsk_free_rx_wqe(wi); + } } static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) @@ -818,19 +836,21 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) */ wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; - mlx5e_free_rx_wqes(rq, head, wqe_bulk); - - if (!rq->xsk_pool) + if (!rq->xsk_pool) { + mlx5e_free_rx_wqes(rq, head, wqe_bulk); count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); - else if (likely(!rq->xsk_pool->dma_need_sync)) + } else if (likely(!rq->xsk_pool->dma_need_sync)) { + mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); - else + } else { + mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); /* If dma_need_sync is true, it's more efficient to call * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch, * because the latter does the same check and returns only one * frame. */ count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk); + } mlx5_wq_cyc_push_n(wq, count); if (unlikely(count != wqe_bulk)) { -- cgit From 4ba2b4988c98ce9b56b77a1610c3a7b70ee30b57 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 21 Feb 2023 20:42:48 +0200 Subject: net/mlx5e: RX, Increase WQE bulk size for legacy rq Deferred page release was added to legacy rq but its desired effect (driver releases last fragment to page pool cache) is not yet visible due to the WQE bulks being too small. This patch increases the WQE bulk size to span 512 KB and clip it to one quarter of the rx queue size. Signed-off-by: Dragos Tatulea Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en/params.c | 47 ++++++++++++++++++++-- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 613a7daf9595..a087c433366b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -670,7 +670,7 @@ struct mlx5e_rq_frags_info { struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS]; u8 num_frags; u8 log_num_frags; - u8 wqe_bulk; + u16 wqe_bulk; u8 wqe_index_mask; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 561da78d3b5c..40218d77ef34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -667,6 +667,43 @@ static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp) return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; } +static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, + struct mlx5e_rq_frags_info *info) +{ + u16 bulk_bound_rq_size = (1 << params->log_rq_mtu_frames) / 4; + u32 bulk_bound_rq_size_in_bytes; + u32 sum_frag_strides = 0; + u32 wqe_bulk_in_bytes; + u32 wqe_bulk; + int i; + + for (i = 0; i < info->num_frags; i++) + sum_frag_strides += info->arr[i].frag_stride; + + /* For MTUs larger than PAGE_SIZE, align to PAGE_SIZE to reflect + * amount of consumed pages per wqe in bytes. + */ + if (sum_frag_strides > PAGE_SIZE) + sum_frag_strides = ALIGN(sum_frag_strides, PAGE_SIZE); + + bulk_bound_rq_size_in_bytes = bulk_bound_rq_size * sum_frag_strides; + +#define MAX_WQE_BULK_BYTES(xdp) ((xdp ? 256 : 512) * 1024) + + /* A WQE bulk should not exceed min(512KB, 1/4 of rq size). For XDP + * keep bulk size smaller to avoid filling the page_pool cache on + * every bulk refill. + */ + wqe_bulk_in_bytes = min_t(u32, MAX_WQE_BULK_BYTES(params->xdp_prog), + bulk_bound_rq_size_in_bytes); + wqe_bulk = DIV_ROUND_UP(wqe_bulk_in_bytes, sum_frag_strides); + + /* Make sure that allocations don't start when the page is still used + * by older WQEs. + */ + info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk); +} + #define DEFAULT_FRAG_SIZE (2048) static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, @@ -774,11 +811,13 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, } out: - /* Bulking optimization to skip allocation until at least 8 WQEs can be - * allocated in a row. At the same time, never start allocation when - * the page is still used by older WQEs. + /* Bulking optimization to skip allocation until a large enough number + * of WQEs can be allocated in a row. Bulking also influences how well + * deferred page release works. */ - info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8); + mlx5e_rx_compute_wqe_bulk_params(params, info); + + mlx5_core_dbg(mdev, "%s: wqe_bulk = %u\n", __func__, info->wqe_bulk); info->log_num_frags = order_base_2(info->num_frags); -- cgit From cd640b050368d5be6bccf1edb51b1e4c553555e6 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 21 Feb 2023 21:05:07 +0200 Subject: net/mlx5e: RX, Break the wqe bulk refill in smaller chunks To avoid overflowing the page pool's cache, don't release the whole bulk which is usually larger than the cache refill size. Group release+alloc instead into cache refill units that allow releasing to the cache and then allocating from the cache. A refill_unit variable is added as a iteration unit over the wqe_bulk when doing release+alloc. For a single ring, single core, default MTU (1500) TCP stream test the number of pages allocated from the cache directly (rx_pp_recycle_cached) increases from 0% to 52%: +---------------------------------------------+ | Page Pool stats (/sec) | Before | After | +-------------------------+---------+---------+ |rx_pp_alloc_fast | 2145422 | 2193802 | |rx_pp_alloc_slow | 2 | 0 | |rx_pp_alloc_empty | 2 | 0 | |rx_pp_alloc_refill | 34059 | 16634 | |rx_pp_alloc_waive | 0 | 0 | |rx_pp_recycle_cached | 0 | 1145818 | |rx_pp_recycle_cache_full | 0 | 0 | |rx_pp_recycle_ring | 2179361 | 1064616 | |rx_pp_recycle_ring_full | 121 | 0 | +---------------------------------------------+ With this patch, the performance for legacy rq for the above test is back to baseline. Signed-off-by: Dragos Tatulea Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en/params.c | 8 ++++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 28 ++++++++++++++++++++-- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a087c433366b..ba615b74bb8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -671,6 +671,7 @@ struct mlx5e_rq_frags_info { u8 num_frags; u8 log_num_frags; u16 wqe_bulk; + u16 refill_unit; u8 wqe_index_mask; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 40218d77ef34..31f3c6e51d9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -674,6 +674,7 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, u32 bulk_bound_rq_size_in_bytes; u32 sum_frag_strides = 0; u32 wqe_bulk_in_bytes; + u16 split_factor; u32 wqe_bulk; int i; @@ -702,6 +703,10 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, * by older WQEs. */ info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk); + + split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog), + PP_ALLOC_CACHE_REFILL * PAGE_SIZE); + info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor); } #define DEFAULT_FRAG_SIZE (2048) @@ -817,7 +822,8 @@ out: */ mlx5e_rx_compute_wqe_bulk_params(params, info); - mlx5_core_dbg(mdev, "%s: wqe_bulk = %u\n", __func__, info->wqe_bulk); + mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n", + __func__, info->wqe_bulk, info->refill_unit); info->log_num_frags = order_base_2(info->num_frags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 9c5270eb9dc6..df5dbef9e5ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -449,6 +449,31 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) return i; } +static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + int remaining = wqe_bulk; + int i = 0; + + /* The WQE bulk is split into smaller bulks that are sized + * according to the page pool cache refill size to avoid overflowing + * the page pool cache due to too many page releases at once. + */ + do { + int refill = min_t(u16, rq->wqe.info.refill_unit, remaining); + int alloc_count; + + mlx5e_free_rx_wqes(rq, ix + i, refill); + alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill); + i += alloc_count; + if (unlikely(alloc_count != refill)) + break; + + remaining -= refill; + } while (remaining); + + return i; +} + static inline void mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, struct page *page, u32 frag_offset, u32 len, @@ -837,8 +862,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; if (!rq->xsk_pool) { - mlx5e_free_rx_wqes(rq, head, wqe_bulk); - count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); + count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk); } else if (likely(!rq->xsk_pool->dma_need_sync)) { mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); -- cgit From 3905f8d64ccc2c640d8c1179f4452f2bf8f1df56 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 22 Feb 2023 11:27:38 +0200 Subject: net/mlx5e: RX, Remove unnecessary recycle parameter and page_cache stats The recycle parameter used during page release is no longer necessary: the page pool can detect when the page cannot be recycled to the cache or ring without any outside hint. The page pool will also take care of cleaning up after itself once all the inflight pages have been released. So no need to explicitly release pages to the system. Remove the internal page_cache stats as the mlx5e_page_cache struct no longer exists. Delete the documentation entries along with the stats. Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/counters.rst | 26 ------------ drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 7 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 47 ++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 20 --------- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 10 ----- 5 files changed, 25 insertions(+), 85 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst index 4cd8e869762b..6b2d1fe74ecf 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst @@ -346,32 +346,6 @@ the software port. - The number of receive packets with CQE compression on ring i [#accel]_. - Acceleration - * - `rx[i]_cache_reuse` - - The number of events of successful reuse of a page from a driver's - internal page cache. - - Acceleration - - * - `rx[i]_cache_full` - - The number of events of full internal page cache where driver can't put a - page back to the cache for recycling (page will be freed). - - Acceleration - - * - `rx[i]_cache_empty` - - The number of events where cache was empty - no page to give. Driver - shall allocate new page. - - Acceleration - - * - `rx[i]_cache_busy` - - The number of events where cache head was busy and cannot be recycled. - Driver allocated new page. - - Acceleration - - * - `rx[i]_cache_waive` - - The number of cache evacuation. This can occur due to page move to - another NUMA node or page was pfmemalloc-ed and should be freed as soon - as possible. - - Acceleration - * - `rx[i]_arfs_err` - Number of flow rules that failed to be added to the flow table. - Error diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index ca6ac9772d22..15d15d0e5ef9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -505,7 +505,6 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_wqe_info *wi, u32 *xsk_frames, - bool recycle, struct xdp_frame_bulk *bq) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; @@ -524,7 +523,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, case MLX5E_XDP_XMIT_MODE_PAGE: /* XDP_TX from the regular RQ */ page_pool_put_defragged_page(xdpi.page.rq->page_pool, - xdpi.page.page, -1, recycle); + xdpi.page.page, -1, true); break; case MLX5E_XDP_XMIT_MODE_XSK: /* AF_XDP send */ @@ -578,7 +577,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) sqcc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); } while (!last_wqe); if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { @@ -625,7 +624,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) sq->cc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); } xdp_flush_frame_bulk(&bq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index df5dbef9e5ec..1049805571c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -293,14 +293,13 @@ static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, } static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, - struct mlx5e_frag_page *frag_page, - bool recycle) + struct mlx5e_frag_page *frag_page) { u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; struct page *page = frag_page->page; if (page_pool_defrag_page(page, drain_count) == 0) - page_pool_put_defragged_page(rq->page_pool, page, -1, recycle); + page_pool_put_defragged_page(rq->page_pool, page, -1, true); } static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, @@ -330,11 +329,10 @@ static bool mlx5e_frag_can_release(struct mlx5e_wqe_frag_info *frag) } static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, - struct mlx5e_wqe_frag_info *frag, - bool recycle) + struct mlx5e_wqe_frag_info *frag) { if (mlx5e_frag_can_release(frag)) - mlx5e_page_release_fragmented(rq, frag->frag_page, recycle); + mlx5e_page_release_fragmented(rq, frag->frag_page); } static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) @@ -368,19 +366,18 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, free_frags: while (--i >= 0) - mlx5e_put_rx_frag(rq, --frag, true); + mlx5e_put_rx_frag(rq, --frag); return err; } static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, - struct mlx5e_wqe_frag_info *wi, - bool recycle) + struct mlx5e_wqe_frag_info *wi) { int i; for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) - mlx5e_put_rx_frag(rq, wi, recycle); + mlx5e_put_rx_frag(rq, wi); } static void mlx5e_xsk_free_rx_wqe(struct mlx5e_wqe_frag_info *wi) @@ -396,7 +393,7 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) if (rq->xsk_pool) mlx5e_xsk_free_rx_wqe(wi); else - mlx5e_free_rx_wqe(rq, wi, false); + mlx5e_free_rx_wqe(rq, wi); } static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) @@ -427,7 +424,7 @@ static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) struct mlx5e_wqe_frag_info *wi; wi = get_frag(rq, j); - mlx5e_free_rx_wqe(rq, wi, true); + mlx5e_free_rx_wqe(rq, wi); } } @@ -502,7 +499,7 @@ mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, } static void -mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) +mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { bool no_xdp_xmit; int i; @@ -516,9 +513,9 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle if (rq->xsk_pool) { struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs; - /* The `recycle` parameter is ignored, and the page is always - * put into the Reuse Ring, because there is no way to return - * the page to the userspace when the interface goes down. + /* The page is always put into the Reuse Ring, because there + * is no way to return the page to userspace when the interface + * goes down. */ for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap)) @@ -529,7 +526,7 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle struct mlx5e_frag_page *frag_page; frag_page = &wi->alloc_units.frag_pages[i]; - mlx5e_page_release_fragmented(rq, frag_page, recycle); + mlx5e_page_release_fragmented(rq, frag_page); } } } @@ -663,7 +660,7 @@ err_unmap: dma_info = &shampo->info[--index]; if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); - mlx5e_page_release_fragmented(rq, dma_info->frag_page, true); + mlx5e_page_release_fragmented(rq, dma_info->frag_page); } } rq->stats->buff_alloc_err++; @@ -781,7 +778,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_unmap: while (--i >= 0) { frag_page--; - mlx5e_page_release_fragmented(rq, frag_page, true); + mlx5e_page_release_fragmented(rq, frag_page); } err: @@ -815,7 +812,7 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); if (hd_info->frag_page && hd_info->frag_page != deleted_page) { deleted_page = hd_info->frag_page; - mlx5e_page_release_fragmented(rq, hd_info->frag_page, false); + mlx5e_page_release_fragmented(rq, hd_info->frag_page); } hd_info->frag_page = NULL; @@ -833,8 +830,8 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); - /* Don't recycle, this function is called on rq/netdev close */ - mlx5e_free_rx_mpwqe(rq, wi, false); + /* This function is called on rq/netdev close. */ + mlx5e_free_rx_mpwqe(rq, wi); } INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) @@ -1058,7 +1055,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, head); /* Deferred free for better page pool cache usage. */ - mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5e_free_rx_mpwqe(rq, wi); alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : mlx5e_alloc_rx_mpwqe(rq, head); @@ -1739,7 +1736,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi int i; for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++) - mlx5e_put_rx_frag(rq, &head_wi[i], true); + mlx5e_put_rx_frag(rq, &head_wi[i]); } return NULL; /* page/packet was consumed by XDP */ } @@ -2158,7 +2155,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_dma_info *dma_info = &shampo->info[header_index]; dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE); - mlx5e_page_release_fragmented(rq, dma_info->frag_page, true); + mlx5e_page_release_fragmented(rq, dma_info->frag_page); } bitmap_clear(shampo->bitmap, header_index, 1); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 4478223c1720..f1d9596905c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -179,11 +179,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, @@ -358,11 +353,6 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; - s->rx_cache_reuse += rq_stats->cache_reuse; - s->rx_cache_full += rq_stats->cache_full; - s->rx_cache_empty += rq_stats->cache_empty; - s->rx_cache_busy += rq_stats->cache_busy; - s->rx_cache_waive += rq_stats->cache_waive; s->rx_congst_umr += rq_stats->congst_umr; s->rx_arfs_err += rq_stats->arfs_err; s->rx_recover += rq_stats->recover; @@ -1978,11 +1968,6 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, @@ -2163,11 +2148,6 @@ static const struct counter_desc ptp_rq_stats_desc[] = { { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) }, - { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index b77100b60b50..1ff8a06027dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -193,11 +193,6 @@ struct mlx5e_sw_stats { u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; - u64 rx_cache_reuse; - u64 rx_cache_full; - u64 rx_cache_empty; - u64 rx_cache_busy; - u64 rx_cache_waive; u64 rx_congst_umr; u64 rx_arfs_err; u64 rx_recover; @@ -362,11 +357,6 @@ struct mlx5e_rq_stats { u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; - u64 cache_reuse; - u64 cache_full; - u64 cache_empty; - u64 cache_busy; - u64 cache_waive; u64 congst_umr; u64 arfs_err; u64 recover; -- cgit