diff options
Diffstat (limited to 'net/core/page_pool.c')
-rw-r--r-- | net/core/page_pool.c | 231 |
1 files changed, 168 insertions, 63 deletions
diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5e409b98aba0..8bcc7014a61a 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -5,6 +5,7 @@ * Copyright (C) 2016 Red Hat, Inc. */ +#include <linux/error-injection.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> @@ -23,12 +24,16 @@ #include <trace/events/page_pool.h> +#include "page_pool_priv.h" + #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) -#define BIAS_MAX LONG_MAX +#define BIAS_MAX (LONG_MAX >> 1) #ifdef CONFIG_PAGE_POOL_STATS +static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats); + /* alloc_stat_inc is intended to be used in softirq context */ #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) /* recycle_stat_inc is safe to use when preemption is possible. */ @@ -69,7 +74,7 @@ static const char pp_stats[][ETH_GSTRING_LEN] = { * is passed to this API which is filled in. The caller can then report * those stats to the user (perhaps via ethtool, debugfs, etc.). */ -bool page_pool_get_stats(struct page_pool *pool, +bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats) { int cpu = 0; @@ -119,9 +124,9 @@ int page_pool_ethtool_stats_get_count(void) } EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) { - struct page_pool_stats *pool_stats = stats; + const struct page_pool_stats *pool_stats = stats; *data++ = pool_stats->alloc_stats.fast; *data++ = pool_stats->alloc_stats.slow; @@ -169,11 +174,15 @@ static void page_pool_producer_unlock(struct page_pool *pool, } static int page_pool_init(struct page_pool *pool, - const struct page_pool_params *params) + const struct page_pool_params *params, + int cpuid) { unsigned int ring_qsize = 1024; /* Default */ - memcpy(&pool->p, params, sizeof(pool->p)); + memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); + memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); + + pool->cpuid = cpuid; /* Validate only known flags were used */ if (pool->p.flags & ~(PP_FLAG_ALL)) @@ -211,14 +220,29 @@ static int page_pool_init(struct page_pool *pool, */ } + pool->has_init_callback = !!pool->slow.init_callback; + #ifdef CONFIG_PAGE_POOL_STATS - pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); - if (!pool->recycle_stats) - return -ENOMEM; + if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) { + pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); + if (!pool->recycle_stats) + return -ENOMEM; + } else { + /* For system page pool instance we use a singular stats object + * instead of allocating a separate percpu variable for each + * (also percpu) page pool instance. + */ + pool->recycle_stats = &pp_system_recycle_stats; + } #endif - if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { +#ifdef CONFIG_PAGE_POOL_STATS + if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) + free_percpu(pool->recycle_stats); +#endif return -ENOMEM; + } atomic_set(&pool->pages_state_release_cnt, 0); @@ -231,11 +255,26 @@ static int page_pool_init(struct page_pool *pool, return 0; } +static void page_pool_uninit(struct page_pool *pool) +{ + ptr_ring_cleanup(&pool->ring, NULL); + + if (pool->p.flags & PP_FLAG_DMA_MAP) + put_device(pool->p.dev); + +#ifdef CONFIG_PAGE_POOL_STATS + if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) + free_percpu(pool->recycle_stats); +#endif +} + /** - * page_pool_create() - create a page pool. + * page_pool_create_percpu() - create a page pool for a given cpu. * @params: parameters, see struct page_pool_params + * @cpuid: cpu identifier */ -struct page_pool *page_pool_create(const struct page_pool_params *params) +struct page_pool * +page_pool_create_percpu(const struct page_pool_params *params, int cpuid) { struct page_pool *pool; int err; @@ -244,14 +283,32 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) if (!pool) return ERR_PTR(-ENOMEM); - err = page_pool_init(pool, params); - if (err < 0) { - pr_warn("%s() gave up with errno %d\n", __func__, err); - kfree(pool); - return ERR_PTR(err); - } + err = page_pool_init(pool, params, cpuid); + if (err < 0) + goto err_free; + + err = page_pool_list(pool); + if (err) + goto err_uninit; return pool; + +err_uninit: + page_pool_uninit(pool); +err_free: + pr_warn("%s() gave up with errno %d\n", __func__, err); + kfree(pool); + return ERR_PTR(err); +} +EXPORT_SYMBOL(page_pool_create_percpu); + +/** + * page_pool_create() - create a page pool + * @params: parameters, see struct page_pool_params + */ +struct page_pool *page_pool_create(const struct page_pool_params *params) +{ + return page_pool_create_percpu(params, -1); } EXPORT_SYMBOL(page_pool_create); @@ -327,8 +384,8 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) return page; } -static void page_pool_dma_sync_for_device(struct page_pool *pool, - struct page *page, +static void page_pool_dma_sync_for_device(const struct page_pool *pool, + const struct page *page, unsigned int dma_sync_size) { dma_addr_t dma_addr = page_pool_get_dma_addr(page); @@ -384,8 +441,8 @@ static void page_pool_set_pp_info(struct page_pool *pool, * the overhead is negligible. */ page_pool_fragment_page(page, 1); - if (pool->p.init_callback) - pool->p.init_callback(page, pool->p.init_arg); + if (pool->has_init_callback) + pool->slow.init_callback(page, pool->slow.init_arg); } static void page_pool_clear_pp_info(struct page *page) @@ -494,13 +551,14 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) return page; } EXPORT_SYMBOL(page_pool_alloc_pages); +ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); /* Calculate distance between two u32 values, valid if distance is below 2^(31) * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution */ #define _distance(a, b) (s32)((a) - (b)) -static s32 page_pool_inflight(struct page_pool *pool) +s32 page_pool_inflight(const struct page_pool *pool, bool strict) { u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); @@ -508,27 +566,27 @@ static s32 page_pool_inflight(struct page_pool *pool) inflight = _distance(hold_cnt, release_cnt); - trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); - WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); + if (strict) { + trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); + WARN(inflight < 0, "Negative(%d) inflight packet-pages", + inflight); + } else { + inflight = max(0, inflight); + } return inflight; } -/* Disconnects a page (from a page_pool). API users can have a need - * to disconnect a page (from a page_pool), to allow it to be used as - * a regular page (that will eventually be returned to the normal - * page-allocator via put_page). - */ -static void page_pool_return_page(struct page_pool *pool, struct page *page) +static __always_inline +void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) { dma_addr_t dma; - int count; if (!(pool->p.flags & PP_FLAG_DMA_MAP)) /* Always account for inflight pages, even if we didn't * map them */ - goto skip_dma_unmap; + return; dma = page_pool_get_dma_addr(page); @@ -537,7 +595,19 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page) PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); page_pool_set_dma_addr(page, 0); -skip_dma_unmap: +} + +/* Disconnects a page (from a page_pool). API users can have a need + * to disconnect a page (from a page_pool), to allow it to be used as + * a regular page (that will eventually be returned to the normal + * page-allocator via put_page). + */ +void page_pool_return_page(struct page_pool *pool, struct page *page) +{ + int count; + + __page_pool_release_page_dma(pool, page); + page_pool_clear_pp_info(page); /* This may be the last page returned, releasing the pool, so @@ -589,6 +659,11 @@ static bool page_pool_recycle_in_cache(struct page *page, return true; } +static bool __page_pool_page_can_be_recycled(const struct page *page) +{ + return page_ref_count(page) == 1 && !page_is_pfmemalloc(page); +} + /* If the page refcnt == 1, this will try to recycle the page. * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for * the configured size min(dma_sync_size, pool->max_len). @@ -610,15 +685,14 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * page is NOT reusable when allocated when system is under * some pressure. (page_is_pfmemalloc) */ - if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) { + if (likely(__page_pool_page_can_be_recycled(page))) { /* Read barrier done in page_ref_count / READ_ONCE */ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, dma_sync_size); - if (allow_direct && in_softirq() && - page_pool_recycle_in_cache(page, pool)) + if (allow_direct && page_pool_recycle_in_cache(page, pool)) return NULL; /* Page found as candidate for recycling */ @@ -643,9 +717,35 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, return NULL; } -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct) +static bool page_pool_napi_local(const struct page_pool *pool) +{ + const struct napi_struct *napi; + u32 cpuid; + + if (unlikely(!in_softirq())) + return false; + + /* Allow direct recycle if we have reasons to believe that we are + * in the same context as the consumer would run, so there's + * no possible race. + * __page_pool_put_page() makes sure we're not in hardirq context + * and interrupts are enabled prior to accessing the cache. + */ + cpuid = smp_processor_id(); + if (READ_ONCE(pool->cpuid) == cpuid) + return true; + + napi = READ_ONCE(pool->p.napi); + + return napi && READ_ONCE(napi->list_owner) == cpuid; +} + +void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) { + if (!allow_direct) + allow_direct = page_pool_napi_local(pool); + page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); if (page && !page_pool_recycle_in_ring(pool, page)) { /* Cache full, fallback to free pages */ @@ -653,7 +753,7 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, page_pool_return_page(pool, page); } } -EXPORT_SYMBOL(page_pool_put_defragged_page); +EXPORT_SYMBOL(page_pool_put_unrefed_page); /** * page_pool_put_page_bulk() - release references on multiple pages @@ -674,22 +774,25 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count) { int i, bulk_len = 0; + bool allow_direct; bool in_softirq; + allow_direct = page_pool_napi_local(pool); + for (i = 0; i < count; i++) { struct page *page = virt_to_head_page(data[i]); /* It is not the last user for the page frag case */ - if (!page_pool_is_last_frag(page)) + if (!page_pool_is_last_ref(page)) continue; - page = __page_pool_put_page(pool, page, -1, false); + page = __page_pool_put_page(pool, page, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ if (page) data[bulk_len++] = page; } - if (unlikely(!bulk_len)) + if (!bulk_len) return; /* Bulk producer into ptr_ring page_pool cache */ @@ -722,10 +825,10 @@ static struct page *page_pool_drain_frag(struct page_pool *pool, long drain_count = BIAS_MAX - pool->frag_users; /* Some user is still using the page frag */ - if (likely(page_pool_defrag_page(page, drain_count))) + if (likely(page_pool_unref_page(page, drain_count))) return NULL; - if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { + if (__page_pool_page_can_be_recycled(page)) { if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, -1); @@ -743,7 +846,7 @@ static void page_pool_free_frag(struct page_pool *pool) pool->frag_page = NULL; - if (!page || page_pool_defrag_page(page, drain_count)) + if (!page || page_pool_unref_page(page, drain_count)) return; page_pool_return_page(pool, page); @@ -814,14 +917,8 @@ static void __page_pool_destroy(struct page_pool *pool) if (pool->disconnect) pool->disconnect(pool); - ptr_ring_cleanup(&pool->ring, NULL); - - if (pool->p.flags & PP_FLAG_DMA_MAP) - put_device(pool->p.dev); - -#ifdef CONFIG_PAGE_POOL_STATS - free_percpu(pool->recycle_stats); -#endif + page_pool_unlist(pool); + page_pool_uninit(pool); kfree(pool); } @@ -858,7 +955,7 @@ static int page_pool_release(struct page_pool *pool) int inflight; page_pool_scrub(pool); - inflight = page_pool_inflight(pool); + inflight = page_pool_inflight(pool, true); if (!inflight) __page_pool_destroy(pool); @@ -869,18 +966,21 @@ static void page_pool_release_retry(struct work_struct *wq) { struct delayed_work *dwq = to_delayed_work(wq); struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw); + void *netdev; int inflight; inflight = page_pool_release(pool); if (!inflight) return; - /* Periodic warning */ - if (time_after_eq(jiffies, pool->defer_warn)) { + /* Periodic warning for page pools the user can't see */ + netdev = READ_ONCE(pool->slow.netdev); + if (time_after_eq(jiffies, pool->defer_warn) && + (!netdev || netdev == NET_PTR_POISON)) { int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ; - pr_warn("%s() stalled pool shutdown %d inflight %d sec\n", - __func__, inflight, sec); + pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n", + __func__, pool->user.id, inflight, sec); pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; } @@ -889,15 +989,20 @@ static void page_pool_release_retry(struct work_struct *wq) } void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), - struct xdp_mem_info *mem) + const struct xdp_mem_info *mem) { refcount_inc(&pool->user_cnt); pool->disconnect = disconnect; pool->xdp_mem_id = mem->id; } -void page_pool_unlink_napi(struct page_pool *pool) +static void page_pool_disable_direct_recycling(struct page_pool *pool) { + /* Disable direct recycling based on pool->cpuid. + * Paired with READ_ONCE() in page_pool_napi_local(). + */ + WRITE_ONCE(pool->cpuid, -1); + if (!pool->p.napi) return; @@ -909,7 +1014,6 @@ void page_pool_unlink_napi(struct page_pool *pool) WRITE_ONCE(pool->p.napi, NULL); } -EXPORT_SYMBOL(page_pool_unlink_napi); void page_pool_destroy(struct page_pool *pool) { @@ -919,12 +1023,13 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_put(pool)) return; - page_pool_unlink_napi(pool); + page_pool_disable_direct_recycling(pool); page_pool_free_frag(pool); if (!page_pool_release(pool)) return; + page_pool_detached(pool); pool->defer_start = jiffies; pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; |