aboutsummaryrefslogtreecommitdiff
path: root/net/core/page_pool.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2020-01-28 16:02:33 -0800
committerLinus Torvalds <[email protected]>2020-01-28 16:02:33 -0800
commitbd2463ac7d7ec51d432f23bf0e893fb371a908cd (patch)
tree3da32c23be83adb9d9bda7e51b51fa39f69f2447 /net/core/page_pool.c
parenta78208e2436963d0b2c7d186277d6e1a9755029a (diff)
parentf76e4c167ea2212e23c15ee7e601a865e822c291 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Add WireGuard 2) Add HE and TWT support to ath11k driver, from John Crispin. 3) Add ESP in TCP encapsulation support, from Sabrina Dubroca. 4) Add variable window congestion control to TIPC, from Jon Maloy. 5) Add BCM84881 PHY driver, from Russell King. 6) Start adding netlink support for ethtool operations, from Michal Kubecek. 7) Add XDP drop and TX action support to ena driver, from Sameeh Jubran. 8) Add new ipv4 route notifications so that mlxsw driver does not have to handle identical routes itself. From Ido Schimmel. 9) Add BPF dynamic program extensions, from Alexei Starovoitov. 10) Support RX and TX timestamping in igc, from Vinicius Costa Gomes. 11) Add support for macsec HW offloading, from Antoine Tenart. 12) Add initial support for MPTCP protocol, from Christoph Paasch, Matthieu Baerts, Florian Westphal, Peter Krystad, and many others. 13) Add Octeontx2 PF support, from Sunil Goutham, Geetha sowjanya, Linu Cherian, and others. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1469 commits) net: phy: add default ARCH_BCM_IPROC for MDIO_BCM_IPROC udp: segment looped gso packets correctly netem: change mailing list qed: FW 8.42.2.0 debug features qed: rt init valid initialization changed qed: Debug feature: ilt and mdump qed: FW 8.42.2.0 Add fw overlay feature qed: FW 8.42.2.0 HSI changes qed: FW 8.42.2.0 iscsi/fcoe changes qed: Add abstraction for different hsi values per chip qed: FW 8.42.2.0 Additional ll2 type qed: Use dmae to write to widebus registers in fw_funcs qed: FW 8.42.2.0 Parser offsets modified qed: FW 8.42.2.0 Queue Manager changes qed: FW 8.42.2.0 Expose new registers and change windows qed: FW 8.42.2.0 Internal ram offsets modifications MAINTAINERS: Add entry for Marvell OcteonTX2 Physical Function driver Documentation: net: octeontx2: Add RVU HW and drivers overview octeontx2-pf: ethtool RSS config support octeontx2-pf: Add basic ethtool support ...
Diffstat (limited to 'net/core/page_pool.c')
-rw-r--r--net/core/page_pool.c89
1 files changed, 70 insertions, 19 deletions
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a6aefe989043..9b7cbe35df37 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -96,10 +96,65 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
+static void __page_pool_return_page(struct page_pool *pool, struct page *page);
+
+noinline
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
+ bool refill)
+{
+ struct ptr_ring *r = &pool->ring;
+ struct page *page;
+ int pref_nid; /* preferred NUMA node */
+
+ /* Quicker fallback, avoid locks when ring is empty */
+ if (__ptr_ring_empty(r))
+ return NULL;
+
+ /* Softirq guarantee CPU and thus NUMA node is stable. This,
+ * assumes CPU refilling driver RX-ring will also run RX-NAPI.
+ */
+#ifdef CONFIG_NUMA
+ pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
+#else
+ /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
+ pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
+#endif
+
+ /* Slower-path: Get pages from locked ring queue */
+ spin_lock(&r->consumer_lock);
+
+ /* Refill alloc array, but only if NUMA match */
+ do {
+ page = __ptr_ring_consume(r);
+ if (unlikely(!page))
+ break;
+
+ if (likely(page_to_nid(page) == pref_nid)) {
+ pool->alloc.cache[pool->alloc.count++] = page;
+ } else {
+ /* NUMA mismatch;
+ * (1) release 1 page to page-allocator and
+ * (2) break out to fallthrough to alloc_pages_node.
+ * This limit stress on page buddy alloactor.
+ */
+ __page_pool_return_page(pool, page);
+ page = NULL;
+ break;
+ }
+ } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL &&
+ refill);
+
+ /* Return last page */
+ if (likely(pool->alloc.count > 0))
+ page = pool->alloc.cache[--pool->alloc.count];
+
+ spin_unlock(&r->consumer_lock);
+ return page;
+}
+
/* fast path */
static struct page *__page_pool_get_cached(struct page_pool *pool)
{
- struct ptr_ring *r = &pool->ring;
bool refill = false;
struct page *page;
@@ -113,20 +168,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
refill = true;
}
- /* Quicker fallback, avoid locks when ring is empty */
- if (__ptr_ring_empty(r))
- return NULL;
-
- /* Slow-path: Get page from locked ring queue,
- * refill alloc array if requested.
- */
- spin_lock(&r->consumer_lock);
- page = __ptr_ring_consume(r);
- if (refill)
- pool->alloc.count = __ptr_ring_consume_batched(r,
- pool->alloc.cache,
- PP_ALLOC_CACHE_REFILL);
- spin_unlock(&r->consumer_lock);
+ page = page_pool_refill_alloc_cache(pool, refill);
return page;
}
@@ -163,7 +205,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
*/
/* Cache was empty, do real allocation */
+#ifdef CONFIG_NUMA
page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
+#else
+ page = alloc_pages(gfp, pool->p.order);
+#endif
if (!page)
return NULL;
@@ -311,13 +357,10 @@ static bool __page_pool_recycle_direct(struct page *page,
/* page is NOT reusable when:
* 1) allocated when system is under some pressure. (page_is_pfmemalloc)
- * 2) belongs to a different NUMA node than pool->p.nid.
- *
- * To update pool->p.nid users must call page_pool_update_nid.
*/
static bool pool_page_reusable(struct page_pool *pool, struct page *page)
{
- return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid;
+ return !page_is_pfmemalloc(page);
}
void __page_pool_put_page(struct page_pool *pool, struct page *page,
@@ -484,7 +527,15 @@ EXPORT_SYMBOL(page_pool_destroy);
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid)
{
+ struct page *page;
+
trace_page_pool_update_nid(pool, new_nid);
pool->p.nid = new_nid;
+
+ /* Flush pool alloc cache, as refill will check NUMA node */
+ while (pool->alloc.count) {
+ page = pool->alloc.cache[--pool->alloc.count];
+ __page_pool_return_page(pool, page);
+ }
}
EXPORT_SYMBOL(page_pool_update_nid);