aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c3
-rw-r--r--net/core/netdev-genl-gen.c2
-rw-r--r--net/core/netdev-genl-gen.h2
-rw-r--r--net/core/netpoll.c19
-rw-r--r--net/core/rtnetlink.c11
-rw-r--r--net/core/skbuff.c47
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/xdp.c48
8 files changed, 102 insertions, 33 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 253584777101..1488f700bf81 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3199,6 +3199,7 @@ static u16 skb_tx_hash(const struct net_device *dev,
}
if (skb_rx_queue_recorded(skb)) {
+ DEBUG_NET_WARN_ON_ONCE(qcount == 0);
hash = skb_get_rx_queue(skb);
if (hash >= qoffset)
hash -= qoffset;
@@ -10846,7 +10847,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
GFP_KERNEL, NULL, 0,
- portid, nlmsg_seq(nlh));
+ portid, nlh);
/*
* Flush the unicast and multicast chains
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 48812ec843f5..3abab70d66dd 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: BSD-3-Clause
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
/* Do not edit directly, auto-generated from: */
/* Documentation/netlink/specs/netdev.yaml */
/* YNL-GEN kernel source */
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index b16dc7e026bb..74d74fc23167 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
/* Do not edit directly, auto-generated from: */
/* Documentation/netlink/specs/netdev.yaml */
/* YNL-GEN kernel header */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a089b704b986..e6a739b1afa9 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -137,6 +137,20 @@ static void queue_process(struct work_struct *work)
}
}
+static int netif_local_xmit_active(struct net_device *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+ if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id())
+ return 1;
+ }
+
+ return 0;
+}
+
static void poll_one_napi(struct napi_struct *napi)
{
int work;
@@ -183,7 +197,10 @@ void netpoll_poll_dev(struct net_device *dev)
if (!ni || down_trylock(&ni->dev_lock))
return;
- if (!netif_running(dev)) {
+ /* Some drivers will take the same locks in poll and xmit,
+ * we can't poll if local CPU is already in xmit.
+ */
+ if (!netif_running(dev) || netif_local_xmit_active(dev)) {
up(&ni->dev_lock);
return;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5d8eb57867a9..6e44e92ebdf5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3972,16 +3972,23 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
u32 event, gfp_t flags, int *new_nsid,
- int new_ifindex, u32 portid, u32 seq)
+ int new_ifindex, u32 portid,
+ const struct nlmsghdr *nlh)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
+ u32 seq = 0;
skb = nlmsg_new(if_nlmsg_size(dev, 0), flags);
if (skb == NULL)
goto errout;
+ if (nlmsg_report(nlh))
+ seq = nlmsg_seq(nlh);
+ else
+ portid = 0;
+
err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
type, portid, seq, change, 0, 0, event,
new_nsid, new_ifindex, -1, flags);
@@ -4017,7 +4024,7 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
return;
skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid,
- new_ifindex, portid, nlmsg_seq(nlh));
+ new_ifindex, portid, nlh);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags, portid, nlh);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eb7d33b41e71..4c0879798eb8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -517,18 +517,16 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
#ifdef HAVE_SKB_SMALL_HEAD_CACHE
if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
!(flags & KMALLOC_NOT_NORMAL_BITS)) {
-
- /* skb_small_head_cache has non power of two size,
- * likely forcing SLUB to use order-3 pages.
- * We deliberately attempt a NOMEMALLOC allocation only.
- */
obj = kmem_cache_alloc_node(skb_small_head_cache,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
- if (obj) {
- *size = SKB_SMALL_HEAD_CACHE_SIZE;
+ *size = SKB_SMALL_HEAD_CACHE_SIZE;
+ if (obj || !(gfp_pfmemalloc_allowed(flags)))
goto out;
- }
+ /* Try again but now we are using pfmemalloc reserves */
+ ret_pfmemalloc = true;
+ obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
+ goto out;
}
#endif
*size = obj_size = kmalloc_size_roundup(obj_size);
@@ -2082,6 +2080,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
+/* Note: We plan to rework this in linux-6.4 */
int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
{
unsigned int saved_end_offset, saved_truesize;
@@ -2100,6 +2099,22 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
if (likely(skb_end_offset(skb) == saved_end_offset))
return 0;
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+ /* We can not change skb->end if the original or new value
+ * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head().
+ */
+ if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM ||
+ skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) {
+ /* We think this path should not be taken.
+ * Add a temporary trace to warn us just in case.
+ */
+ pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n",
+ saved_end_offset, skb_end_offset(skb));
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+#endif
+
shinfo = skb_shinfo(skb);
/* We are about to change back skb->end,
@@ -5584,18 +5599,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (skb_cloned(to))
return false;
- /* In general, avoid mixing slab allocated and page_pool allocated
- * pages within the same SKB. However when @to is not pp_recycle and
- * @from is cloned, we can transition frag pages from page_pool to
- * reference counted.
- *
- * On the other hand, don't allow coalescing two pp_recycle SKBs if
- * @from is cloned, in case the SKB is using page_pool fragment
+ /* In general, avoid mixing page_pool and non-page_pool allocated
+ * pages within the same SKB. Additionally avoid dealing with clones
+ * with page_pool pages, in case the SKB is using page_pool fragment
* references (PP_FLAG_PAGE_FRAG). Since we only take full page
* references for cloned SKBs at the moment that would result in
* inconsistent reference counts.
+ * In theory we could take full references if @from is cloned and
+ * !@to->pp_recycle but its tricky (due to potential race with
+ * the clone disappearing) and rare, so not worth dealing with.
*/
- if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from)))
+ if (to->pp_recycle != from->pp_recycle ||
+ (from->pp_recycle && skb_cloned(from)))
return false;
if (len <= skb_tailroom(to)) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 341c565dbc26..c25888795390 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2818,7 +2818,8 @@ static void sk_enter_memory_pressure(struct sock *sk)
static void sk_leave_memory_pressure(struct sock *sk)
{
if (sk->sk_prot->leave_memory_pressure) {
- sk->sk_prot->leave_memory_pressure(sk);
+ INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
+ tcp_leave_memory_pressure, sk);
} else {
unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 8c92fc553317..fb85aca81961 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -720,7 +720,10 @@ __diag_ignore_all("-Wmissing-prototypes",
* @ctx: XDP context pointer.
* @timestamp: Return value pointer.
*
- * Returns 0 on success or ``-errno`` on error.
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ * * ``-EOPNOTSUPP`` : means device driver does not implement kfunc
+ * * ``-ENODATA`` : means no RX-timestamp available for this frame
*/
__bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
{
@@ -731,10 +734,21 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim
* bpf_xdp_metadata_rx_hash - Read XDP frame RX hash.
* @ctx: XDP context pointer.
* @hash: Return value pointer.
+ * @rss_type: Return value pointer for RSS type.
*
- * Returns 0 on success or ``-errno`` on error.
+ * The RSS hash type (@rss_type) specifies what portion of packet headers NIC
+ * hardware used when calculating RSS hash value. The RSS type can be decoded
+ * via &enum xdp_rss_hash_type either matching on individual L3/L4 bits
+ * ``XDP_RSS_L*`` or by combined traditional *RSS Hashing Types*
+ * ``XDP_RSS_TYPE_L*``.
+ *
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ * * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc
+ * * ``-ENODATA`` : means no RX-hash available for this frame
*/
-__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
+__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
{
return -EOPNOTSUPP;
}
@@ -774,20 +788,34 @@ static int __init xdp_metadata_init(void)
}
late_initcall(xdp_metadata_init);
+void xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
+{
+ val &= NETDEV_XDP_ACT_MASK;
+ if (dev->xdp_features == val)
+ return;
+
+ dev->xdp_features = val;
+
+ if (dev->reg_state == NETREG_REGISTERED)
+ call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL_GPL(xdp_set_features_flag);
+
void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
{
- dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
- if (support_sg)
- dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG;
+ xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT);
- call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+ if (support_sg)
+ val |= NETDEV_XDP_ACT_NDO_XMIT_SG;
+ xdp_set_features_flag(dev, val);
}
EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target);
void xdp_features_clear_redirect_target(struct net_device *dev)
{
- dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT |
- NETDEV_XDP_ACT_NDO_XMIT_SG);
- call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+ xdp_features_t val = dev->xdp_features;
+
+ val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG);
+ xdp_set_features_flag(dev, val);
}
EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target);