aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdev_features.h9
-rw-r--r--include/linux/netdevice.h5
-rw-r--r--include/linux/skbuff.h4
-rw-r--r--include/net/udp.h2
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/skbuff.c91
-rw-r--r--net/ethtool/common.c1
-rw-r--r--net/ipv4/udp_offload.c104
-rw-r--r--net/ipv6/udp_offload.c27
9 files changed, 217 insertions, 30 deletions
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 4b19c544c59a..34d050bb1ae6 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -53,8 +53,9 @@ enum {
NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */
NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */
NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */
+ NETIF_F_GSO_FRAGLIST_BIT, /* ... Fraglist GSO */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
- NETIF_F_GSO_UDP_L4_BIT,
+ NETIF_F_GSO_FRAGLIST_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
@@ -80,6 +81,7 @@ enum {
NETIF_F_GRO_HW_BIT, /* Hardware Generic receive offload */
NETIF_F_HW_TLS_RECORD_BIT, /* Offload TLS record */
+ NETIF_F_GRO_FRAGLIST_BIT, /* Fraglist GRO */
/*
* Add your fresh new feature above and remember to update
@@ -150,6 +152,8 @@ enum {
#define NETIF_F_GSO_UDP_L4 __NETIF_F(GSO_UDP_L4)
#define NETIF_F_HW_TLS_TX __NETIF_F(HW_TLS_TX)
#define NETIF_F_HW_TLS_RX __NETIF_F(HW_TLS_RX)
+#define NETIF_F_GRO_FRAGLIST __NETIF_F(GRO_FRAGLIST)
+#define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST)
/* Finds the next feature with the highest number of the range of start till 0.
*/
@@ -226,6 +230,9 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
/* changeable features with no special hardware requirements */
#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO)
+/* Changeable features with no special hardware requirements that defaults to off. */
+#define NETIF_F_SOFT_FEATURES_OFF NETIF_F_GRO_FRAGLIST
+
#define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \
NETIF_F_HW_VLAN_CTAG_RX | \
NETIF_F_HW_VLAN_CTAG_TX | \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 78e9c6c1b131..20445f94eb1c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2326,7 +2326,8 @@ struct napi_gro_cb {
/* Number of gro_receive callbacks this packet already went through */
u8 recursion_counter:4;
- /* 1 bit hole */
+ /* GRO is done by frag_list pointer chaining. */
+ u8 is_flist:1;
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
@@ -2694,6 +2695,7 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id);
int netdev_get_name(struct net *net, char *name, int ifindex);
int dev_restart(struct net_device *dev);
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
{
@@ -4570,6 +4572,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 26beae7db264..3d13a4b717e9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -592,6 +592,8 @@ enum {
SKB_GSO_UDP = 1 << 16,
SKB_GSO_UDP_L4 = 1 << 17,
+
+ SKB_GSO_FRAGLIST = 1 << 18,
};
#if BITS_PER_LONG > 32
@@ -3533,6 +3535,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet);
bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
+struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features,
+ unsigned int offset);
struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
int skb_ensure_writable(struct sk_buff *skb, int write_len);
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
diff --git a/include/net/udp.h b/include/net/udp.h
index bad74f780831..44e0e52b585c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -167,7 +167,7 @@ typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
__be16 dport);
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
- struct udphdr *uh, udp_lookup_t lookup);
+ struct udphdr *uh, struct sock *sk);
int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
diff --git a/net/core/dev.c b/net/core/dev.c
index c806b078097b..ce8900dbd9ea 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3249,7 +3249,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
- if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
+ if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
skb_warn_bad_offload(skb);
return segs;
@@ -9283,7 +9283,7 @@ int register_netdevice(struct net_device *dev)
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
*/
- dev->hw_features |= NETIF_F_SOFT_FEATURES;
+ dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
dev->features |= NETIF_F_SOFT_FEATURES;
if (dev->netdev_ops->ndo_udp_tunnel_add) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 48a7029529c9..864cb9e9622f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3639,6 +3639,97 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
return head_frag;
}
+struct sk_buff *skb_segment_list(struct sk_buff *skb,
+ netdev_features_t features,
+ unsigned int offset)
+{
+ struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
+ unsigned int tnl_hlen = skb_tnl_header_len(skb);
+ unsigned int delta_truesize = 0;
+ unsigned int delta_len = 0;
+ struct sk_buff *tail = NULL;
+ struct sk_buff *nskb;
+
+ skb_push(skb, -skb_network_offset(skb) + offset);
+
+ skb_shinfo(skb)->frag_list = NULL;
+
+ do {
+ nskb = list_skb;
+ list_skb = list_skb->next;
+
+ if (!tail)
+ skb->next = nskb;
+ else
+ tail->next = nskb;
+
+ tail = nskb;
+
+ delta_len += nskb->len;
+ delta_truesize += nskb->truesize;
+
+ skb_push(nskb, -skb_network_offset(nskb) + offset);
+
+ __copy_skb_header(nskb, skb);
+
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ nskb->data - tnl_hlen,
+ offset + tnl_hlen);
+
+ if (skb_needs_linearize(nskb, features) &&
+ __skb_linearize(nskb))
+ goto err_linearize;
+
+ } while (list_skb);
+
+ skb->truesize = skb->truesize - delta_truesize;
+ skb->data_len = skb->data_len - delta_len;
+ skb->len = skb->len - delta_len;
+
+ skb_gso_reset(skb);
+
+ skb->prev = tail;
+
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb))
+ goto err_linearize;
+
+ skb_get(skb);
+
+ return skb;
+
+err_linearize:
+ kfree_skb_list(skb->next);
+ skb->next = NULL;
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL_GPL(skb_segment_list);
+
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
+{
+ if (unlikely(p->len + skb->len >= 65536))
+ return -E2BIG;
+
+ if (NAPI_GRO_CB(p)->last == p)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
+
+ skb_pull(skb, skb_gro_offset(skb));
+
+ NAPI_GRO_CB(p)->last = skb;
+ NAPI_GRO_CB(p)->count++;
+ p->data_len += skb->len;
+ p->truesize += skb->truesize;
+ p->len += skb->len;
+
+ NAPI_GRO_CB(skb)->same_flow = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(skb_gro_receive_list);
+
/**
* skb_segment - Perform protocol segmentation on skb.
* @head_skb: buffer to segment
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index e621b1694d2f..c7b8956c3827 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -59,6 +59,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
[NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
[NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
+ [NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list",
};
const char
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b25e42100ceb..1a98583a79f4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -184,6 +184,20 @@ out_unlock:
}
EXPORT_SYMBOL(skb_udp_tunnel_segment);
+static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ unsigned int mss = skb_shinfo(skb)->gso_size;
+
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
+
+ return skb;
+}
+
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
netdev_features_t features)
{
@@ -196,6 +210,9 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
__sum16 check;
__be16 newlen;
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __udp_gso_segment_list(gso_skb, features);
+
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
@@ -354,6 +371,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct udphdr *uh2;
struct sk_buff *p;
unsigned int ulen;
+ int ret = 0;
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
@@ -369,7 +387,6 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
}
/* pull encapsulating udp header */
skb_gro_pull(skb, sizeof(struct udphdr));
- skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
list_for_each_entry(p, head, list) {
if (!NAPI_GRO_CB(p)->same_flow)
@@ -383,14 +400,40 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
continue;
}
+ if (NAPI_GRO_CB(skb)->is_flist != NAPI_GRO_CB(p)->is_flist) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return p;
+ }
+
/* Terminate the flow on len mismatch or if it grow "too much".
* Under small packet flood GRO count could elsewhere grow a lot
* leading to excessive truesize values.
* On len mismatch merge the first packet shorter than gso_size,
* otherwise complete the GRO packet.
*/
- if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
- ulen != ntohs(uh2->len) ||
+ if (ulen > ntohs(uh2->len)) {
+ pp = p;
+ } else {
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ if (!pskb_may_pull(skb, skb_gro_offset(skb))) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+ if ((skb->ip_summed != p->ip_summed) ||
+ (skb->csum_level != p->csum_level)) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+ ret = skb_gro_receive_list(p, skb);
+ } else {
+ skb_gro_postpull_rcsum(skb, uh,
+ sizeof(struct udphdr));
+
+ ret = skb_gro_receive(p, skb);
+ }
+ }
+
+ if (ret || ulen != ntohs(uh2->len) ||
NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
pp = p;
@@ -401,36 +444,29 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return NULL;
}
-INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
- __be16 sport, __be16 dport));
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
- struct udphdr *uh, udp_lookup_t lookup)
+ struct udphdr *uh, struct sock *sk)
{
struct sk_buff *pp = NULL;
struct sk_buff *p;
struct udphdr *uh2;
unsigned int off = skb_gro_offset(skb);
int flush = 1;
- struct sock *sk;
- rcu_read_lock();
- sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
- udp4_lib_lookup_skb, skb, uh->source, uh->dest);
- if (!sk)
- goto out_unlock;
+ if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+ NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
- if (udp_sk(sk)->gro_enabled) {
+ if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
pp = call_gro_receive(udp_gro_receive_segment, head, skb);
- rcu_read_unlock();
return pp;
}
- if (NAPI_GRO_CB(skb)->encap_mark ||
+ if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
(skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid) ||
!udp_sk(sk)->gro_receive)
- goto out_unlock;
+ goto out;
/* mark that this skb passed once through the tunnel gro layer */
NAPI_GRO_CB(skb)->encap_mark = 1;
@@ -457,8 +493,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
-out_unlock:
- rcu_read_unlock();
+out:
skb_gro_flush_final(skb, pp, flush);
return pp;
}
@@ -468,8 +503,10 @@ INDIRECT_CALLABLE_SCOPE
struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ struct sk_buff *pp;
+ struct sock *sk;
- if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
+ if (unlikely(!uh))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -484,7 +521,11 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
inet_gro_compute_pseudo);
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 0;
- return udp_gro_receive(head, skb, uh, udp4_lib_lookup_skb);
+ rcu_read_lock();
+ sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+ pp = udp_gro_receive(head, skb, uh, sk);
+ rcu_read_unlock();
+ return pp;
flush:
NAPI_GRO_CB(skb)->flush = 1;
@@ -517,9 +558,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
rcu_read_lock();
sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
udp4_lib_lookup_skb, skb, uh->source, uh->dest);
- if (sk && udp_sk(sk)->gro_enabled) {
- err = udp_gro_complete_segment(skb);
- } else if (sk && udp_sk(sk)->gro_complete) {
+ if (sk && udp_sk(sk)->gro_complete) {
skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL;
@@ -529,6 +568,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
skb->encapsulation = 1;
err = udp_sk(sk)->gro_complete(sk, skb,
nhoff + sizeof(struct udphdr));
+ } else {
+ err = udp_gro_complete_segment(skb);
}
rcu_read_unlock();
@@ -544,6 +585,23 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ uh->len = htons(skb->len - nhoff);
+
+ skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+ skb->csum_level++;
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = 0;
+ }
+
+ return 0;
+ }
+
if (uh->check)
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
iph->daddr, 0);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index f0d5fc27d0b5..584157a07759 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -115,8 +115,10 @@ INDIRECT_CALLABLE_SCOPE
struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
+ struct sk_buff *pp;
+ struct sock *sk;
- if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
+ if (unlikely(!uh))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -132,7 +134,11 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 1;
- return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb);
+ rcu_read_lock();
+ sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+ pp = udp_gro_receive(head, skb, uh, sk);
+ rcu_read_unlock();
+ return pp;
flush:
NAPI_GRO_CB(skb)->flush = 1;
@@ -144,6 +150,23 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+ if (NAPI_GRO_CB(skb)->is_flist) {
+ uh->len = htons(skb->len - nhoff);
+
+ skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+ if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+ skb->csum_level++;
+ } else {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = 0;
+ }
+
+ return 0;
+ }
+
if (uh->check)
uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
&ipv6h->daddr, 0);