From 26c4f7da3e413da697a7beb22ad496390eda7da0 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Feb 2015 16:30:27 -0800 Subject: net: Fix remcsum in GRO path to not change packet Remote checksum offload processing is currently the same for both the GRO and non-GRO path. When the remote checksum offload option is encountered, the checksum field referred to is modified in the packet. So in the GRO case, the packet is modified in the GRO path and then the operation is skipped when the packet goes through the normal path based on skb->remcsum_offload. There is a problem in that the packet may be modified in the GRO path, but then forwarded off host still containing the remote checksum option. A remote host will again perform RCO but now the checksum verification will fail since GRO RCO already modified the checksum. To fix this, we ensure that GRO restores a packet to it's original state before returning. In this model, when GRO processes a remote checksum option it still changes the checksum per the algorithm but on return from lower layer processing the checksum is restored to its original value. In this patch we add define gro_remcsum structure which is passed to skb_gro_remcsum_process to save offset and delta for the checksum being changed. After lower layer processing, skb_gro_remcsum_cleanup is called to restore the checksum before returning from GRO. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 19 +++++++++---------- include/linux/netdevice.h | 25 +++++++++++++++++++++++-- include/net/checksum.h | 5 +++++ net/ipv4/fou.c | 20 ++++++++++---------- 4 files changed, 47 insertions(+), 22 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 0e57e862c399..30310a63475a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -555,12 +555,12 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, unsigned int off, struct vxlanhdr *vh, size_t hdrlen, - u32 data) + u32 data, struct gro_remcsum *grc) { size_t start, offset, plen; if (skb->remcsum_offload) - return vh; + return NULL; if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; @@ -579,7 +579,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, return NULL; } - skb_gro_remcsum_process(skb, (void *)vh + hdrlen, start, offset); + skb_gro_remcsum_process(skb, (void *)vh + hdrlen, + start, offset, grc); skb->remcsum_offload = 1; @@ -597,6 +598,9 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock, udp_offloads); u32 flags; + struct gro_remcsum grc; + + skb_gro_remcsum_init(&grc); off_vx = skb_gro_offset(skb); hlen = off_vx + sizeof(*vh); @@ -614,7 +618,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr), - ntohl(vh->vx_vni)); + ntohl(vh->vx_vni), &grc); if (!vh) goto out; @@ -637,6 +641,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, pp = eth_gro_receive(head, skb); out: + skb_gro_remcsum_cleanup(skb, &grc); NAPI_GRO_CB(skb)->flush |= flush; return pp; @@ -1154,12 +1159,6 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, { size_t start, offset, plen; - if (skb->remcsum_offload) { - /* Already processed in GRO path */ - skb->remcsum_offload = 0; - return vh; - } - start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; offset = start + ((data & VXLAN_RCO_UDP) ? offsetof(struct udphdr, check) : diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d115256ed5a2..3aa02457fe4f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2321,8 +2321,19 @@ do { \ compute_pseudo(skb, proto)); \ } while (0) +struct gro_remcsum { + int offset; + __wsum delta; +}; + +static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) +{ + grc->delta = 0; +} + static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, - int start, int offset) + int start, int offset, + struct gro_remcsum *grc) { __wsum delta; @@ -2331,10 +2342,20 @@ static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset); /* Adjust skb->csum since we changed the packet */ - skb->csum = csum_add(skb->csum, delta); NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); + + grc->offset = (ptr + offset) - (void *)skb->head; + grc->delta = delta; } +static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, + struct gro_remcsum *grc) +{ + if (!grc->delta) + return; + + remcsum_unadjust((__sum16 *)(skb->head + grc->offset), grc->delta); +} static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, diff --git a/include/net/checksum.h b/include/net/checksum.h index e339a9513e29..0a55ac715077 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -167,4 +167,9 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, return delta; } +static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) +{ + *psum = csum_fold(csum_sub(delta, *psum)); +} + #endif diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 92ddea1e6457..7fa8d36e56d4 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -71,12 +71,6 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, size_t offset = ntohs(pd[1]); size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); - if (skb->remcsum_offload) { - /* Already processed in GRO path */ - skb->remcsum_offload = 0; - return guehdr; - } - if (!pskb_may_pull(skb, plen)) return NULL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -214,7 +208,8 @@ out_unlock: static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, struct guehdr *guehdr, void *data, - size_t hdrlen, u8 ipproto) + size_t hdrlen, u8 ipproto, + struct gro_remcsum *grc) { __be16 *pd = data; size_t start = ntohs(pd[0]); @@ -222,7 +217,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); if (skb->remcsum_offload) - return guehdr; + return NULL; if (!NAPI_GRO_CB(skb)->csum_valid) return NULL; @@ -234,7 +229,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return NULL; } - skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); + skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, + start, offset, grc); skb->remcsum_offload = 1; @@ -254,6 +250,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, void *data; u16 doffset = 0; int flush = 1; + struct gro_remcsum grc; + + skb_gro_remcsum_init(&grc); off = skb_gro_offset(skb); len = off + sizeof(*guehdr); @@ -295,7 +294,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_gro_remcsum(skb, off, guehdr, data + doffset, hdrlen, - guehdr->proto_ctype); + guehdr->proto_ctype, &grc); if (!guehdr) goto out; @@ -345,6 +344,7 @@ out_unlock: rcu_read_unlock(); out: NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, &grc); return pp; } -- cgit From 6edec0e61b1e52f9144935d28c9088f5b202e61c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Feb 2015 16:30:28 -0800 Subject: net: Clarify meaning of CHECKSUM_PARTIAL for receive path The current meaning of CHECKSUM_PARTIAL for validating checksums is that _all_ checksums in the packet are considered valid. However, in the manner that CHECKSUM_PARTIAL is set only the checksum at csum_start+csum_offset and any preceding checksums may be considered valid. If there are checksums in the packet after csum_offset it is possible they have not been verfied. This patch changes CHECKSUM_PARTIAL logic in skb_csum_unnecessary and __skb_gro_checksum_validate_needed to only considered checksums referring to csum_start and any preceding checksums (with starting offset before csum_start) to be verified. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- include/linux/skbuff.h | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3aa02457fe4f..9e9be221e4bf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2246,7 +2246,9 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, bool zero_okay, __sum16 check) { - return (skb->ip_summed != CHECKSUM_PARTIAL && + return ((skb->ip_summed != CHECKSUM_PARTIAL || + skb_checksum_start_offset(skb) < + skb_gro_offset(skb)) && NAPI_GRO_CB(skb)->csum_cnt == 0 && (!zero_okay || check)); } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1bb36edb66b9..da6028a50687 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -83,11 +83,15 @@ * * CHECKSUM_PARTIAL: * - * This is identical to the case for output below. This may occur on a packet + * A checksum is set up to be offloaded to a device as described in the + * output description for CHECKSUM_PARTIAL. This may occur on a packet * received directly from another Linux OS, e.g., a virtualized Linux kernel - * on the same host. The packet can be treated in the same way as - * CHECKSUM_UNNECESSARY, except that on output (i.e., forwarding) the - * checksum must be filled in by the OS or the hardware. + * on the same host, or it may be set in the input path in GRO or remote + * checksum offload. For the purposes of checksum verification, the checksum + * referred to by skb->csum_start + skb->csum_offset and any preceding + * checksums in the packet are considered verified. Any checksums in the + * packet that are after the checksum being offloaded are not considered to + * be verified. * * B. Checksumming on output. * @@ -2915,7 +2919,10 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb); static inline int skb_csum_unnecessary(const struct sk_buff *skb) { - return ((skb->ip_summed & CHECKSUM_UNNECESSARY) || skb->csum_valid); + return ((skb->ip_summed == CHECKSUM_UNNECESSARY) || + skb->csum_valid || + (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) >= 0)); } /** -- cgit From 6db93ea13b7937c0523ea1f977b322f1347a5633 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Feb 2015 16:30:29 -0800 Subject: udp: Set SKB_GSO_UDP_TUNNEL* in UDP GRO path Properly set GSO types and skb->encapsulation in the UDP tunnel GRO complete so that packets are properly represented for GSO. This sets SKB_GSO_UDP_TUNNEL or SKB_GSO_UDP_TUNNEL_CSUM depending on whether non-zero checksums were received, and sets SKB_GSO_TUNNEL_REMCSUM if the remote checksum option was processed. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/udp_offload.c | 13 ++++++++++++- net/ipv6/udp_offload.c | 6 +++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d10f6f4ead27..4915d8284a86 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -402,6 +402,13 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) } rcu_read_unlock(); + + if (skb->remcsum_offload) + skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; + + skb->encapsulation = 1; + skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); + return err; } @@ -410,9 +417,13 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) + if (uh->check) { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, iph->daddr, 0); + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } return udp_gro_complete(skb, nhoff); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a56276996b72..ab889bb16b3c 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -161,9 +161,13 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) + if (uh->check) { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, &ipv6h->daddr, 0); + } else { + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } return udp_gro_complete(skb, nhoff); } -- cgit From baa32ff42871f2d4aca9c08c9403d0e497325564 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Feb 2015 16:30:30 -0800 Subject: net: Use more bit fields in napi_gro_cb This patch moves the free and same_flow fields to be bit fields (2 and 1 bit sized respectively). This frees up some space for u16's. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netdevice.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e9be221e4bf..43fd0a4dcd04 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1923,20 +1923,15 @@ struct napi_gro_cb { /* Number of segments aggregated. */ u16 count; - /* This is non-zero if the packet may be of the same flow. */ - u8 same_flow; - - /* Free the skb? */ - u8 free; -#define NAPI_GRO_FREE 1 -#define NAPI_GRO_FREE_STOLEN_HEAD 2 - /* jiffies when first packet was created/queued */ unsigned long age; /* Used in ipv6_gro_receive() and foo-over-udp */ u16 proto; + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow:1; + /* Used in udp_gro_receive */ u8 udp_mark:1; @@ -1946,9 +1941,16 @@ struct napi_gro_cb { /* Number of checksums via CHECKSUM_UNNECESSARY */ u8 csum_cnt:3; + /* Free the skb? */ + u8 free:2; +#define NAPI_GRO_FREE 1 +#define NAPI_GRO_FREE_STOLEN_HEAD 2 + /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; + /* 7 bit hole */ + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; -- cgit From 15e2396d4e3ce23188852b74d924107982c63b42 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Feb 2015 16:30:31 -0800 Subject: net: Infrastructure for CHECKSUM_PARTIAL with remote checsum offload This patch adds infrastructure so that remote checksum offload can set CHECKSUM_PARTIAL instead of calling csum_partial and writing the modfied checksum field. Add skb_remcsum_adjust_partial function to set an skb for using CHECKSUM_PARTIAL with remote checksum offload. Changed skb_remcsum_process and skb_gro_remcsum_process to take a boolean argument to indicate if checksum partial can be set or the checksum needs to be modified using the normal algorithm. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++-- include/linux/netdevice.h | 19 ++++++++++++++++++- include/linux/skbuff.h | 15 ++++++++++++++- net/core/dev.c | 1 + net/ipv4/fou.c | 4 ++-- 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 30310a63475a..4f04443cfd33 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -580,7 +580,7 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, } skb_gro_remcsum_process(skb, (void *)vh + hdrlen, - start, offset, grc); + start, offset, grc, true); skb->remcsum_offload = 1; @@ -1171,7 +1171,7 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, vh = (struct vxlanhdr *)(udp_hdr(skb) + 1); - skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset); + skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, true); return vh; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 43fd0a4dcd04..5897b4ea5a3f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1923,6 +1923,9 @@ struct napi_gro_cb { /* Number of segments aggregated. */ u16 count; + /* Start offset for remote checksum offload */ + u16 gro_remcsum_start; + /* jiffies when first packet was created/queued */ unsigned long age; @@ -2244,6 +2247,12 @@ static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, __sum16 __skb_gro_checksum_complete(struct sk_buff *skb); +static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) +{ + return (NAPI_GRO_CB(skb)->gro_remcsum_start - skb_headroom(skb) == + skb_gro_offset(skb)); +} + static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, bool zero_okay, __sum16 check) @@ -2251,6 +2260,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, return ((skb->ip_summed != CHECKSUM_PARTIAL || skb_checksum_start_offset(skb) < skb_gro_offset(skb)) && + !skb_at_gro_remcsum_start(skb) && NAPI_GRO_CB(skb)->csum_cnt == 0 && (!zero_okay || check)); } @@ -2337,12 +2347,19 @@ static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, int start, int offset, - struct gro_remcsum *grc) + struct gro_remcsum *grc, + bool nopartial) { __wsum delta; BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); + if (!nopartial) { + NAPI_GRO_CB(skb)->gro_remcsum_start = + ((unsigned char *)ptr + start) - skb->head; + return; + } + delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset); /* Adjust skb->csum since we changed the packet */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index da6028a50687..30007afe70b3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3104,16 +3104,29 @@ do { \ compute_pseudo(skb, proto)); \ } while (0) +static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr, + u16 start, u16 offset) +{ + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = ((unsigned char *)ptr + start) - skb->head; + skb->csum_offset = offset - start; +} + /* Update skbuf and packet to reflect the remote checksum offload operation. * When called, ptr indicates the starting point for skb->csum when * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete * here, skb_postpull_rcsum is done so skb->csum start is ptr. */ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, - int start, int offset) + int start, int offset, bool nopartial) { __wsum delta; + if (!nopartial) { + skb_remcsum_adjust_partial(skb, ptr, start, offset); + return; + } + if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { __skb_checksum_complete(skb); skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data); diff --git a/net/core/dev.c b/net/core/dev.c index d030575532a2..48c6ecb18f3c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4024,6 +4024,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ switch (skb->ip_summed) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 7fa8d36e56d4..d320f575cc62 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -75,7 +75,7 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, return NULL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; - skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset); + skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset, true); return guehdr; } @@ -230,7 +230,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, } skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, - start, offset, grc); + start, offset, grc, true); skb->remcsum_offload = 1; -- cgit From 0ace2ca89cbd6bcdf2b9d2df1fa0fa24ea9d1653 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Feb 2015 16:30:32 -0800 Subject: vxlan: Use checksum partial with remote checksum offload Change remote checksum handling to set checksum partial as default behavior. Added an iflink parameter to configure not using checksum partial (calling csum_partial to update checksum). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 25 +++++++++++++++++++------ include/net/vxlan.h | 4 +++- include/uapi/linux/if_link.h | 1 + 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 4f04443cfd33..1e0a775ea882 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -555,7 +555,8 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, unsigned int off, struct vxlanhdr *vh, size_t hdrlen, - u32 data, struct gro_remcsum *grc) + u32 data, struct gro_remcsum *grc, + bool nopartial) { size_t start, offset, plen; @@ -580,7 +581,7 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb, } skb_gro_remcsum_process(skb, (void *)vh + hdrlen, - start, offset, grc, true); + start, offset, grc, nopartial); skb->remcsum_offload = 1; @@ -618,7 +619,9 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr), - ntohl(vh->vx_vni), &grc); + ntohl(vh->vx_vni), &grc, + !!(vs->flags & + VXLAN_F_REMCSUM_NOPARTIAL)); if (!vh) goto out; @@ -1155,7 +1158,7 @@ static void vxlan_igmp_leave(struct work_struct *work) } static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, - size_t hdrlen, u32 data) + size_t hdrlen, u32 data, bool nopartial) { size_t start, offset, plen; @@ -1171,7 +1174,8 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, vh = (struct vxlanhdr *)(udp_hdr(skb) + 1); - skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, true); + skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, + nopartial); return vh; } @@ -1208,7 +1212,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { - vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni); + vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni, + !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL)); if (!vxh) goto drop; @@ -2437,6 +2442,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 }, [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 }, [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, }, + [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -2760,6 +2766,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_GBP]) vxlan->flags |= VXLAN_F_GBP; + if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) + vxlan->flags |= VXLAN_F_REMCSUM_NOPARTIAL; + if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET, vxlan->dst_port, vxlan->flags)) { pr_info("duplicate VNI %u\n", vni); @@ -2909,6 +2918,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_flag(skb, IFLA_VXLAN_GBP)) goto nla_put_failure; + if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL && + nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 2927d6244481..eabd3a038674 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -128,13 +128,15 @@ struct vxlan_sock { #define VXLAN_F_REMCSUM_TX 0x200 #define VXLAN_F_REMCSUM_RX 0x400 #define VXLAN_F_GBP 0x800 +#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 /* Flags that are used in the receive patch. These flags must match in * order for a socket to be shareable */ #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ - VXLAN_F_REMCSUM_RX) + VXLAN_F_REMCSUM_RX | \ + VXLAN_F_REMCSUM_NOPARTIAL) struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 0deee3eeddbf..dfd0bb22e554 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -374,6 +374,7 @@ enum { IFLA_VXLAN_REMCSUM_TX, IFLA_VXLAN_REMCSUM_RX, IFLA_VXLAN_GBP, + IFLA_VXLAN_REMCSUM_NOPARTIAL, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit From fe881ef11cf0220f118816181930494d484c4883 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 10 Feb 2015 16:30:33 -0800 Subject: gue: Use checksum partial with remote checksum offload Change remote checksum handling to set checksum partial as default behavior. Added an iflink parameter to configure not using checksum partial (calling csum_partial to update checksum). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/uapi/linux/fou.h | 1 + net/ipv4/fou.c | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index 8df06894da23..c303588bb767 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -14,6 +14,7 @@ enum { FOU_ATTR_AF, /* u8 */ FOU_ATTR_IPPROTO, /* u8 */ FOU_ATTR_TYPE, /* u8 */ + FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */ __FOU_ATTR_MAX, }; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index d320f575cc62..ff069f6597ac 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -22,14 +22,18 @@ static LIST_HEAD(fou_list); struct fou { struct socket *sock; u8 protocol; + u8 flags; u16 port; struct udp_offload udp_offloads; struct list_head list; }; +#define FOU_F_REMCSUM_NOPARTIAL BIT(0) + struct fou_cfg { u16 type; u8 protocol; + u8 flags; struct udp_port_cfg udp_config; }; @@ -64,7 +68,8 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) } static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, - void *data, size_t hdrlen, u8 ipproto) + void *data, size_t hdrlen, u8 ipproto, + bool nopartial) { __be16 *pd = data; size_t start = ntohs(pd[0]); @@ -75,7 +80,8 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, return NULL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; - skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset, true); + skb_remcsum_process(skb, (void *)guehdr + hdrlen, + start, offset, nopartial); return guehdr; } @@ -136,7 +142,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_remcsum(skb, guehdr, data + doffset, - hdrlen, guehdr->proto_ctype); + hdrlen, guehdr->proto_ctype, + !!(fou->flags & + FOU_F_REMCSUM_NOPARTIAL)); if (!guehdr) goto drop; @@ -209,7 +217,7 @@ out_unlock: static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, struct guehdr *guehdr, void *data, size_t hdrlen, u8 ipproto, - struct gro_remcsum *grc) + struct gro_remcsum *grc, bool nopartial) { __be16 *pd = data; size_t start = ntohs(pd[0]); @@ -230,7 +238,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, } skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, - start, offset, grc, true); + start, offset, grc, nopartial); skb->remcsum_offload = 1; @@ -250,6 +258,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, void *data; u16 doffset = 0; int flush = 1; + struct fou *fou = container_of(uoff, struct fou, udp_offloads); struct gro_remcsum grc; skb_gro_remcsum_init(&grc); @@ -294,7 +303,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, if (flags & GUE_PFLAG_REMCSUM) { guehdr = gue_gro_remcsum(skb, off, guehdr, data + doffset, hdrlen, - guehdr->proto_ctype, &grc); + guehdr->proto_ctype, &grc, + !!(fou->flags & + FOU_F_REMCSUM_NOPARTIAL)); if (!guehdr) goto out; @@ -455,6 +466,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk = sock->sk; + fou->flags = cfg->flags; fou->port = cfg->udp_config.local_udp_port; /* Initial for fou type */ @@ -541,6 +553,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_AF] = { .type = NLA_U8, }, [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, [FOU_ATTR_TYPE] = { .type = NLA_U8, }, + [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, }; static int parse_nl_config(struct genl_info *info, @@ -571,6 +584,9 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_TYPE]) cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]); + if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL]) + cfg->flags |= FOU_F_REMCSUM_NOPARTIAL; + return 0; } -- cgit