From 70ecc24841326396a827deb55c3fefac582a729d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 2 Nov 2016 11:02:16 -0400 Subject: ipv4: add IP_RECVFRAGSIZE cmsg The IP stack records the largest fragment of a reassembled packet in IPCB(skb)->frag_max_size. When reading a datagram or raw packet that arrived fragmented, expose the value to allow applications to estimate receive path MTU. Tested: Sent data over a veth pair of which the source has a small mtu. Sent data using netcat, received using a dedicated process. Verified that the cmsg IP_RECVFRAGSIZE is returned only when data arrives fragmented, and in that cases matches the veth mtu. ip link add veth0 type veth peer name veth1 ip netns add from ip netns add to ip link set dev veth1 netns to ip netns exec to ip addr add dev veth1 192.168.10.1/24 ip netns exec to ip link set dev veth1 up ip link set dev veth0 netns from ip netns exec from ip addr add dev veth0 192.168.10.2/24 ip netns exec from ip link set dev veth0 up ip netns exec from ip link set dev veth0 mtu 1300 ip netns exec from ethtool -K veth0 ufo off dd if=/dev/zero bs=1 count=1400 2>/dev/null > payload ip netns exec to ./recv_cmsg_recvfragsize -4 -u -p 6000 & ip netns exec from nc -q 1 -u 192.168.10.1 6000 < payload using github.com/wdebruij/kerneltools/blob/master/tests/recvfragsize.c Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index eaf94919291a..4e557f4e9553 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -117,6 +117,7 @@ struct in_addr { #define IP_NODEFRAG 22 #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 +#define IP_RECVFRAGSIZE 25 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ -- cgit From 0cc0aa614b4c24b21b2492c0a1753035ee8c6edb Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 2 Nov 2016 11:02:17 -0400 Subject: ipv6: add IPV6_RECVFRAGSIZE cmsg When reading a datagram or raw packet that arrived fragmented, expose the maximum fragment size if recorded to allow applications to estimate receive path MTU. At this point, the field is only recorded when ipv6 connection tracking is enabled. A follow-up patch will record this field also in the ipv6 input path. Tested using the test for IP_RECVFRAGSIZE plus ip netns exec to ip addr add dev veth1 fc07::1/64 ip netns exec from ip addr add dev veth0 fc07::2/64 ip netns exec to ./recv_cmsg_recvfragsize -6 -u -p 6000 & ip netns exec from nc -q 1 -u fc07::1 6000 < payload Both with and without enabling connection tracking ip6tables -A INPUT -m state --state NEW -p udp -j LOG Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +++-- include/uapi/linux/in6.h | 1 + net/ipv6/datagram.c | 5 +++++ net/ipv6/ipv6_sockglue.c | 8 ++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..1afb6e8d35c3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -229,8 +229,9 @@ struct ipv6_pinfo { rxflow:1, rxtclass:1, rxpmtu:1, - rxorigdstaddr:1; - /* 2 bits hole */ + rxorigdstaddr:1, + recvfragsize:1; + /* 1 bits hole */ } bits; __u16 all; } rxopt; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index b39ea4f2e701..46444f8fbee4 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -283,6 +283,7 @@ struct in6_flowlabel_req { #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR #define IPV6_TRANSPARENT 75 #define IPV6_UNICAST_IF 76 +#define IPV6_RECVFRAGSIZE 77 /* * Multicast Routing: diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 37874e2f30ed..620c79a0130a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -715,6 +715,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } } + if (np->rxopt.bits.recvfragsize && opt->frag_max_size) { + int val = opt->frag_max_size; + + put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val); + } } void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 636ec56f5f50..6c126780fcf2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -868,6 +868,10 @@ pref_skip_coa: np->autoflowlabel = valbool; retv = 0; break; + case IPV6_RECVFRAGSIZE: + np->rxopt.bits.recvfragsize = valbool; + retv = 0; + break; } release_sock(sk); @@ -1310,6 +1314,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->autoflowlabel; break; + case IPV6_RECVFRAGSIZE: + val = np->rxopt.bits.recvfragsize; + break; + default: return -ENOPROTOOPT; } -- cgit