From 19f6d3f3c8422d65b5e3d2162e30ef07c6e21ea2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 23 Jan 2017 10:59:22 -0800 Subject: net/tcp-fastopen: Add new API support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new socket option, TCP_FASTOPEN_CONNECT, as an alternative way to perform Fast Open on the active side (client). Prior to this patch, a client needs to replace the connect() call with sendto(MSG_FASTOPEN). This can be cumbersome for applications who want to use Fast Open: these socket operations are often done in lower layer libraries used by many other applications. Changing these libraries and/or the socket call sequences are not trivial. A more convenient approach is to perform Fast Open by simply enabling a socket option when the socket is created w/o changing other socket calls sequence: s = socket() create a new socket setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN_CONNECT …); newly introduced sockopt If set, new functionality described below will be used. Return ENOTSUPP if TFO is not supported or not enabled in the kernel. connect() With cookie present, return 0 immediately. With no cookie, initiate 3WHS with TFO cookie-request option and return -1 with errno = EINPROGRESS. write()/sendmsg() With cookie present, send out SYN with data and return the number of bytes buffered. With no cookie, and 3WHS not yet completed, return -1 with errno = EINPROGRESS. No MSG_FASTOPEN flag is needed. read() Return -1 with errno = EWOULDBLOCK/EAGAIN if connect() is called but write() is not called yet. Return -1 with errno = EWOULDBLOCK/EAGAIN if connection is established but no msg is received yet. Return number of bytes read if socket is established and there is msg received. The new API simplifies life for applications that always perform a write() immediately after a successful connect(). Such applications can now take advantage of Fast Open by merely making one new setsockopt() call at the time of creating the socket. Nothing else about the application's socket call sequence needs to change. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux/tcp.h') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index c53de2691cec..6ff35eb48d10 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -116,6 +116,7 @@ enum { #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ #define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ +#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ struct tcp_repair_opt { __u32 opt_code; -- cgit From 7e98102f489775d8c000884fca8a0d995ea688a9 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 27 Jan 2017 16:24:38 -0800 Subject: tcp: record pkts sent and retransmistted Add two stats in SCM_TIMESTAMPING_OPT_STATS: TCP_NLA_DATA_SEGS_OUT: total data packets sent including retransmission TCP_NLA_TOTAL_RETRANS: total data packets retransmitted The names are picked to be consistent with corresponding fields in TCP_INFO. This allows applications that are using the timestamping API to measure latency stats to also retrive retransmission rate of application write. Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/tcp.h') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 6ff35eb48d10..38a2b07afdff 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -227,6 +227,8 @@ enum { TCP_NLA_BUSY, /* Time (usec) busy sending data */ TCP_NLA_RWND_LIMITED, /* Time (usec) limited by receive window */ TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ + TCP_NLA_DATA_SEGS_OUT, /* Data pkts sent including retransmission */ + TCP_NLA_TOTAL_RETRANS, /* Data pkts retransmitted */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2ed472ebf3b5..b751abc56935 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2870,7 +2870,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) struct sk_buff *stats; struct tcp_info info; - stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); + stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); if (!stats) return NULL; @@ -2881,6 +2881,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) info.tcpi_rwnd_limited, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, info.tcpi_sndbuf_limited, TCP_NLA_PAD); + nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT, + tp->data_segs_out, TCP_NLA_PAD); + nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, + tp->total_retrans, TCP_NLA_PAD); return stats; } -- cgit