From 37bfbdda0b036a3720924e04c0171d9038159c2c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Sep 2015 07:39:17 -0700 Subject: tcp: remove tcp_synack_options() socket argument We do not use the socket in this function. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4cd0b50d4e46..87392cb51b11 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -612,12 +612,11 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, } /* Set up TCP options for SYN-ACKs. */ -static unsigned int tcp_synack_options(struct sock *sk, - struct request_sock *req, - unsigned int mss, struct sk_buff *skb, - struct tcp_out_options *opts, - const struct tcp_md5sig_key *md5, - struct tcp_fastopen_cookie *foc) +static unsigned int tcp_synack_options(struct request_sock *req, + unsigned int mss, struct sk_buff *skb, + struct tcp_out_options *opts, + const struct tcp_md5sig_key *md5, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -2989,8 +2988,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); - tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc) + sizeof(*th); + tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); -- cgit From 6ac705b1805863b1899e85f641bb265f9e6e9d99 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Sep 2015 07:39:18 -0700 Subject: tcp: remove tcp_ecn_make_synack() socket argument SYNACK packets might be sent without holding socket lock. For DCTCP/ECN sake, we should call INET_ECN_xmit() while socket lock is owned, and only when we init/change congestion control. This also fixies a bug if congestion module is changed from dctcp to another one on a listener : we now clear ECN bits properly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 12 ++++++++++-- net/ipv4/tcp_output.c | 10 +++------- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 93c4dc3ab23f..882caa4e72bc 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -173,6 +173,10 @@ out: */ if (ca->get_info) memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + if (ca->flags & TCP_CONG_NEEDS_ECN) + INET_ECN_xmit(sk); + else + INET_ECN_dontxmit(sk); } void tcp_init_congestion_control(struct sock *sk) @@ -181,6 +185,10 @@ void tcp_init_congestion_control(struct sock *sk) if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); + if (tcp_ca_needs_ecn(sk)) + INET_ECN_xmit(sk); + else + INET_ECN_dontxmit(sk); } static void tcp_reinit_congestion_control(struct sock *sk, @@ -192,8 +200,8 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; - if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); + if (sk->sk_state != TCP_CLOSE) + tcp_init_congestion_control(sk); } /* Manage refcounts on socket close. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 87392cb51b11..ba6194152d39 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -357,14 +357,10 @@ static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) } static void -tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, - struct sock *sk) +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) { - if (inet_rsk(req)->ecn_ok) { + if (inet_rsk(req)->ecn_ok) th->ece = 1; - if (tcp_ca_needs_ecn(sk)) - INET_ECN_xmit(sk); - } } /* Set up ECN state for a packet on a ESTABLISHED socket that is about to @@ -2998,7 +2994,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; - tcp_ecn_make_synack(req, th, sk); + tcp_ecn_make_synack(req, th); th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; /* Setting of flags are superfluous here for callers (and ECE is -- cgit From 5d062de7f8ea1ca7c635957ff1144fba815ba34c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Sep 2015 07:39:19 -0700 Subject: tcp: constify tcp_make_synack() socket argument listener socket is not locked when tcp_make_synack() is called. We better make sure no field is written. There is one exception : Since SYNACK packets are attached to the listener at this moment (or SYN_RECV child in case of Fast Open), sock_wmalloc() needs to update sk->sk_wmem_alloc, but this is done using atomic operations so this is safe. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp_output.c | 24 +++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 45bc3c63c3fd..19f23590baa0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -461,7 +461,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int tcp_connect(struct sock *sk); -struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, +struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc); int tcp_disconnect(struct sock *sk, int flags); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba6194152d39..9eb67a8933f1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2944,20 +2944,25 @@ int tcp_send_synack(struct sock *sk) * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ -struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, +struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc) { - struct tcp_out_options opts; struct inet_request_sock *ireq = inet_rsk(req); - struct tcp_sock *tp = tcp_sk(sk); - struct tcphdr *th; - struct sk_buff *skb; + const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *md5 = NULL; + struct tcp_out_options opts; + struct sk_buff *skb; int tcp_header_size; + struct tcphdr *th; + u16 user_mss; int mss; - skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); + /* sk is a const pointer, because we want to express multiple cpus + * might call us concurrently. + * sock_wmalloc() will change sk->sk_wmem_alloc in an atomic way. + */ + skb = sock_wmalloc((struct sock *)sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2968,8 +2973,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_dst_set(skb, dst); mss = dst_metric_advmss(dst); - if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) - mss = tp->rx_opt.user_mss; + user_mss = READ_ONCE(tp->rx_opt.user_mss); + if (user_mss && user_mss < mss) + mss = user_mss; memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES @@ -3009,7 +3015,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); - tcp_options_write((__be32 *)(th + 1), tp, &opts); + tcp_options_write((__be32 *)(th + 1), NULL, &opts); th->doff = (tcp_header_size >> 2); TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); -- cgit From ea3bea3a1d38aab1542176b2ff11a99ce3db9656 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Sep 2015 07:39:23 -0700 Subject: tcp/dccp: constify rtx_synack() and friends This is done to make sure we do not change listener socket while sending SYNACK packets while socket lock is not held. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/request_sock.h | 2 +- include/net/tcp.h | 2 +- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/ipv4/tcp_output.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 87935cad2f7b..ff7ce1e53ed4 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -32,7 +32,7 @@ struct request_sock_ops { int obj_size; struct kmem_cache *slab; char *slab_name; - int (*rtx_syn_ack)(struct sock *sk, + int (*rtx_syn_ack)(const struct sock *sk, struct request_sock *req); void (*send_ack)(struct sock *sk, struct sk_buff *skb, struct request_sock *req); diff --git a/include/net/tcp.h b/include/net/tcp.h index 868c53532169..6630ab180f5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1676,7 +1676,7 @@ int tcp4_proc_init(void); void tcp4_proc_exit(void); #endif -int tcp_rtx_synack(struct sock *sk, struct request_sock *req); +int tcp_rtx_synack(const struct sock *sk, struct request_sock *req); int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ccf4c5629b3c..a46ae9c69ccf 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -498,7 +498,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) +static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5165571f397a..4fa199dc69a3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -181,7 +181,7 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) +static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9eb67a8933f1..53ce6cf55598 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3502,7 +3502,7 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } -int tcp_rtx_synack(struct sock *sk, struct request_sock *req) +int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) { const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; struct flowi fl; -- cgit