diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 87 | 
1 files changed, 46 insertions, 41 deletions
| diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f9a8a12b62ee..cb7ca569052c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -357,14 +357,10 @@ static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)  }  static void -tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, -		    struct sock *sk) +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)  { -	if (inet_rsk(req)->ecn_ok) { +	if (inet_rsk(req)->ecn_ok)  		th->ece = 1; -		if (tcp_ca_needs_ecn(sk)) -			INET_ECN_xmit(sk); -	}  }  /* Set up ECN state for a packet on a ESTABLISHED socket that is about to @@ -612,12 +608,11 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,  }  /* Set up TCP options for SYN-ACKs. */ -static unsigned int tcp_synack_options(struct sock *sk, -				   struct request_sock *req, -				   unsigned int mss, struct sk_buff *skb, -				   struct tcp_out_options *opts, -				   const struct tcp_md5sig_key *md5, -				   struct tcp_fastopen_cookie *foc) +static unsigned int tcp_synack_options(struct request_sock *req, +				       unsigned int mss, struct sk_buff *skb, +				       struct tcp_out_options *opts, +				       const struct tcp_md5sig_key *md5, +				       struct tcp_fastopen_cookie *foc)  {  	struct inet_request_sock *ireq = inet_rsk(req);  	unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -1827,7 +1822,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,  	/* Ok, it looks like it is advisable to defer. */ -	if (cong_win < send_win && cong_win < skb->len) +	if (cong_win < send_win && cong_win <= skb->len)  		*is_cwnd_limited = true;  	return true; @@ -2060,7 +2055,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,  		cwnd_quota = tcp_cwnd_test(tp, skb);  		if (!cwnd_quota) { -			is_cwnd_limited = true;  			if (push_one == 2)  				/* Force out a loss probe pkt. */  				cwnd_quota = 1; @@ -2142,6 +2136,7 @@ repair:  		/* Send one loss probe per tail loss episode. */  		if (push_one != 2)  			tcp_schedule_loss_probe(sk); +		is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);  		tcp_cwnd_validate(sk, is_cwnd_limited);  		return false;  	} @@ -2165,7 +2160,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)  	/* Don't do any loss probe on a Fast Open connection before 3WHS  	 * finishes.  	 */ -	if (sk->sk_state == TCP_SYN_RECV) +	if (tp->fastopen_rsk)  		return false;  	/* TLP is only scheduled when next timer event is RTO. */ @@ -2175,7 +2170,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)  	/* Schedule a loss probe in 2*RTT for SACK capable connections  	 * in Open state, that are either limited by cwnd or application.  	 */ -	if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out || +	if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||  	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)  		return false; @@ -2184,9 +2179,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)  		return false;  	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account -	 * for delayed ack when there's one outstanding packet. +	 * for delayed ack when there's one outstanding packet. If no RTT +	 * sample is available then probe after TCP_TIMEOUT_INIT.  	 */ -	timeout = rtt << 1; +	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;  	if (tp->packets_out == 1)  		timeout = max_t(u32, timeout,  				(rtt + (rtt >> 1) + TCP_DELACK_MAX)); @@ -2659,8 +2655,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  			net_dbg_ratelimited("retrans_out leaked\n");  		}  #endif -		if (!tp->retrans_out) -			tp->lost_retrans_low = tp->snd_nxt;  		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;  		tp->retrans_out += tcp_skb_pcount(skb); @@ -2668,10 +2662,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  		if (!tp->retrans_stamp)  			tp->retrans_stamp = tcp_skb_timestamp(skb); -		/* snd_nxt is stored to detect loss of retransmitted segment, -		 * see tcp_input.c tcp_sacktag_write_queue(). -		 */ -		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;  	} else if (err != -EBUSY) {  		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);  	} @@ -2897,6 +2887,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)  	skb_reserve(skb, MAX_TCP_HEADER);  	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),  			     TCPHDR_ACK | TCPHDR_RST); +	skb_mstamp_get(&skb->skb_mstamp);  	/* Send it off. */  	if (tcp_transmit_skb(sk, skb, 0, priority))  		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); @@ -2948,20 +2939,22 @@ int tcp_send_synack(struct sock *sk)   * Allocate one skb and build a SYNACK packet.   * @dst is consumed : Caller should not use it again.   */ -struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, +struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,  				struct request_sock *req, -				struct tcp_fastopen_cookie *foc) +				struct tcp_fastopen_cookie *foc, +				bool attach_req)  { -	struct tcp_out_options opts;  	struct inet_request_sock *ireq = inet_rsk(req); -	struct tcp_sock *tp = tcp_sk(sk); -	struct tcphdr *th; -	struct sk_buff *skb; +	const struct tcp_sock *tp = tcp_sk(sk);  	struct tcp_md5sig_key *md5 = NULL; +	struct tcp_out_options opts; +	struct sk_buff *skb;  	int tcp_header_size; +	struct tcphdr *th; +	u16 user_mss;  	int mss; -	skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); +	skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);  	if (unlikely(!skb)) {  		dst_release(dst);  		return NULL; @@ -2969,11 +2962,21 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	/* Reserve space for headers. */  	skb_reserve(skb, MAX_TCP_HEADER); +	if (attach_req) { +		skb_set_owner_w(skb, req_to_sk(req)); +	} else { +		/* sk is a const pointer, because we want to express multiple +		 * cpu might call us concurrently. +		 * sk->sk_wmem_alloc in an atomic, we can promote to rw. +		 */ +		skb_set_owner_w(skb, (struct sock *)sk); +	}  	skb_dst_set(skb, dst);  	mss = dst_metric_advmss(dst); -	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) -		mss = tp->rx_opt.user_mss; +	user_mss = READ_ONCE(tp->rx_opt.user_mss); +	if (user_mss && user_mss < mss) +		mss = user_mss;  	memset(&opts, 0, sizeof(opts));  #ifdef CONFIG_SYN_COOKIES @@ -2987,8 +2990,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	rcu_read_lock();  	md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));  #endif -	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, -					     foc) + sizeof(*th); +	skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); +	tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + +			  sizeof(*th);  	skb_push(skb, tcp_header_size);  	skb_reset_transport_header(skb); @@ -2997,7 +3001,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	memset(th, 0, sizeof(struct tcphdr));  	th->syn = 1;  	th->ack = 1; -	tcp_ecn_make_synack(req, th, sk); +	tcp_ecn_make_synack(req, th);  	th->source = htons(ireq->ir_num);  	th->dest = ireq->ir_rmt_port;  	/* Setting of flags are superfluous here for callers (and ECE is @@ -3011,8 +3015,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);  	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ -	th->window = htons(min(req->rcv_wnd, 65535U)); -	tcp_options_write((__be32 *)(th + 1), tp, &opts); +	th->window = htons(min(req->rsk_rcv_wnd, 65535U)); +	tcp_options_write((__be32 *)(th + 1), NULL, &opts);  	th->doff = (tcp_header_size >> 2);  	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); @@ -3404,7 +3408,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)  	 */  	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);  	skb_mstamp_get(&skb->skb_mstamp); -	NET_INC_STATS_BH(sock_net(sk), mib); +	NET_INC_STATS(sock_net(sk), mib);  	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);  } @@ -3499,13 +3503,14 @@ void tcp_send_probe0(struct sock *sk)  				  TCP_RTO_MAX);  } -int tcp_rtx_synack(struct sock *sk, struct request_sock *req) +int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)  {  	const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;  	struct flowi fl;  	int res; -	res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); +	tcp_rsk(req)->txhash = net_tx_rndhash(); +	res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true);  	if (!res) {  		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);  		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); |