diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 59 | 
1 files changed, 26 insertions, 33 deletions
| diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 47e08c1b5bc3..2868ef28ce52 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -426,26 +426,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)  	}  } -/* 3. Tuning rcvbuf, when connection enters established state. */ -static void tcp_fixup_rcvbuf(struct sock *sk) -{ -	u32 mss = tcp_sk(sk)->advmss; -	int rcvmem; - -	rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * -		 tcp_default_init_rwnd(mss); - -	/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency -	 * Allow enough cushion so that sender is not limited by our window -	 */ -	if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) -		rcvmem <<= 2; - -	if (sk->sk_rcvbuf < rcvmem) -		sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); -} - -/* 4. Try to fixup all. It is made immediately after connection enters +/* 3. Try to fixup all. It is made immediately after connection enters   *    established state.   */  void tcp_init_buffer_space(struct sock *sk) @@ -454,12 +435,10 @@ void tcp_init_buffer_space(struct sock *sk)  	struct tcp_sock *tp = tcp_sk(sk);  	int maxwin; -	if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) -		tcp_fixup_rcvbuf(sk);  	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))  		tcp_sndbuf_expand(sk); -	tp->rcvq_space.space = tp->rcv_wnd; +	tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);  	tcp_mstamp_refresh(tp);  	tp->rcvq_space.time = tp->tcp_mstamp;  	tp->rcvq_space.seq = tp->copied_seq; @@ -485,7 +464,7 @@ void tcp_init_buffer_space(struct sock *sk)  	tp->snd_cwnd_stamp = tcp_jiffies32;  } -/* 5. Recalculate window clamp after socket hit its memory bounds. */ +/* 4. Recalculate window clamp after socket hit its memory bounds. */  static void tcp_clamp_window(struct sock *sk)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -1305,7 +1284,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,  	 */  	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,  			start_seq, end_seq, dup_sack, pcount, -			skb->skb_mstamp); +			tcp_skb_timestamp_us(skb));  	tcp_rate_skb_delivered(sk, skb, state->rate);  	if (skb == tp->lost_skb_hint) @@ -1580,7 +1559,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,  						TCP_SKB_CB(skb)->end_seq,  						dup_sack,  						tcp_skb_pcount(skb), -						skb->skb_mstamp); +						tcp_skb_timestamp_us(skb));  			tcp_rate_skb_delivered(sk, skb, state->rate);  			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)  				list_del_init(&skb->tcp_tsorted_anchor); @@ -3000,8 +2979,8 @@ void tcp_rearm_rto(struct sock *sk)  			 */  			rto = usecs_to_jiffies(max_t(int, delta_us, 1));  		} -		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, -					  TCP_RTO_MAX); +		tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, +				     TCP_RTO_MAX, tcp_rtx_queue_head(sk));  	}  } @@ -3103,7 +3082,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,  				tp->retrans_out -= acked_pcount;  			flag |= FLAG_RETRANS_DATA_ACKED;  		} else if (!(sacked & TCPCB_SACKED_ACKED)) { -			last_ackt = skb->skb_mstamp; +			last_ackt = tcp_skb_timestamp_us(skb);  			WARN_ON_ONCE(last_ackt == 0);  			if (!first_ackt)  				first_ackt = last_ackt; @@ -3121,7 +3100,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,  			tp->delivered += acked_pcount;  			if (!tcp_skb_spurious_retrans(tp, skb))  				tcp_rack_advance(tp, sacked, scb->end_seq, -						 skb->skb_mstamp); +						 tcp_skb_timestamp_us(skb));  		}  		if (sacked & TCPCB_LOST)  			tp->lost_out -= acked_pcount; @@ -3215,7 +3194,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,  			tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);  		}  	} else if (skb && rtt_update && sack_rtt_us >= 0 && -		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { +		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, +						    tcp_skb_timestamp_us(skb))) {  		/* Do not re-arm RTO if the sack RTT is measured from data sent  		 * after when the head was last (re)transmitted. Otherwise the  		 * timeout may continue to extend in loss recovery. @@ -3275,8 +3255,8 @@ static void tcp_ack_probe(struct sock *sk)  	} else {  		unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); -		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, -					  when, TCP_RTO_MAX); +		tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, +				     when, TCP_RTO_MAX, NULL);  	}  } @@ -4199,6 +4179,17 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)  		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);  } +static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) +{ +	/* When the ACK path fails or drops most ACKs, the sender would +	 * timeout and spuriously retransmit the same segment repeatedly. +	 * The receiver remembers and reflects via DSACKs. Leverage the +	 * DSACK state and change the txhash to re-route speculatively. +	 */ +	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) +		sk_rethink_txhash(sk); +} +  static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -4211,6 +4202,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)  		if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {  			u32 end_seq = TCP_SKB_CB(skb)->end_seq; +			tcp_rcv_spurious_retrans(sk, skb);  			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))  				end_seq = tp->rcv_nxt;  			tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq); @@ -4755,6 +4747,7 @@ queue_and_out:  	}  	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { +		tcp_rcv_spurious_retrans(sk, skb);  		/* A retransmit, 2nd most common case.  Force an immediate ack. */  		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);  		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); |