diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 136 | 
1 files changed, 55 insertions, 81 deletions
| diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4e985dea1dd2..478909f4694d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -739,8 +739,10 @@ static void tcp_tsq_handler(struct sock *sk)  		struct tcp_sock *tp = tcp_sk(sk);  		if (tp->lost_out > tp->retrans_out && -		    tp->snd_cwnd > tcp_packets_in_flight(tp)) +		    tp->snd_cwnd > tcp_packets_in_flight(tp)) { +			tcp_mstamp_refresh(tp);  			tcp_xmit_retransmit_queue(sk); +		}  		tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,  			       0, GFP_ATOMIC); @@ -991,6 +993,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	struct tcp_skb_cb *tcb;  	struct tcp_out_options opts;  	unsigned int tcp_options_size, tcp_header_size; +	struct sk_buff *oskb = NULL;  	struct tcp_md5sig_key *md5;  	struct tcphdr *th;  	int err; @@ -998,12 +1001,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	BUG_ON(!skb || !tcp_skb_pcount(skb));  	tp = tcp_sk(sk); -	skb->skb_mstamp = tp->tcp_mstamp;  	if (clone_it) {  		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq  			- tp->snd_una; -		tcp_rate_skb_sent(sk, skb); - +		oskb = skb;  		if (unlikely(skb_cloned(skb)))  			skb = pskb_copy(skb, gfp_mask);  		else @@ -1011,6 +1012,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  		if (unlikely(!skb))  			return -ENOBUFS;  	} +	skb->skb_mstamp = tp->tcp_mstamp;  	inet = inet_sk(sk);  	tcb = TCP_SKB_CB(skb); @@ -1122,12 +1124,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); -	if (likely(err <= 0)) -		return err; - -	tcp_enter_cwr(sk); - -	return net_xmit_eval(err); +	if (unlikely(err > 0)) { +		tcp_enter_cwr(sk); +		err = net_xmit_eval(err); +	} +	if (!err && oskb) { +		oskb->skb_mstamp = tp->tcp_mstamp; +		tcp_rate_skb_sent(sk, oskb); +	} +	return err;  }  /* This routine just queues the buffer for sending. @@ -1803,40 +1808,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,  	return !after(end_seq, tcp_wnd_end(tp));  } -/* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) - * should be put on the wire right now.  If so, it returns the number of - * packets allowed by the congestion window. - */ -static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb, -				 unsigned int cur_mss, int nonagle) -{ -	const struct tcp_sock *tp = tcp_sk(sk); -	unsigned int cwnd_quota; - -	tcp_init_tso_segs(skb, cur_mss); - -	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) -		return 0; - -	cwnd_quota = tcp_cwnd_test(tp, skb); -	if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss)) -		cwnd_quota = 0; - -	return cwnd_quota; -} - -/* Test if sending is allowed right now. */ -bool tcp_may_send_now(struct sock *sk) -{ -	const struct tcp_sock *tp = tcp_sk(sk); -	struct sk_buff *skb = tcp_send_head(sk); - -	return skb && -		tcp_snd_test(sk, skb, tcp_current_mss(sk), -			     (tcp_skb_is_last(sk, skb) ? -			      tp->nonagle : TCP_NAGLE_PUSH)); -} -  /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet   * which is put after SKB on the list.  It is very much like   * tcp_fragment() except that it may make several kinds of assumptions @@ -2091,6 +2062,7 @@ static int tcp_mtu_probe(struct sock *sk)  	nskb->ip_summed = skb->ip_summed;  	tcp_insert_write_queue_before(nskb, skb, sk); +	tcp_highest_sack_replace(sk, skb, nskb);  	len = 0;  	tcp_for_write_queue_from_safe(skb, next, sk) { @@ -2202,9 +2174,10 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,  static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)  {  	const u32 now = tcp_jiffies32; +	enum tcp_chrono old = tp->chrono_type; -	if (tp->chrono_type > TCP_CHRONO_UNSPEC) -		tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; +	if (old > TCP_CHRONO_UNSPEC) +		tp->chrono_stat[old - 1] += now - tp->chrono_start;  	tp->chrono_start = now;  	tp->chrono_type = new;  } @@ -2267,6 +2240,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,  	sent_pkts = 0; +	tcp_mstamp_refresh(tp);  	if (!push_one) {  		/* Do MTU probing. */  		result = tcp_mtu_probe(sk); @@ -2278,7 +2252,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,  	}  	max_segs = tcp_tso_segs(sk, mss_now); -	tcp_mstamp_refresh(tp);  	while ((skb = tcp_send_head(sk))) {  		unsigned int limit; @@ -2376,24 +2349,14 @@ bool tcp_schedule_loss_probe(struct sock *sk)  {  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk); -	u32 timeout, tlp_time_stamp, rto_time_stamp; -	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); +	u32 timeout, rto_delta_us; -	/* No consecutive loss probes. */ -	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { -		tcp_rearm_rto(sk); -		return false; -	}  	/* Don't do any loss probe on a Fast Open connection before 3WHS  	 * finishes.  	 */  	if (tp->fastopen_rsk)  		return false; -	/* TLP is only scheduled when next timer event is RTO. */ -	if (icsk->icsk_pending != ICSK_TIME_RETRANS) -		return false; -  	/* Schedule a loss probe in 2*RTT for SACK capable connections  	 * in Open state, that are either limited by cwnd or application.  	 */ @@ -2406,25 +2369,25 @@ bool tcp_schedule_loss_probe(struct sock *sk)  	     tcp_send_head(sk))  		return false; -	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account +	/* Probe timeout is 2*rtt. Add minimum RTO to account  	 * for delayed ack when there's one outstanding packet. If no RTT  	 * sample is available then probe after TCP_TIMEOUT_INIT.  	 */ -	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT; -	if (tp->packets_out == 1) -		timeout = max_t(u32, timeout, -				(rtt + (rtt >> 1) + TCP_DELACK_MAX)); -	timeout = max_t(u32, timeout, msecs_to_jiffies(10)); - -	/* If RTO is shorter, just schedule TLP in its place. */ -	tlp_time_stamp = tcp_jiffies32 + timeout; -	rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; -	if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { -		s32 delta = rto_time_stamp - tcp_jiffies32; -		if (delta > 0) -			timeout = delta; +	if (tp->srtt_us) { +		timeout = usecs_to_jiffies(tp->srtt_us >> 2); +		if (tp->packets_out == 1) +			timeout += TCP_RTO_MIN; +		else +			timeout += TCP_TIMEOUT_MIN; +	} else { +		timeout = TCP_TIMEOUT_INIT;  	} +	/* If the RTO formula yields an earlier time, then use that time. */ +	rto_delta_us = tcp_rto_delta_us(sk);  /* How far in future is RTO? */ +	if (rto_delta_us > 0) +		timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); +  	inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,  				  TCP_RTO_MAX);  	return true; @@ -2703,7 +2666,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)  		else if (!skb_shift(skb, next_skb, next_skb_size))  			return false;  	} -	tcp_highest_sack_combine(sk, next_skb, skb); +	tcp_highest_sack_replace(sk, next_skb, skb);  	tcp_unlink_write_queue(next_skb, sk); @@ -2878,10 +2841,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)  		     skb_headroom(skb) >= 0xFFFF)) {  		struct sk_buff *nskb; -		skb->skb_mstamp = tp->tcp_mstamp;  		nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);  		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :  			     -ENOBUFS; +		if (!err) { +			skb->skb_mstamp = tp->tcp_mstamp; +			tcp_rate_skb_sent(sk, skb); +		}  	} else {  		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);  	} @@ -3214,13 +3180,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,  	th->source = htons(ireq->ir_num);  	th->dest = ireq->ir_rmt_port;  	skb->mark = ireq->ir_mark; -	/* Setting of flags are superfluous here for callers (and ECE is -	 * not even correctly set) -	 */ -	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, -			     TCPHDR_SYN | TCPHDR_ACK); - -	th->seq = htonl(TCP_SKB_CB(skb)->seq); +	skb->ip_summed = CHECKSUM_PARTIAL; +	th->seq = htonl(tcp_rsk(req)->snt_isn);  	/* XXX data is queued and acked as is. No buffer/window check */  	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); @@ -3428,6 +3389,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)  		goto done;  	} +	/* data was not sent, this is our new send_head */ +	sk->sk_send_head = syn_data; +	tp->packets_out -= tcp_skb_pcount(syn_data); +  fallback:  	/* Send a regular SYN with Fast Open cookie request option */  	if (fo->cookie.len > 0) @@ -3448,6 +3413,10 @@ int tcp_connect(struct sock *sk)  	int err;  	tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); + +	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) +		return -EHOSTUNREACH; /* Routing failure or similar. */ +  	tcp_connect_init(sk);  	if (unlikely(tp->repair)) { @@ -3476,6 +3445,11 @@ int tcp_connect(struct sock *sk)  	 */  	tp->snd_nxt = tp->write_seq;  	tp->pushed_seq = tp->write_seq; +	buff = tcp_send_head(sk); +	if (unlikely(buff)) { +		tp->snd_nxt	= TCP_SKB_CB(buff)->seq; +		tp->pushed_seq	= TCP_SKB_CB(buff)->seq; +	}  	TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);  	/* Timer for repeating the SYN until an answer. */ |