diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 210 | 
1 files changed, 87 insertions, 123 deletions
| diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2920e0cb09f8..5a87a00641d3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0  /*   * INET		An implementation of the TCP/IP protocol suite for the LINUX   *		operating system.  INET is implemented using the  BSD Socket @@ -107,6 +108,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;  #define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/  #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */  #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */ +#define FLAG_SET_XMIT_TIMER	0x1000 /* Set TLP or RTO timer */  #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */  #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */  #define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/ @@ -1951,6 +1953,7 @@ void tcp_enter_loss(struct sock *sk)  	    !after(tp->high_seq, tp->snd_una) ||  	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {  		tp->prior_ssthresh = tcp_current_ssthresh(sk); +		tp->prior_cwnd = tp->snd_cwnd;  		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);  		tcp_ca_event(sk, CA_EVENT_LOSS);  		tcp_init_undo(tp); @@ -2520,8 +2523,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)  		return;  	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ -	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || -	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { +	if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && +	    (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {  		tp->snd_cwnd = tp->snd_ssthresh;  		tp->snd_cwnd_stamp = tcp_jiffies32;  	} @@ -3004,21 +3007,24 @@ void tcp_rearm_rto(struct sock *sk)  		/* Offset the time elapsed after installing regular RTO */  		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||  		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { -			struct sk_buff *skb = tcp_write_queue_head(sk); -			u64 rto_time_stamp = skb->skb_mstamp + -					     jiffies_to_usecs(rto); -			s64 delta_us = rto_time_stamp - tp->tcp_mstamp; +			s64 delta_us = tcp_rto_delta_us(sk);  			/* delta_us may not be positive if the socket is locked  			 * when the retrans timer fires and is rescheduled.  			 */ -			if (delta_us > 0) -				rto = usecs_to_jiffies(delta_us); +			rto = usecs_to_jiffies(max_t(int, delta_us, 1));  		}  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,  					  TCP_RTO_MAX);  	}  } +/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ +static void tcp_set_xmit_timer(struct sock *sk) +{ +	if (!tcp_schedule_loss_probe(sk)) +		tcp_rearm_rto(sk); +} +  /* If we get here, the whole TSO packet has not been acked. */  static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)  { @@ -3180,7 +3186,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,  					ca_rtt_us, sack->rate);  	if (flag & FLAG_ACKED) { -		tcp_rearm_rto(sk); +		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */  		if (unlikely(icsk->icsk_mtup.probe_size &&  			     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {  			tcp_mtup_probe_success(sk); @@ -3208,7 +3214,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,  		 * after when the head was last (re)transmitted. Otherwise the  		 * timeout may continue to extend in loss recovery.  		 */ -		tcp_rearm_rto(sk); +		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */  	}  	if (icsk->icsk_ca_ops->pkts_acked) { @@ -3580,9 +3586,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	if (after(ack, tp->snd_nxt))  		goto invalid_ack; -	if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) -		tcp_rearm_rto(sk); -  	if (after(ack, prior_snd_una)) {  		flag |= FLAG_SND_UNA_ADVANCED;  		icsk->icsk_retransmits = 0; @@ -3647,18 +3650,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,  				    &sack_state); +	if (tp->tlp_high_seq) +		tcp_process_tlp_ack(sk, ack, flag); +	/* If needed, reset TLP/RTO timer; RACK may later override this. */ +	if (flag & FLAG_SET_XMIT_TIMER) +		tcp_set_xmit_timer(sk); +  	if (tcp_ack_is_dubious(sk, flag)) {  		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));  		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);  	} -	if (tp->tlp_high_seq) -		tcp_process_tlp_ack(sk, ack, flag);  	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))  		sk_dst_confirm(sk); -	if (icsk->icsk_pending == ICSK_TIME_RETRANS) -		tcp_schedule_loss_probe(sk);  	delivered = tp->delivered - delivered;	/* freshly ACKed or SACKed */  	lost = tp->lost - lost;			/* freshly marked lost */  	tcp_rate_gen(sk, delivered, lost, sack_state.rate); @@ -4264,9 +4269,15 @@ static void tcp_sack_remove(struct tcp_sock *tp)  	tp->rx_opt.num_sacks = num_sacks;  } +enum tcp_queue { +	OOO_QUEUE, +	RCV_QUEUE, +}; +  /**   * tcp_try_coalesce - try to merge skb to prior one   * @sk: socket + * @dest: destination queue   * @to: prior buffer   * @from: buffer to add in queue   * @fragstolen: pointer to boolean @@ -4278,6 +4289,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)   * Returns true if caller should free @from instead of queueing it   */  static bool tcp_try_coalesce(struct sock *sk, +			     enum tcp_queue dest,  			     struct sk_buff *to,  			     struct sk_buff *from,  			     bool *fragstolen) @@ -4299,6 +4311,15 @@ static bool tcp_try_coalesce(struct sock *sk,  	TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;  	TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;  	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; + +	if (TCP_SKB_CB(from)->has_rxtstamp) { +		TCP_SKB_CB(to)->has_rxtstamp = true; +		if (dest == OOO_QUEUE) +			TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp; +		else +			to->tstamp = from->tstamp; +	} +  	return true;  } @@ -4333,6 +4354,9 @@ static void tcp_ofo_queue(struct sock *sk)  		}  		p = rb_next(p);  		rb_erase(&skb->rbnode, &tp->out_of_order_queue); +		/* Replace tstamp which was stomped by rbnode */ +		if (TCP_SKB_CB(skb)->has_rxtstamp) +			skb->tstamp = TCP_SKB_CB(skb)->swtstamp;  		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {  			SOCK_DEBUG(sk, "ofo packet was already received\n"); @@ -4344,7 +4368,8 @@ static void tcp_ofo_queue(struct sock *sk)  			   TCP_SKB_CB(skb)->end_seq);  		tail = skb_peek_tail(&sk->sk_receive_queue); -		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); +		eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, +						 tail, skb, &fragstolen);  		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);  		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;  		if (!eaten) @@ -4398,6 +4423,10 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)  		return;  	} +	/* Stash tstamp to avoid being stomped on by rbnode */ +	if (TCP_SKB_CB(skb)->has_rxtstamp) +		TCP_SKB_CB(skb)->swtstamp = skb->tstamp; +  	/* Disable header prediction. */  	tp->pred_flags = 0;  	inet_csk_schedule_ack(sk); @@ -4425,7 +4454,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)  	/* In the typical case, we are adding an skb to the end of the list.  	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.  	 */ -	if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { +	if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, +			     skb, &fragstolen)) {  coalesce_done:  		tcp_grow_window(sk, skb);  		kfree_skb_partial(skb, fragstolen); @@ -4475,7 +4505,8 @@ coalesce_done:  				__kfree_skb(skb1);  				goto merge_right;  			} -		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { +		} else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, +					    skb, &fragstolen)) {  			goto coalesce_done;  		}  		p = &parent->rb_right; @@ -4526,7 +4557,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int  	__skb_pull(skb, hdrlen);  	eaten = (tail && -		 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; +		 tcp_try_coalesce(sk, RCV_QUEUE, tail, +				  skb, fragstolen)) ? 1 : 0;  	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);  	if (!eaten) {  		__skb_queue_tail(&sk->sk_receive_queue, skb); @@ -4588,8 +4620,8 @@ err:  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)  {  	struct tcp_sock *tp = tcp_sk(sk); -	bool fragstolen = false; -	int eaten = -1; +	bool fragstolen; +	int eaten;  	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {  		__kfree_skb(skb); @@ -4611,32 +4643,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)  			goto out_of_window;  		/* Ok. In sequence. In window. */ -		if (tp->ucopy.task == current && -		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && -		    sock_owned_by_user(sk) && !tp->urg_data) { -			int chunk = min_t(unsigned int, skb->len, -					  tp->ucopy.len); - -			__set_current_state(TASK_RUNNING); - -			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { -				tp->ucopy.len -= chunk; -				tp->copied_seq += chunk; -				eaten = (chunk == skb->len); -				tcp_rcv_space_adjust(sk); -			} -		} - -		if (eaten <= 0) {  queue_and_out: -			if (eaten < 0) { -				if (skb_queue_len(&sk->sk_receive_queue) == 0) -					sk_forced_mem_schedule(sk, skb->truesize); -				else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) -					goto drop; -			} -			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); -		} +		if (skb_queue_len(&sk->sk_receive_queue) == 0) +			sk_forced_mem_schedule(sk, skb->truesize); +		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) +			goto drop; + +		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);  		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);  		if (skb->len)  			tcp_event_data_recv(sk, skb); @@ -5186,26 +5199,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t  	}  } -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) -{ -	struct tcp_sock *tp = tcp_sk(sk); -	int chunk = skb->len - hlen; -	int err; - -	if (skb_csum_unnecessary(skb)) -		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); -	else -		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg); - -	if (!err) { -		tp->ucopy.len -= chunk; -		tp->copied_seq += chunk; -		tcp_rcv_space_adjust(sk); -	} - -	return err; -} -  /* Accept RST for rcv_nxt - 1 after a FIN.   * When tcp connections are abruptly terminated from Mac OSX (via ^C), a   * FIN is sent followed by a RST packet. The RST is sent with the same @@ -5358,8 +5351,9 @@ discard:   *	tcp_data_queue when everything is OK.   */  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, -			 const struct tcphdr *th, unsigned int len) +			 const struct tcphdr *th)  { +	unsigned int len = skb->len;  	struct tcp_sock *tp = tcp_sk(sk);  	tcp_mstamp_refresh(tp); @@ -5445,56 +5439,28 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,  			int eaten = 0;  			bool fragstolen = false; -			if (tp->ucopy.task == current && -			    tp->copied_seq == tp->rcv_nxt && -			    len - tcp_header_len <= tp->ucopy.len && -			    sock_owned_by_user(sk)) { -				__set_current_state(TASK_RUNNING); - -				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { -					/* Predicted packet is in window by definition. -					 * seq == rcv_nxt and rcv_wup <= rcv_nxt. -					 * Hence, check seq<=rcv_wup reduces to: -					 */ -					if (tcp_header_len == -					    (sizeof(struct tcphdr) + -					     TCPOLEN_TSTAMP_ALIGNED) && -					    tp->rcv_nxt == tp->rcv_wup) -						tcp_store_ts_recent(tp); - -					tcp_rcv_rtt_measure_ts(sk, skb); - -					__skb_pull(skb, tcp_header_len); -					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); -					NET_INC_STATS(sock_net(sk), -							LINUX_MIB_TCPHPHITSTOUSER); -					eaten = 1; -				} -			} -			if (!eaten) { -				if (tcp_checksum_complete(skb)) -					goto csum_error; +			if (tcp_checksum_complete(skb)) +				goto csum_error; -				if ((int)skb->truesize > sk->sk_forward_alloc) -					goto step5; +			if ((int)skb->truesize > sk->sk_forward_alloc) +				goto step5; -				/* Predicted packet is in window by definition. -				 * seq == rcv_nxt and rcv_wup <= rcv_nxt. -				 * Hence, check seq<=rcv_wup reduces to: -				 */ -				if (tcp_header_len == -				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && -				    tp->rcv_nxt == tp->rcv_wup) -					tcp_store_ts_recent(tp); +			/* Predicted packet is in window by definition. +			 * seq == rcv_nxt and rcv_wup <= rcv_nxt. +			 * Hence, check seq<=rcv_wup reduces to: +			 */ +			if (tcp_header_len == +			    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && +			    tp->rcv_nxt == tp->rcv_wup) +				tcp_store_ts_recent(tp); -				tcp_rcv_rtt_measure_ts(sk, skb); +			tcp_rcv_rtt_measure_ts(sk, skb); -				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); +			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); -				/* Bulk data transfer: receiver */ -				eaten = tcp_queue_rcv(sk, skb, tcp_header_len, -						      &fragstolen); -			} +			/* Bulk data transfer: receiver */ +			eaten = tcp_queue_rcv(sk, skb, tcp_header_len, +					      &fragstolen);  			tcp_event_data_recv(sk, skb); @@ -5588,7 +5554,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)  		__tcp_fast_path_on(tp, tp->snd_wnd);  	else  		tp->pred_flags = 0; -  }  static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -6232,7 +6197,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,  		struct inet_request_sock *ireq = inet_rsk(req);  		kmemcheck_annotate_bitfield(ireq, flags); -		ireq->opt = NULL; +		ireq->ireq_opt = NULL;  #if IS_ENABLED(CONFIG_IPV6)  		ireq->pktopts = NULL;  #endif @@ -6303,9 +6268,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,  	struct tcp_sock *tp = tcp_sk(sk);  	struct net *net = sock_net(sk);  	struct sock *fastopen_sk = NULL; -	struct dst_entry *dst = NULL;  	struct request_sock *req;  	bool want_cookie = false; +	struct dst_entry *dst;  	struct flowi fl;  	/* TW buckets are converted to open requests without @@ -6355,6 +6320,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,  	if (tmp_opt.tstamp_ok)  		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); +	dst = af_ops->route_req(sk, &fl, req); +	if (!dst) +		goto drop_and_free; +  	if (!want_cookie && !isn) {  		/* Kill the following clause, if you dislike this way. */  		if (!net->ipv4.sysctl_tcp_syncookies && @@ -6375,11 +6344,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,  		isn = af_ops->init_seq(skb);  	} -	if (!dst) { -		dst = af_ops->route_req(sk, &fl, req); -		if (!dst) -			goto drop_and_free; -	}  	tcp_ecn_create_request(req, skb, sk, dst); @@ -6395,7 +6359,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,  	tcp_openreq_init_rwin(req, sk, dst);  	if (!want_cookie) {  		tcp_reqsk_record_syn(sk, req, skb); -		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); +		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);  	}  	if (fastopen_sk) {  		af_ops->send_synack(fastopen_sk, dst, &fl, req, |