diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 88 | 
1 files changed, 51 insertions, 37 deletions
| diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 075ab4d5af5e..8fdd27b17306 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly;  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;  int sysctl_tcp_early_retrans __read_mostly = 3; +int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;  #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/  #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/ @@ -3183,8 +3184,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,  		tp->fackets_out -= min(pkts_acked, tp->fackets_out); -		if (ca_ops->pkts_acked) -			ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); +		if (ca_ops->pkts_acked) { +			long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); +			ca_ops->pkts_acked(sk, pkts_acked, rtt_us); +		}  	} else if (skb && rtt_update && sack_rtt_us >= 0 &&  		   sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { @@ -3319,13 +3322,22 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32  }  /* RFC 5961 7 [ACK Throttling] */ -static void tcp_send_challenge_ack(struct sock *sk) +static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)  {  	/* unprotected vars, we dont care of overwrites */  	static u32 challenge_timestamp;  	static unsigned int challenge_count; -	u32 now = jiffies / HZ; +	struct tcp_sock *tp = tcp_sk(sk); +	u32 now; + +	/* First check our per-socket dupack rate limit. */ +	if (tcp_oow_rate_limited(sock_net(sk), skb, +				 LINUX_MIB_TCPACKSKIPPEDCHALLENGE, +				 &tp->last_oow_ack_time)) +		return; +	/* Then check the check host-wide RFC 5961 rate limit. */ +	now = jiffies / HZ;  	if (now != challenge_timestamp) {  		challenge_timestamp = now;  		challenge_count = 0; @@ -3358,34 +3370,34 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)  }  /* This routine deals with acks during a TLP episode. + * We mark the end of a TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq.   * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.   */  static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)  {  	struct tcp_sock *tp = tcp_sk(sk); -	bool is_tlp_dupack = (ack == tp->tlp_high_seq) && -			     !(flag & (FLAG_SND_UNA_ADVANCED | -				       FLAG_NOT_DUP | FLAG_DATA_SACKED)); -	/* Mark the end of TLP episode on receiving TLP dupack or when -	 * ack is after tlp_high_seq. -	 */ -	if (is_tlp_dupack) { -		tp->tlp_high_seq = 0; +	if (before(ack, tp->tlp_high_seq))  		return; -	} -	if (after(ack, tp->tlp_high_seq)) { +	if (flag & FLAG_DSACKING_ACK) { +		/* This DSACK means original and TLP probe arrived; no loss */ +		tp->tlp_high_seq = 0; +	} else if (after(ack, tp->tlp_high_seq)) { +		/* ACK advances: there was a loss, so reduce cwnd. Reset +		 * tlp_high_seq in tcp_init_cwnd_reduction() +		 */ +		tcp_init_cwnd_reduction(sk); +		tcp_set_ca_state(sk, TCP_CA_CWR); +		tcp_end_cwnd_reduction(sk); +		tcp_try_keep_open(sk); +		NET_INC_STATS_BH(sock_net(sk), +				 LINUX_MIB_TCPLOSSPROBERECOVERY); +	} else if (!(flag & (FLAG_SND_UNA_ADVANCED | +			     FLAG_NOT_DUP | FLAG_DATA_SACKED))) { +		/* Pure dupack: original and TLP probe arrived; no loss */  		tp->tlp_high_seq = 0; -		/* Don't reduce cwnd if DSACK arrives for TLP retrans. */ -		if (!(flag & FLAG_DSACKING_ACK)) { -			tcp_init_cwnd_reduction(sk); -			tcp_set_ca_state(sk, TCP_CA_CWR); -			tcp_end_cwnd_reduction(sk); -			tcp_try_keep_open(sk); -			NET_INC_STATS_BH(sock_net(sk), -					 LINUX_MIB_TCPLOSSPROBERECOVERY); -		}  	}  } @@ -3421,7 +3433,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	if (before(ack, prior_snd_una)) {  		/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */  		if (before(ack, prior_snd_una - tp->max_window)) { -			tcp_send_challenge_ack(sk); +			tcp_send_challenge_ack(sk, skb);  			return -1;  		}  		goto old_ack; @@ -4990,7 +5002,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,  	    tcp_paws_discard(sk, skb)) {  		if (!th->rst) {  			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); -			tcp_send_dupack(sk, skb); +			if (!tcp_oow_rate_limited(sock_net(sk), skb, +						  LINUX_MIB_TCPACKSKIPPEDPAWS, +						  &tp->last_oow_ack_time)) +				tcp_send_dupack(sk, skb);  			goto discard;  		}  		/* Reset is accepted even if it did not pass PAWS. */ @@ -5007,7 +5022,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,  		if (!th->rst) {  			if (th->syn)  				goto syn_challenge; -			tcp_send_dupack(sk, skb); +			if (!tcp_oow_rate_limited(sock_net(sk), skb, +						  LINUX_MIB_TCPACKSKIPPEDSEQ, +						  &tp->last_oow_ack_time)) +				tcp_send_dupack(sk, skb);  		}  		goto discard;  	} @@ -5023,7 +5041,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,  		if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)  			tcp_reset(sk);  		else -			tcp_send_challenge_ack(sk); +			tcp_send_challenge_ack(sk, skb);  		goto discard;  	} @@ -5037,7 +5055,7 @@ syn_challenge:  		if (syn_inerr)  			TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);  		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE); -		tcp_send_challenge_ack(sk); +		tcp_send_challenge_ack(sk, skb);  		goto discard;  	} @@ -5870,10 +5888,9 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)   * TCP ECN negotiation.   *   * Exception: tcp_ca wants ECN. This is required for DCTCP - * congestion control; it requires setting ECT on all packets, - * including SYN. We inverse the test in this case: If our - * local socket wants ECN, but peer only set ece/cwr (but not - * ECT in IP header) its probably a non-DCTCP aware sender. + * congestion control: Linux DCTCP asserts ECT on all packets, + * including SYN, which is most optimal solution; however, + * others, such as FreeBSD do not.   */  static void tcp_ecn_create_request(struct request_sock *req,  				   const struct sk_buff *skb, @@ -5883,18 +5900,15 @@ static void tcp_ecn_create_request(struct request_sock *req,  	const struct tcphdr *th = tcp_hdr(skb);  	const struct net *net = sock_net(listen_sk);  	bool th_ecn = th->ece && th->cwr; -	bool ect, need_ecn, ecn_ok; +	bool ect, ecn_ok;  	if (!th_ecn)  		return;  	ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); -	need_ecn = tcp_ca_needs_ecn(listen_sk);  	ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); -	if (!ect && !need_ecn && ecn_ok) -		inet_rsk(req)->ecn_ok = 1; -	else if (ect && need_ecn) +	if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk))  		inet_rsk(req)->ecn_ok = 1;  } |