diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 56 | 
1 files changed, 42 insertions, 14 deletions
| diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 65caf8b95e17..1db253e36045 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;   */  int sysctl_tcp_tso_win_divisor __read_mostly = 3; -int sysctl_tcp_mtu_probing __read_mostly = 0; -int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; -  /* By default, RFC2861 behavior.  */  int sysctl_tcp_slow_start_after_idle __read_mostly = 1; @@ -948,7 +945,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	skb_orphan(skb);  	skb->sk = sk; -	skb->destructor = tcp_wfree; +	skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;  	skb_set_hash_from_sk(skb, sk);  	atomic_add(skb->truesize, &sk->sk_wmem_alloc); @@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)  {  	struct tcp_sock *tp = tcp_sk(sk);  	struct inet_connection_sock *icsk = inet_csk(sk); +	struct net *net = sock_net(sk); -	icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1; +	icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;  	icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +  			       icsk->icsk_af_ops->net_header_len; -	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); +	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);  	icsk->icsk_mtup.probe_size = 0;  }  EXPORT_SYMBOL(tcp_mtup_init); @@ -2775,15 +2773,11 @@ void tcp_send_fin(struct sock *sk)  	} else {  		/* Socket is locked, keep trying until memory is available. */  		for (;;) { -			skb = alloc_skb_fclone(MAX_TCP_HEADER, -					       sk->sk_allocation); +			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);  			if (skb)  				break;  			yield();  		} - -		/* Reserve space for headers and prepare control bits. */ -		skb_reserve(skb, MAX_TCP_HEADER);  		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */  		tcp_init_nondata_skb(skb, tp->write_seq,  				     TCPHDR_ACK | TCPHDR_FIN); @@ -2939,6 +2933,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  }  EXPORT_SYMBOL(tcp_make_synack); +static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); +	const struct tcp_congestion_ops *ca; +	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); + +	if (ca_key == TCP_CA_UNSPEC) +		return; + +	rcu_read_lock(); +	ca = tcp_ca_find_key(ca_key); +	if (likely(ca && try_module_get(ca->owner))) { +		module_put(icsk->icsk_ca_ops->owner); +		icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); +		icsk->icsk_ca_ops = ca; +	} +	rcu_read_unlock(); +} +  /* Do all connect socket setups that can be done AF independent. */  static void tcp_connect_init(struct sock *sk)  { @@ -2964,6 +2977,8 @@ static void tcp_connect_init(struct sock *sk)  	tcp_mtup_init(sk);  	tcp_sync_mss(sk, dst_mtu(dst)); +	tcp_ca_dst_init(sk, dst); +  	if (!tp->window_clamp)  		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);  	tp->advmss = dst_metric_advmss(dst); @@ -3034,7 +3049,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)  {  	struct tcp_sock *tp = tcp_sk(sk);  	struct tcp_fastopen_request *fo = tp->fastopen_req; -	int syn_loss = 0, space, err = 0; +	int syn_loss = 0, space, err = 0, copied;  	unsigned long last_syn_loss = 0;  	struct sk_buff *syn_data; @@ -3072,11 +3087,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)  		goto fallback;  	syn_data->ip_summed = CHECKSUM_PARTIAL;  	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); -	if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), -					 fo->data->msg_iter.iov, 0, space))) { +	copied = copy_from_iter(skb_put(syn_data, space), space, +				&fo->data->msg_iter); +	if (unlikely(!copied)) {  		kfree_skb(syn_data);  		goto fallback;  	} +	if (copied != space) { +		skb_trim(syn_data, copied); +		space = copied; +	}  	/* No more data pending in inet_wait_for_connect() */  	if (space == fo->size) @@ -3244,6 +3264,14 @@ void tcp_send_ack(struct sock *sk)  	skb_reserve(buff, MAX_TCP_HEADER);  	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); +	/* We do not want pure acks influencing TCP Small Queues or fq/pacing +	 * too much. +	 * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 +	 * We also avoid tcp_wfree() overhead (cache line miss accessing +	 * tp->tsq_flags) by using regular sock_wfree() +	 */ +	skb_set_tcp_pure_ack(buff); +  	/* Send it off, this clears delayed acks for us. */  	skb_mstamp_get(&buff->skb_mstamp);  	tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |