diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 48 | 
1 files changed, 39 insertions, 9 deletions
| diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 03d26b85eab8..17a11e65e57f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -698,7 +698,8 @@ static void tcp_tsq_handler(struct sock *sk)  	if ((1 << sk->sk_state) &  	    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |  	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK)) -		tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); +		tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, +			       0, GFP_ATOMIC);  }  /*   * One tasklet per cpu tries to send more skbs. @@ -766,6 +767,17 @@ void tcp_release_cb(struct sock *sk)  	if (flags & (1UL << TCP_TSQ_DEFERRED))  		tcp_tsq_handler(sk); +	/* Here begins the tricky part : +	 * We are called from release_sock() with : +	 * 1) BH disabled +	 * 2) sk_lock.slock spinlock held +	 * 3) socket owned by us (sk->sk_lock.owned == 1) +	 * +	 * But following code is meant to be called from BH handlers, +	 * so we should keep BH disabled, but early release socket ownership +	 */ +	sock_release_ownership(sk); +  	if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {  		tcp_write_timer_handler(sk);  		__sock_put(sk); @@ -863,8 +875,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  		if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&  			     fclone->fclone == SKB_FCLONE_CLONE)) -			NET_INC_STATS_BH(sock_net(sk), -					 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); +			NET_INC_STATS(sock_net(sk), +				      LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);  		if (unlikely(skb_cloned(skb)))  			skb = pskb_copy(skb, gfp_mask); @@ -1904,7 +1916,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,  		if (atomic_read(&sk->sk_wmem_alloc) > limit) {  			set_bit(TSQ_THROTTLED, &tp->tsq_flags); -			break; +			/* It is possible TX completion already happened +			 * before we set TSQ_THROTTLED, so we must +			 * test again the condition. +			 * We abuse smp_mb__after_clear_bit() because +			 * there is no smp_mb__after_set_bit() yet +			 */ +			smp_mb__after_clear_bit(); +			if (atomic_read(&sk->sk_wmem_alloc) > limit) +				break;  		}  		limit = mss_now; @@ -1977,7 +1997,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)  	/* Schedule a loss probe in 2*RTT for SACK capable connections  	 * in Open state, that are either limited by cwnd or application.  	 */ -	if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || +	if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out ||  	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)  		return false; @@ -2328,6 +2348,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  	struct tcp_sock *tp = tcp_sk(sk);  	struct inet_connection_sock *icsk = inet_csk(sk);  	unsigned int cur_mss; +	int err;  	/* Inconslusive MTU probe */  	if (icsk->icsk_mtup.probe_size) { @@ -2391,11 +2412,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  		     skb_headroom(skb) >= 0xFFFF)) {  		struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,  						   GFP_ATOMIC); -		return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -			      -ENOBUFS; +		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : +			     -ENOBUFS;  	} else { -		return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); +		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);  	} + +	if (likely(!err)) +		TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; +	return err;  }  int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) @@ -2899,7 +2924,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)  	space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -  		MAX_TCP_OPTION_SPACE; -	syn_data = skb_copy_expand(syn, skb_headroom(syn), space, +	space = min_t(size_t, space, fo->size); + +	/* limit to order-0 allocations */ +	space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); + +	syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,  				   sk->sk_allocation);  	if (syn_data == NULL)  		goto fallback; |