diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 273 | 
1 files changed, 159 insertions, 114 deletions
| diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53c8ce4046b2..9944c1d9a218 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -61,6 +61,8 @@   *		Pasi Sarolahti:		F-RTO for dealing with spurious RTOs   */ +#define pr_fmt(fmt) "TCP: " fmt +  #include <linux/mm.h>  #include <linux/slab.h>  #include <linux/module.h> @@ -472,8 +474,11 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)  		if (!win_dep) {  			m -= (new_sample >> 3);  			new_sample += m; -		} else if (m < new_sample) -			new_sample = m << 3; +		} else { +			m <<= 3; +			if (m < new_sample) +				new_sample = m; +		}  	} else {  		/* No previous measure. */  		new_sample = m << 3; @@ -1403,8 +1408,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,  	BUG_ON(!pcount); -	/* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */ -	if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint)) +	/* Adjust counters and hints for the newly sacked sequence +	 * range but discard the return value since prev is already +	 * marked. We must tag the range first because the seq +	 * advancement below implicitly advances +	 * tcp_highest_sack_seq() when skb is highest_sack. +	 */ +	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, +			start_seq, end_seq, dup_sack, pcount); + +	if (skb == tp->lost_skb_hint)  		tp->lost_cnt_hint += pcount;  	TCP_SKB_CB(prev)->end_seq += shifted; @@ -1430,12 +1443,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,  		skb_shinfo(skb)->gso_type = 0;  	} -	/* Adjust counters and hints for the newly sacked sequence range but -	 * discard the return value since prev is already marked. -	 */ -	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, -			start_seq, end_seq, dup_sack, pcount); -  	/* Difference in this won't matter, both ACKed by the same cumul. ACK */  	TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); @@ -1583,6 +1590,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,  		}  	} +	/* tcp_sacktag_one() won't SACK-tag ranges below snd_una */ +	if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) +		goto fallback; +  	if (!skb_shift(prev, skb, len))  		goto fallback;  	if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) @@ -2567,6 +2578,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)  		if (cnt > packets) {  			if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || +			    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||  			    (oldcnt >= packets))  				break; @@ -3860,9 +3872,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o  					opt_rx->wscale_ok = 1;  					if (snd_wscale > 14) {  						if (net_ratelimit()) -							printk(KERN_INFO "tcp_parse_options: Illegal window " -							       "scaling value %d >14 received.\n", -							       snd_wscale); +							pr_info("%s: Illegal window scaling value %d >14 received\n", +								__func__, +								snd_wscale);  						snd_wscale = 14;  					}  					opt_rx->snd_wscale = snd_wscale; @@ -4184,7 +4196,7 @@ static void tcp_fin(struct sock *sk)  		/* Only TCP_LISTEN and TCP_CLOSE are left, in these  		 * cases we should never reach this piece of code.  		 */ -		printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", +		pr_err("%s: Impossible, sk->sk_state=%d\n",  		       __func__, sk->sk_state);  		break;  	} @@ -4437,6 +4449,137 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)  	return 0;  } +static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +{ +	struct tcp_sock *tp = tcp_sk(sk); +	struct sk_buff *skb1; +	u32 seq, end_seq; + +	TCP_ECN_check_ce(tp, skb); + +	if (tcp_try_rmem_schedule(sk, skb->truesize)) { +		/* TODO: should increment a counter */ +		__kfree_skb(skb); +		return; +	} + +	/* Disable header prediction. */ +	tp->pred_flags = 0; +	inet_csk_schedule_ack(sk); + +	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", +		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + +	skb1 = skb_peek_tail(&tp->out_of_order_queue); +	if (!skb1) { +		/* Initial out of order segment, build 1 SACK. */ +		if (tcp_is_sack(tp)) { +			tp->rx_opt.num_sacks = 1; +			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; +			tp->selective_acks[0].end_seq = +						TCP_SKB_CB(skb)->end_seq; +		} +		__skb_queue_head(&tp->out_of_order_queue, skb); +		goto end; +	} + +	seq = TCP_SKB_CB(skb)->seq; +	end_seq = TCP_SKB_CB(skb)->end_seq; + +	if (seq == TCP_SKB_CB(skb1)->end_seq) { +		/* Packets in ofo can stay in queue a long time. +		 * Better try to coalesce them right now +		 * to avoid future tcp_collapse_ofo_queue(), +		 * probably the most expensive function in tcp stack. +		 */ +		if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) { +			NET_INC_STATS_BH(sock_net(sk), +					 LINUX_MIB_TCPRCVCOALESCE); +			BUG_ON(skb_copy_bits(skb, 0, +					     skb_put(skb1, skb->len), +					     skb->len)); +			TCP_SKB_CB(skb1)->end_seq = end_seq; +			TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq; +			__kfree_skb(skb); +			skb = NULL; +		} else { +			__skb_queue_after(&tp->out_of_order_queue, skb1, skb); +		} + +		if (!tp->rx_opt.num_sacks || +		    tp->selective_acks[0].end_seq != seq) +			goto add_sack; + +		/* Common case: data arrive in order after hole. */ +		tp->selective_acks[0].end_seq = end_seq; +		goto end; +	} + +	/* Find place to insert this segment. */ +	while (1) { +		if (!after(TCP_SKB_CB(skb1)->seq, seq)) +			break; +		if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { +			skb1 = NULL; +			break; +		} +		skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); +	} + +	/* Do skb overlap to previous one? */ +	if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { +		if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { +			/* All the bits are present. Drop. */ +			__kfree_skb(skb); +			skb = NULL; +			tcp_dsack_set(sk, seq, end_seq); +			goto add_sack; +		} +		if (after(seq, TCP_SKB_CB(skb1)->seq)) { +			/* Partial overlap. */ +			tcp_dsack_set(sk, seq, +				      TCP_SKB_CB(skb1)->end_seq); +		} else { +			if (skb_queue_is_first(&tp->out_of_order_queue, +					       skb1)) +				skb1 = NULL; +			else +				skb1 = skb_queue_prev( +					&tp->out_of_order_queue, +					skb1); +		} +	} +	if (!skb1) +		__skb_queue_head(&tp->out_of_order_queue, skb); +	else +		__skb_queue_after(&tp->out_of_order_queue, skb1, skb); + +	/* And clean segments covered by new one as whole. */ +	while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { +		skb1 = skb_queue_next(&tp->out_of_order_queue, skb); + +		if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) +			break; +		if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { +			tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, +					 end_seq); +			break; +		} +		__skb_unlink(skb1, &tp->out_of_order_queue); +		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, +				 TCP_SKB_CB(skb1)->end_seq); +		__kfree_skb(skb1); +	} + +add_sack: +	if (tcp_is_sack(tp)) +		tcp_sack_new_ofo_skb(sk, seq, end_seq); +end: +	if (skb) +		skb_set_owner_r(skb, sk); +} + +  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)  {  	const struct tcphdr *th = tcp_hdr(skb); @@ -4552,105 +4695,7 @@ drop:  		goto queue_and_out;  	} -	TCP_ECN_check_ce(tp, skb); - -	if (tcp_try_rmem_schedule(sk, skb->truesize)) -		goto drop; - -	/* Disable header prediction. */ -	tp->pred_flags = 0; -	inet_csk_schedule_ack(sk); - -	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", -		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - -	skb_set_owner_r(skb, sk); - -	if (!skb_peek(&tp->out_of_order_queue)) { -		/* Initial out of order segment, build 1 SACK. */ -		if (tcp_is_sack(tp)) { -			tp->rx_opt.num_sacks = 1; -			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; -			tp->selective_acks[0].end_seq = -						TCP_SKB_CB(skb)->end_seq; -		} -		__skb_queue_head(&tp->out_of_order_queue, skb); -	} else { -		struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue); -		u32 seq = TCP_SKB_CB(skb)->seq; -		u32 end_seq = TCP_SKB_CB(skb)->end_seq; - -		if (seq == TCP_SKB_CB(skb1)->end_seq) { -			__skb_queue_after(&tp->out_of_order_queue, skb1, skb); - -			if (!tp->rx_opt.num_sacks || -			    tp->selective_acks[0].end_seq != seq) -				goto add_sack; - -			/* Common case: data arrive in order after hole. */ -			tp->selective_acks[0].end_seq = end_seq; -			return; -		} - -		/* Find place to insert this segment. */ -		while (1) { -			if (!after(TCP_SKB_CB(skb1)->seq, seq)) -				break; -			if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { -				skb1 = NULL; -				break; -			} -			skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); -		} - -		/* Do skb overlap to previous one? */ -		if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { -			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { -				/* All the bits are present. Drop. */ -				__kfree_skb(skb); -				tcp_dsack_set(sk, seq, end_seq); -				goto add_sack; -			} -			if (after(seq, TCP_SKB_CB(skb1)->seq)) { -				/* Partial overlap. */ -				tcp_dsack_set(sk, seq, -					      TCP_SKB_CB(skb1)->end_seq); -			} else { -				if (skb_queue_is_first(&tp->out_of_order_queue, -						       skb1)) -					skb1 = NULL; -				else -					skb1 = skb_queue_prev( -						&tp->out_of_order_queue, -						skb1); -			} -		} -		if (!skb1) -			__skb_queue_head(&tp->out_of_order_queue, skb); -		else -			__skb_queue_after(&tp->out_of_order_queue, skb1, skb); - -		/* And clean segments covered by new one as whole. */ -		while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { -			skb1 = skb_queue_next(&tp->out_of_order_queue, skb); - -			if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) -				break; -			if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { -				tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, -						 end_seq); -				break; -			} -			__skb_unlink(skb1, &tp->out_of_order_queue); -			tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, -					 TCP_SKB_CB(skb1)->end_seq); -			__kfree_skb(skb1); -		} - -add_sack: -		if (tcp_is_sack(tp)) -			tcp_sack_new_ofo_skb(sk, seq, end_seq); -	} +	tcp_data_queue_ofo(sk, skb);  }  static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, @@ -5183,7 +5228,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,  		return 0;  	if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) -		tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); +		tp->ucopy.dma_chan = net_dma_find_channel();  	if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { |