aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c152
1 files changed, 77 insertions, 75 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2868ef28ce52..4eb0c8ca3c60 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -221,7 +221,7 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_incr_quickack(sk, max_quickacks);
- icsk->icsk_ack.pingpong = 0;
+ inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
EXPORT_SYMBOL(tcp_enter_quickack_mode);
@@ -236,7 +236,7 @@ static bool tcp_in_quickack_mode(struct sock *sk)
const struct dst_entry *dst = __sk_dst_get(sk);
return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
- (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
+ (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
}
static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
u32 delta_us;
- if (!delta)
- delta = 1;
- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- tcp_rcv_rtt_update(tp, delta_us, 0);
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
}
@@ -1572,9 +1574,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
return skb;
}
-static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
- struct tcp_sacktag_state *state,
- u32 seq)
+static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
{
struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
struct sk_buff *skb;
@@ -1596,13 +1596,12 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
}
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
- struct tcp_sacktag_state *state,
u32 skip_to_seq)
{
if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
return skb;
- return tcp_sacktag_bsearch(sk, state, skip_to_seq);
+ return tcp_sacktag_bsearch(sk, skip_to_seq);
}
static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1615,7 +1614,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
return skb;
if (before(next_dup->start_seq, skip_to_seq)) {
- skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq);
+ skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
skb = tcp_sacktag_walk(skb, sk, NULL, state,
next_dup->start_seq, next_dup->end_seq,
1);
@@ -1756,8 +1755,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
/* Head todo? */
if (before(start_seq, cache->start_seq)) {
- skb = tcp_sacktag_skip(skb, sk, state,
- start_seq);
+ skb = tcp_sacktag_skip(skb, sk, start_seq);
skb = tcp_sacktag_walk(skb, sk, next_dup,
state,
start_seq,
@@ -1783,7 +1781,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
goto walk;
}
- skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
+ skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
/* Check overlap against next cached too (past this one already) */
cache++;
continue;
@@ -1794,7 +1792,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
if (!skb)
break;
}
- skb = tcp_sacktag_skip(skb, sk, state, start_seq);
+ skb = tcp_sacktag_skip(skb, sk, start_seq);
walk:
skb = tcp_sacktag_walk(skb, sk, next_dup, state,
@@ -1863,16 +1861,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
/* Emulate SACKs for SACKless connection: account for a new dupack. */
-static void tcp_add_reno_sack(struct sock *sk)
+static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
{
- struct tcp_sock *tp = tcp_sk(sk);
- u32 prior_sacked = tp->sacked_out;
+ if (num_dupack) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 prior_sacked = tp->sacked_out;
+ s32 delivered;
- tp->sacked_out++;
- tcp_check_reno_reordering(sk, 0);
- if (tp->sacked_out > prior_sacked)
- tp->delivered++; /* Some out-of-order packet is delivered */
- tcp_verify_left_out(tp);
+ tp->sacked_out += num_dupack;
+ tcp_check_reno_reordering(sk, 0);
+ delivered = tp->sacked_out - prior_sacked;
+ if (delivered > 0)
+ tp->delivered += delivered;
+ tcp_verify_left_out(tp);
+ }
}
/* Account for ACK, ACKing some data in Reno Recovery phase. */
@@ -2457,8 +2459,8 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
tp->prior_cwnd - 1;
sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
- !(flag & FLAG_LOST_RETRANS)) {
+ } else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
+ FLAG_RETRANS_DATA_ACKED) {
sndcnt = min_t(int, delta,
max_t(int, tp->prr_delivered - tp->prr_out,
newly_acked_sacked) + 1);
@@ -2634,7 +2636,7 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
* recovered or spurious. Otherwise retransmits more on partial ACKs.
*/
-static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
int *rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2653,7 +2655,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
return;
if (after(tp->snd_nxt, tp->high_seq)) {
- if (flag & FLAG_DATA_SACKED || is_dupack)
+ if (flag & FLAG_DATA_SACKED || num_dupack)
tp->frto = 0; /* Step 3.a. loss was real */
} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
tp->high_seq = tp->snd_nxt;
@@ -2679,8 +2681,8 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
*/
- if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
- tcp_add_reno_sack(sk);
+ if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
+ tcp_add_reno_sack(sk, num_dupack);
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
@@ -2757,13 +2759,13 @@ static bool tcp_force_fast_retransmit(struct sock *sk)
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
- bool is_dupack, int *ack_flag, int *rexmit)
+ int num_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
- bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
- tcp_force_fast_retransmit(sk));
+ bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
+ tcp_force_fast_retransmit(sk));
if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
@@ -2810,8 +2812,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
- if (tcp_is_reno(tp) && is_dupack)
- tcp_add_reno_sack(sk);
+ if (tcp_is_reno(tp))
+ tcp_add_reno_sack(sk, num_dupack);
} else {
if (tcp_try_undo_partial(sk, prior_snd_una))
return;
@@ -2826,7 +2828,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tcp_identify_packet_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
- tcp_process_loss(sk, flag, is_dupack, rexmit);
+ tcp_process_loss(sk, flag, num_dupack, rexmit);
tcp_identify_packet_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
@@ -2837,8 +2839,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
- if (is_dupack)
- tcp_add_reno_sack(sk);
+ tcp_add_reno_sack(sk, num_dupack);
}
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
@@ -2910,9 +2911,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us;
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ ca_rtt_us = seq_rtt_us;
+ }
}
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -3558,7 +3561,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
bool is_sack_reneg = tp->is_sack_reneg;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
- bool is_dupack = false;
+ int num_dupack = 0;
int prior_packets = tp->packets_out;
u32 delivered = tp->delivered;
u32 lost = tp->lost;
@@ -3588,7 +3591,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* this segment (RFC793 Section 3.9).
*/
if (after(ack, tp->snd_nxt))
- goto invalid_ack;
+ return -1;
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
@@ -3610,7 +3613,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (flag & FLAG_UPDATE_TS_RECENT)
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
- if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+ if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
+ FLAG_SND_UNA_ADVANCED) {
/* Window is constant, pure forward advance.
* No more checks are required.
* Note, we use the fact that SND.UNA>=SND.WL2.
@@ -3668,8 +3672,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_set_xmit_timer(sk);
if (tcp_ack_is_dubious(sk, flag)) {
- is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ num_dupack = 1;
+ /* Consider if pure acks were aggregated in tcp_add_backlog() */
+ if (!(flag & FLAG_DATA))
+ num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ }
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
}
@@ -3687,7 +3696,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) {
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
tcp_newly_delivered(sk, delivered, flag);
}
@@ -3701,10 +3710,6 @@ no_queue:
tcp_process_tlp_ack(sk, ack, flag);
return 1;
-invalid_ack:
- SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
- return -1;
-
old_ack:
/* If data was SACKed, tag it and see if we should send more data.
* If data was DSACKed, see if we can undo a cwnd reduction.
@@ -3712,13 +3717,12 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
tcp_newly_delivered(sk, delivered, flag);
tcp_xmit_recovery(sk, rexmit);
}
- SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
return 0;
}
@@ -4081,7 +4085,7 @@ void tcp_fin(struct sock *sk)
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
- inet_csk(sk)->icsk_ack.pingpong = 1;
+ inet_csk_enter_pingpong_mode(sk);
break;
case TCP_CLOSE_WAIT:
@@ -4268,7 +4272,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
@@ -4363,6 +4367,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
to->tstamp = from->tstamp;
+ skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
}
return true;
@@ -4418,13 +4423,9 @@ static void tcp_ofo_queue(struct sock *sk)
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
- SOCK_DEBUG(sk, "ofo packet was already received\n");
tcp_drop(sk, skb);
continue;
}
- SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->end_seq);
tail = skb_peek_tail(&sk->sk_receive_queue);
eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
@@ -4488,8 +4489,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
seq = TCP_SKB_CB(skb)->seq;
end_seq = TCP_SKB_CB(skb)->end_seq;
- SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, seq, end_seq);
p = &tp->out_of_order_queue.rb_node;
if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
@@ -4601,13 +4600,12 @@ end:
}
}
-static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
- bool *fragstolen)
+static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
+ bool *fragstolen)
{
int eaten;
struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
- __skb_pull(skb, hdrlen);
eaten = (tail &&
tcp_try_coalesce(sk, tail,
skb, fragstolen)) ? 1 : 0;
@@ -4658,7 +4656,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
- if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
+ if (tcp_queue_rcv(sk, skb, &fragstolen)) {
WARN_ON_ONCE(fragstolen); /* should not happen */
__kfree_skb(skb);
}
@@ -4718,7 +4716,7 @@ queue_and_out:
goto drop;
}
- eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
+ eaten = tcp_queue_rcv(sk, skb, &fragstolen);
if (skb->len)
tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4766,10 +4764,6 @@ drop:
if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
/* Partial packet, seq < rcv_next < end_seq */
- SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->end_seq);
-
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
/* If window is closed, drop tail of packet. But after
@@ -5048,8 +5042,6 @@ static int tcp_prune_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
-
NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
@@ -5188,7 +5180,17 @@ send_now:
if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
goto send_now;
- tp->compressed_ack++;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+ tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = 0;
+ }
+
+ if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ goto send_now;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
@@ -5584,8 +5586,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
- eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
- &fragstolen);
+ __skb_pull(skb, tcp_header_len);
+ eaten = tcp_queue_rcv(sk, skb, &fragstolen);
tcp_event_data_recv(sk, skb);
@@ -5866,7 +5868,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
return -1;
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
- icsk->icsk_ack.pingpong) {
+ inet_csk_in_pingpong_mode(sk)) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*