diff options
author | David S. Miller <davem@davemloft.net> | 2018-11-30 13:26:54 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-11-30 13:26:54 -0800 |
commit | 2f69555315ad7dc1ac37366b2ac2429e2d24d444 (patch) | |
tree | 3b70704dbf9f7cf27ec16d9321f7c1956b81c3a5 /net/ipv4/tcp_input.c | |
parent | b0e3f1bdf9e7140fd1151af575f468b5827a61e1 (diff) | |
parent | 4f693b55c3d2d2239b8a0094b518a1e533cf75d5 (diff) |
Merge branch 'tcp-take-a-bit-more-care-of-backlog-stress'
Eric Dumazet says:
====================
tcp: take a bit more care of backlog stress
While working on the SACK compression issue Jean-Louis Dupond
reported, we found that his linux box was suffering very hard
from tail drops on the socket backlog queue.
First patch hints the compiler about sack flows being the norm.
Second patch changes non-sack code in preparation of the ack
compression.
Third patch fixes tcp_space() to take backlog into account.
Fourth patch is attempting coalescing when a new packet must
be added to the backlog queue. Cooking bigger skbs helps
to keep backlog list smaller and speeds its handling when
user thread finally releases the socket lock.
v3: Neal/Yuchung feedback addressed :
Do not aggregate if any skb has URG bit set.
Do not aggregate if the skbs have different ECE/CWR bits
v2: added feedback from Neal : tcp: take care of compressed acks in tcp_add_reno_sack()
added : tcp: hint compiler about sack flows
added : tcp: make tcp_space() aware of socket backlog
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 58 |
1 files changed, 33 insertions, 25 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 78752746a6e2..76858b14ebe9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1865,16 +1865,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) /* Emulate SACKs for SACKless connection: account for a new dupack. */ -static void tcp_add_reno_sack(struct sock *sk) +static void tcp_add_reno_sack(struct sock *sk, int num_dupack) { - struct tcp_sock *tp = tcp_sk(sk); - u32 prior_sacked = tp->sacked_out; + if (num_dupack) { + struct tcp_sock *tp = tcp_sk(sk); + u32 prior_sacked = tp->sacked_out; + s32 delivered; - tp->sacked_out++; - tcp_check_reno_reordering(sk, 0); - if (tp->sacked_out > prior_sacked) - tp->delivered++; /* Some out-of-order packet is delivered */ - tcp_verify_left_out(tp); + tp->sacked_out += num_dupack; + tcp_check_reno_reordering(sk, 0); + delivered = tp->sacked_out - prior_sacked; + if (delivered > 0) + tp->delivered += delivered; + tcp_verify_left_out(tp); + } } /* Account for ACK, ACKing some data in Reno Recovery phase. */ @@ -2636,7 +2640,7 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack) /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are * recovered or spurious. Otherwise retransmits more on partial ACKs. */ -static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, +static void tcp_process_loss(struct sock *sk, int flag, int num_dupack, int *rexmit) { struct tcp_sock *tp = tcp_sk(sk); @@ -2655,7 +2659,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, return; if (after(tp->snd_nxt, tp->high_seq)) { - if (flag & FLAG_DATA_SACKED || is_dupack) + if (flag & FLAG_DATA_SACKED || num_dupack) tp->frto = 0; /* Step 3.a. loss was real */ } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { tp->high_seq = tp->snd_nxt; @@ -2681,8 +2685,8 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, /* A Reno DUPACK means new data in F-RTO step 2.b above are * delivered. Lower inflight to clock out (re)tranmissions. */ - if (after(tp->snd_nxt, tp->high_seq) && is_dupack) - tcp_add_reno_sack(sk); + if (after(tp->snd_nxt, tp->high_seq) && num_dupack) + tcp_add_reno_sack(sk, num_dupack); else if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); } @@ -2759,13 +2763,13 @@ static bool tcp_force_fast_retransmit(struct sock *sk) * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, - bool is_dupack, int *ack_flag, int *rexmit) + int num_dupack, int *ack_flag, int *rexmit) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int fast_rexmit = 0, flag = *ack_flag; - bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && - tcp_force_fast_retransmit(sk)); + bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) && + tcp_force_fast_retransmit(sk)); if (!tp->packets_out && tp->sacked_out) tp->sacked_out = 0; @@ -2812,8 +2816,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (!(flag & FLAG_SND_UNA_ADVANCED)) { - if (tcp_is_reno(tp) && is_dupack) - tcp_add_reno_sack(sk); + if (tcp_is_reno(tp)) + tcp_add_reno_sack(sk, num_dupack); } else { if (tcp_try_undo_partial(sk, prior_snd_una)) return; @@ -2828,7 +2832,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tcp_identify_packet_loss(sk, ack_flag); break; case TCP_CA_Loss: - tcp_process_loss(sk, flag, is_dupack, rexmit); + tcp_process_loss(sk, flag, num_dupack, rexmit); tcp_identify_packet_loss(sk, ack_flag); if (!(icsk->icsk_ca_state == TCP_CA_Open || (*ack_flag & FLAG_LOST_RETRANS))) @@ -2839,8 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); - if (is_dupack) - tcp_add_reno_sack(sk); + tcp_add_reno_sack(sk, num_dupack); } if (icsk->icsk_ca_state <= TCP_CA_Disorder) @@ -3562,7 +3565,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) bool is_sack_reneg = tp->is_sack_reneg; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; - bool is_dupack = false; + int num_dupack = 0; int prior_packets = tp->packets_out; u32 delivered = tp->delivered; u32 lost = tp->lost; @@ -3673,8 +3676,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_set_xmit_timer(sk); if (tcp_ack_is_dubious(sk, flag)) { - is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { + num_dupack = 1; + /* Consider if pure acks were aggregated in tcp_add_backlog() */ + if (!(flag & FLAG_DATA)) + num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + } + tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag, &rexmit); } @@ -3692,7 +3700,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) { - tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag, &rexmit); tcp_newly_delivered(sk, delivered, flag); } @@ -3717,7 +3725,7 @@ old_ack: if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag, &rexmit); tcp_newly_delivered(sk, delivered, flag); tcp_xmit_recovery(sk, rexmit); |