aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c128
1 files changed, 61 insertions, 67 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b55f60f6fcbe..65f6c0406245 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -182,7 +182,7 @@ static void tcp_incr_quickack(struct sock *sk)
icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
}
-void tcp_enter_quickack_mode(struct sock *sk)
+static void tcp_enter_quickack_mode(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_incr_quickack(sk);
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
sizeof(struct sk_buff);
- if (sk->sk_sndbuf < 3 * sndmem)
- sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
+ if (sk->sk_sndbuf < 3 * sndmem) {
+ sk->sk_sndbuf = 3 * sndmem;
+ if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
+ sk->sk_sndbuf = sysctl_tcp_wmem[2];
+ }
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure &&
- atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
}
@@ -428,10 +431,10 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
*
* The algorithm for RTT estimation w/o timestamps is based on
* Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
- * <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps>
+ * <http://public.lanl.gov/radiant/pubs.html#DRS>
*
* More detail on this code can be found at
- * <http://www.psc.edu/~jheffner/senior_thesis.ps>,
+ * <http://staff.psc.edu/jheffner/>,
* though this reference is out of date. A new paper
* is pending.
*/
@@ -731,7 +734,7 @@ void tcp_update_metrics(struct sock *sk)
* Reset our results.
*/
if (!(dst_metric_locked(dst, RTAX_RTT)))
- dst->metrics[RTAX_RTT - 1] = 0;
+ dst_metric_set(dst, RTAX_RTT, 0);
return;
}
@@ -773,57 +776,48 @@ void tcp_update_metrics(struct sock *sk)
if (dst_metric(dst, RTAX_SSTHRESH) &&
!dst_metric_locked(dst, RTAX_SSTHRESH) &&
(tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
- dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
+ dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1);
if (!dst_metric_locked(dst, RTAX_CWND) &&
tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
- dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
+ dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd);
} else if (tp->snd_cwnd > tp->snd_ssthresh &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
if (!dst_metric_locked(dst, RTAX_SSTHRESH))
- dst->metrics[RTAX_SSTHRESH-1] =
- max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
+ dst_metric_set(dst, RTAX_SSTHRESH,
+ max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
if (!dst_metric_locked(dst, RTAX_CWND))
- dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
+ dst_metric_set(dst, RTAX_CWND,
+ (dst_metric(dst, RTAX_CWND) +
+ tp->snd_cwnd) >> 1);
} else {
/* Else slow start did not finish, cwnd is non-sense,
ssthresh may be also invalid.
*/
if (!dst_metric_locked(dst, RTAX_CWND))
- dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
+ dst_metric_set(dst, RTAX_CWND,
+ (dst_metric(dst, RTAX_CWND) +
+ tp->snd_ssthresh) >> 1);
if (dst_metric(dst, RTAX_SSTHRESH) &&
!dst_metric_locked(dst, RTAX_SSTHRESH) &&
tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
- dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
+ dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh);
}
if (!dst_metric_locked(dst, RTAX_REORDERING)) {
if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
tp->reordering != sysctl_tcp_reordering)
- dst->metrics[RTAX_REORDERING-1] = tp->reordering;
+ dst_metric_set(dst, RTAX_REORDERING, tp->reordering);
}
}
}
-/* Numbers are taken from RFC3390.
- *
- * John Heffner states:
- *
- * The RFC specifies a window of no more than 4380 bytes
- * unless 2*MSS > 4380. Reading the pseudocode in the RFC
- * is a bit misleading because they use a clamp at 4380 bytes
- * rather than use a multiplier in the relevant range.
- */
__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
{
__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
- if (!cwnd) {
- if (tp->mss_cache > 1460)
- cwnd = 2;
- else
- cwnd = (tp->mss_cache > 1095) ? 3 : 4;
- }
+ if (!cwnd)
+ cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
@@ -922,25 +916,20 @@ static void tcp_init_metrics(struct sock *sk)
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
}
tcp_set_rto(sk);
- if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
- goto reset;
-
-cwnd:
- tp->snd_cwnd = tcp_init_cwnd(tp, dst);
- tp->snd_cwnd_stamp = tcp_time_stamp;
- return;
-
+ if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {
reset:
- /* Play conservative. If timestamps are not
- * supported, TCP will fail to recalculate correct
- * rtt, if initial rto is too small. FORGET ALL AND RESET!
- */
- if (!tp->rx_opt.saw_tstamp && tp->srtt) {
- tp->srtt = 0;
- tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
- inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
+ /* Play conservative. If timestamps are not
+ * supported, TCP will fail to recalculate correct
+ * rtt, if initial rto is too small. FORGET ALL AND RESET!
+ */
+ if (!tp->rx_opt.saw_tstamp && tp->srtt) {
+ tp->srtt = 0;
+ tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
+ inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
+ }
}
- goto cwnd;
+ tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+ tp->snd_cwnd_stamp = tcp_time_stamp;
}
static void tcp_update_reordering(struct sock *sk, const int metric,
@@ -1233,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
}
/* D-SACK for already forgotten data... Do dumb counting. */
- if (dup_sack &&
+ if (dup_sack && tp->undo_marker && tp->undo_retrans &&
!after(end_seq_0, prior_snd_una) &&
after(end_seq_0, tp->undo_marker))
tp->undo_retrans--;
@@ -1310,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
- if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+ if (tp->undo_marker && tp->undo_retrans &&
+ after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
state->reord = min(fack_count, state->reord);
@@ -2314,7 +2304,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
{
- return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
+ return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
}
static inline int tcp_head_timedout(struct sock *sk)
@@ -2508,7 +2498,7 @@ static void tcp_timeout_skbs(struct sock *sk)
/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
* is against sacked "cnt", otherwise it's against facked "cnt"
*/
-static void tcp_mark_head_lost(struct sock *sk, int packets)
+static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -2516,13 +2506,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
int err;
unsigned int mss;
- if (packets == 0)
- return;
-
WARN_ON(packets > tp->packets_out);
if (tp->lost_skb_hint) {
skb = tp->lost_skb_hint;
cnt = tp->lost_cnt_hint;
+ /* Head already handled? */
+ if (mark_head && skb != tcp_write_queue_head(sk))
+ return;
} else {
skb = tcp_write_queue_head(sk);
cnt = 0;
@@ -2557,6 +2547,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
}
tcp_skb_mark_lost(tp, skb);
+
+ if (mark_head)
+ break;
}
tcp_verify_left_out(tp);
}
@@ -2568,17 +2561,18 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_is_reno(tp)) {
- tcp_mark_head_lost(sk, 1);
+ tcp_mark_head_lost(sk, 1, 1);
} else if (tcp_is_fack(tp)) {
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
- tcp_mark_head_lost(sk, lost);
+ tcp_mark_head_lost(sk, lost, 0);
} else {
int sacked_upto = tp->sacked_out - tp->reordering;
- if (sacked_upto < fast_rexmit)
- sacked_upto = fast_rexmit;
- tcp_mark_head_lost(sk, sacked_upto);
+ if (sacked_upto >= 0)
+ tcp_mark_head_lost(sk, sacked_upto, 0);
+ else if (fast_rexmit)
+ tcp_mark_head_lost(sk, 1, 1);
}
tcp_timeout_skbs(sk);
@@ -2887,7 +2881,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
icsk->icsk_mtup.probe_size;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
- tp->rcv_ssthresh = tcp_current_ssthresh(sk);
+ tp->snd_ssthresh = tcp_current_ssthresh(sk);
icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
icsk->icsk_mtup.probe_size = 0;
@@ -2984,7 +2978,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
+ tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
}
@@ -3412,8 +3406,8 @@ static void tcp_ack_probe(struct sock *sk)
static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
{
- return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
- inet_csk(sk)->icsk_ca_state != TCP_CA_Open);
+ return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
+ inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
}
static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
@@ -3430,9 +3424,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
const u32 ack, const u32 ack_seq,
const u32 nwin)
{
- return (after(ack, tp->snd_una) ||
+ return after(ack, tp->snd_una) ||
after(ack_seq, tp->snd_wl1) ||
- (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
+ (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
}
/* Update our send window.
@@ -4406,7 +4400,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
- eaten = (chunk == skb->len && !th->fin);
+ eaten = (chunk == skb->len);
tcp_rcv_space_adjust(sk);
}
local_bh_disable();
@@ -4870,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
- if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+ if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
return 0;
/* If we filled the congestion window, do not expand. */