aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.rst75
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/netns/ipv4.h5
-rw-r--r--include/net/tcp.h28
-rw-r--r--include/uapi/linux/snmp.h1
-rw-r--r--include/uapi/linux/tcp.h6
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c43
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_dctcp.c23
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_plb.c109
13 files changed, 305 insertions, 2 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index e7b3fa7bb3f7..815efc89ad73 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1069,6 +1069,81 @@ tcp_child_ehash_entries - INTEGER
Default: 0
+tcp_plb_enabled - BOOLEAN
+ If set and the underlying congestion control (e.g. DCTCP) supports
+ and enables PLB feature, TCP PLB (Protective Load Balancing) is
+ enabled. PLB is described in the following paper:
+ https://doi.org/10.1145/3544216.3544226. Based on PLB parameters,
+ upon sensing sustained congestion, TCP triggers a change in
+ flow label field for outgoing IPv6 packets. A change in flow label
+ field potentially changes the path of outgoing packets for switches
+ that use ECMP/WCMP for routing.
+
+ PLB changes socket txhash which results in a change in IPv6 Flow Label
+ field, and currently no-op for IPv4 headers. It is possible
+ to apply PLB for IPv4 with other network header fields (e.g. TCP
+ or IPv4 options) or using encapsulation where outer header is used
+ by switches to determine next hop. In either case, further host
+ and switch side changes will be needed.
+
+ When set, PLB assumes that congestion signal (e.g. ECN) is made
+ available and used by congestion control module to estimate a
+ congestion measure (e.g. ce_ratio). PLB needs a congestion measure to
+ make repathing decisions.
+
+ Default: FALSE
+
+tcp_plb_idle_rehash_rounds - INTEGER
+ Number of consecutive congested rounds (RTT) seen after which
+ a rehash can be performed, given there are no packets in flight.
+ This is referred to as M in PLB paper:
+ https://doi.org/10.1145/3544216.3544226.
+
+ Possible Values: 0 - 31
+
+ Default: 3
+
+tcp_plb_rehash_rounds - INTEGER
+ Number of consecutive congested rounds (RTT) seen after which
+ a forced rehash can be performed. Be careful when setting this
+ parameter, as a small value increases the risk of retransmissions.
+ This is referred to as N in PLB paper:
+ https://doi.org/10.1145/3544216.3544226.
+
+ Possible Values: 0 - 31
+
+ Default: 12
+
+tcp_plb_suspend_rto_sec - INTEGER
+ Time, in seconds, to suspend PLB in event of an RTO. In order to avoid
+ having PLB repath onto a connectivity "black hole", after an RTO a TCP
+ connection suspends PLB repathing for a random duration between 1x and
+ 2x of this parameter. Randomness is added to avoid concurrent rehashing
+ of multiple TCP connections. This should be set corresponding to the
+ amount of time it takes to repair a failed link.
+
+ Possible Values: 0 - 255
+
+ Default: 60
+
+tcp_plb_cong_thresh - INTEGER
+ Fraction of packets marked with congestion over a round (RTT) to
+ tag that round as congested. This is referred to as K in the PLB paper:
+ https://doi.org/10.1145/3544216.3544226.
+
+ The 0-1 fraction range is mapped to 0-256 range to avoid floating
+ point operations. For example, 128 means that if at least 50% of
+ the packets in a round were marked as congested then the round
+ will be tagged as congested.
+
+ Setting threshold to 0 means that PLB repaths every RTT regardless
+ of congestion. This is not intended behavior for PLB and should be
+ used only for experimentation purpose.
+
+ Possible Values: 0 - 256
+
+ Default: 128
+
UDP variables
=============
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 41b1da621a45..ca7f05a130d2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -423,6 +423,7 @@ struct tcp_sock {
u32 probe_seq_start;
u32 probe_seq_end;
} mtu_probe;
+ u32 plb_rehash; /* PLB-triggered rehash attempts */
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
* while socket was owned by user.
*/
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1b8004679445..25f90bba4889 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -183,6 +183,11 @@ struct netns_ipv4 {
unsigned long tfo_active_disable_stamp;
u32 tcp_challenge_timestamp;
u32 tcp_challenge_count;
+ u8 sysctl_tcp_plb_enabled;
+ u8 sysctl_tcp_plb_idle_rehash_rounds;
+ u8 sysctl_tcp_plb_rehash_rounds;
+ u8 sysctl_tcp_plb_suspend_rto_sec;
+ int sysctl_tcp_plb_cong_thresh;
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 14d45661a84d..6b814e788f00 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2140,6 +2140,34 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
extern void tcp_rack_reo_timeout(struct sock *sk);
extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
+/* tcp_plb.c */
+
+/*
+ * Scaling factor for fractions in PLB. For example, tcp_plb_update_state
+ * expects cong_ratio which represents fraction of traffic that experienced
+ * congestion over a single RTT. In order to avoid floating point operations,
+ * this fraction should be mapped to (1 << TCP_PLB_SCALE) and passed in.
+ */
+#define TCP_PLB_SCALE 8
+
+/* State for PLB (Protective Load Balancing) for a single TCP connection. */
+struct tcp_plb_state {
+ u8 consec_cong_rounds:5, /* consecutive congested rounds */
+ unused:3;
+ u32 pause_until; /* jiffies32 when PLB can resume rerouting */
+};
+
+static inline void tcp_plb_init(const struct sock *sk,
+ struct tcp_plb_state *plb)
+{
+ plb->consec_cong_rounds = 0;
+ plb->pause_until = 0;
+}
+void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb,
+ const int cong_ratio);
+void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb);
+void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb);
+
/* At how many usecs into the future should the RTO fire? */
static inline s64 tcp_rto_delta_us(const struct sock *sk)
{
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 4d7470036a8b..6600cb0164c2 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -292,6 +292,7 @@ enum
LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */
LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */
LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */
+ LINUX_MIB_TCPPLBREHASH, /* TCPPLBRehash */
__LINUX_MIB_MAX
};
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 8fc09e8638b3..879eeb0a084b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -284,6 +284,11 @@ struct tcp_info {
__u32 tcpi_snd_wnd; /* peer's advertised receive window after
* scaling (bytes)
*/
+ __u32 tcpi_rcv_wnd; /* local advertised receive window after
+ * scaling (bytes)
+ */
+
+ __u32 tcpi_rehash; /* PLB or timeout triggered rehash attempts */
};
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -315,6 +320,7 @@ enum {
TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */
TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */
TCP_NLA_TTL, /* TTL or hop limit of a packet received */
+ TCP_NLA_REHASH, /* PLB and timeout triggered rehash attempts */
};
/* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bbdd9c44f14e..af7d2cf490fb 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
tcp_rate.o tcp_recovery.o tcp_ulp.o \
- tcp_offload.o datagram.o raw.o udp.o udplite.o \
+ tcp_offload.o tcp_plb.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 5386f460bd20..f88daace9de3 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -297,6 +297,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
+ SNMP_MIB_ITEM("TCPPLBRehash", LINUX_MIB_TCPPLBREHASH),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9b8a6db7a66b..0af28cedd071 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -40,6 +40,8 @@ static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
+static int tcp_plb_max_rounds = 31;
+static int tcp_plb_max_cong_thresh = 256;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -1384,6 +1386,47 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
+ {
+ .procname = "tcp_plb_enabled",
+ .data = &init_net.ipv4.sysctl_tcp_plb_enabled,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "tcp_plb_idle_rehash_rounds",
+ .data = &init_net.ipv4.sysctl_tcp_plb_idle_rehash_rounds,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra2 = &tcp_plb_max_rounds,
+ },
+ {
+ .procname = "tcp_plb_rehash_rounds",
+ .data = &init_net.ipv4.sysctl_tcp_plb_rehash_rounds,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra2 = &tcp_plb_max_rounds,
+ },
+ {
+ .procname = "tcp_plb_suspend_rto_sec",
+ .data = &init_net.ipv4.sysctl_tcp_plb_suspend_rto_sec,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ },
+ {
+ .procname = "tcp_plb_cong_thresh",
+ .data = &init_net.ipv4.sysctl_tcp_plb_cong_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_plb_max_cong_thresh,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ef14efa1fb70..de8f0cd7cb32 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3176,6 +3176,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->sacked_out = 0;
tp->tlp_high_seq = 0;
tp->last_oow_ack_time = 0;
+ tp->plb_rehash = 0;
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
tp->rack.mstamp = 0;
@@ -3939,6 +3940,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_reord_seen = tp->reord_seen;
info->tcpi_rcv_ooopack = tp->rcv_ooopack;
info->tcpi_snd_wnd = tp->snd_wnd;
+ info->tcpi_rcv_wnd = tp->rcv_wnd;
+ info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash;
info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
unlock_sock_fast(sk, slow);
}
@@ -3973,6 +3976,7 @@ static size_t tcp_opt_stats_get_size(void)
nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */
+ nla_total_size(sizeof(u32)) + /* TCP_NLA_REHASH */
0;
}
@@ -4049,6 +4053,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
nla_put_u8(stats, TCP_NLA_TTL,
tcp_skb_ttl_or_hop_limit(ack_skb));
+ nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash);
return stats;
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 2a6c0dd665a4..e0a2ca7456ff 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -54,6 +54,7 @@ struct dctcp {
u32 next_seq;
u32 ce_state;
u32 loss_cwnd;
+ struct tcp_plb_state plb;
};
static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
@@ -91,6 +92,8 @@ static void dctcp_init(struct sock *sk)
ca->ce_state = 0;
dctcp_reset(tp, ca);
+ tcp_plb_init(sk, &ca->plb);
+
return;
}
@@ -117,14 +120,28 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
/* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) {
+ u32 delivered = tp->delivered - ca->old_delivered;
u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
u32 alpha = ca->dctcp_alpha;
+ u32 ce_ratio = 0;
+
+ if (delivered > 0) {
+ /* dctcp_alpha keeps EWMA of fraction of ECN marked
+ * packets. Because of EWMA smoothing, PLB reaction can
+ * be slow so we use ce_ratio which is an instantaneous
+ * measure of congestion. ce_ratio is the fraction of
+ * ECN marked packets in the previous RTT.
+ */
+ if (delivered_ce > 0)
+ ce_ratio = (delivered_ce << TCP_PLB_SCALE) / delivered;
+ tcp_plb_update_state(sk, &ca->plb, (int)ce_ratio);
+ tcp_plb_check_rehash(sk, &ca->plb);
+ }
/* alpha = (1 - g) * alpha + g * F */
alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
if (delivered_ce) {
- u32 delivered = tp->delivered - ca->old_delivered;
/* If dctcp_shift_g == 1, a 32bit value would overflow
* after 8 M packets.
@@ -172,8 +189,12 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
break;
case CA_EVENT_LOSS:
+ tcp_plb_update_state_upon_rto(sk, &ca->plb);
dctcp_react_to_loss(sk);
break;
+ case CA_EVENT_TX_START:
+ tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */
+ break;
default:
/* Don't care for the rest. */
break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 87d440f47a70..ebab9e8b184c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3218,6 +3218,14 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+ /* Set default values for PLB */
+ net->ipv4.sysctl_tcp_plb_enabled = 0; /* Disabled by default */
+ net->ipv4.sysctl_tcp_plb_idle_rehash_rounds = 3;
+ net->ipv4.sysctl_tcp_plb_rehash_rounds = 12;
+ net->ipv4.sysctl_tcp_plb_suspend_rto_sec = 60;
+ /* Default congestion threshold for PLB to mark a round is 50% */
+ net->ipv4.sysctl_tcp_plb_cong_thresh = (1 << TCP_PLB_SCALE) / 2;
+
/* Reno is always built in */
if (!net_eq(net, &init_net) &&
bpf_try_module_get(init_net.ipv4.tcp_congestion_control,
diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c
new file mode 100644
index 000000000000..bb1a08fda113
--- /dev/null
+++ b/net/ipv4/tcp_plb.c
@@ -0,0 +1,109 @@
+/* Protective Load Balancing (PLB)
+ *
+ * PLB was designed to reduce link load imbalance across datacenter
+ * switches. PLB is a host-based optimization; it leverages congestion
+ * signals from the transport layer to randomly change the path of the
+ * connection experiencing sustained congestion. PLB prefers to repath
+ * after idle periods to minimize packet reordering. It repaths by
+ * changing the IPv6 Flow Label on the packets of a connection, which
+ * datacenter switches include as part of ECMP/WCMP hashing.
+ *
+ * PLB is described in detail in:
+ *
+ * Mubashir Adnan Qureshi, Yuchung Cheng, Qianwen Yin, Qiaobin Fu,
+ * Gautam Kumar, Masoud Moshref, Junhua Yan, Van Jacobson,
+ * David Wetherall,Abdul Kabbani:
+ * "PLB: Congestion Signals are Simple and Effective for
+ * Network Load Balancing"
+ * In ACM SIGCOMM 2022, Amsterdam Netherlands.
+ *
+ */
+
+#include <net/tcp.h>
+
+/* Called once per round-trip to update PLB state for a connection. */
+void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb,
+ const int cong_ratio)
+{
+ struct net *net = sock_net(sk);
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled))
+ return;
+
+ if (cong_ratio >= 0) {
+ if (cong_ratio < READ_ONCE(net->ipv4.sysctl_tcp_plb_cong_thresh))
+ plb->consec_cong_rounds = 0;
+ else if (plb->consec_cong_rounds <
+ READ_ONCE(net->ipv4.sysctl_tcp_plb_rehash_rounds))
+ plb->consec_cong_rounds++;
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_plb_update_state);
+
+/* Check whether recent congestion has been persistent enough to warrant
+ * a load balancing decision that switches the connection to another path.
+ */
+void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb)
+{
+ struct net *net = sock_net(sk);
+ u32 max_suspend;
+ bool forced_rehash = false, idle_rehash = false;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled))
+ return;
+
+ forced_rehash = plb->consec_cong_rounds >=
+ READ_ONCE(net->ipv4.sysctl_tcp_plb_rehash_rounds);
+ /* If sender goes idle then we check whether to rehash. */
+ idle_rehash = READ_ONCE(net->ipv4.sysctl_tcp_plb_idle_rehash_rounds) &&
+ !tcp_sk(sk)->packets_out &&
+ plb->consec_cong_rounds >=
+ READ_ONCE(net->ipv4.sysctl_tcp_plb_idle_rehash_rounds);
+
+ if (!forced_rehash && !idle_rehash)
+ return;
+
+ /* Note that tcp_jiffies32 can wrap; we detect wraps by checking for
+ * cases where the max suspension end is before the actual suspension
+ * end. We clear pause_until to 0 to indicate there is no recent
+ * RTO event that constrains PLB rehashing.
+ */
+ max_suspend = 2 * READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ;
+ if (plb->pause_until &&
+ (!before(tcp_jiffies32, plb->pause_until) ||
+ before(tcp_jiffies32 + max_suspend, plb->pause_until)))
+ plb->pause_until = 0;
+
+ if (plb->pause_until)
+ return;
+
+ sk_rethink_txhash(sk);
+ plb->consec_cong_rounds = 0;
+ tcp_sk(sk)->plb_rehash++;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH);
+}
+EXPORT_SYMBOL_GPL(tcp_plb_check_rehash);
+
+/* Upon RTO, disallow load balancing for a while, to avoid having load
+ * balancing decisions switch traffic to a black-holed path that was
+ * previously avoided with a sk_rethink_txhash() call at RTO time.
+ */
+void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb)
+{
+ struct net *net = sock_net(sk);
+ u32 pause;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled))
+ return;
+
+ pause = READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ;
+ pause += prandom_u32_max(pause);
+ plb->pause_until = tcp_jiffies32 + pause;
+
+ /* Reset PLB state upon RTO, since an RTO causes a sk_rethink_txhash() call
+ * that may switch this connection to a path with completely different
+ * congestion characteristics.
+ */
+ plb->consec_cong_rounds = 0;
+}
+EXPORT_SYMBOL_GPL(tcp_plb_update_state_upon_rto);