aboutsummaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/ctrl.c133
-rw-r--r--net/mptcp/mib.c3
-rw-r--r--net/mptcp/mib.h3
-rw-r--r--net/mptcp/protocol.c18
-rw-r--r--net/mptcp/protocol.h16
-rw-r--r--net/mptcp/subflow.c4
6 files changed, 166 insertions, 11 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 99382c317ebb..38d8121331d4 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -12,6 +12,7 @@
#include <net/netns/generic.h>
#include "protocol.h"
+#include "mib.h"
#define MPTCP_SYSCTL_PATH "net/mptcp"
@@ -27,8 +28,11 @@ struct mptcp_pernet {
#endif
unsigned int add_addr_timeout;
+ unsigned int blackhole_timeout;
unsigned int close_timeout;
unsigned int stale_loss_cnt;
+ atomic_t active_disable_times;
+ unsigned long active_disable_stamp;
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
@@ -87,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->blackhole_timeout = 3600;
+ atomic_set(&pernet->active_disable_times, 0);
pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
@@ -151,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
return ret;
}
+static int proc_blackhole_detect_timeout(const struct ctl_table *table,
+ int write, void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ atomic_set(&pernet->active_disable_times, 0);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -217,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "blackhole_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_blackhole_detect_timeout,
+ .extra1 = SYSCTL_ZERO,
+ },
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -240,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[6].data = &pernet->scheduler;
/* table[7] is for available_schedulers which is read-only info */
table[8].data = &pernet->close_timeout;
+ table[9].data = &pernet->blackhole_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -277,6 +305,111 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
#endif /* CONFIG_SYSCTL */
+/* The following code block is to deal with middle box issues with MPTCP,
+ * similar to what is done with TFO.
+ * The proposed solution is to disable active MPTCP globally when SYN+MPC are
+ * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
+ * disabled globally for 1hr at first. Then if it happens again, it is disabled
+ * for 2h, then 4h, 8h, ...
+ * The timeout is reset back to 1hr when a successful active MPTCP connection is
+ * fully established.
+ */
+
+/* Disable active MPTCP and record current jiffies and active_disable_times */
+void mptcp_active_disable(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ struct mptcp_pernet *pernet;
+
+ pernet = mptcp_get_pernet(net);
+
+ if (!READ_ONCE(pernet->blackhole_timeout))
+ return;
+
+ /* Paired with READ_ONCE() in mptcp_active_should_disable() */
+ WRITE_ONCE(pernet->active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in mptcp_active_should_disable().
+ * We want pernet->active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&pernet->active_disable_times);
+
+ MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
+}
+
+/* Calculate timeout for MPTCP active disable
+ * Return true if we are still in the active MPTCP disable period
+ * Return false if timeout already expired and we should use active MPTCP
+ */
+bool mptcp_active_should_disable(struct sock *ssk)
+{
+ struct net *net = sock_net(ssk);
+ unsigned int blackhole_timeout;
+ struct mptcp_pernet *pernet;
+ unsigned long timeout;
+ int disable_times;
+ int multiplier;
+
+ pernet = mptcp_get_pernet(net);
+ blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
+
+ if (!blackhole_timeout)
+ return false;
+
+ disable_times = atomic_read(&pernet->active_disable_times);
+ if (!disable_times)
+ return false;
+
+ /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
+ smp_rmb();
+
+ /* Limit timeout to max: 2^6 * initial timeout */
+ multiplier = 1 << min(disable_times - 1, 6);
+
+ /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
+ timeout = READ_ONCE(pernet->active_disable_stamp) +
+ multiplier * blackhole_timeout * HZ;
+
+ return time_before(jiffies, timeout);
+}
+
+/* Enable active MPTCP and reset active_disable_times if needed */
+void mptcp_active_enable(struct sock *sk)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
+
+ if (atomic_read(&pernet->active_disable_times)) {
+ struct dst_entry *dst = sk_dst_get(sk);
+
+ if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
+ atomic_set(&pernet->active_disable_times, 0);
+ }
+}
+
+/* Check the number of retransmissions, and fallback to TCP if needed */
+void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
+{
+ struct mptcp_subflow_context *subflow;
+ u32 timeouts;
+
+ if (!sk_is_mptcp(ssk))
+ return;
+
+ timeouts = inet_csk(ssk)->icsk_retransmits;
+ subflow = mptcp_subflow_ctx(ssk);
+
+ if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
+ if (timeouts == 2 || (timeouts < 2 && expired)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
+ subflow->mpc_drop = 1;
+ mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
+ } else {
+ subflow->mpc_drop = 0;
+ }
+ }
+}
+
static int __net_init mptcp_net_init(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index ec0d461cb921..38c2efc82b94 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -15,6 +15,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
+ SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP),
+ SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED),
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
@@ -73,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
+ SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index d68136f93dac..c8ffe18a8722 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -10,6 +10,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
+ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */
+ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
@@ -74,6 +76,7 @@ enum linux_mptcp_mib_field {
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
+ MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 37ebcb7640eb..c2317919fc14 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3717,13 +3717,6 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
return 0;
}
-static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
-}
-
static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
@@ -3744,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
+ if (subflow->request_mptcp) {
+ if (mptcp_active_should_disable(sk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
+ mptcp_subflow_early_fallback(msk, subflow);
+ } else if (mptcp_token_new_connect(ssk) < 0) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
+ mptcp_subflow_early_fallback(msk, subflow);
+ }
}
WRITE_ONCE(msk->write_seq, subflow->idsn);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index bf03bff9ac44..74417aae08d0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -531,7 +531,8 @@ struct mptcp_subflow_context {
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
- __unused : 9;
+ mpc_drop : 1, /* the MPC option has been dropped in a rtx */
+ __unused : 8;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -697,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+
+void mptcp_active_disable(struct sock *sk);
+bool mptcp_active_should_disable(struct sock *ssk);
+void mptcp_active_enable(struct sock *sk);
+
void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
@@ -1215,6 +1221,14 @@ static inline void mptcp_do_fallback(struct sock *ssk)
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
+static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ pr_fallback(msk);
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
{
struct mptcp_ext *mpext;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index b9b14e75e8c2..1040b3b9696b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_capable = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
+ mptcp_active_enable(parent);
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -591,6 +592,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
}
} else if (mptcp_check_fallback(sk)) {
+ /* It looks like MPTCP is blocked, while TCP is not */
+ if (subflow->mpc_drop)
+ mptcp_active_disable(parent);
fallback:
mptcp_propagate_state(parent, sk, subflow, NULL);
}