aboutsummaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/ctrl.c133
-rw-r--r--net/mptcp/mib.c7
-rw-r--r--net/mptcp/mib.h7
-rw-r--r--net/mptcp/pm.c11
-rw-r--r--net/mptcp/pm_netlink.c78
-rw-r--r--net/mptcp/pm_userspace.c40
-rw-r--r--net/mptcp/protocol.c18
-rw-r--r--net/mptcp/protocol.h33
-rw-r--r--net/mptcp/subflow.c54
9 files changed, 260 insertions, 121 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 99382c317ebb..38d8121331d4 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -12,6 +12,7 @@
#include <net/netns/generic.h>
#include "protocol.h"
+#include "mib.h"
#define MPTCP_SYSCTL_PATH "net/mptcp"
@@ -27,8 +28,11 @@ struct mptcp_pernet {
#endif
unsigned int add_addr_timeout;
+ unsigned int blackhole_timeout;
unsigned int close_timeout;
unsigned int stale_loss_cnt;
+ atomic_t active_disable_times;
+ unsigned long active_disable_stamp;
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
@@ -87,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->blackhole_timeout = 3600;
+ atomic_set(&pernet->active_disable_times, 0);
pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
@@ -151,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
return ret;
}
+static int proc_blackhole_detect_timeout(const struct ctl_table *table,
+ int write, void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ atomic_set(&pernet->active_disable_times, 0);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -217,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "blackhole_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_blackhole_detect_timeout,
+ .extra1 = SYSCTL_ZERO,
+ },
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -240,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[6].data = &pernet->scheduler;
/* table[7] is for available_schedulers which is read-only info */
table[8].data = &pernet->close_timeout;
+ table[9].data = &pernet->blackhole_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -277,6 +305,111 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
#endif /* CONFIG_SYSCTL */
+/* The following code block is to deal with middle box issues with MPTCP,
+ * similar to what is done with TFO.
+ * The proposed solution is to disable active MPTCP globally when SYN+MPC are
+ * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
+ * disabled globally for 1hr at first. Then if it happens again, it is disabled
+ * for 2h, then 4h, 8h, ...
+ * The timeout is reset back to 1hr when a successful active MPTCP connection is
+ * fully established.
+ */
+
+/* Disable active MPTCP and record current jiffies and active_disable_times */
+void mptcp_active_disable(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ struct mptcp_pernet *pernet;
+
+ pernet = mptcp_get_pernet(net);
+
+ if (!READ_ONCE(pernet->blackhole_timeout))
+ return;
+
+ /* Paired with READ_ONCE() in mptcp_active_should_disable() */
+ WRITE_ONCE(pernet->active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in mptcp_active_should_disable().
+ * We want pernet->active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&pernet->active_disable_times);
+
+ MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
+}
+
+/* Calculate timeout for MPTCP active disable
+ * Return true if we are still in the active MPTCP disable period
+ * Return false if timeout already expired and we should use active MPTCP
+ */
+bool mptcp_active_should_disable(struct sock *ssk)
+{
+ struct net *net = sock_net(ssk);
+ unsigned int blackhole_timeout;
+ struct mptcp_pernet *pernet;
+ unsigned long timeout;
+ int disable_times;
+ int multiplier;
+
+ pernet = mptcp_get_pernet(net);
+ blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
+
+ if (!blackhole_timeout)
+ return false;
+
+ disable_times = atomic_read(&pernet->active_disable_times);
+ if (!disable_times)
+ return false;
+
+ /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
+ smp_rmb();
+
+ /* Limit timeout to max: 2^6 * initial timeout */
+ multiplier = 1 << min(disable_times - 1, 6);
+
+ /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
+ timeout = READ_ONCE(pernet->active_disable_stamp) +
+ multiplier * blackhole_timeout * HZ;
+
+ return time_before(jiffies, timeout);
+}
+
+/* Enable active MPTCP and reset active_disable_times if needed */
+void mptcp_active_enable(struct sock *sk)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
+
+ if (atomic_read(&pernet->active_disable_times)) {
+ struct dst_entry *dst = sk_dst_get(sk);
+
+ if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
+ atomic_set(&pernet->active_disable_times, 0);
+ }
+}
+
+/* Check the number of retransmissions, and fallback to TCP if needed */
+void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
+{
+ struct mptcp_subflow_context *subflow;
+ u32 timeouts;
+
+ if (!sk_is_mptcp(ssk))
+ return;
+
+ timeouts = inet_csk(ssk)->icsk_retransmits;
+ subflow = mptcp_subflow_ctx(ssk);
+
+ if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
+ if (timeouts == 2 || (timeouts < 2 && expired)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
+ subflow->mpc_drop = 1;
+ mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
+ } else {
+ subflow->mpc_drop = 0;
+ }
+ }
+}
+
static int __net_init mptcp_net_init(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 7884217f33eb..38c2efc82b94 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -15,6 +15,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
+ SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP),
+ SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED),
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
@@ -25,6 +27,10 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
+ SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX),
+ SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR),
+ SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR),
+ SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
@@ -69,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
+ SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 66aa67f49d03..c8ffe18a8722 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -10,6 +10,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
+ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */
+ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
@@ -20,6 +22,10 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
+ MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXCONNECTERR, /* Not able to connect() when sending a SYN + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
@@ -70,6 +76,7 @@ enum linux_mptcp_mib_field {
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
+ MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 37f6dbcd8434..620264c75dc2 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -430,17 +430,6 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_is_backup(msk, &skc_local);
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
-{
- *flags = 0;
- *ifindex = 0;
-
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
- return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
-}
-
int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info)
{
if (info->attrs[MPTCP_PM_ATTR_TOKEN])
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index ad935d34c973..64fe0e7d87d7 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -149,7 +149,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
static bool
select_local_address(const struct pm_nl_pernet *pernet,
const struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *new_entry)
+ struct mptcp_pm_local *new_local)
{
struct mptcp_pm_addr_entry *entry;
bool found = false;
@@ -164,7 +164,9 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- *new_entry = *entry;
+ new_local->addr = entry->addr;
+ new_local->flags = entry->flags;
+ new_local->ifindex = entry->ifindex;
found = true;
break;
}
@@ -175,7 +177,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
static bool
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *new_entry)
+ struct mptcp_pm_local *new_local)
{
struct mptcp_pm_addr_entry *entry;
bool found = false;
@@ -193,7 +195,9 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
- *new_entry = *entry;
+ new_local->addr = entry->addr;
+ new_local->flags = entry->flags;
+ new_local->ifindex = entry->ifindex;
found = true;
break;
}
@@ -530,11 +534,11 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
- struct mptcp_pm_addr_entry local;
unsigned int add_addr_signal_max;
bool signal_and_subflow = false;
unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
+ struct mptcp_pm_local local;
unsigned int subflows_max;
pernet = pm_nl_get_pernet(sock_net(sk));
@@ -635,7 +639,7 @@ subflow:
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- __mptcp_subflow_connect(sk, &local.addr, &addrs[i]);
+ __mptcp_subflow_connect(sk, &local, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
}
mptcp_pm_nl_check_work_pending(msk);
@@ -656,7 +660,7 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
struct mptcp_addr_info *remote,
- struct mptcp_addr_info *addrs)
+ struct mptcp_pm_local *locals)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
@@ -679,13 +683,15 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
continue;
if (msk->pm.subflows < subflows_max) {
- msk->pm.subflows++;
- addrs[i] = entry->addr;
+ locals[i].addr = entry->addr;
+ locals[i].flags = entry->flags;
+ locals[i].ifindex = entry->ifindex;
/* Special case for ID0: set the correct ID */
- if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port))
- addrs[i].id = 0;
+ if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port))
+ locals[i].addr.id = 0;
+ msk->pm.subflows++;
i++;
}
}
@@ -695,21 +701,19 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
* 'IPADDRANY' local address
*/
if (!i) {
- struct mptcp_addr_info local;
-
- memset(&local, 0, sizeof(local));
- local.family =
+ memset(&locals[i], 0, sizeof(locals[i]));
+ locals[i].addr.family =
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
remote->family == AF_INET6 &&
ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
#endif
remote->family;
- if (!mptcp_pm_addr_families_match(sk, &local, remote))
+ if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote))
return 0;
msk->pm.subflows++;
- addrs[i++] = local;
+ i++;
}
return i;
@@ -717,7 +721,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
- struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
@@ -746,13 +750,13 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- nr = fill_local_addresses_vec(msk, &remote, addrs);
+ nr = fill_local_addresses_vec(msk, &remote, locals);
if (nr == 0)
return;
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0)
+ if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0)
sf_created = true;
spin_lock_bh(&msk->pm.lock);
@@ -1439,28 +1443,6 @@ out_free:
return ret;
}
-int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
-{
- struct mptcp_pm_addr_entry *entry;
- struct sock *sk = (struct sock *)msk;
- struct net *net = sock_net(sk);
-
- /* No entries with ID 0 */
- if (id == 0)
- return 0;
-
- rcu_read_lock();
- entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
- if (entry) {
- *flags = entry->flags;
- *ifindex = entry->ifindex;
- }
- rcu_read_unlock();
-
- return 0;
-}
-
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
@@ -1677,8 +1659,8 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
}
/* Called from the in-kernel PM only */
-static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
@@ -1706,8 +1688,8 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
spin_unlock_bh(&msk->pm.lock);
}
-static void mptcp_nl_remove_addrs_list(struct net *net,
- struct list_head *rm_list)
+static void mptcp_nl_flush_addrs_list(struct net *net,
+ struct list_head *rm_list)
{
long s_slot = 0, s_num = 0;
struct mptcp_sock *msk;
@@ -1720,7 +1702,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
if (!mptcp_pm_is_userspace(msk)) {
lock_sock(sk);
- mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+ mptcp_pm_flush_addrs_and_subflows(msk, rm_list);
release_sock(sk);
}
@@ -1761,7 +1743,7 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
pernet->next_id = 1;
bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
- mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list);
synchronize_rcu();
__flush_addrs(&free_list);
return 0;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 8eaa9fbe3e34..2cceded3a83a 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -119,23 +119,6 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
return NULL;
}
-int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex)
-{
- struct mptcp_pm_addr_entry *match;
-
- spin_lock_bh(&msk->pm.lock);
- match = mptcp_userspace_pm_lookup_addr_by_id(msk, id);
- spin_unlock_bh(&msk->pm.lock);
- if (match) {
- *flags = match->flags;
- *ifindex = match->ifindex;
- }
-
- return 0;
-}
-
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
@@ -352,8 +335,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct mptcp_pm_addr_entry local = { 0 };
+ struct mptcp_pm_addr_entry entry = { 0 };
struct mptcp_addr_info addr_r;
+ struct mptcp_pm_local local;
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
@@ -379,18 +363,18 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- err = mptcp_pm_parse_entry(laddr, info, true, &local);
+ err = mptcp_pm_parse_entry(laddr, info, true, &entry);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
goto create_err;
}
- if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
GENL_SET_ERR_MSG(info, "invalid addr flags");
err = -EINVAL;
goto create_err;
}
- local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
@@ -398,27 +382,29 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) {
+ if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
err = -EINVAL;
goto create_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
}
- lock_sock(sk);
-
- err = __mptcp_subflow_connect(sk, &local.addr, &addr_r);
+ local.addr = entry.addr;
+ local.flags = entry.flags;
+ local.ifindex = entry.ifindex;
+ lock_sock(sk);
+ err = __mptcp_subflow_connect(sk, &local, &addr_r);
release_sock(sk);
spin_lock_bh(&msk->pm.lock);
if (err)
- mptcp_userspace_pm_delete_local_addr(msk, &local);
+ mptcp_userspace_pm_delete_local_addr(msk, &entry);
else
msk->pm.subflows++;
spin_unlock_bh(&msk->pm.lock);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 37ebcb7640eb..c2317919fc14 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3717,13 +3717,6 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
return 0;
}
-static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
-}
-
static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
@@ -3744,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
+ if (subflow->request_mptcp) {
+ if (mptcp_active_should_disable(sk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
+ mptcp_subflow_early_fallback(msk, subflow);
+ } else if (mptcp_token_new_connect(ssk) < 0) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
+ mptcp_subflow_early_fallback(msk, subflow);
+ }
}
WRITE_ONCE(msk->write_seq, subflow->idsn);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3b22313d1b86..74417aae08d0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -236,6 +236,12 @@ struct mptcp_pm_data {
struct mptcp_rm_list rm_list_rx;
};
+struct mptcp_pm_local {
+ struct mptcp_addr_info addr;
+ u8 flags;
+ int ifindex;
+};
+
struct mptcp_pm_addr_entry {
struct list_head list;
struct mptcp_addr_info addr;
@@ -525,7 +531,8 @@ struct mptcp_subflow_context {
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
- __unused : 9;
+ mpc_drop : 1, /* the MPC option has been dropped in a rtx */
+ __unused : 8;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -691,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+
+void mptcp_active_disable(struct sock *sk);
+bool mptcp_active_should_disable(struct sock *ssk);
+void mptcp_active_enable(struct sock *sk);
+
void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
@@ -719,7 +731,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr);
/* called with sk socket lock held */
-int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
struct socket **new_sock);
@@ -1014,14 +1026,6 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex);
-int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex);
-int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex);
int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
@@ -1154,7 +1158,6 @@ static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
-void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
@@ -1218,6 +1221,14 @@ static inline void mptcp_do_fallback(struct sock *ssk)
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
+static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ pr_fallback(msk);
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
{
struct mptcp_ext *mpext;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 064ab3235893..1040b3b9696b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_capable = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
+ mptcp_active_enable(parent);
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -591,6 +592,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
}
} else if (mptcp_check_fallback(sk)) {
+ /* It looks like MPTCP is blocked, while TCP is not */
+ if (subflow->mpc_drop)
+ mptcp_active_disable(parent);
fallback:
mptcp_propagate_state(parent, sk, subflow, NULL);
}
@@ -1565,28 +1569,31 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
#endif
}
-int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
const struct mptcp_addr_info *remote)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
+ int local_id = local->addr.id;
struct sockaddr_storage addr;
int remote_id = remote->id;
- int local_id = loc->id;
int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
- int ifindex;
- u8 flags;
+ /* The userspace PM sent the request too early? */
if (!mptcp_is_fully_established(sk))
goto err_out;
- err = mptcp_subflow_create_socket(sk, loc->family, &sf);
- if (err)
+ err = mptcp_subflow_create_socket(sk, local->addr.family, &sf);
+ if (err) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCREATSKERR);
+ pr_debug("msk=%p local=%d remote=%d create sock error: %d\n",
+ msk, local_id, remote_id, err);
goto err_out;
+ }
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1594,26 +1601,39 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
get_random_bytes(&subflow->local_nonce, sizeof(u32));
} while (!subflow->local_nonce);
- if (local_id)
+ /* if 'IPADDRANY', the ID will be set later, after the routing */
+ if (local->addr.family == AF_INET) {
+ if (!local->addr.addr.s_addr)
+ local_id = -1;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ if (ipv6_addr_any(&local->addr.addr6))
+ local_id = -1;
+#endif
+ }
+
+ if (local_id >= 0)
subflow_set_local_id(subflow, local_id);
- mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
- &flags, &ifindex);
subflow->remote_key_valid = 1;
subflow->remote_key = READ_ONCE(msk->remote_key);
subflow->local_key = READ_ONCE(msk->local_key);
subflow->token = msk->token;
- mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
+ mptcp_info2sockaddr(&local->addr, &addr, ssk->sk_family);
addrlen = sizeof(struct sockaddr_in);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- ssk->sk_bound_dev_if = ifindex;
+ ssk->sk_bound_dev_if = local->ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
- if (err)
+ if (err) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXBINDERR);
+ pr_debug("msk=%p local=%d remote=%d bind error: %d\n",
+ msk, local_id, remote_id, err);
goto failed;
+ }
mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk,
@@ -1621,15 +1641,21 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_token = remote_token;
WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
- subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ subflow->request_bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
sock_hold(ssk);
list_add_tail(&subflow->node, &msk->conn_list);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
- if (err && err != -EINPROGRESS)
+ if (err && err != -EINPROGRESS) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCONNECTERR);
+ pr_debug("msk=%p local=%d remote=%d connect error: %d\n",
+ msk, local_id, remote_id, err);
goto failed_unlink;
+ }
+
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTX);
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);