diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/ctrl.c | 133 | ||||
-rw-r--r-- | net/mptcp/mib.c | 7 | ||||
-rw-r--r-- | net/mptcp/mib.h | 7 | ||||
-rw-r--r-- | net/mptcp/pm.c | 11 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 78 | ||||
-rw-r--r-- | net/mptcp/pm_userspace.c | 40 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 18 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 33 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 54 |
9 files changed, 260 insertions, 121 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 99382c317ebb..38d8121331d4 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -12,6 +12,7 @@ #include <net/netns/generic.h> #include "protocol.h" +#include "mib.h" #define MPTCP_SYSCTL_PATH "net/mptcp" @@ -27,8 +28,11 @@ struct mptcp_pernet { #endif unsigned int add_addr_timeout; + unsigned int blackhole_timeout; unsigned int close_timeout; unsigned int stale_loss_cnt; + atomic_t active_disable_times; + unsigned long active_disable_stamp; u8 mptcp_enabled; u8 checksum_enabled; u8 allow_join_initial_addr_port; @@ -87,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; + pernet->blackhole_timeout = 3600; + atomic_set(&pernet->active_disable_times, 0); pernet->close_timeout = TCP_TIMEWAIT_LEN; pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; @@ -151,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl, return ret; } +static int proc_blackhole_detect_timeout(const struct ctl_table *table, + int write, void *buffer, size_t *lenp, + loff_t *ppos) +{ + struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns); + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + atomic_set(&pernet->active_disable_times, 0); + + return ret; +} + static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", @@ -217,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "blackhole_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_blackhole_detect_timeout, + .extra1 = SYSCTL_ZERO, + }, }; static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) @@ -240,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[6].data = &pernet->scheduler; /* table[7] is for available_schedulers which is read-only info */ table[8].data = &pernet->close_timeout; + table[9].data = &pernet->blackhole_timeout; hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); @@ -277,6 +305,111 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {} #endif /* CONFIG_SYSCTL */ +/* The following code block is to deal with middle box issues with MPTCP, + * similar to what is done with TFO. + * The proposed solution is to disable active MPTCP globally when SYN+MPC are + * dropped, while SYN without MPC aren't. In this case, active side MPTCP is + * disabled globally for 1hr at first. Then if it happens again, it is disabled + * for 2h, then 4h, 8h, ... + * The timeout is reset back to 1hr when a successful active MPTCP connection is + * fully established. + */ + +/* Disable active MPTCP and record current jiffies and active_disable_times */ +void mptcp_active_disable(struct sock *sk) +{ + struct net *net = sock_net(sk); + struct mptcp_pernet *pernet; + + pernet = mptcp_get_pernet(net); + + if (!READ_ONCE(pernet->blackhole_timeout)) + return; + + /* Paired with READ_ONCE() in mptcp_active_should_disable() */ + WRITE_ONCE(pernet->active_disable_stamp, jiffies); + + /* Paired with smp_rmb() in mptcp_active_should_disable(). + * We want pernet->active_disable_stamp to be updated first. + */ + smp_mb__before_atomic(); + atomic_inc(&pernet->active_disable_times); + + MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE); +} + +/* Calculate timeout for MPTCP active disable + * Return true if we are still in the active MPTCP disable period + * Return false if timeout already expired and we should use active MPTCP + */ +bool mptcp_active_should_disable(struct sock *ssk) +{ + struct net *net = sock_net(ssk); + unsigned int blackhole_timeout; + struct mptcp_pernet *pernet; + unsigned long timeout; + int disable_times; + int multiplier; + + pernet = mptcp_get_pernet(net); + blackhole_timeout = READ_ONCE(pernet->blackhole_timeout); + + if (!blackhole_timeout) + return false; + + disable_times = atomic_read(&pernet->active_disable_times); + if (!disable_times) + return false; + + /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */ + smp_rmb(); + + /* Limit timeout to max: 2^6 * initial timeout */ + multiplier = 1 << min(disable_times - 1, 6); + + /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */ + timeout = READ_ONCE(pernet->active_disable_stamp) + + multiplier * blackhole_timeout * HZ; + + return time_before(jiffies, timeout); +} + +/* Enable active MPTCP and reset active_disable_times if needed */ +void mptcp_active_enable(struct sock *sk) +{ + struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk)); + + if (atomic_read(&pernet->active_disable_times)) { + struct dst_entry *dst = sk_dst_get(sk); + + if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)) + atomic_set(&pernet->active_disable_times, 0); + } +} + +/* Check the number of retransmissions, and fallback to TCP if needed */ +void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) +{ + struct mptcp_subflow_context *subflow; + u32 timeouts; + + if (!sk_is_mptcp(ssk)) + return; + + timeouts = inet_csk(ssk)->icsk_retransmits; + subflow = mptcp_subflow_ctx(ssk); + + if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) { + if (timeouts == 2 || (timeouts < 2 && expired)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP); + subflow->mpc_drop = 1; + mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); + } else { + subflow->mpc_drop = 0; + } + } +} + static int __net_init mptcp_net_init(struct net *net) { struct mptcp_pernet *pernet = mptcp_get_pernet(net); diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 7884217f33eb..38c2efc82b94 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -15,6 +15,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), + SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), + SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), @@ -25,6 +27,10 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), + SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), + SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR), + SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR), + SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), @@ -69,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), + SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 66aa67f49d03..c8ffe18a8722 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -10,6 +10,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ + MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */ + MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */ MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */ MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ @@ -20,6 +22,10 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ + MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNTXCONNECTERR, /* Not able to connect() when sending a SYN + MP_JOIN */ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ @@ -70,6 +76,7 @@ enum linux_mptcp_mib_field { */ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ + MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 37f6dbcd8434..620264c75dc2 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -430,17 +430,6 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_is_backup(msk, &skc_local); } -int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex) -{ - *flags = 0; - *ifindex = 0; - - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); - return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); -} - int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info) { if (info->attrs[MPTCP_PM_ATTR_TOKEN]) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ad935d34c973..64fe0e7d87d7 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -149,7 +149,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, static bool select_local_address(const struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *new_entry) + struct mptcp_pm_local *new_local) { struct mptcp_pm_addr_entry *entry; bool found = false; @@ -164,7 +164,9 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue; - *new_entry = *entry; + new_local->addr = entry->addr; + new_local->flags = entry->flags; + new_local->ifindex = entry->ifindex; found = true; break; } @@ -175,7 +177,7 @@ select_local_address(const struct pm_nl_pernet *pernet, static bool select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *new_entry) + struct mptcp_pm_local *new_local) { struct mptcp_pm_addr_entry *entry; bool found = false; @@ -193,7 +195,9 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; - *new_entry = *entry; + new_local->addr = entry->addr; + new_local->flags = entry->flags; + new_local->ifindex = entry->ifindex; found = true; break; } @@ -530,11 +534,11 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry local; unsigned int add_addr_signal_max; bool signal_and_subflow = false; unsigned int local_addr_max; struct pm_nl_pernet *pernet; + struct mptcp_pm_local local; unsigned int subflows_max; pernet = pm_nl_get_pernet(sock_net(sk)); @@ -635,7 +639,7 @@ subflow: spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local.addr, &addrs[i]); + __mptcp_subflow_connect(sk, &local, &addrs[i]); spin_lock_bh(&msk->pm.lock); } mptcp_pm_nl_check_work_pending(msk); @@ -656,7 +660,7 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) */ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote, - struct mptcp_addr_info *addrs) + struct mptcp_pm_local *locals) { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *entry; @@ -679,13 +683,15 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, continue; if (msk->pm.subflows < subflows_max) { - msk->pm.subflows++; - addrs[i] = entry->addr; + locals[i].addr = entry->addr; + locals[i].flags = entry->flags; + locals[i].ifindex = entry->ifindex; /* Special case for ID0: set the correct ID */ - if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port)) - addrs[i].id = 0; + if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) + locals[i].addr.id = 0; + msk->pm.subflows++; i++; } } @@ -695,21 +701,19 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, * 'IPADDRANY' local address */ if (!i) { - struct mptcp_addr_info local; - - memset(&local, 0, sizeof(local)); - local.family = + memset(&locals[i], 0, sizeof(locals[i])); + locals[i].addr.family = #if IS_ENABLED(CONFIG_MPTCP_IPV6) remote->family == AF_INET6 && ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : #endif remote->family; - if (!mptcp_pm_addr_families_match(sk, &local, remote)) + if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) return 0; msk->pm.subflows++; - addrs[i++] = local; + i++; } return i; @@ -717,7 +721,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { - struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; + struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; struct sock *sk = (struct sock *)msk; unsigned int add_addr_accept_max; struct mptcp_addr_info remote; @@ -746,13 +750,13 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) /* connect to the specified remote address, using whatever * local address the routing configuration will pick. */ - nr = fill_local_addresses_vec(msk, &remote, addrs); + nr = fill_local_addresses_vec(msk, &remote, locals); if (nr == 0) return; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0) + if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) sf_created = true; spin_lock_bh(&msk->pm.lock); @@ -1439,28 +1443,6 @@ out_free: return ret; } -int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex) -{ - struct mptcp_pm_addr_entry *entry; - struct sock *sk = (struct sock *)msk; - struct net *net = sock_net(sk); - - /* No entries with ID 0 */ - if (id == 0) - return 0; - - rcu_read_lock(); - entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); - if (entry) { - *flags = entry->flags; - *ifindex = entry->ifindex; - } - rcu_read_unlock(); - - return 0; -} - static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { @@ -1677,8 +1659,8 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) } /* Called from the in-kernel PM only */ -static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) +static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; @@ -1706,8 +1688,8 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, spin_unlock_bh(&msk->pm.lock); } -static void mptcp_nl_remove_addrs_list(struct net *net, - struct list_head *rm_list) +static void mptcp_nl_flush_addrs_list(struct net *net, + struct list_head *rm_list) { long s_slot = 0, s_num = 0; struct mptcp_sock *msk; @@ -1720,7 +1702,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net, if (!mptcp_pm_is_userspace(msk)) { lock_sock(sk); - mptcp_pm_remove_addrs_and_subflows(msk, rm_list); + mptcp_pm_flush_addrs_and_subflows(msk, rm_list); release_sock(sk); } @@ -1761,7 +1743,7 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) pernet->next_id = 1; bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); - mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); + mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); synchronize_rcu(); __flush_addrs(&free_list); return 0; diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 8eaa9fbe3e34..2cceded3a83a 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -119,23 +119,6 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) return NULL; } -int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, - unsigned int id, - u8 *flags, int *ifindex) -{ - struct mptcp_pm_addr_entry *match; - - spin_lock_bh(&msk->pm.lock); - match = mptcp_userspace_pm_lookup_addr_by_id(msk, id); - spin_unlock_bh(&msk->pm.lock); - if (match) { - *flags = match->flags; - *ifindex = match->ifindex; - } - - return 0; -} - int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { @@ -352,8 +335,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct mptcp_pm_addr_entry local = { 0 }; + struct mptcp_pm_addr_entry entry = { 0 }; struct mptcp_addr_info addr_r; + struct mptcp_pm_local local; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; @@ -379,18 +363,18 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) goto create_err; } - err = mptcp_pm_parse_entry(laddr, info, true, &local); + err = mptcp_pm_parse_entry(laddr, info, true, &entry); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); goto create_err; } - if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { GENL_SET_ERR_MSG(info, "invalid addr flags"); err = -EINVAL; goto create_err; } - local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { @@ -398,27 +382,29 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) goto create_err; } - if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) { + if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); err = -EINVAL; goto create_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); + err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; } - lock_sock(sk); - - err = __mptcp_subflow_connect(sk, &local.addr, &addr_r); + local.addr = entry.addr; + local.flags = entry.flags; + local.ifindex = entry.ifindex; + lock_sock(sk); + err = __mptcp_subflow_connect(sk, &local, &addr_r); release_sock(sk); spin_lock_bh(&msk->pm.lock); if (err) - mptcp_userspace_pm_delete_local_addr(msk, &local); + mptcp_userspace_pm_delete_local_addr(msk, &entry); else msk->pm.subflows++; spin_unlock_bh(&msk->pm.lock); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 37ebcb7640eb..c2317919fc14 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3717,13 +3717,6 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) return 0; } -static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow) -{ - subflow->request_mptcp = 0; - __mptcp_do_fallback(msk); -} - static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct mptcp_subflow_context *subflow; @@ -3744,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info)) mptcp_subflow_early_fallback(msk, subflow); #endif - if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) { - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); - mptcp_subflow_early_fallback(msk, subflow); + if (subflow->request_mptcp) { + if (mptcp_active_should_disable(sk)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED); + mptcp_subflow_early_fallback(msk, subflow); + } else if (mptcp_token_new_connect(ssk) < 0) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); + mptcp_subflow_early_fallback(msk, subflow); + } } WRITE_ONCE(msk->write_seq, subflow->idsn); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3b22313d1b86..74417aae08d0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -236,6 +236,12 @@ struct mptcp_pm_data { struct mptcp_rm_list rm_list_rx; }; +struct mptcp_pm_local { + struct mptcp_addr_info addr; + u8 flags; + int ifindex; +}; + struct mptcp_pm_addr_entry { struct list_head list; struct mptcp_addr_info addr; @@ -525,7 +531,8 @@ struct mptcp_subflow_context { valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ close_event_done : 1, /* has done the post-closed part */ - __unused : 9; + mpc_drop : 1, /* the MPC option has been dropped in a rtx */ + __unused : 8; bool data_avail; bool scheduled; u32 remote_nonce; @@ -691,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); + +void mptcp_active_disable(struct sock *sk); +bool mptcp_active_should_disable(struct sock *ssk); +void mptcp_active_enable(struct sock *sk); + void mptcp_get_available_schedulers(char *buf, size_t maxlen); void __mptcp_subflow_fully_established(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, @@ -719,7 +731,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a, void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ -int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, +int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, struct socket **new_sock); @@ -1014,14 +1026,6 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, - unsigned int id, - u8 *flags, int *ifindex); -int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex); -int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, - unsigned int id, - u8 *flags, int *ifindex); int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); @@ -1154,7 +1158,6 @@ static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); } -void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) @@ -1218,6 +1221,14 @@ static inline void mptcp_do_fallback(struct sock *ssk) #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) +static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + pr_fallback(msk); + subflow->request_mptcp = 0; + __mptcp_do_fallback(msk); +} + static inline bool mptcp_check_infinite_map(struct sk_buff *skb) { struct mptcp_ext *mpext; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 064ab3235893..1040b3b9696b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->mp_capable = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); + mptcp_active_enable(parent); mptcp_propagate_state(parent, sk, subflow, &mp_opt); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -591,6 +592,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX); } } else if (mptcp_check_fallback(sk)) { + /* It looks like MPTCP is blocked, while TCP is not */ + if (subflow->mpc_drop) + mptcp_active_disable(parent); fallback: mptcp_propagate_state(parent, sk, subflow, NULL); } @@ -1565,28 +1569,31 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, #endif } -int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, +int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, const struct mptcp_addr_info *remote) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; + int local_id = local->addr.id; struct sockaddr_storage addr; int remote_id = remote->id; - int local_id = loc->id; int err = -ENOTCONN; struct socket *sf; struct sock *ssk; u32 remote_token; int addrlen; - int ifindex; - u8 flags; + /* The userspace PM sent the request too early? */ if (!mptcp_is_fully_established(sk)) goto err_out; - err = mptcp_subflow_create_socket(sk, loc->family, &sf); - if (err) + err = mptcp_subflow_create_socket(sk, local->addr.family, &sf); + if (err) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCREATSKERR); + pr_debug("msk=%p local=%d remote=%d create sock error: %d\n", + msk, local_id, remote_id, err); goto err_out; + } ssk = sf->sk; subflow = mptcp_subflow_ctx(ssk); @@ -1594,26 +1601,39 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, get_random_bytes(&subflow->local_nonce, sizeof(u32)); } while (!subflow->local_nonce); - if (local_id) + /* if 'IPADDRANY', the ID will be set later, after the routing */ + if (local->addr.family == AF_INET) { + if (!local->addr.addr.s_addr) + local_id = -1; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + } else if (sk->sk_family == AF_INET6) { + if (ipv6_addr_any(&local->addr.addr6)) + local_id = -1; +#endif + } + + if (local_id >= 0) subflow_set_local_id(subflow, local_id); - mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id, - &flags, &ifindex); subflow->remote_key_valid = 1; subflow->remote_key = READ_ONCE(msk->remote_key); subflow->local_key = READ_ONCE(msk->local_key); subflow->token = msk->token; - mptcp_info2sockaddr(loc, &addr, ssk->sk_family); + mptcp_info2sockaddr(&local->addr, &addr, ssk->sk_family); addrlen = sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - ssk->sk_bound_dev_if = ifindex; + ssk->sk_bound_dev_if = local->ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); - if (err) + if (err) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXBINDERR); + pr_debug("msk=%p local=%d remote=%d bind error: %d\n", + msk, local_id, remote_id, err); goto failed; + } mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk, @@ -1621,15 +1641,21 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->remote_token = remote_token; WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; - subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); + subflow->request_bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); subflow->subflow_id = msk->subflow_id++; mptcp_info2sockaddr(remote, &addr, ssk->sk_family); sock_hold(ssk); list_add_tail(&subflow->node, &msk->conn_list); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); - if (err && err != -EINPROGRESS) + if (err && err != -EINPROGRESS) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCONNECTERR); + pr_debug("msk=%p local=%d remote=%d connect error: %d\n", + msk, local_id, remote_id, err); goto failed_unlink; + } + + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTX); /* discard the subflow socket */ mptcp_sock_graft(ssk, sk->sk_socket); |