diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/ctrl.c | 133 | ||||
-rw-r--r-- | net/mptcp/fastopen.c | 4 | ||||
-rw-r--r-- | net/mptcp/mib.c | 7 | ||||
-rw-r--r-- | net/mptcp/mib.h | 7 | ||||
-rw-r--r-- | net/mptcp/options.c | 50 | ||||
-rw-r--r-- | net/mptcp/pm.c | 43 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 198 | ||||
-rw-r--r-- | net/mptcp/pm_userspace.c | 40 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 83 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 40 | ||||
-rw-r--r-- | net/mptcp/sched.c | 4 | ||||
-rw-r--r-- | net/mptcp/sockopt.c | 4 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 110 |
13 files changed, 463 insertions, 260 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 99382c317ebb..38d8121331d4 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -12,6 +12,7 @@ #include <net/netns/generic.h> #include "protocol.h" +#include "mib.h" #define MPTCP_SYSCTL_PATH "net/mptcp" @@ -27,8 +28,11 @@ struct mptcp_pernet { #endif unsigned int add_addr_timeout; + unsigned int blackhole_timeout; unsigned int close_timeout; unsigned int stale_loss_cnt; + atomic_t active_disable_times; + unsigned long active_disable_stamp; u8 mptcp_enabled; u8 checksum_enabled; u8 allow_join_initial_addr_port; @@ -87,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; + pernet->blackhole_timeout = 3600; + atomic_set(&pernet->active_disable_times, 0); pernet->close_timeout = TCP_TIMEWAIT_LEN; pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; @@ -151,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl, return ret; } +static int proc_blackhole_detect_timeout(const struct ctl_table *table, + int write, void *buffer, size_t *lenp, + loff_t *ppos) +{ + struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns); + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + atomic_set(&pernet->active_disable_times, 0); + + return ret; +} + static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", @@ -217,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "blackhole_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_blackhole_detect_timeout, + .extra1 = SYSCTL_ZERO, + }, }; static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) @@ -240,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[6].data = &pernet->scheduler; /* table[7] is for available_schedulers which is read-only info */ table[8].data = &pernet->close_timeout; + table[9].data = &pernet->blackhole_timeout; hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); @@ -277,6 +305,111 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {} #endif /* CONFIG_SYSCTL */ +/* The following code block is to deal with middle box issues with MPTCP, + * similar to what is done with TFO. + * The proposed solution is to disable active MPTCP globally when SYN+MPC are + * dropped, while SYN without MPC aren't. In this case, active side MPTCP is + * disabled globally for 1hr at first. Then if it happens again, it is disabled + * for 2h, then 4h, 8h, ... + * The timeout is reset back to 1hr when a successful active MPTCP connection is + * fully established. + */ + +/* Disable active MPTCP and record current jiffies and active_disable_times */ +void mptcp_active_disable(struct sock *sk) +{ + struct net *net = sock_net(sk); + struct mptcp_pernet *pernet; + + pernet = mptcp_get_pernet(net); + + if (!READ_ONCE(pernet->blackhole_timeout)) + return; + + /* Paired with READ_ONCE() in mptcp_active_should_disable() */ + WRITE_ONCE(pernet->active_disable_stamp, jiffies); + + /* Paired with smp_rmb() in mptcp_active_should_disable(). + * We want pernet->active_disable_stamp to be updated first. + */ + smp_mb__before_atomic(); + atomic_inc(&pernet->active_disable_times); + + MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE); +} + +/* Calculate timeout for MPTCP active disable + * Return true if we are still in the active MPTCP disable period + * Return false if timeout already expired and we should use active MPTCP + */ +bool mptcp_active_should_disable(struct sock *ssk) +{ + struct net *net = sock_net(ssk); + unsigned int blackhole_timeout; + struct mptcp_pernet *pernet; + unsigned long timeout; + int disable_times; + int multiplier; + + pernet = mptcp_get_pernet(net); + blackhole_timeout = READ_ONCE(pernet->blackhole_timeout); + + if (!blackhole_timeout) + return false; + + disable_times = atomic_read(&pernet->active_disable_times); + if (!disable_times) + return false; + + /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */ + smp_rmb(); + + /* Limit timeout to max: 2^6 * initial timeout */ + multiplier = 1 << min(disable_times - 1, 6); + + /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */ + timeout = READ_ONCE(pernet->active_disable_stamp) + + multiplier * blackhole_timeout * HZ; + + return time_before(jiffies, timeout); +} + +/* Enable active MPTCP and reset active_disable_times if needed */ +void mptcp_active_enable(struct sock *sk) +{ + struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk)); + + if (atomic_read(&pernet->active_disable_times)) { + struct dst_entry *dst = sk_dst_get(sk); + + if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)) + atomic_set(&pernet->active_disable_times, 0); + } +} + +/* Check the number of retransmissions, and fallback to TCP if needed */ +void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) +{ + struct mptcp_subflow_context *subflow; + u32 timeouts; + + if (!sk_is_mptcp(ssk)) + return; + + timeouts = inet_csk(ssk)->icsk_retransmits; + subflow = mptcp_subflow_ctx(ssk); + + if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) { + if (timeouts == 2 || (timeouts < 2 && expired)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP); + subflow->mpc_drop = 1; + mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); + } else { + subflow->mpc_drop = 0; + } + } +} + static int __net_init mptcp_net_init(struct net *net) { struct mptcp_pernet *pernet = mptcp_get_pernet(net); diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index ad28da655f8b..a29ff901df75 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -68,12 +68,12 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo skb = skb_peek_tail(&sk->sk_receive_queue); if (skb) { WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); - pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk, + pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk, MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq, MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq); MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq; MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq; } - pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq); + pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq); } diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 7884217f33eb..38c2efc82b94 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -15,6 +15,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), + SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), + SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), @@ -25,6 +27,10 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), + SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), + SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR), + SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR), + SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), @@ -69,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), + SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 66aa67f49d03..c8ffe18a8722 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -10,6 +10,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ + MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */ + MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */ MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */ MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ @@ -20,6 +22,10 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ + MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNTXCONNECTERR, /* Not able to connect() when sending a SYN + MP_JOIN */ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ @@ -70,6 +76,7 @@ enum linux_mptcp_mib_field { */ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ + MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index ac2f1a54cc43..370c3836b771 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -117,7 +117,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } - pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", + pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u\n", version, flags, opsize, mp_opt->sndr_key, mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; @@ -131,7 +131,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 4; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; - pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u", + pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->token, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) { @@ -142,19 +142,19 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 8; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; - pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u", + pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->thmac, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) { mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK; ptr += 2; memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); - pr_debug("MP_JOIN hmac"); + pr_debug("MP_JOIN hmac\n"); } break; case MPTCPOPT_DSS: - pr_debug("DSS"); + pr_debug("DSS\n"); ptr++; /* we must clear 'mpc_map' be able to detect MP_CAPABLE @@ -169,7 +169,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0; mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK); - pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d", + pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d\n", mp_opt->data_fin, mp_opt->dsn64, mp_opt->use_map, mp_opt->ack64, mp_opt->use_ack); @@ -207,7 +207,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 4; } - pr_debug("data_ack=%llu", mp_opt->data_ack); + pr_debug("data_ack=%llu\n", mp_opt->data_ack); } if (mp_opt->use_map) { @@ -231,7 +231,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; } - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n", mp_opt->data_seq, mp_opt->subflow_seq, mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD), mp_opt->csum); @@ -293,7 +293,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->ahmac = get_unaligned_be64(ptr); ptr += 8; } - pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d", + pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d\n", (mp_opt->addr.family == AF_INET6) ? "6" : "", mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port)); break; @@ -309,7 +309,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE; for (i = 0; i < mp_opt->rm_list.nr; i++) mp_opt->rm_list.ids[i] = *ptr++; - pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr); + pr_debug("RM_ADDR: rm_list_nr=%d\n", mp_opt->rm_list.nr); break; case MPTCPOPT_MP_PRIO: @@ -318,7 +318,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_PRIO; mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP; - pr_debug("MP_PRIO: prio=%d", mp_opt->backup); + pr_debug("MP_PRIO: prio=%d\n", mp_opt->backup); break; case MPTCPOPT_MP_FASTCLOSE: @@ -329,7 +329,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE; - pr_debug("MP_FASTCLOSE: recv_key=%llu", mp_opt->rcvr_key); + pr_debug("MP_FASTCLOSE: recv_key=%llu\n", mp_opt->rcvr_key); break; case MPTCPOPT_RST: @@ -343,7 +343,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; - pr_debug("MP_RST: transient=%u reason=%u", + pr_debug("MP_RST: transient=%u reason=%u\n", mp_opt->reset_transient, mp_opt->reset_reason); break; @@ -354,7 +354,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; mp_opt->suboptions |= OPTION_MPTCP_FAIL; mp_opt->fail_seq = get_unaligned_be64(ptr); - pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq); + pr_debug("MP_FAIL: data_seq=%llu\n", mp_opt->fail_seq); break; default: @@ -417,7 +417,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { - pr_debug("remote_token=%u, nonce=%u", subflow->remote_token, + pr_debug("remote_token=%u, nonce=%u\n", subflow->remote_token, subflow->local_nonce); opts->suboptions = OPTION_MPTCP_MPJ_SYN; opts->join_id = subflow->local_id; @@ -500,7 +500,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, *size = TCPOLEN_MPTCP_MPC_ACK; } - pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", + pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d\n", subflow, subflow->local_key, subflow->remote_key, data_len); @@ -509,7 +509,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, opts->suboptions = OPTION_MPTCP_MPJ_ACK; memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN); *size = TCPOLEN_MPTCP_MPJ_ACK; - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); /* we can use the full delegate action helper only from BH context * If we are in process context - sk is flushing the backlog at @@ -675,7 +675,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * *size = len; if (drop_other_suboptions) { - pr_debug("drop other suboptions"); + pr_debug("drop other suboptions\n"); opts->suboptions = 0; /* note that e.g. DSS could have written into the memory @@ -695,7 +695,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } else { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); } - pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", + pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); return true; @@ -726,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, opts->rm_list = rm_list; for (i = 0; i < opts->rm_list.nr; i++) - pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]); + pr_debug("rm_list_ids[%d]=%d\n", i, opts->rm_list.ids[i]); MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); return true; } @@ -752,7 +752,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, opts->suboptions |= OPTION_MPTCP_PRIO; opts->backup = subflow->request_bkup; - pr_debug("prio=%d", opts->backup); + pr_debug("prio=%d\n", opts->backup); return true; } @@ -794,7 +794,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk, opts->suboptions |= OPTION_MPTCP_FASTCLOSE; opts->rcvr_key = READ_ONCE(msk->remote_key); - pr_debug("FASTCLOSE key=%llu", opts->rcvr_key); + pr_debug("FASTCLOSE key=%llu\n", opts->rcvr_key); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); return true; } @@ -816,7 +816,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk, opts->suboptions |= OPTION_MPTCP_FAIL; opts->fail_seq = subflow->map_seq; - pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + pr_debug("MP_FAIL fail_seq=%llu\n", opts->fail_seq); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); return true; @@ -904,7 +904,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, opts->csum_reqd = subflow_req->csum_reqd; opts->allow_join_id0 = subflow_req->allow_join_id0; *size = TCPOLEN_MPTCP_MPC_SYNACK; - pr_debug("subflow_req=%p, local_key=%llu", + pr_debug("subflow_req=%p, local_key=%llu\n", subflow_req, subflow_req->local_key); return true; } else if (subflow_req->mp_join) { @@ -913,7 +913,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; - pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u", + pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u\n", subflow_req, opts->backup, opts->join_id, opts->thmac, opts->nonce); *size = TCPOLEN_MPTCP_MPJ_SYNACK; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 3e6e0f5510bb..620264c75dc2 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -19,7 +19,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, { u8 add_addr = READ_ONCE(msk->pm.addr_signal); - pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); + pr_debug("msk=%p, local_id=%d, echo=%d\n", msk, addr->id, echo); lockdep_assert_held(&msk->pm.lock); @@ -45,7 +45,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ { u8 rm_addr = READ_ONCE(msk->pm.addr_signal); - pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); + pr_debug("msk=%p, rm_list_nr=%d\n", msk, rm_list->nr); if (rm_addr) { MPTCP_ADD_STATS(sock_net((struct sock *)msk), @@ -66,7 +66,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int { struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side); + pr_debug("msk=%p, token=%u side=%d\n", msk, READ_ONCE(msk->token), server_side); WRITE_ONCE(pm->server_side, server_side); mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); @@ -90,7 +90,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) subflows_max = mptcp_pm_get_subflows_max(msk); - pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, + pr_debug("msk=%p subflows=%d max=%d allow=%d\n", msk, pm->subflows, subflows_max, READ_ONCE(pm->accept_subflow)); /* try to avoid acquiring the lock below */ @@ -114,7 +114,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, enum mptcp_pm_status new_status) { - pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status, + pr_debug("msk=%p status=%x new=%lx\n", msk, msk->pm.status, BIT(new_status)); if (msk->pm.status & BIT(new_status)) return false; @@ -129,7 +129,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) struct mptcp_pm_data *pm = &msk->pm; bool announce = false; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); spin_lock_bh(&pm->lock); @@ -153,14 +153,14 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) void mptcp_pm_connection_closed(struct mptcp_sock *msk) { - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); } void mptcp_pm_subflow_established(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); if (!READ_ONCE(pm->work_pending)) return; @@ -212,7 +212,7 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, + pr_debug("msk=%p remote_id=%d accept=%d\n", msk, addr->id, READ_ONCE(pm->accept_addr)); mptcp_event_addr_announced(ssk, addr); @@ -226,7 +226,9 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, } else { __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); } - } else if (!READ_ONCE(pm->accept_addr)) { + /* id0 should not have a different address */ + } else if ((addr->id == 0 && !mptcp_pm_nl_is_init_remote_addr(msk, addr)) || + (addr->id > 0 && !READ_ONCE(pm->accept_addr))) { mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { @@ -243,7 +245,7 @@ void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, { struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); spin_lock_bh(&pm->lock); @@ -267,7 +269,7 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, struct mptcp_pm_data *pm = &msk->pm; u8 i; - pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr); + pr_debug("msk=%p remote_ids_nr=%d\n", msk, rm_list->nr); for (i = 0; i < rm_list->nr; i++) mptcp_event_addr_removed(msk, rm_list->ids[i]); @@ -299,19 +301,19 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - pr_debug("fail_seq=%llu", fail_seq); + pr_debug("fail_seq=%llu\n", fail_seq); if (!READ_ONCE(msk->allow_infinite_fallback)) return; if (!subflow->fail_tout) { - pr_debug("send MP_FAIL response and infinite map"); + pr_debug("send MP_FAIL response and infinite map\n"); subflow->send_mp_fail = 1; subflow->send_infinite_map = 1; tcp_send_ack(sk); } else { - pr_debug("MP_FAIL response received"); + pr_debug("MP_FAIL response received\n"); WRITE_ONCE(subflow->fail_tout, 0); } } @@ -428,17 +430,6 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_is_backup(msk, &skc_local); } -int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex) -{ - *flags = 0; - *ifindex = 0; - - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); - return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); -} - int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info) { if (info->attrs[MPTCP_PM_ATTR_TOKEN]) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3e4ad801786f..64fe0e7d87d7 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -130,12 +130,15 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, { struct mptcp_subflow_context *subflow; struct mptcp_addr_info cur; - struct sock_common *skc; list_for_each_entry(subflow, list, node) { - skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - remote_address(skc, &cur); + if (!((1 << inet_sk_state_load(ssk)) & + (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) + continue; + + remote_address((struct sock_common *)ssk, &cur); if (mptcp_addresses_equal(&cur, daddr, daddr->port)) return true; } @@ -146,7 +149,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, static bool select_local_address(const struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *new_entry) + struct mptcp_pm_local *new_local) { struct mptcp_pm_addr_entry *entry; bool found = false; @@ -161,7 +164,9 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue; - *new_entry = *entry; + new_local->addr = entry->addr; + new_local->flags = entry->flags; + new_local->ifindex = entry->ifindex; found = true; break; } @@ -172,7 +177,7 @@ select_local_address(const struct pm_nl_pernet *pernet, static bool select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *new_entry) + struct mptcp_pm_local *new_local) { struct mptcp_pm_addr_entry *entry; bool found = false; @@ -190,7 +195,9 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; - *new_entry = *entry; + new_local->addr = entry->addr; + new_local->flags = entry->flags; + new_local->ifindex = entry->ifindex; found = true; break; } @@ -287,7 +294,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); if (!msk) return; @@ -306,7 +313,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) spin_lock_bh(&msk->pm.lock); if (!mptcp_pm_should_add_signal_addr(msk)) { - pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); + pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); entry->retrans_times++; @@ -331,15 +338,21 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *entry; struct sock *sk = (struct sock *)msk; + struct timer_list *add_timer = NULL; spin_lock_bh(&msk->pm.lock); entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - if (entry && (!check_id || entry->addr.id == addr->id)) + if (entry && (!check_id || entry->addr.id == addr->id)) { entry->retrans_times = ADD_ADDR_RETRANS_MAX; + add_timer = &entry->add_timer; + } + if (!check_id && entry) + list_del(&entry->list); spin_unlock_bh(&msk->pm.lock); - if (entry && (!check_id || entry->addr.id == addr->id)) - sk_stop_timer_sync(sk, &entry->add_timer); + /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ + if (add_timer) + sk_stop_timer_sync(sk, add_timer); return entry; } @@ -387,7 +400,7 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) struct sock *sk = (struct sock *)msk; LIST_HEAD(free_list); - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); spin_lock_bh(&msk->pm.lock); list_splice_init(&msk->pm.anno_list, &free_list); @@ -473,7 +486,7 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow; - pr_debug("send ack for %s", + pr_debug("send ack for %s\n", prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); slow = lock_sock_fast(ssk); @@ -521,11 +534,11 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry local; unsigned int add_addr_signal_max; bool signal_and_subflow = false; unsigned int local_addr_max; struct pm_nl_pernet *pernet; + struct mptcp_pm_local local; unsigned int subflows_max; pernet = pm_nl_get_pernet(sock_net(sk)); @@ -585,6 +598,11 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; + + /* Special case for ID0: set the correct ID */ + if (local.addr.id == msk->mpc_endpoint_id) + local.addr.id = 0; + mptcp_pm_announce_addr(msk, &local.addr, false); mptcp_pm_nl_addr_send_ack(msk); @@ -607,15 +625,21 @@ subflow: fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); - msk->pm.local_addr_used++; __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + + /* Special case for ID0: set the correct ID */ + if (local.addr.id == msk->mpc_endpoint_id) + local.addr.id = 0; + else /* local_addr_used is not decr for ID 0 */ + msk->pm.local_addr_used++; + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) continue; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local.addr, &addrs[i]); + __mptcp_subflow_connect(sk, &local, &addrs[i]); spin_lock_bh(&msk->pm.lock); } mptcp_pm_nl_check_work_pending(msk); @@ -636,7 +660,7 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) */ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote, - struct mptcp_addr_info *addrs) + struct mptcp_pm_local *locals) { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *entry; @@ -659,13 +683,15 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, continue; if (msk->pm.subflows < subflows_max) { - msk->pm.subflows++; - addrs[i] = entry->addr; + locals[i].addr = entry->addr; + locals[i].flags = entry->flags; + locals[i].ifindex = entry->ifindex; /* Special case for ID0: set the correct ID */ - if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port)) - addrs[i].id = 0; + if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) + locals[i].addr.id = 0; + msk->pm.subflows++; i++; } } @@ -675,21 +701,19 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, * 'IPADDRANY' local address */ if (!i) { - struct mptcp_addr_info local; - - memset(&local, 0, sizeof(local)); - local.family = + memset(&locals[i], 0, sizeof(locals[i])); + locals[i].addr.family = #if IS_ENABLED(CONFIG_MPTCP_IPV6) remote->family == AF_INET6 && ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : #endif remote->family; - if (!mptcp_pm_addr_families_match(sk, &local, remote)) + if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) return 0; msk->pm.subflows++; - addrs[i++] = local; + i++; } return i; @@ -697,7 +721,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { - struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; + struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; struct sock *sk = (struct sock *)msk; unsigned int add_addr_accept_max; struct mptcp_addr_info remote; @@ -708,7 +732,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); - pr_debug("accepted %d:%d remote family %d", + pr_debug("accepted %d:%d remote family %d\n", msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); @@ -726,24 +750,35 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) /* connect to the specified remote address, using whatever * local address the routing configuration will pick. */ - nr = fill_local_addresses_vec(msk, &remote, addrs); + nr = fill_local_addresses_vec(msk, &remote, locals); if (nr == 0) return; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0) + if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) sf_created = true; spin_lock_bh(&msk->pm.lock); if (sf_created) { - msk->pm.add_addr_accepted++; + /* add_addr_accepted is not decr for ID 0 */ + if (remote.id) + msk->pm.add_addr_accepted++; if (msk->pm.add_addr_accepted >= add_addr_accept_max || msk->pm.subflows >= subflows_max) WRITE_ONCE(msk->pm.accept_addr, false); } } +bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote) +{ + struct mptcp_addr_info mpc_remote; + + remote_address((struct sock_common *)msk, &mpc_remote); + return mptcp_addresses_equal(&mpc_remote, remote, remote->port); +} + void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -755,9 +790,12 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) !mptcp_pm_should_rm_signal(msk)) return; - subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); - if (subflow) - mptcp_pm_send_ack(msk, subflow, false, false); + mptcp_for_each_subflow(msk, subflow) { + if (__mptcp_subflow_active(subflow)) { + mptcp_pm_send_ack(msk, subflow, false, false); + break; + } + } } int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, @@ -767,7 +805,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, { struct mptcp_subflow_context *subflow; - pr_debug("bkup=%d", bkup); + pr_debug("bkup=%d\n", bkup); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -790,11 +828,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } -static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id) -{ - return local_id == id || (!local_id && msk->mpc_endpoint_id == id); -} - static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list, enum linux_mptcp_mib_field rm_type) @@ -803,7 +836,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, struct sock *sk = (struct sock *)msk; u8 i; - pr_debug("%s rm_list_nr %d", + pr_debug("%s rm_list_nr %d\n", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); msk_owned_by_me(msk); @@ -827,12 +860,14 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); + if (inet_sk_state_load(ssk) == TCP_CLOSE) + continue; if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; - if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) + if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) continue; - pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); @@ -889,7 +924,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) spin_lock_bh(&msk->pm.lock); - pr_debug("msk=%p status=%x", msk, pm->status); + pr_debug("msk=%p status=%x\n", msk, pm->status); if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); mptcp_pm_nl_add_addr_received(msk); @@ -1307,20 +1342,27 @@ static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) return pm_nl_get_pernet(genl_info_net(info)); } -static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) +static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, + struct mptcp_addr_info *addr) { struct mptcp_sock *msk; long s_slot = 0, s_num = 0; while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info mpc_addr; if (!READ_ONCE(msk->fully_established) || mptcp_pm_is_userspace(msk)) goto next; + /* if the endp linked to the init sf is re-added with a != ID */ + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + lock_sock(sk); spin_lock_bh(&msk->pm.lock); + if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) + msk->mpc_endpoint_id = addr->id; mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1393,7 +1435,7 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) goto out_free; } - mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk)); + mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); return 0; out_free: @@ -1401,28 +1443,6 @@ out_free: return ret; } -int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex) -{ - struct mptcp_pm_addr_entry *entry; - struct sock *sk = (struct sock *)msk; - struct net *net = sock_net(sk); - - /* No entries with ID 0 */ - if (id == 0) - return 0; - - rcu_read_lock(); - entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); - if (entry) { - *flags = entry->flags; - *ifindex = entry->ifindex; - } - rcu_read_unlock(); - - return 0; -} - static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { @@ -1430,7 +1450,6 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, entry = mptcp_pm_del_add_timer(msk, addr, false); if (entry) { - list_del(&entry->list); kfree(entry); return true; } @@ -1438,6 +1457,12 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, return false; } +static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; +} + static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool force) @@ -1445,7 +1470,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, struct mptcp_rm_list list = { .nr = 0 }; bool ret; - list.ids[list.nr++] = addr->id; + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { @@ -1472,13 +1497,11 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, const struct mptcp_pm_addr_entry *entry) { const struct mptcp_addr_info *addr = &entry->addr; - struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_rm_list list = { .nr = 1 }; long s_slot = 0, s_num = 0; struct mptcp_sock *msk; - pr_debug("remove_id=%d", addr->id); - - list.ids[list.nr++] = addr->id; + pr_debug("remove_id=%d\n", addr->id); while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { struct sock *sk = (struct sock *)msk; @@ -1497,6 +1520,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); + list.ids[0] = mptcp_endp_get_local_id(msk, addr); if (remove_subflow) { spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); @@ -1509,6 +1533,8 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, spin_unlock_bh(&msk->pm.lock); } + if (msk->mpc_endpoint_id == entry->addr.id) + msk->mpc_endpoint_id = 0; release_sock(sk); next: @@ -1603,6 +1629,7 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) return ret; } +/* Called from the userspace PM only */ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }; @@ -1631,8 +1658,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) } } -static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) +/* Called from the in-kernel PM only */ +static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; @@ -1640,11 +1668,11 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, list_for_each_entry(entry, rm_list, list) { if (slist.nr < MPTCP_RM_IDS_MAX && lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) - slist.ids[slist.nr++] = entry->addr.id; + slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); if (alist.nr < MPTCP_RM_IDS_MAX && remove_anno_list_by_saddr(msk, &entry->addr)) - alist.ids[alist.nr++] = entry->addr.id; + alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); } spin_lock_bh(&msk->pm.lock); @@ -1660,8 +1688,8 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, spin_unlock_bh(&msk->pm.lock); } -static void mptcp_nl_remove_addrs_list(struct net *net, - struct list_head *rm_list) +static void mptcp_nl_flush_addrs_list(struct net *net, + struct list_head *rm_list) { long s_slot = 0, s_num = 0; struct mptcp_sock *msk; @@ -1674,7 +1702,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net, if (!mptcp_pm_is_userspace(msk)) { lock_sock(sk); - mptcp_pm_remove_addrs_and_subflows(msk, rm_list); + mptcp_pm_flush_addrs_and_subflows(msk, rm_list); release_sock(sk); } @@ -1715,7 +1743,7 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) pernet->next_id = 1; bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); - mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); + mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); synchronize_rcu(); __flush_addrs(&free_list); return 0; @@ -1941,7 +1969,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, { struct mptcp_rm_list list = { .nr = 0 }; - list.ids[list.nr++] = addr->id; + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 8eaa9fbe3e34..2cceded3a83a 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -119,23 +119,6 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) return NULL; } -int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, - unsigned int id, - u8 *flags, int *ifindex) -{ - struct mptcp_pm_addr_entry *match; - - spin_lock_bh(&msk->pm.lock); - match = mptcp_userspace_pm_lookup_addr_by_id(msk, id); - spin_unlock_bh(&msk->pm.lock); - if (match) { - *flags = match->flags; - *ifindex = match->ifindex; - } - - return 0; -} - int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { @@ -352,8 +335,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct mptcp_pm_addr_entry local = { 0 }; + struct mptcp_pm_addr_entry entry = { 0 }; struct mptcp_addr_info addr_r; + struct mptcp_pm_local local; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; @@ -379,18 +363,18 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) goto create_err; } - err = mptcp_pm_parse_entry(laddr, info, true, &local); + err = mptcp_pm_parse_entry(laddr, info, true, &entry); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); goto create_err; } - if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { GENL_SET_ERR_MSG(info, "invalid addr flags"); err = -EINVAL; goto create_err; } - local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { @@ -398,27 +382,29 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) goto create_err; } - if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) { + if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); err = -EINVAL; goto create_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); + err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; } - lock_sock(sk); - - err = __mptcp_subflow_connect(sk, &local.addr, &addr_r); + local.addr = entry.addr; + local.flags = entry.flags; + local.ifindex = entry.ifindex; + lock_sock(sk); + err = __mptcp_subflow_connect(sk, &local, &addr_r); release_sock(sk); spin_lock_bh(&msk->pm.lock); if (err) - mptcp_userspace_pm_delete_local_addr(msk, &local); + mptcp_userspace_pm_delete_local_addr(msk, &entry); else msk->pm.subflows++; spin_unlock_bh(&msk->pm.lock); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0d536b183a6c..c2317919fc14 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -139,7 +139,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; - pr_debug("colesced seq %llx into %llx new len %d new end seq %llx", + pr_debug("colesced seq %llx into %llx new len %d new end seq %llx\n", MPTCP_SKB_CB(from)->map_seq, MPTCP_SKB_CB(to)->map_seq, to->len, MPTCP_SKB_CB(from)->end_seq); MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq; @@ -217,7 +217,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) end_seq = MPTCP_SKB_CB(skb)->end_seq; max_seq = atomic64_read(&msk->rcv_wnd_sent); - pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq, + pr_debug("msk=%p seq=%llx limit=%llx empty=%d\n", msk, seq, max_seq, RB_EMPTY_ROOT(&msk->out_of_order_queue)); if (after64(end_seq, max_seq)) { /* out of window */ @@ -643,7 +643,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } } - pr_debug("msk=%p ssk=%p", msk, ssk); + pr_debug("msk=%p ssk=%p\n", msk, ssk); tp = tcp_sk(ssk); do { u32 map_remaining, offset; @@ -724,7 +724,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) u64 end_seq; p = rb_first(&msk->out_of_order_queue); - pr_debug("msk=%p empty=%d", msk, RB_EMPTY_ROOT(&msk->out_of_order_queue)); + pr_debug("msk=%p empty=%d\n", msk, RB_EMPTY_ROOT(&msk->out_of_order_queue)); while (p) { skb = rb_to_skb(p); if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) @@ -746,7 +746,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) int delta = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; /* skip overlapping data, if any */ - pr_debug("uncoalesced seq=%llx ack seq=%llx delta=%d", + pr_debug("uncoalesced seq=%llx ack seq=%llx delta=%d\n", MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq, delta); MPTCP_SKB_CB(skb)->offset += delta; @@ -1240,7 +1240,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, size_t copy; int i; - pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u", + pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u\n", msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); if (WARN_ON_ONCE(info->sent > info->limit || @@ -1341,7 +1341,7 @@ alloc_skb: mpext->use_map = 1; mpext->dsn64 = 1; - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d", + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d\n", mpext->data_seq, mpext->subflow_seq, mpext->data_len, mpext->dsn64); @@ -1892,7 +1892,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!msk->first_pending) WRITE_ONCE(msk->first_pending, dfrag); } - pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d", msk, + pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk, dfrag->data_seq, dfrag->data_len, dfrag->already_sent, !dfrag_collapsed); @@ -2248,7 +2248,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } - pr_debug("block timeout %ld", timeo); + pr_debug("block timeout %ld\n", timeo); sk_wait_data(sk, &timeo, NULL); } @@ -2264,7 +2264,7 @@ out_err: } } - pr_debug("msk=%p rx queue empty=%d:%d copied=%d", + pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n", msk, skb_queue_empty_lockless(&sk->sk_receive_queue), skb_queue_empty(&msk->receive_queue), copied); if (!(flags & MSG_PEEK)) @@ -2326,7 +2326,7 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) continue; } - if (subflow->backup) { + if (subflow->backup || subflow->request_bkup) { if (!backup) backup = ssk; continue; @@ -2508,6 +2508,12 @@ out: void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { + /* The first subflow can already be closed and still in the list */ + if (subflow->close_event_done) + return; + + subflow->close_event_done = true; + if (sk->sk_state == TCP_ESTABLISHED) mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL); @@ -2533,8 +2539,11 @@ static void __mptcp_close_subflow(struct sock *sk) mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int ssk_state = inet_sk_state_load(ssk); - if (inet_sk_state_load(ssk) != TCP_CLOSE) + if (ssk_state != TCP_CLOSE && + (ssk_state != TCP_CLOSE_WAIT || + inet_sk_state_load(sk) != TCP_ESTABLISHED)) continue; /* 'subflow_data_ready' will re-sched once rx queue is empty */ @@ -2714,7 +2723,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) if (!ssk) return; - pr_debug("MP_FAIL doesn't respond, reset the subflow"); + pr_debug("MP_FAIL doesn't respond, reset the subflow\n"); slow = lock_sock_fast(ssk); mptcp_subflow_reset(ssk); @@ -2888,7 +2897,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) break; default: if (__mptcp_check_fallback(mptcp_sk(sk))) { - pr_debug("Fallback"); + pr_debug("Fallback\n"); ssk->sk_shutdown |= how; tcp_shutdown(ssk, how); @@ -2898,7 +2907,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt); mptcp_schedule_work(sk); } else { - pr_debug("Sending DATA_FIN on subflow %p", ssk); + pr_debug("Sending DATA_FIN on subflow %p\n", ssk); tcp_send_ack(ssk); if (!mptcp_rtx_timer_pending(sk)) mptcp_reset_rtx_timer(sk); @@ -2964,7 +2973,7 @@ static void mptcp_check_send_data_fin(struct sock *sk) struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - pr_debug("msk=%p snd_data_fin_enable=%d pending=%d snd_nxt=%llu write_seq=%llu", + pr_debug("msk=%p snd_data_fin_enable=%d pending=%d snd_nxt=%llu write_seq=%llu\n", msk, msk->snd_data_fin_enable, !!mptcp_send_head(sk), msk->snd_nxt, msk->write_seq); @@ -2988,7 +2997,7 @@ static void __mptcp_wr_shutdown(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - pr_debug("msk=%p snd_data_fin_enable=%d shutdown=%x state=%d pending=%d", + pr_debug("msk=%p snd_data_fin_enable=%d shutdown=%x state=%d pending=%d\n", msk, msk->snd_data_fin_enable, sk->sk_shutdown, sk->sk_state, !!mptcp_send_head(sk)); @@ -3003,7 +3012,7 @@ static void __mptcp_destroy_sock(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); might_sleep(); @@ -3111,7 +3120,7 @@ cleanup: mptcp_set_state(sk, TCP_CLOSE); sock_hold(sk); - pr_debug("msk=%p state=%d", sk, sk->sk_state); + pr_debug("msk=%p state=%d\n", sk, sk->sk_state); if (msk->token) mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); @@ -3543,7 +3552,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum) { struct mptcp_sock *msk = mptcp_sk(sk); - pr_debug("msk=%p, ssk=%p", msk, msk->first); + pr_debug("msk=%p, ssk=%p\n", msk, msk->first); if (WARN_ON_ONCE(!msk->first)) return -EINVAL; @@ -3560,7 +3569,7 @@ void mptcp_finish_connect(struct sock *ssk) sk = subflow->conn; msk = mptcp_sk(sk); - pr_debug("msk=%p, token=%u", sk, subflow->token); + pr_debug("msk=%p, token=%u\n", sk, subflow->token); subflow->map_seq = subflow->iasn; subflow->map_subflow_seq = 1; @@ -3589,7 +3598,7 @@ bool mptcp_finish_join(struct sock *ssk) struct sock *parent = (void *)msk; bool ret = true; - pr_debug("msk=%p, subflow=%p", msk, subflow); + pr_debug("msk=%p, subflow=%p\n", msk, subflow); /* mptcp socket already closing? */ if (!mptcp_is_fully_established(parent)) { @@ -3635,7 +3644,7 @@ err_prohibited: static void mptcp_shutdown(struct sock *sk, int how) { - pr_debug("sk=%p, how=%d", sk, how); + pr_debug("sk=%p, how=%d\n", sk, how); if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) __mptcp_wr_shutdown(sk); @@ -3708,13 +3717,6 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) return 0; } -static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow) -{ - subflow->request_mptcp = 0; - __mptcp_do_fallback(msk); -} - static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct mptcp_subflow_context *subflow; @@ -3735,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info)) mptcp_subflow_early_fallback(msk, subflow); #endif - if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) { - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); - mptcp_subflow_early_fallback(msk, subflow); + if (subflow->request_mptcp) { + if (mptcp_active_should_disable(sk)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED); + mptcp_subflow_early_fallback(msk, subflow); + } else if (mptcp_token_new_connect(ssk) < 0) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); + mptcp_subflow_early_fallback(msk, subflow); + } } WRITE_ONCE(msk->write_seq, subflow->idsn); @@ -3856,7 +3863,7 @@ static int mptcp_listen(struct socket *sock, int backlog) struct sock *ssk; int err; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); lock_sock(sk); @@ -3895,7 +3902,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct mptcp_sock *msk = mptcp_sk(sock->sk); struct sock *ssk, *newsk; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); /* Buggy applications can call accept on socket states other then LISTEN * but no need to allocate the first subflow just to error out. @@ -3904,12 +3911,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk) return -EINVAL; - pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); + pr_debug("ssk=%p, listener=%p\n", ssk, mptcp_subflow_ctx(ssk)); newsk = inet_csk_accept(ssk, arg); if (!newsk) return arg->err; - pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); + pr_debug("newsk=%p, subflow is mptcp=%d\n", newsk, sk_is_mptcp(newsk)); if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; @@ -4002,7 +4009,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sock, wait); state = inet_sk_state_load(sk); - pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags); + pr_debug("msk=%p state=%d flags=%lx\n", msk, state, msk->flags); if (state == TCP_LISTEN) { struct sock *ssk = READ_ONCE(msk->first); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a1c1b0ff1ce1..74417aae08d0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -236,6 +236,12 @@ struct mptcp_pm_data { struct mptcp_rm_list rm_list_rx; }; +struct mptcp_pm_local { + struct mptcp_addr_info addr; + u8 flags; + int ifindex; +}; + struct mptcp_pm_addr_entry { struct list_head list; struct mptcp_addr_info addr; @@ -524,7 +530,9 @@ struct mptcp_subflow_context { stale : 1, /* unable to snd/rcv data, do not use for xmit */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 10; + close_event_done : 1, /* has done the post-closed part */ + mpc_drop : 1, /* the MPC option has been dropped in a rtx */ + __unused : 8; bool data_avail; bool scheduled; u32 remote_nonce; @@ -690,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); + +void mptcp_active_disable(struct sock *sk); +bool mptcp_active_should_disable(struct sock *ssk); +void mptcp_active_enable(struct sock *sk); + void mptcp_get_available_schedulers(char *buf, size_t maxlen); void __mptcp_subflow_fully_established(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, @@ -718,7 +731,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a, void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ -int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, +int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, struct socket **new_sock); @@ -992,6 +1005,8 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); +bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote); void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); @@ -1011,14 +1026,6 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, - unsigned int id, - u8 *flags, int *ifindex); -int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex); -int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, - unsigned int id, - u8 *flags, int *ifindex); int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); @@ -1151,7 +1158,6 @@ static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); } -void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) @@ -1177,7 +1183,7 @@ static inline bool mptcp_check_fallback(const struct sock *sk) static inline void __mptcp_do_fallback(struct mptcp_sock *msk) { if (__mptcp_check_fallback(msk)) { - pr_debug("TCP fallback already done (msk=%p)", msk); + pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } set_bit(MPTCP_FALLBACK_DONE, &msk->flags); @@ -1213,7 +1219,15 @@ static inline void mptcp_do_fallback(struct sock *ssk) } } -#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) +#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) + +static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + pr_fallback(msk); + subflow->request_mptcp = 0; + __mptcp_do_fallback(msk); +} static inline bool mptcp_check_infinite_map(struct sk_buff *skb) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 4a7fd0508ad2..78ed508ebc1b 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -86,7 +86,7 @@ int mptcp_register_scheduler(struct mptcp_sched_ops *sched) list_add_tail_rcu(&sched->list, &mptcp_sched_list); spin_unlock(&mptcp_sched_list_lock); - pr_debug("%s registered", sched->name); + pr_debug("%s registered\n", sched->name); return 0; } @@ -118,7 +118,7 @@ int mptcp_init_sched(struct mptcp_sock *msk, if (msk->sched->init) msk->sched->init(msk); - pr_debug("sched=%s", msk->sched->name); + pr_debug("sched=%s\n", msk->sched->name); return 0; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 2026a9a36f80..505445a9598f 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -873,7 +873,7 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname, struct mptcp_sock *msk = mptcp_sk(sk); struct sock *ssk; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); if (level == SOL_SOCKET) return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); @@ -1453,7 +1453,7 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname, struct mptcp_sock *msk = mptcp_sk(sk); struct sock *ssk; - pr_debug("msk=%p", msk); + pr_debug("msk=%p\n", msk); /* @@ the meaning of setsockopt() when the socket is connected and * there are multiple subflows is not yet defined. It is up to the diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a21c712350c3..1040b3b9696b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -39,7 +39,7 @@ static void subflow_req_destructor(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - pr_debug("subflow_req=%p", subflow_req); + pr_debug("subflow_req=%p\n", subflow_req); if (subflow_req->msk) sock_put((struct sock *)subflow_req->msk); @@ -146,7 +146,7 @@ static int subflow_check_req(struct request_sock *req, struct mptcp_options_received mp_opt; bool opt_mp_capable, opt_mp_join; - pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); + pr_debug("subflow_req=%p, listener=%p\n", subflow_req, listener); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -221,7 +221,7 @@ again: } if (subflow_use_different_sport(subflow_req->msk, sk_listener)) { - pr_debug("syn inet_sport=%d %d", + pr_debug("syn inet_sport=%d %d\n", ntohs(inet_sk(sk_listener)->inet_sport), ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) { @@ -243,7 +243,7 @@ again: subflow_init_req_cookie_join_save(subflow_req, skb); } - pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, + pr_debug("token=%u, remote_nonce=%u msk=%p\n", subflow_req->token, subflow_req->remote_nonce, subflow_req->msk); } @@ -527,7 +527,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; - pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); + pr_debug("subflow=%p synack seq=%x\n", subflow, subflow->ssn_offset); mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp) { @@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->mp_capable = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); + mptcp_active_enable(parent); mptcp_propagate_state(parent, sk, subflow, &mp_opt); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -559,7 +560,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; WRITE_ONCE(subflow->remote_id, mp_opt.join_id); - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d\n", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -585,12 +586,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX); if (subflow_use_different_dport(msk, sk)) { - pr_debug("synack inet_dport=%d %d", + pr_debug("synack inet_dport=%d %d\n", ntohs(inet_sk(sk)->inet_dport), ntohs(inet_sk(parent)->inet_dport)); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX); } } else if (mptcp_check_fallback(sk)) { + /* It looks like MPTCP is blocked, while TCP is not */ + if (subflow->mpc_drop) + mptcp_active_disable(parent); fallback: mptcp_propagate_state(parent, sk, subflow, NULL); } @@ -655,7 +659,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); /* Never answer to SYNs sent to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -686,7 +690,7 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); if (skb->protocol == htons(ETH_P_IP)) return subflow_v4_conn_request(sk, skb); @@ -807,7 +811,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_sock *owner; struct sock *child; - pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); + pr_debug("listener=%p, req=%p, conn=%p\n", listener, req, listener->conn); /* After child creation we must look for MPC even when options * are not parsed @@ -898,7 +902,7 @@ create_child: ctx->conn = (struct sock *)owner; if (subflow_use_different_sport(owner, sk)) { - pr_debug("ack inet_sport=%d %d", + pr_debug("ack inet_sport=%d %d\n", ntohs(inet_sk(sk)->inet_sport), ntohs(inet_sk((struct sock *)owner)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(owner, sk)) { @@ -961,7 +965,7 @@ enum mapping_status { static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { - pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", + pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d\n", ssn, subflow->map_subflow_seq, subflow->map_data_len); } @@ -1121,7 +1125,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_len = mpext->data_len; if (data_len == 0) { - pr_debug("infinite mapping received"); + pr_debug("infinite mapping received\n"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); subflow->map_data_len = 0; return MAPPING_INVALID; @@ -1133,7 +1137,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (data_len == 1) { bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, mpext->dsn64); - pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); + pr_debug("DATA_FIN with no payload seq=%llu\n", mpext->data_seq); if (subflow->map_valid) { /* A DATA_FIN might arrive in a DSS * option before the previous mapping @@ -1159,7 +1163,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_fin_seq &= GENMASK_ULL(31, 0); mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); - pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", + pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d\n", data_fin_seq, mpext->dsn64); /* Adjust for DATA_FIN using 1 byte of sequence space */ @@ -1205,7 +1209,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (unlikely(subflow->map_csum_reqd != csum_reqd)) return MAPPING_INVALID; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n", subflow->map_seq, subflow->map_subflow_seq, subflow->map_data_len, subflow->map_csum_reqd, subflow->map_data_csum); @@ -1240,7 +1244,7 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, avail_len = skb->len - offset; incr = limit >= avail_len ? avail_len + fin : limit; - pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len, + pr_debug("discarding=%d len=%d offset=%d seq=%d\n", incr, skb->len, offset, subflow->map_subflow_seq); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); tcp_sk(ssk)->copied_seq += incr; @@ -1255,12 +1259,16 @@ out: /* sched mptcp worker to remove the subflow if no more data is pending */ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) { - if (likely(ssk->sk_state != TCP_CLOSE)) + struct sock *sk = (struct sock *)msk; + + if (likely(ssk->sk_state != TCP_CLOSE && + (ssk->sk_state != TCP_CLOSE_WAIT || + inet_sk_state_load(sk) != TCP_ESTABLISHED))) return; if (skb_queue_empty(&ssk->sk_receive_queue) && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - mptcp_schedule_work((struct sock *)msk); + mptcp_schedule_work(sk); } static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) @@ -1337,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk) old_ack = READ_ONCE(msk->ack_seq); ack_seq = mptcp_subflow_get_mapped_dsn(subflow); - pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, + pr_debug("msk ack_seq=%llx subflow ack_seq=%llx\n", old_ack, ack_seq); if (unlikely(before64(ack_seq, old_ack))) { mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); @@ -1409,7 +1417,7 @@ bool mptcp_subflow_data_available(struct sock *sk) subflow->map_valid = 0; WRITE_ONCE(subflow->data_avail, false); - pr_debug("Done with mapping: seq=%u data_len=%u", + pr_debug("Done with mapping: seq=%u data_len=%u\n", subflow->map_subflow_seq, subflow->map_data_len); } @@ -1519,7 +1527,7 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped) target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk); - pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d", + pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d\n", subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped); if (likely(icsk->icsk_af_ops == target)) @@ -1561,28 +1569,31 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, #endif } -int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, +int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, const struct mptcp_addr_info *remote) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; + int local_id = local->addr.id; struct sockaddr_storage addr; int remote_id = remote->id; - int local_id = loc->id; int err = -ENOTCONN; struct socket *sf; struct sock *ssk; u32 remote_token; int addrlen; - int ifindex; - u8 flags; + /* The userspace PM sent the request too early? */ if (!mptcp_is_fully_established(sk)) goto err_out; - err = mptcp_subflow_create_socket(sk, loc->family, &sf); - if (err) + err = mptcp_subflow_create_socket(sk, local->addr.family, &sf); + if (err) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCREATSKERR); + pr_debug("msk=%p local=%d remote=%d create sock error: %d\n", + msk, local_id, remote_id, err); goto err_out; + } ssk = sf->sk; subflow = mptcp_subflow_ctx(ssk); @@ -1590,42 +1601,61 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, get_random_bytes(&subflow->local_nonce, sizeof(u32)); } while (!subflow->local_nonce); - if (local_id) + /* if 'IPADDRANY', the ID will be set later, after the routing */ + if (local->addr.family == AF_INET) { + if (!local->addr.addr.s_addr) + local_id = -1; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + } else if (sk->sk_family == AF_INET6) { + if (ipv6_addr_any(&local->addr.addr6)) + local_id = -1; +#endif + } + + if (local_id >= 0) subflow_set_local_id(subflow, local_id); - mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id, - &flags, &ifindex); subflow->remote_key_valid = 1; subflow->remote_key = READ_ONCE(msk->remote_key); subflow->local_key = READ_ONCE(msk->local_key); subflow->token = msk->token; - mptcp_info2sockaddr(loc, &addr, ssk->sk_family); + mptcp_info2sockaddr(&local->addr, &addr, ssk->sk_family); addrlen = sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - ssk->sk_bound_dev_if = ifindex; + ssk->sk_bound_dev_if = local->ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); - if (err) + if (err) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXBINDERR); + pr_debug("msk=%p local=%d remote=%d bind error: %d\n", + msk, local_id, remote_id, err); goto failed; + } mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); - pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, + pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; - subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); + subflow->request_bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); subflow->subflow_id = msk->subflow_id++; mptcp_info2sockaddr(remote, &addr, ssk->sk_family); sock_hold(ssk); list_add_tail(&subflow->node, &msk->conn_list); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); - if (err && err != -EINPROGRESS) + if (err && err != -EINPROGRESS) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCONNECTERR); + pr_debug("msk=%p local=%d remote=%d connect error: %d\n", + msk, local_id, remote_id, err); goto failed_unlink; + } + + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTX); /* discard the subflow socket */ mptcp_sock_graft(ssk, sk->sk_socket); @@ -1747,7 +1777,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; subflow = mptcp_subflow_ctx(sf->sk); - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); *new_sock = sf; sock_hold(sk); @@ -1776,7 +1806,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, INIT_LIST_HEAD(&ctx->node); INIT_LIST_HEAD(&ctx->delegated_node); - pr_debug("subflow=%p", ctx); + pr_debug("subflow=%p\n", ctx); ctx->tcp_sock = sk; WRITE_ONCE(ctx->local_id, -1); @@ -1927,7 +1957,7 @@ static int subflow_ulp_init(struct sock *sk) goto out; } - pr_debug("subflow=%p, family=%d", ctx, sk->sk_family); + pr_debug("subflow=%p, family=%d\n", ctx, sk->sk_family); tp->is_mptcp = 1; ctx->icsk_af_ops = icsk->icsk_af_ops; |