diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/mib.c | 6 | ||||
-rw-r--r-- | net/mptcp/mib.h | 18 | ||||
-rw-r--r-- | net/mptcp/options.c | 19 | ||||
-rw-r--r-- | net/mptcp/pm.c | 70 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 161 | ||||
-rw-r--r-- | net/mptcp/pm_userspace.c | 53 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 225 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 26 | ||||
-rw-r--r-- | net/mptcp/sockopt.c | 153 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 25 |
10 files changed, 514 insertions, 242 deletions
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 0dac2863c6e1..a0990c365a2e 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -34,7 +34,11 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), + SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), + SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), + SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX), + SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), @@ -44,6 +48,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), + SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX), + SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 2be3596374f4..cae71d947252 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -27,7 +27,15 @@ enum linux_mptcp_mib_field { MPTCP_MIB_NODSSWINDOW, /* Segments not in MPTCP windows */ MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */ MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */ + MPTCP_MIB_ADDADDRTX, /* Sent ADD_ADDR with echo-flag=0 */ + MPTCP_MIB_ADDADDRTXDROP, /* ADD_ADDR with echo-flag=0 not send due to + * resource exhaustion + */ MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ + MPTCP_MIB_ECHOADDTX, /* Send ADD_ADDR with echo-flag=1 */ + MPTCP_MIB_ECHOADDTXDROP, /* ADD_ADDR with echo-flag=1 not send due + * to resource exhaustion + */ MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */ MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */ MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */ @@ -37,6 +45,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */ MPTCP_MIB_RMADDR, /* Received RM_ADDR */ MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */ + MPTCP_MIB_RMADDRTX, /* Sent RM_ADDR */ + MPTCP_MIB_RMADDRTXDROP, /* RM_ADDR not sent due to resource exhaustion */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ @@ -63,6 +73,14 @@ struct mptcp_mib { unsigned long mibs[LINUX_MIB_MPTCP_MAX]; }; +static inline void MPTCP_ADD_STATS(struct net *net, + enum linux_mptcp_mib_field field, + int val) +{ + if (likely(net->mib.mptcp_statistics)) + SNMP_ADD_STATS(net->mib.mptcp_statistics, field, val); +} + static inline void MPTCP_INC_STATS(struct net *net, enum linux_mptcp_mib_field field) { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 19a01b6566f1..c254accb14de 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -687,9 +687,12 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * } opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); opts->ahmac = add_addr_generate_hmac(msk->local_key, msk->remote_key, &opts->addr); + } else { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); } pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); @@ -723,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, for (i = 0; i < opts->rm_list.nr; i++) pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]); - + MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); return true; } @@ -1023,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) return cur_seq; } +static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) +{ + msk->bytes_acked += new_snd_una - msk->snd_una; + msk->snd_una = new_snd_una; +} + static void ack_update_msk(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_options_received *mp_opt) @@ -1054,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk, __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { - msk->snd_una = new_snd_una; + __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } mptcp_data_unlock(sk); @@ -1116,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mptcp_data_lock(subflow->conn); if (sk_stream_memory_free(sk)) __mptcp_check_push(subflow->conn, sk); + + /* on fallback we just need to ignore the msk-level snd_una, as + * this is really plain TCP + */ + __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt)); + __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return true; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 78c924506e83..7dbbad1e4f55 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -26,7 +26,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, if (add_addr & (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { - pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); + MPTCP_INC_STATS(sock_net((struct sock *)msk), + echo ? MPTCP_MIB_ECHOADDTXDROP : MPTCP_MIB_ADDADDRTXDROP); return -EINVAL; } @@ -48,7 +49,8 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); if (rm_addr) { - pr_warn("addr_signal error, rm_addr=%d", rm_addr); + MPTCP_ADD_STATS(sock_net((struct sock *)msk), + MPTCP_MIB_RMADDRTXDROP, rm_list->nr); return -EINVAL; } @@ -87,8 +89,15 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) unsigned int subflows_max; int ret = 0; - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_active(msk); + if (mptcp_pm_is_userspace(msk)) { + if (mptcp_userspace_pm_active(msk)) { + spin_lock_bh(&pm->lock); + pm->subflows++; + spin_unlock_bh(&pm->lock); + return true; + } + return false; + } subflows_max = mptcp_pm_get_subflows_max(msk); @@ -181,8 +190,16 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, struct mptcp_pm_data *pm = &msk->pm; bool update_subflows; - update_subflows = (subflow->request_join || subflow->mp_join) && - mptcp_pm_is_kernel(msk); + update_subflows = subflow->request_join || subflow->mp_join; + if (mptcp_pm_is_userspace(msk)) { + if (update_subflows) { + spin_lock_bh(&pm->lock); + pm->subflows--; + spin_unlock_bh(&pm->lock); + } + return; + } + if (!READ_ONCE(pm->work_pending) && !update_subflows) return; @@ -398,7 +415,46 @@ out_unlock: int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) { - return mptcp_pm_nl_get_local_id(msk, skc); + struct mptcp_addr_info skc_local; + struct mptcp_addr_info msk_local; + + if (WARN_ON_ONCE(!msk)) + return -1; + + /* The 0 ID mapping is defined by the first subflow, copied into the msk + * addr + */ + mptcp_local_address((struct sock_common *)msk, &msk_local); + mptcp_local_address((struct sock_common *)skc, &skc_local); + if (mptcp_addresses_equal(&msk_local, &skc_local, false)) + return 0; + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_local_id(msk, &skc_local); + return mptcp_pm_nl_get_local_id(msk, &skc_local); +} + +int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex) +{ + *flags = 0; + *ifindex = 0; + + if (!id) + return 0; + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); + return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); +} + +int mptcp_pm_set_flags(struct net *net, struct nlattr *token, + struct mptcp_pm_addr_entry *loc, + struct mptcp_pm_addr_entry *rem, u8 bkup) +{ + if (token) + return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup); + return mptcp_pm_nl_set_flags(net, loc, bkup); } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index bc343dab5e3f..5692daf57a4d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -25,9 +25,9 @@ static int pm_nl_pernet_id; struct mptcp_pm_add_entry { struct list_head list; struct mptcp_addr_info addr; + u8 retrans_times; struct timer_list add_timer; struct mptcp_sock *sock; - u8 retrans_times; }; struct pm_nl_pernet { @@ -86,8 +86,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a, return a->port == b->port; } -static void local_address(const struct sock_common *skc, - struct mptcp_addr_info *addr) +void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { addr->family = skc->skc_family; addr->port = htons(skc->skc_num); @@ -122,7 +121,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, list_for_each_entry(subflow, list, node) { skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); - local_address(skc, &cur); + mptcp_local_address(skc, &cur); if (mptcp_addresses_equal(&cur, saddr, saddr->port)) return true; } @@ -263,7 +262,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) struct mptcp_addr_info saddr; bool ret = false; - local_address((struct sock_common *)sk, &saddr); + mptcp_local_address((struct sock_common *)sk, &saddr); spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.anno_list, list) { @@ -342,7 +341,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, } bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry) + const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; @@ -350,7 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); + add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { if (mptcp_pm_is_kernel(msk)) @@ -367,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, list_add(&add_entry->list, &msk->pm.anno_list); - add_entry->addr = entry->addr; + add_entry->addr = *addr; add_entry->sock = msk; add_entry->retrans_times = 0; @@ -541,7 +540,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) struct mptcp_addr_info mpc_addr; bool backup = false; - local_address((struct sock_common *)msk->first, &mpc_addr); + mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); rcu_read_lock(); entry = __lookup_addr(pernet, &mpc_addr, false); if (entry) { @@ -577,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) return; if (local) { - if (mptcp_pm_alloc_anno_list(msk, local)) { + if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &local->addr, false); @@ -752,7 +751,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; - local_address((struct sock_common *)ssk, &local); + mptcp_local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) continue; @@ -1047,6 +1046,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + inet_sk_state_store(newsk, TCP_LISTEN); err = kernel_listen(ssock, backlog); if (err) return err; @@ -1056,33 +1056,17 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, return 0; } -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct mptcp_pm_addr_entry *entry; - struct mptcp_addr_info skc_local; - struct mptcp_addr_info msk_local; struct pm_nl_pernet *pernet; int ret = -1; - if (WARN_ON_ONCE(!msk)) - return -1; - - /* The 0 ID mapping is defined by the first subflow, copied into the msk - * addr - */ - local_address((struct sock_common *)msk, &msk_local); - local_address((struct sock_common *)skc, &skc_local); - if (mptcp_addresses_equal(&msk_local, &skc_local, false)) - return 0; - - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_get_local_id(msk, &skc_local); - pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { + if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { ret = entry->addr.id; break; } @@ -1096,7 +1080,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) if (!entry) return -ENOMEM; - entry->addr = skc_local; + entry->addr = *skc; entry->addr.id = 0; entry->addr.port = 0; entry->ifindex = 0; @@ -1373,31 +1357,20 @@ out_free: return ret; } -int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex) +int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex) { struct mptcp_pm_addr_entry *entry; struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); - *flags = 0; - *ifindex = 0; - - if (id) { - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, - id, - flags, - ifindex); - - rcu_read_lock(); - entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); - if (entry) { - *flags = entry->flags; - *ifindex = entry->ifindex; - } - rcu_read_unlock(); + rcu_read_lock(); + entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); + if (entry) { + *flags = entry->flags; + *ifindex = entry->ifindex; } + rcu_read_unlock(); return 0; } @@ -1491,7 +1464,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next; - local_address((struct sock_common *)msk, &msk_local); + mptcp_local_address((struct sock_common *)msk, &msk_local); if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) goto next; @@ -1558,6 +1531,24 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) return ret; } +void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) +{ + struct mptcp_rm_list alist = { .nr = 0 }; + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, rm_list, list) { + remove_anno_list_by_saddr(msk, &entry->addr); + if (alist.nr < MPTCP_RM_IDS_MAX) + alist.ids[alist.nr++] = entry->addr.id; + } + + if (alist.nr) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &alist); + spin_unlock_bh(&msk->pm.lock); + } +} + void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list) { @@ -1892,18 +1883,50 @@ next: return ret; } +int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | + MPTCP_PM_ADDR_FLAG_FULLMESH; + struct mptcp_pm_addr_entry *entry; + u8 lookup_by_id = 0; + + if (addr->addr.family == AF_UNSPEC) { + lookup_by_id = 1; + if (!addr->addr.id) + return -EOPNOTSUPP; + } + + spin_lock_bh(&pernet->lock); + entry = __lookup_addr(pernet, &addr->addr, lookup_by_id); + if (!entry) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } + if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; + } + + changed = (addr->flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (addr->flags & mask); + *addr = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_nl_set_flags(net, &addr->addr, bkup, changed); + return 0; +} + static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) { - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; + struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | - MPTCP_PM_ADDR_FLAG_FULLMESH; struct net *net = sock_net(skb->sk); - u8 bkup = 0, lookup_by_id = 0; + u8 bkup = 0; int ret; ret = mptcp_pm_parse_entry(attr, info, false, &addr); @@ -1918,34 +1941,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; - if (addr.addr.family == AF_UNSPEC) { - lookup_by_id = 1; - if (!addr.addr.id) - return -EOPNOTSUPP; - } - if (token) - return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup); - - spin_lock_bh(&pernet->lock); - entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); - if (!entry) { - spin_unlock_bh(&pernet->lock); - return -EINVAL; - } - if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && - (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - spin_unlock_bh(&pernet->lock); - return -EINVAL; - } - - changed = (addr.flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (addr.flags & mask); - addr = *entry; - spin_unlock_bh(&pernet->lock); - - mptcp_nl_set_flags(net, &addr.addr, bkup, changed); - return 0; + return mptcp_pm_set_flags(net, token, &addr, &remote, bkup); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 27a275805c06..b5a8aa4c1ebd 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -69,6 +69,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, MPTCP_PM_MAX_ADDR_ID + 1, 1); list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list); + msk->pm.local_addr_used++; ret = e->addr.id; } else if (match) { ret = entry->addr.id; @@ -79,15 +80,37 @@ append_err: return ret; } +/* If the subflow is closed from the other peer (not via a + * subflow destroy command then), we want to keep the entry + * not to assign the same ID to another address and to be + * able to send RM_ADDR after the removal of the subflow. + */ +static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *addr) +{ + struct mptcp_pm_addr_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) { + /* TODO: a refcount is needed because the entry can + * be used multiple times (e.g. fullmesh mode). + */ + list_del_rcu(&entry->list); + kfree(entry); + msk->pm.local_addr_used--; + return 0; + } + } + + return -EINVAL; +} + int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { struct mptcp_pm_addr_entry *entry, *match = NULL; - *flags = 0; - *ifindex = 0; - spin_lock_bh(&msk->pm.lock); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { if (id == entry->addr.id) { @@ -170,7 +193,8 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) lock_sock((struct sock *)msk); spin_lock_bh(&msk->pm.lock); - if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { + if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { + msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); mptcp_pm_nl_addr_send_ack(msk); } @@ -232,7 +256,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) list_move(&match->list, &free_list); - mptcp_pm_remove_addrs_and_subflows(msk, &free_list); + mptcp_pm_remove_addrs(msk, &free_list); release_sock((struct sock *)msk); @@ -251,6 +275,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_pm_addr_entry local = { 0 }; struct mptcp_addr_info addr_r; struct mptcp_addr_info addr_l; struct mptcp_sock *msk; @@ -302,12 +327,26 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) goto create_err; } + local.addr = addr_l; + err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + if (err < 0) { + GENL_SET_ERR_MSG(info, "did not match address and id"); + goto create_err; + } + lock_sock(sk); err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); release_sock(sk); + spin_lock_bh(&msk->pm.lock); + if (err) + mptcp_userspace_pm_delete_local_addr(msk, &local); + else + msk->pm.subflows++; + spin_unlock_bh(&msk->pm.lock); + create_err: sock_put((struct sock *)msk); return err; @@ -420,7 +459,11 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); if (ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_pm_addr_entry entry = { .addr = addr_l }; + spin_lock_bh(&msk->pm.lock); + mptcp_userspace_pm_delete_local_addr(msk, &entry); + spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); mptcp_close_ssk(sk, ssk, subflow); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 67311e7d5b21..e892673deb73 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -44,7 +44,7 @@ enum { static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp; static void __mptcp_destroy_sock(struct sock *sk); -static void __mptcp_check_send_data_fin(struct sock *sk); +static void mptcp_check_send_data_fin(struct sock *sk); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; @@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); subflow->request_mptcp = 1; + subflow->subflow_id = msk->subflow_id++; /* This is the first subflow, always with id 0 */ subflow->local_id_valid = 1; @@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ + msk->bytes_received += copy_len; WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); tail = skb_peek_tail(&sk->sk_receive_queue); if (tail && mptcp_try_coalesce(sk, tail, skb)) @@ -424,8 +426,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - return !__mptcp_check_fallback(msk) && - ((1 << sk->sk_state) & + return ((1 << sk->sk_state) & (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) && msk->write_seq == READ_ONCE(msk->snd_una); } @@ -583,9 +584,6 @@ static bool mptcp_check_data_fin(struct sock *sk) u64 rcv_data_fin_seq; bool ret = false; - if (__mptcp_check_fallback(msk)) - return ret; - /* Need to ack a DATA_FIN received from a peer while this side * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2. * msk->rcv_data_fin was set when parsing the incoming options @@ -623,7 +621,8 @@ static bool mptcp_check_data_fin(struct sock *sk) } ret = true; - mptcp_send_ack(msk); + if (!__mptcp_check_fallback(msk)) + mptcp_send_ack(msk); mptcp_close_wake_up(sk); } return ret; @@ -760,6 +759,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) MPTCP_SKB_CB(skb)->map_seq += delta; __skb_queue_tail(&sk->sk_receive_queue, skb); } + msk->bytes_received += end_seq - msk->ack_seq; msk->ack_seq = end_seq; moved = true; } @@ -845,17 +845,18 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); + mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); return true; } -static void __mptcp_flush_join_list(struct sock *sk) +static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list) { struct mptcp_subflow_context *tmp, *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) { + list_for_each_entry_safe(subflow, tmp, join_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); @@ -897,49 +898,6 @@ bool mptcp_schedule_work(struct sock *sk) return false; } -void mptcp_subflow_eof(struct sock *sk) -{ - if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags)) - mptcp_schedule_work(sk); -} - -static void mptcp_check_for_eof(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int receivers = 0; - - mptcp_for_each_subflow(msk, subflow) - receivers += !subflow->rx_eof; - if (receivers) - return; - - if (!(sk->sk_shutdown & RCV_SHUTDOWN)) { - /* hopefully temporary hack: propagate shutdown status - * to msk, when all subflows agree on it - */ - WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); - - smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - sk->sk_data_ready(sk); - } - - switch (sk->sk_state) { - case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); - break; - case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); - break; - case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); - break; - default: - return; - } - mptcp_close_wake_up(sk); -} - static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -1004,12 +962,6 @@ static void __mptcp_clean_una(struct sock *sk) struct mptcp_data_frag *dtmp, *dfrag; u64 snd_una; - /* on fallback we just need to ignore snd_una, as this is really - * plain TCP - */ - if (__mptcp_check_fallback(msk)) - msk->snd_una = READ_ONCE(msk->snd_nxt); - snd_una = msk->snd_una; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) @@ -1537,8 +1489,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, * that has been handed to the subflow for transmission * and skip update in case it was old dfrag. */ - if (likely(after64(snd_nxt_new, msk->snd_nxt))) + if (likely(after64(snd_nxt_new, msk->snd_nxt))) { + msk->bytes_sent += snd_nxt_new - msk->snd_nxt; msk->snd_nxt = snd_nxt_new; + } } void mptcp_check_and_set_pending(struct sock *sk) @@ -1609,7 +1563,7 @@ out: if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); if (do_check_data_fin) - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first) @@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) *copied_syn = 0; } else if (ret && ret != -EINPROGRESS) { - mptcp_disconnect(sk, 0); + /* The disconnect() op called by tcp_sendmsg_fastopen()/ + * __inet_stream_connect() can fail, due to looking check, + * see mptcp_disconnect(). + * Attempt it again outside the problematic scope. + */ + if (!mptcp_disconnect(sk, 0)) + sk->sk_socket->state = SS_UNCONNECTED; } inet_sk(sk)->defer_connect = 0; @@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, break; } - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - if (sk->sk_shutdown & RCV_SHUTDOWN) { /* race breaker: the shutdown could be after the * previous receive queue check @@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { - tcp_disconnect(ssk, 0); + /* The MPTCP code never wait on the subflow sockets, TCP-level + * disconnect should never fail + */ + WARN_ON_ONCE(tcp_disconnect(ssk, 0)); msk->subflow->state = SS_UNCONNECTED; mptcp_subflow_ctx_reset(subflow); release_sock(ssk); @@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } - __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -2596,6 +2549,7 @@ static void __mptcp_retrans(struct sock *sk) } if (copied) { dfrag->already_sent = max(dfrag->already_sent, info.sent); + msk->bytes_retrans += copied; tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); WRITE_ONCE(msk->allow_infinite_fallback, false); @@ -2654,6 +2608,7 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); + inet_sk_state_store(sk, TCP_CLOSE); mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, MPTCP_CF_FASTCLOSE); @@ -2671,16 +2626,12 @@ static void mptcp_worker(struct work_struct *work) if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN))) goto unlock; - mptcp_check_data_fin_ack(sk); - mptcp_check_fastclose(msk); mptcp_pm_nl_work(msk); - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); + mptcp_check_data_fin_ack(sk); mptcp_check_data_fin(sk); if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) @@ -2691,10 +2642,9 @@ static void mptcp_worker(struct work_struct *work) * even if it is orphaned and in FIN_WAIT2 state */ if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_should_close(sk)) { - inet_sk_state_store(sk, TCP_CLOSE); + if (mptcp_should_close(sk)) mptcp_do_fastclose(sk); - } + if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); goto unlock; @@ -2733,6 +2683,7 @@ static int __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); WRITE_ONCE(msk->allow_infinite_fallback, true); msk->recovery = false; + msk->subflow_id = 1; mptcp_pm_data_init(msk); @@ -2812,13 +2763,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) break; fallthrough; case TCP_SYN_SENT: - tcp_disconnect(ssk, O_NONBLOCK); + WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK)); break; default: if (__mptcp_check_fallback(mptcp_sk(sk))) { pr_debug("Fallback"); ssk->sk_shutdown |= how; tcp_shutdown(ssk, how); + + /* simulate the data_fin ack reception to let the state + * machine move forward + */ + WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt); + mptcp_schedule_work(sk); } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); @@ -2858,7 +2815,7 @@ static int mptcp_close_state(struct sock *sk) return next & TCP_ACTION_FIN; } -static void __mptcp_check_send_data_fin(struct sock *sk) +static void mptcp_check_send_data_fin(struct sock *sk) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); @@ -2876,19 +2833,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk) WRITE_ONCE(msk->snd_nxt, msk->write_seq); - /* fallback socket will not get data_fin/ack, can move to the next - * state now - */ - if (__mptcp_check_fallback(msk)) { - WRITE_ONCE(msk->snd_una, msk->write_seq); - if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) { - inet_sk_state_store(sk, TCP_CLOSE); - mptcp_close_wake_up(sk); - } else if (sk->sk_state == TCP_FIN_WAIT1) { - inet_sk_state_store(sk, TCP_FIN_WAIT2); - } - } - mptcp_for_each_subflow(msk, subflow) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); @@ -2908,7 +2852,7 @@ static void __mptcp_wr_shutdown(struct sock *sk) WRITE_ONCE(msk->write_seq, msk->write_seq + 1); WRITE_ONCE(msk->snd_data_fin_enable, 1); - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); } static void __mptcp_destroy_sock(struct sock *sk) @@ -2936,7 +2880,6 @@ static void __mptcp_destroy_sock(struct sock *sk) void __mptcp_unaccepted_force_close(struct sock *sk) { sock_set_flag(sk, SOCK_DEAD); - inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); __mptcp_destroy_sock(sk); } @@ -2953,10 +2896,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) return EPOLLIN | EPOLLRDNORM; } -static void mptcp_listen_inuse_dec(struct sock *sk) +static void mptcp_check_listen_stop(struct sock *sk) { - if (inet_sk_state_load(sk) == TCP_LISTEN) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + struct sock *ssk; + + if (inet_sk_state_load(sk) != TCP_LISTEN) + return; + + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + ssk = mptcp_sk(sk)->first; + if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN)) + return; + + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); + tcp_set_state(ssk, TCP_CLOSE); + release_sock(ssk); } bool __mptcp_close(struct sock *sk, long timeout) @@ -2969,7 +2926,7 @@ bool __mptcp_close(struct sock *sk, long timeout) WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); goto cleanup; } @@ -2978,7 +2935,6 @@ bool __mptcp_close(struct sock *sk, long timeout) /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ - inet_sk_state_store(sk, TCP_CLOSE); mptcp_do_fastclose(sk); timeout = 0; } else if (mptcp_close_state(sk)) { @@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); + /* Deny disconnect if other threads are blocked in sk_wait_event() + * or inet_wait_for_connect(). + */ + if (sk->sk_wait_pending) + return -EBUSY; + /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under - * msk->firstsocket lock). Do nothing and leave the cleanup to the - * caller. + * msk->firstsocket lock). */ if (msk->fastopening) - return 0; + return -EBUSY; - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_timer(sk); @@ -3108,6 +3069,10 @@ static int mptcp_disconnect(struct sock *sk, int flags) WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); + msk->bytes_acked = 0; + msk->bytes_received = 0; + msk->bytes_sent = 0; + msk->bytes_retrans = 0; WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); @@ -3140,6 +3105,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif + nsk->sk_wait_pending = 0; __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); @@ -3157,6 +3123,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; + /* passive msk is created after the first/MPC subflow */ + msk->subflow_id = 2; + sock_reset_flag(nsk, SOCK_RCU_FREE); security_inet_csk_clone(nsk, req); @@ -3327,9 +3296,14 @@ static void mptcp_release_cb(struct sock *sk) for (;;) { unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | msk->push_pending; + struct list_head join_list; + if (!flags) break; + INIT_LIST_HEAD(&join_list); + list_splice_init(&msk->join_list, &join_list); + /* the following actions acquire the subflow socket lock * * 1) can't be invoked in atomic scope @@ -3340,8 +3314,9 @@ static void mptcp_release_cb(struct sock *sk) msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); + if (flags & BIT(MPTCP_FLUSH_JOIN_LIST)) - __mptcp_flush_join_list(sk); + __mptcp_flush_join_list(sk, &join_list); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); if (flags & BIT(MPTCP_RETRANSMIT)) @@ -3570,11 +3545,10 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) return (int)delta; } -static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int mptcp_ioctl(struct sock *sk, int cmd, int *karg) { struct mptcp_sock *msk = mptcp_sk(sk); bool slow; - int answ; switch (cmd) { case SIOCINQ: @@ -3583,24 +3557,24 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) lock_sock(sk); __mptcp_move_skbs(msk); - answ = mptcp_inq_hint(sk); + *karg = mptcp_inq_hint(sk); release_sock(sk); break; case SIOCOUTQ: slow = lock_sock_fast(sk); - answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); + *karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); unlock_sock_fast(sk, slow); break; case SIOCOUTQNSD: slow = lock_sock_fast(sk); - answ = mptcp_ioctl_outq(msk, msk->snd_nxt); + *karg = mptcp_ioctl_outq(msk, msk->snd_nxt); unlock_sock_fast(sk, slow); break; default: return -ENOIOCTLCMD; } - return put_user(answ, (int __user *)arg); + return 0; } static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, @@ -3758,6 +3732,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct socket *ssock; + struct sock *newsk; int err; pr_debug("msk=%p", msk); @@ -3769,17 +3744,20 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssock) return -EINVAL; - err = ssock->ops->accept(sock, newsock, flags, kern); - if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk = mptcp_sk(newsock->sk); + newsk = mptcp_accept(sock->sk, flags, &err, kern); + if (!newsk) + return err; + + lock_sock(newsk); + + __inet_accept(sock, newsock, newsk); + if (!mptcp_is_tcpsk(newsock->sk)) { + struct mptcp_sock *msk = mptcp_sk(newsk); struct mptcp_subflow_context *subflow; - struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); msk->in_accept_queue = 0; - lock_sock(newsk); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ @@ -3800,11 +3778,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (unlikely(list_empty(&msk->conn_list))) inet_sk_state_store(newsk, TCP_CLOSE); } - - release_sock(newsk); } + release_sock(newsk); - return err; + return 0; } static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) @@ -3889,7 +3866,6 @@ static const struct proto_ops mptcp_stream_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, }; static struct inet_protosw mptcp_protosw = { @@ -3984,7 +3960,6 @@ static const struct proto_ops mptcp_v6_stream_ops = { .sendmsg = inet6_sendmsg, .recvmsg = inet6_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c5255258bfb3..37fbe22e2433 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -113,7 +113,6 @@ /* MPTCP socket atomic flags */ #define MPTCP_NOSPACE 1 #define MPTCP_WORK_RTX 2 -#define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 @@ -262,10 +261,13 @@ struct mptcp_sock { u64 local_key; u64 remote_key; u64 write_seq; + u64 bytes_sent; u64 snd_nxt; + u64 bytes_received; u64 ack_seq; atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; + u64 bytes_retrans; int rmem_fwd_alloc; struct sock *last_snd; int snd_burst; @@ -274,6 +276,7 @@ struct mptcp_sock { * recovery related fields are under data_lock * protection */ + u64 bytes_acked; u64 snd_una; u64 wnd_end; unsigned long timer_ival; @@ -319,7 +322,8 @@ struct mptcp_sock { u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; - u32 setsockopt_seq; + u32 subflow_id; + u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; struct mptcp_sock *dl_next; }; @@ -476,14 +480,13 @@ struct mptcp_subflow_context { send_mp_fail : 1, send_fastclose : 1, send_infinite_map : 1, - rx_eof : 1, remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 8; + __unused : 9; enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; @@ -500,6 +503,8 @@ struct mptcp_subflow_context { u8 reset_reason:4; u8 stale_count; + u32 subflow_id; + long delegated_status; unsigned long fail_tout; @@ -638,6 +643,7 @@ void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); +void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, @@ -720,7 +726,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); -void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { @@ -809,7 +814,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *rem, u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry); + const struct mptcp_addr_info *addr); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * @@ -821,9 +826,15 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); +int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, + u8 *flags, int *ifindex); int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); +int mptcp_pm_set_flags(struct net *net, struct nlattr *token, + struct mptcp_pm_addr_entry *loc, + struct mptcp_pm_addr_entry *rem, u8 bkup); +int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup); int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, struct mptcp_pm_addr_entry *loc, struct mptcp_pm_addr_entry *rem, u8 bkup); @@ -832,6 +843,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); +void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); @@ -915,13 +927,13 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index d4258869ac48..63f7a09335c5 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -14,7 +14,8 @@ #include <net/mptcp.h> #include "protocol.h" -#define MIN_INFO_OPTLEN_SIZE 16 +#define MIN_INFO_OPTLEN_SIZE 16 +#define MIN_FULL_INFO_OPTLEN_SIZE 40 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { @@ -355,6 +356,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, case SO_BROADCAST: case SO_BSDCOMPAT: case SO_PASSCRED: + case SO_PASSPIDFD: case SO_PASSSEC: case SO_RXQ_OVFL: case SO_WIFI_STATUS: @@ -888,7 +890,9 @@ out: void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { + struct sock *sk = (struct sock *)msk; u32 flags = 0; + bool slow; memset(info, 0, sizeof(*info)); @@ -897,6 +901,9 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); + if (inet_sk_state_load(sk) == TCP_LISTEN) + return; + /* The following limits only make sense for the in-kernel PM */ if (mptcp_pm_is_kernel(msk)) { info->mptcpi_subflows_max = @@ -914,11 +921,21 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) if (READ_ONCE(msk->can_ack)) flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; info->mptcpi_flags = flags; - info->mptcpi_token = READ_ONCE(msk->token); - info->mptcpi_write_seq = READ_ONCE(msk->write_seq); - info->mptcpi_snd_una = READ_ONCE(msk->snd_una); - info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); - info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); + mptcp_data_lock(sk); + info->mptcpi_snd_una = msk->snd_una; + info->mptcpi_rcv_nxt = msk->ack_seq; + info->mptcpi_bytes_acked = msk->bytes_acked; + mptcp_data_unlock(sk); + + slow = lock_sock_fast(sk); + info->mptcpi_csum_enabled = msk->csum_enabled; + info->mptcpi_token = msk->token; + info->mptcpi_write_seq = msk->write_seq; + info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; + info->mptcpi_bytes_sent = msk->bytes_sent; + info->mptcpi_bytes_received = msk->bytes_received; + info->mptcpi_bytes_retrans = msk->bytes_retrans; + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); @@ -965,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, } static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, - char __user *optval, int __user *optlen) + char __user *optval, + int __user *optlen) { int len, copylen; @@ -1146,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o return 0; } +static int mptcp_get_full_info(struct mptcp_full_info *mfi, + char __user *optval, + int __user *optlen) +{ + int len; + + BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != + MIN_FULL_INFO_OPTLEN_SIZE); + + if (get_user(len, optlen)) + return -EFAULT; + + if (len < MIN_FULL_INFO_OPTLEN_SIZE) + return -EINVAL; + + memset(mfi, 0, sizeof(*mfi)); + if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) + return -EFAULT; + + if (mfi->size_tcpinfo_kernel || + mfi->size_sfinfo_kernel || + mfi->num_subflows) + return -EINVAL; + + if (mfi->size_sfinfo_user > INT_MAX || + mfi->size_tcpinfo_user > INT_MAX) + return -EINVAL; + + return len - MIN_FULL_INFO_OPTLEN_SIZE; +} + +static int mptcp_put_full_info(struct mptcp_full_info *mfi, + char __user *optval, + u32 copylen, + int __user *optlen) +{ + copylen += MIN_FULL_INFO_OPTLEN_SIZE; + if (put_user(copylen, optlen)) + return -EFAULT; + + if (copy_to_user(optval, mfi, copylen)) + return -EFAULT; + return 0; +} + +static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, + int __user *optlen) +{ + unsigned int sfcount = 0, copylen = 0; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + void __user *tcpinfoptr, *sfinfoptr; + struct mptcp_full_info mfi; + int len; + + len = mptcp_get_full_info(&mfi, optval, optlen); + if (len < 0) + return len; + + /* don't bother filling the mptcp info if there is not enough + * user-space-provided storage + */ + if (len > 0) { + mptcp_diag_fill_info(msk, &mfi.mptcp_info); + copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); + } + + mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); + mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, + sizeof(struct tcp_info)); + sfinfoptr = u64_to_user_ptr(mfi.subflow_info); + mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); + mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, + sizeof(struct mptcp_subflow_info)); + tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + struct mptcp_subflow_info sfinfo; + struct tcp_info tcp_info; + + if (sfcount++ >= mfi.size_arrays_user) + continue; + + /* fetch addr/tcp_info only if the user space buffers + * are wide enough + */ + memset(&sfinfo, 0, sizeof(sfinfo)); + sfinfo.id = subflow->subflow_id; + if (mfi.size_sfinfo_user > + offsetof(struct mptcp_subflow_info, addrs)) + mptcp_get_sub_addrs(ssk, &sfinfo.addrs); + if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) + goto fail_release; + + if (mfi.size_tcpinfo_user) { + tcp_get_info(ssk, &tcp_info); + if (copy_to_user(tcpinfoptr, &tcp_info, + mfi.size_tcpinfo_user)) + goto fail_release; + } + + tcpinfoptr += mfi.size_tcpinfo_user; + sfinfoptr += mfi.size_sfinfo_user; + } + release_sock(sk); + + mfi.num_subflows = sfcount; + if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) + return -EFAULT; + + return 0; + +fail_release: + release_sock(sk); + return -EFAULT; +} + static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, int __user *optlen, int val) { @@ -1219,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, switch (optname) { case MPTCP_INFO: return mptcp_getsockopt_info(msk, optval, optlen); + case MPTCP_FULL_INFO: + return mptcp_getsockopt_full_info(msk, optval, optlen); case MPTCP_TCPINFO: return mptcp_getsockopt_tcpinfo(msk, optval, optlen); case MPTCP_SUBFLOW_ADDRS: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4688daa6b38b..9ee3b7abbaf6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -819,6 +819,7 @@ create_child: if (!ctx->conn) goto fallback; + ctx->subflow_id = 1; owner = mptcp_sk(ctx->conn); mptcp_pm_new_connection(owner, child, 1); @@ -1574,6 +1575,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->remote_id = remote_id; subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); + subflow->subflow_id = msk->subflow_id++; mptcp_info2sockaddr(remote, &addr, ssk->sk_family); sock_hold(ssk); @@ -1668,6 +1670,10 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING); + err = security_mptcp_add_subflow(sk, sf->sk); + if (err) + goto release_ssk; + /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); @@ -1680,6 +1686,8 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); err = tcp_set_ulp(sf->sk, "mptcp"); + +release_ssk: release_sock(sf->sk); if (err) { @@ -1749,14 +1757,16 @@ static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; + struct mptcp_sock *msk; __subflow_state_change(sk); + msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); - mptcp_rcv_space_init(mptcp_sk(parent), sk); - pr_fallback(mptcp_sk(parent)); + mptcp_rcv_space_init(msk, sk); + pr_fallback(msk); subflow->conn_finished = 1; mptcp_set_connected(parent); } @@ -1772,11 +1782,12 @@ static void subflow_state_change(struct sock *sk) subflow_sched_work_if_closed(mptcp_sk(parent), sk); - if (__mptcp_check_fallback(mptcp_sk(parent)) && - !subflow->rx_eof && subflow_is_done(sk)) { - subflow->rx_eof = 1; - mptcp_subflow_eof(parent); - } + /* when the fallback subflow closes the rx side, trigger a 'dummy' + * ingress data fin, so that the msk state will follow along + */ + if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk && + mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) + mptcp_schedule_work(parent); } void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) |