aboutsummaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/options.c3
-rw-r--r--net/mptcp/pm.c25
-rw-r--r--net/mptcp/pm_netlink.c73
-rw-r--r--net/mptcp/pm_userspace.c12
-rw-r--r--net/mptcp/protocol.c70
-rw-r--r--net/mptcp/protocol.h12
-rw-r--r--net/mptcp/sockopt.c14
-rw-r--r--net/mptcp/subflow.c43
-rw-r--r--net/mptcp/token.c14
-rw-r--r--net/mptcp/token_test.c3
10 files changed, 192 insertions, 77 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 5ded85e2c374..b30cea2fbf3f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1594,8 +1594,7 @@ mp_rst:
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
- MPTCP_INC_STATS(sock_net((const struct sock *)tp),
- MPTCP_MIB_MPPRIOTX);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPPRIOTX);
}
mp_capable_done:
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 45e2a48397b9..70f0ced3ca86 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -420,6 +420,31 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
}
}
+/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses,
+ * otherwise allow any matching local/remote pair
+ */
+bool mptcp_pm_addr_families_match(const struct sock *sk,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *rem)
+{
+ bool mptcp_is_v4 = sk->sk_family == AF_INET;
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6);
+ bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6);
+
+ if (mptcp_is_v4)
+ return loc_is_v4 && rem_is_v4;
+
+ if (ipv6_only_sock(sk))
+ return !loc_is_v4 && !rem_is_v4;
+
+ return loc_is_v4 == rem_is_v4;
+#else
+ return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET;
+#endif
+}
+
void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2ea7eae43bdb..56628b52d100 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -152,7 +152,6 @@ static struct mptcp_pm_addr_entry *
select_local_address(const struct pm_nl_pernet *pernet,
const struct mptcp_sock *msk)
{
- const struct sock *sk = (const struct sock *)msk;
struct mptcp_pm_addr_entry *entry, *ret = NULL;
msk_owned_by_me(msk);
@@ -165,16 +164,6 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- if (entry->addr.family != sk->sk_family) {
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if ((entry->addr.family == AF_INET &&
- !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
- (sk->sk_family == AF_INET &&
- !ipv6_addr_v4mapped(&entry->addr.addr6)))
-#endif
- continue;
- }
-
ret = entry;
break;
}
@@ -423,7 +412,9 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
-static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *local,
+ bool fullmesh,
struct mptcp_addr_info *addrs)
{
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
@@ -443,6 +434,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
if (deny_id0)
return 0;
+ if (!mptcp_pm_addr_families_match(sk, local, &remote))
+ return 0;
+
msk->pm.subflows++;
addrs[i++] = remote;
} else {
@@ -453,6 +447,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
if (deny_id0 && !addrs[i].id)
continue;
+ if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
+ continue;
+
if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
@@ -603,9 +600,11 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
msk->pm.local_addr_used++;
- nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
- if (nr)
- __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
+ if (nr == 0)
+ continue;
+
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
@@ -628,11 +627,11 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
* and return the array size.
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *remote,
struct mptcp_addr_info *addrs)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
- struct mptcp_addr_info local;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
int i = 0;
@@ -645,15 +644,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
continue;
- if (entry->addr.family != sk->sk_family) {
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if ((entry->addr.family == AF_INET &&
- !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
- (sk->sk_family == AF_INET &&
- !ipv6_addr_v4mapped(&entry->addr.addr6)))
-#endif
- continue;
- }
+ if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
+ continue;
if (msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
@@ -666,8 +658,18 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
* 'IPADDRANY' local address
*/
if (!i) {
+ struct mptcp_addr_info local;
+
memset(&local, 0, sizeof(local));
- local.family = msk->pm.remote.family;
+ local.family =
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ remote->family == AF_INET6 &&
+ ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
+#endif
+ remote->family;
+
+ if (!mptcp_pm_addr_families_match(sk, &local, remote))
+ return 0;
msk->pm.subflows++;
addrs[i++] = local;
@@ -706,7 +708,9 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- nr = fill_local_addresses_vec(msk, addrs);
+ nr = fill_local_addresses_vec(msk, &remote, addrs);
+ if (nr == 0)
+ return;
msk->pm.add_addr_accepted++;
if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
@@ -998,8 +1002,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
{
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
- struct mptcp_sock *msk;
struct socket *ssock;
+ struct sock *newsk;
int backlog = 1024;
int err;
@@ -1008,11 +1012,13 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
- msk = mptcp_sk(entry->lsk->sk);
- if (!msk)
+ newsk = entry->lsk->sk;
+ if (!newsk)
return -EINVAL;
- ssock = __mptcp_nmpc_socket(msk);
+ lock_sock(newsk);
+ ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
+ release_sock(newsk);
if (!ssock)
return -EINVAL;
@@ -1143,7 +1149,7 @@ void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ss
if (!tcp_rtx_and_write_queues_empty(ssk)) {
subflow->stale = 1;
__mptcp_retransmit_pending_data(sk);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
+ MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE);
}
unlock_sock_fast(ssk, slow);
@@ -1903,8 +1909,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
}
if (token)
- return mptcp_userspace_pm_set_flags(sock_net(skb->sk),
- token, &addr, &remote, bkup);
+ return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup);
spin_lock_bh(&pernet->lock);
entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 65dcc55a8ad8..a02d3cbf2a1b 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -59,8 +59,8 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
*/
e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
if (!e) {
- spin_unlock_bh(&msk->pm.lock);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto append_err;
}
*e = *entry;
@@ -74,6 +74,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
ret = entry->addr.id;
}
+append_err:
spin_unlock_bh(&msk->pm.lock);
return ret;
}
@@ -294,6 +295,13 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
}
sk = (struct sock *)msk;
+
+ if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) {
+ GENL_SET_ERR_MSG(info, "families mismatch");
+ err = -EINVAL;
+ goto create_err;
+ }
+
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f6f93957275b..3ad9c46202fc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -98,7 +98,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
struct socket *ssock;
int err;
- err = mptcp_subflow_create_socket(sk, &ssock);
+ err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock);
if (err)
return err;
@@ -923,9 +923,8 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
mptcp_for_each_subflow(msk, subflow) {
if (READ_ONCE(subflow->data_avail))
@@ -1408,7 +1407,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
u64 linger_time;
long tout = 0;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
@@ -1662,6 +1661,8 @@ static void mptcp_set_nospace(struct sock *sk)
set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
}
+static int mptcp_disconnect(struct sock *sk, int flags);
+
static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
size_t len, int *copied_syn)
{
@@ -1672,9 +1673,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
- msk->is_sendmsg = 1;
+ msk->fastopening = 1;
ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
- msk->is_sendmsg = 0;
+ msk->fastopening = 0;
msg->msg_flags = saved_flags;
release_sock(ssk);
@@ -1688,6 +1689,8 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
*/
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
*copied_syn = 0;
+ } else if (ret && ret != -EINPROGRESS) {
+ mptcp_disconnect(sk, 0);
}
return ret;
@@ -1886,7 +1889,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
u32 time, advmss = 1;
u64 rtt_us, mstamp;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
if (copied <= 0)
return;
@@ -2213,7 +2216,7 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
struct mptcp_subflow_context *subflow;
int min_stale_count = INT_MAX;
- sock_owned_by_me((const struct sock *)msk);
+ msk_owned_by_me(msk);
if (__mptcp_check_fallback(msk))
return NULL;
@@ -2353,7 +2356,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN) {
tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(ssk);
+ mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
@@ -2720,8 +2723,8 @@ static int mptcp_init_sock(struct sock *sk)
mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
- sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
- sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+ sk->sk_rcvbuf = READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]);
+ sk->sk_sndbuf = READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]);
return 0;
}
@@ -2872,7 +2875,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
- sk_refcnt_debug_release(sk);
sock_put(sk);
}
@@ -2888,15 +2890,23 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
return EPOLLIN | EPOLLRDNORM;
}
+static void mptcp_listen_inuse_dec(struct sock *sk)
+{
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+}
+
bool __mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false;
+ int subflows_alive = 0;
sk->sk_shutdown = SHUTDOWN_MASK;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
+ mptcp_listen_inuse_dec(sk);
inet_sk_state_store(sk, TCP_CLOSE);
goto cleanup;
}
@@ -2918,6 +2928,8 @@ cleanup:
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ subflows_alive += ssk->sk_state != TCP_CLOSE;
+
/* since the close timeout takes precedence on the fail one,
* cancel the latter
*/
@@ -2933,6 +2945,12 @@ cleanup:
}
sock_orphan(sk);
+ /* all the subflows are closed, only timeout can change the msk
+ * state, let's not keep resources busy for no reasons
+ */
+ if (subflows_alive == 0)
+ inet_sk_state_store(sk, TCP_CLOSE);
+
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (msk->token)
@@ -2989,6 +3007,15 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ /* We are on the fastopen error path. We can't call straight into the
+ * subflows cleanup code due to lock nesting (we are already under
+ * msk->firstsocket lock). Do nothing and leave the cleanup to the
+ * caller.
+ */
+ if (msk->fastopening)
+ return 0;
+
+ mptcp_listen_inuse_dec(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3532,7 +3559,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->is_sendmsg)
+ if (msk->fastopening)
err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
else
err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
@@ -3627,12 +3654,13 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct sock *sk = sock->sk;
struct socket *ssock;
int err;
pr_debug("msk=%p", msk);
- lock_sock(sock->sk);
+ lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
err = -EINVAL;
@@ -3640,18 +3668,20 @@ static int mptcp_listen(struct socket *sock, int backlog)
}
mptcp_token_destroy(msk);
- inet_sk_state_store(sock->sk, TCP_LISTEN);
- sock_set_flag(sock->sk, SOCK_RCU_FREE);
+ inet_sk_state_store(sk, TCP_LISTEN);
+ sock_set_flag(sk, SOCK_RCU_FREE);
err = ssock->ops->listen(ssock, backlog);
- inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
- if (!err)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ if (!err) {
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ mptcp_copy_inaddrs(sk, ssock->sk);
+ }
mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
unlock:
- release_sock(sock->sk);
+ release_sock(sk);
return err;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 955fb3d88eb3..61fd8eabfca2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -295,7 +295,7 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1,
- is_sendmsg:1;
+ fastopening:1;
int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -628,7 +628,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
-void mptcp_subflow_queue_clean(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
@@ -641,7 +641,8 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
-int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
+int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
+ struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
@@ -754,7 +755,7 @@ static inline void mptcp_token_init_request(struct request_sock *req)
int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(struct request_sock *req);
-int mptcp_token_new_connect(struct sock *sk);
+int mptcp_token_new_connect(struct sock *ssk);
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
struct mptcp_sock *msk);
bool mptcp_token_exists(u32 token);
@@ -776,6 +777,9 @@ int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
bool require_family,
struct mptcp_pm_addr_entry *entry);
+bool mptcp_pm_addr_families_match(const struct sock *sk,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *rem);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index d4b1e6ec1b36..8a9656248b0f 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -18,7 +18,7 @@
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
- sock_owned_by_me((const struct sock *)msk);
+ msk_owned_by_me(msk);
if (likely(!__mptcp_check_fallback(msk)))
return NULL;
@@ -760,14 +760,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct sock *sk = (struct sock *)msk;
struct socket *sock;
+ int ret = -EINVAL;
/* Limit to first subflow, before the connection establishment */
+ lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
if (!sock)
- return -EINVAL;
+ goto unlock;
+
+ ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
- return tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+unlock:
+ release_sock(sk);
+ return ret;
}
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
@@ -1255,6 +1262,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
ssk->sk_priority = sk->sk_priority;
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
+ ssk->sk_ipv6only = sk->sk_ipv6only;
__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
if (sk->sk_userlocks & tx_rx_locks) {
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d1d32a66ae3f..4ae1a7304cf0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -26,6 +26,7 @@
#include "mib.h"
#include <trace/events/mptcp.h>
+#include <trace/events/sock.h>
static void mptcp_subflow_ops_undo_override(struct sock *ssk);
@@ -1399,6 +1400,7 @@ void __mptcp_error_report(struct sock *sk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int err = sock_error(ssk);
+ int ssk_state;
if (!err)
continue;
@@ -1409,7 +1411,14 @@ void __mptcp_error_report(struct sock *sk)
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
continue;
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
sk->sk_err = -err;
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@@ -1438,6 +1447,8 @@ static void subflow_data_ready(struct sock *sk)
struct sock *parent = subflow->conn;
struct mptcp_sock *msk;
+ trace_sk_data_ready(sk);
+
msk = mptcp_sk(parent);
if (state & TCPF_LISTEN) {
/* MPJ subflow are removed from accept queue before reaching here,
@@ -1547,7 +1558,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (!mptcp_is_fully_established(sk))
goto err_out;
- err = mptcp_subflow_create_socket(sk, &sf);
+ err = mptcp_subflow_create_socket(sk, loc->family, &sf);
if (err)
goto err_out;
@@ -1660,7 +1671,9 @@ static void mptcp_subflow_ops_undo_override(struct sock *ssk)
#endif
ssk->sk_prot = &tcp_prot;
}
-int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
+
+int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
+ struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
struct net *net = sock_net(sk);
@@ -1673,12 +1686,11 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
if (unlikely(!sk->sk_socket))
return -EINVAL;
- err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
- &sf);
+ err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
if (err)
return err;
- lock_sock(sf->sk);
+ lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
@@ -1791,7 +1803,7 @@ static void subflow_state_change(struct sock *sk)
}
}
-void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
struct mptcp_sock *msk, *next, *head = NULL;
@@ -1840,8 +1852,23 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk)
do_cancel_work = __mptcp_close(sk, 0);
release_sock(sk);
- if (do_cancel_work)
+ if (do_cancel_work) {
+ /* lockdep will report a false positive ABBA deadlock
+ * between cancel_work_sync and the listener socket.
+ * The involved locks belong to different sockets WRT
+ * the existing AB chain.
+ * Using a per socket key is problematic as key
+ * deregistration requires process context and must be
+ * performed at socket disposal time, in atomic
+ * context.
+ * Just tell lockdep to consider the listener socket
+ * released here.
+ */
+ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
mptcp_cancel_work(sk);
+ mutex_acquire(&listener_sk->sk_lock.dep_map,
+ SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+ }
sock_put(sk);
}
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 65430f314a68..5bb924534387 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -134,7 +134,7 @@ int mptcp_token_new_request(struct request_sock *req)
/**
* mptcp_token_new_connect - create new key/idsn/token for subflow
- * @sk: the socket that will initiate a connection
+ * @ssk: the socket that will initiate a connection
*
* This function is called when a new outgoing mptcp connection is
* initiated.
@@ -148,11 +148,12 @@ int mptcp_token_new_request(struct request_sock *req)
*
* returns 0 on success.
*/
-int mptcp_token_new_connect(struct sock *sk)
+int mptcp_token_new_connect(struct sock *ssk)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int retries = MPTCP_TOKEN_MAX_RETRIES;
+ struct sock *sk = subflow->conn;
struct token_bucket *bucket;
again:
@@ -169,12 +170,13 @@ again:
}
pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
- sk, subflow->local_key, subflow->token, subflow->idsn);
+ ssk, subflow->local_key, subflow->token, subflow->idsn);
WRITE_ONCE(msk->token, subflow->token);
__sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
bucket->chain_len++;
spin_unlock_bh(&bucket->lock);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return 0;
}
@@ -190,8 +192,10 @@ void mptcp_token_accept(struct mptcp_subflow_request_sock *req,
struct mptcp_sock *msk)
{
struct mptcp_subflow_request_sock *pos;
+ struct sock *sk = (struct sock *)msk;
struct token_bucket *bucket;
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
bucket = token_bucket(req->token);
spin_lock_bh(&bucket->lock);
@@ -370,12 +374,14 @@ void mptcp_token_destroy_request(struct request_sock *req)
*/
void mptcp_token_destroy(struct mptcp_sock *msk)
{
+ struct sock *sk = (struct sock *)msk;
struct token_bucket *bucket;
struct mptcp_sock *pos;
if (sk_unhashed((struct sock *)msk))
return;
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
bucket = token_bucket(msk->token);
spin_lock_bh(&bucket->lock);
pos = __token_lookup_msk(bucket, msk->token);
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index 5d984bec1cd8..0758865ab658 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -57,6 +57,9 @@ static struct mptcp_sock *build_msk(struct kunit *test)
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
sock_net_set((struct sock *)msk, &init_net);
+
+ /* be sure the token helpers can dereference sk->sk_prot */
+ ((struct sock *)msk)->sk_prot = &tcp_prot;
return msk;
}