aboutsummaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/fastopen.c11
-rw-r--r--net/mptcp/options.c14
-rw-r--r--net/mptcp/pm.c4
-rw-r--r--net/mptcp/pm_netlink.c22
-rw-r--r--net/mptcp/pm_userspace.c4
-rw-r--r--net/mptcp/protocol.c197
-rw-r--r--net/mptcp/protocol.h21
-rw-r--r--net/mptcp/sockopt.c46
-rw-r--r--net/mptcp/subflow.c167
9 files changed, 289 insertions, 197 deletions
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index d237d142171c..bceaab8dd8e4 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -9,11 +9,18 @@
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req)
{
- struct sock *ssk = subflow->tcp_sock;
- struct sock *sk = subflow->conn;
+ struct sock *sk, *ssk;
struct sk_buff *skb;
struct tcp_sock *tp;
+ /* on early fallback the subflow context is deleted by
+ * subflow_syn_recv_sock()
+ */
+ if (!subflow)
+ return;
+
+ ssk = subflow->tcp_sock;
+ sk = subflow->conn;
tp = tcp_sk(ssk);
subflow->is_mptfo = 1;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b30cea2fbf3f..19a01b6566f1 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -442,7 +442,6 @@ static void clear_3rdack_retransmission(struct sock *sk)
static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
bool snd_data_fin_enable,
unsigned int *size,
- unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -556,7 +555,6 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool snd_data_fin_enable,
unsigned int *size,
- unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -580,7 +578,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy = *mpext;
}
- remaining -= map_size;
dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
@@ -851,9 +848,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
}
snd_data_fin = mptcp_data_fin_enabled(msk);
- if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+ if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, opts)) {
unsigned int mp_fail_size;
ret = true;
@@ -1001,7 +998,7 @@ check_notify:
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk);
} else {
- mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
+ mptcp_pm_fully_established(msk, ssk);
}
return true;
@@ -1192,9 +1189,8 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
*/
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
if (mp_opt.data_fin && mp_opt.data_len == 1 &&
- mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
- schedule_work(&msk->work))
- sock_hold(subflow->conn);
+ mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64))
+ mptcp_schedule_work((struct sock *)msk);
return true;
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 70f0ced3ca86..78c924506e83 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -126,7 +126,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return true;
}
-void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
{
struct mptcp_pm_data *pm = &msk->pm;
bool announce = false;
@@ -150,7 +150,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk,
spin_unlock_bh(&pm->lock);
if (announce)
- mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
+ mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 56628b52d100..bc343dab5e3f 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -997,9 +997,13 @@ out:
return ret;
}
+static struct lock_class_key mptcp_slock_keys[2];
+static struct lock_class_key mptcp_keys[2];
+
static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
struct mptcp_pm_addr_entry *entry)
{
+ bool is_ipv6 = sk->sk_family == AF_INET6;
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
struct socket *ssock;
@@ -1016,11 +1020,23 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (!newsk)
return -EINVAL;
+ /* The subflow socket lock is acquired in a nested to the msk one
+ * in several places, even by the TCP stack, and this msk is a kernel
+ * socket: lockdep complains. Instead of propagating the _nested
+ * modifiers in several places, re-init the lock class for the msk
+ * socket to an mptcp specific one.
+ */
+ sock_lock_init_class_and_name(newsk,
+ is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
+ &mptcp_slock_keys[is_ipv6],
+ is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
+ &mptcp_keys[is_ipv6]);
+
lock_sock(newsk);
ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
release_sock(newsk);
- if (!ssock)
- return -EINVAL;
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -2019,7 +2035,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
return -EMSGSIZE;
- sk_err = ssk->sk_err;
+ sk_err = READ_ONCE(ssk->sk_err);
if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
return -EMSGSIZE;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index a02d3cbf2a1b..27a275805c06 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -25,8 +25,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
}
-int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3ad9c46202fc..08dc53f56bc2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -49,18 +49,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
-/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
- * completed yet or has failed, return the subflow socket.
- * Otherwise return NULL.
- */
-struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
-{
- if (!msk->subflow || READ_ONCE(msk->can_ack))
- return NULL;
-
- return msk->subflow;
-}
-
/* Returns end sequence number of the receiver's advertised window */
static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
{
@@ -116,6 +104,31 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
return 0;
}
+/* If the MPC handshake is not started, returns the first subflow,
+ * eventually allocating it.
+ */
+struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
+{
+ struct sock *sk = (struct sock *)msk;
+ int ret;
+
+ if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ return ERR_PTR(-EINVAL);
+
+ if (!msk->subflow) {
+ if (msk->first)
+ return ERR_PTR(-EINVAL);
+
+ ret = __mptcp_socket_create(msk);
+ if (ret)
+ return ERR_PTR(ret);
+
+ mptcp_sockopt_sync(msk, msk->first);
+ }
+
+ return msk->subflow;
+}
+
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
{
sk_drops_add(sk, skb);
@@ -459,7 +472,7 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
return false;
}
-static void mptcp_set_datafin_timeout(const struct sock *sk)
+static void mptcp_set_datafin_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 retransmits;
@@ -825,7 +838,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_socket && !ssk->sk_socket)
mptcp_sock_graft(ssk, sk->sk_socket);
- mptcp_propagate_sndbuf((struct sock *)msk, ssk);
mptcp_sockopt_sync_locked(msk, ssk);
return true;
}
@@ -1663,13 +1675,31 @@ static void mptcp_set_nospace(struct sock *sk)
static int mptcp_disconnect(struct sock *sk, int flags);
-static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
+static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
size_t len, int *copied_syn)
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct socket *ssock;
+ struct sock *ssk;
int ret;
+ /* on flags based fastopen the mptcp is supposed to create the
+ * first subflow right now. Otherwise we are in the defer_connect
+ * path, and the first subflow must be already present.
+ * Since the defer_connect flag is cleared after the first succsful
+ * fastopen attempt, no need to check for additional subflow status.
+ */
+ if (msg->msg_flags & MSG_FASTOPEN) {
+ ssock = __mptcp_nmpc_socket(msk);
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
+ }
+ if (!msk->first)
+ return -EINVAL;
+
+ ssk = msk->first;
+
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
@@ -1692,6 +1722,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
} else if (ret && ret != -EINPROGRESS) {
mptcp_disconnect(sk, 0);
}
+ inet_sk(sk)->defer_connect = 0;
return ret;
}
@@ -1700,7 +1731,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
- struct socket *ssock;
size_t copied = 0;
int ret = 0;
long timeo;
@@ -1710,12 +1740,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
- msg->msg_flags & MSG_FASTOPEN))) {
+ if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
- ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
+ ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);
copied += copied_syn;
if (ret == -EINPROGRESS && copied_syn > 0)
goto out;
@@ -2316,7 +2344,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- bool need_push, dispose_it;
+ bool dispose_it, need_push = false;
+
+ /* If the first subflow moved to a close state before accept, e.g. due
+ * to an incoming reset, mptcp either:
+ * - if either the subflow or the msk are dead, destroy the context
+ * (the subflow socket is deleted by inet_child_forget) and the msk
+ * - otherwise do nothing at the moment and take action at accept and/or
+ * listener shutdown - user-space must be able to accept() the closed
+ * socket.
+ */
+ if (msk->in_accept_queue && msk->first == ssk) {
+ if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
+ return;
+
+ /* ensure later check in mptcp_worker() will dispose the msk */
+ sock_set_flag(sk, SOCK_DEAD);
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ mptcp_subflow_drop_ctx(ssk);
+ goto out_release;
+ }
dispose_it = !msk->subflow || ssk != msk->subflow->sk;
if (dispose_it)
@@ -2343,7 +2390,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out;
}
- sock_orphan(ssk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2351,6 +2397,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
* reference owned by msk;
*/
if (!inet_csk(ssk)->icsk_ulp_ops) {
+ WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
kfree_rcu(subflow, rcu);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
@@ -2360,11 +2407,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
+
__tcp_close(ssk, 0);
/* close acquired an extra ref */
__sock_put(ssk);
}
+
+out_release:
release_sock(ssk);
sock_put(ssk);
@@ -2399,9 +2449,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}
-static void __mptcp_close_subflow(struct mptcp_sock *msk)
+static void __mptcp_close_subflow(struct sock *sk)
{
struct mptcp_subflow_context *subflow, *tmp;
+ struct mptcp_sock *msk = mptcp_sk(sk);
might_sleep();
@@ -2415,16 +2466,17 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
continue;
- mptcp_close_ssk((struct sock *)msk, ssk, subflow);
+ mptcp_close_ssk(sk, ssk, subflow);
}
+
}
-static bool mptcp_check_close_timeout(const struct sock *sk)
+static bool mptcp_should_close(const struct sock *sk)
{
s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
struct mptcp_subflow_context *subflow;
- if (delta >= TCP_TIMEWAIT_LEN)
+ if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
return true;
/* if all subflows are in closed status don't bother with additional
@@ -2463,15 +2515,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
/* Mirror the tcp_reset() error propagation */
switch (sk->sk_state) {
case TCP_SYN_SENT:
- sk->sk_err = ECONNREFUSED;
+ WRITE_ONCE(sk->sk_err, ECONNREFUSED);
break;
case TCP_CLOSE_WAIT:
- sk->sk_err = EPIPE;
+ WRITE_ONCE(sk->sk_err, EPIPE);
break;
case TCP_CLOSE:
return;
default:
- sk->sk_err = ECONNRESET;
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
}
inet_sk_state_store(sk, TCP_CLOSE);
@@ -2609,7 +2661,7 @@ static void mptcp_worker(struct work_struct *work)
lock_sock(sk);
state = sk->sk_state;
- if (unlikely(state == TCP_CLOSE))
+ if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto unlock;
mptcp_check_data_fin_ack(sk);
@@ -2624,12 +2676,15 @@ static void mptcp_worker(struct work_struct *work)
__mptcp_check_send_data_fin(sk);
mptcp_check_data_fin(sk);
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(sk);
+
/* There is no point in keeping around an orphaned sk timedout or
* closed, but we need the msk around to reply to incoming DATA_FIN,
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_check_close_timeout(sk)) {
+ if (mptcp_should_close(sk)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
}
@@ -2639,9 +2694,6 @@ static void mptcp_worker(struct work_struct *work)
}
}
- if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
- __mptcp_close_subflow(msk);
-
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
@@ -2711,10 +2763,6 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
- ret = __mptcp_socket_create(mptcp_sk(sk));
- if (ret)
- return ret;
-
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
@@ -2878,6 +2926,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
sock_put(sk);
}
+void __mptcp_unaccepted_force_close(struct sock *sk)
+{
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_do_fastclose(sk);
+ __mptcp_destroy_sock(sk);
+}
+
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
/* Concurrent splices from sk_receive_queue into receive_queue will
@@ -2911,10 +2967,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
goto cleanup;
}
- if (mptcp_check_readable(msk)) {
- /* the msk has read data, do the MPTCP equivalent of TCP reset */
+ if (mptcp_check_readable(msk) || timeout < 0) {
+ /* If the msk has read data, or the caller explicitly ask it,
+ * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
+ */
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
+ timeout = 0;
} else if (mptcp_close_state(sk)) {
__mptcp_wr_shutdown(sk);
}
@@ -3079,6 +3138,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
+ msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
WRITE_ONCE(msk->csum_enabled, true);
@@ -3096,8 +3156,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
security_inet_csk_clone(nsk, req);
bh_unlock_sock(nsk);
- /* keep a single reference */
- __sock_put(nsk);
+ /* note: the newly allocated socket refcount is 2 now */
return nsk;
}
@@ -3126,7 +3185,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct socket *listener;
struct sock *newsk;
- listener = __mptcp_nmpc_socket(msk);
+ listener = msk->subflow;
if (WARN_ON_ONCE(!listener)) {
*err = -EINVAL;
return NULL;
@@ -3153,8 +3212,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
goto out;
}
- /* acquire the 2nd reference for the owning socket */
- sock_hold(new_mptcp_sock);
newsk = new_mptcp_sock;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
@@ -3348,7 +3405,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
- ssock = __mptcp_nmpc_socket(msk);
+ ssock = msk->subflow;
pr_debug("msk=%p, subflow=%p", msk, ssock);
if (WARN_ON_ONCE(!ssock))
return -EINVAL;
@@ -3536,8 +3593,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int err = -EINVAL;
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
- return -EINVAL;
+ if (IS_ERR(ssock))
+ return PTR_ERR(ssock);
mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
@@ -3625,8 +3682,8 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
lock_sock(sock->sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
goto unlock;
}
@@ -3662,8 +3719,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
- err = -EINVAL;
+ if (IS_ERR(ssock)) {
+ err = PTR_ERR(ssock);
goto unlock;
}
@@ -3694,7 +3751,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- ssock = __mptcp_nmpc_socket(msk);
+ /* buggy applications can call accept on socket states other then LISTEN
+ * but no need to allocate the first subflow just to error out.
+ */
+ ssock = msk->subflow;
if (!ssock)
return -EINVAL;
@@ -3705,25 +3765,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct sock *newsk = newsock->sk;
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk->in_accept_queue = 0;
lock_sock(newsk);
- /* PM/worker can now acquire the first subflow socket
- * lock without racing with listener queue cleanup,
- * we can notify it, if needed.
- *
- * Even if remote has reset the initial subflow by now
- * the refcnt is still at least one.
- */
- subflow = mptcp_subflow_ctx(msk->first);
- list_add(&subflow->node, &msk->conn_list);
- sock_hold(msk->first);
- if (mptcp_is_fully_established(newsk))
- mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
-
- mptcp_rcv_space_init(msk, msk->first);
- mptcp_propagate_sndbuf(newsk, msk->first);
-
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
*/
@@ -3733,6 +3778,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
+
+ /* Do late cleanup for the first subflow as necessary. Also
+ * deal with bad peers not doing a complete shutdown.
+ */
+ if (msk->first &&
+ unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
+ __mptcp_close_ssk(newsk, msk->first,
+ mptcp_subflow_ctx(msk->first), 0);
+ if (unlikely(list_empty(&msk->conn_list)))
+ inet_sk_state_store(newsk, TCP_CLOSE);
+ }
+
release_sock(newsk);
}
@@ -3791,7 +3848,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
smp_rmb();
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask |= EPOLLERR;
return mask;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 61fd8eabfca2..2d7b2c80a164 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -295,7 +295,8 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1,
- fastopening:1;
+ fastopening:1,
+ in_accept_queue:1;
int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -333,10 +334,7 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk)
sock_owned_by_me((const struct sock *)msk);
}
-static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
-{
- return (struct mptcp_sock *)sk;
-}
+#define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk)
/* the msk socket don't use the backlog, also account for the bulk
* free memory
@@ -370,7 +368,7 @@ static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
+ const struct mptcp_sock *msk = mptcp_sk(sk);
if (!msk->first_pending)
return NULL;
@@ -381,7 +379,7 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}
-static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
+static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -630,9 +628,10 @@ void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
-struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
+void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
@@ -666,6 +665,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
+void mptcp_subflow_drop_ctx(struct sock *ssk);
+
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
@@ -783,7 +784,7 @@ bool mptcp_pm_addr_families_match(const struct sock *sk,
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
-void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk);
@@ -831,8 +832,6 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *
void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list);
-int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 8a9656248b0f..d4258869ac48 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -301,9 +301,9 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BINDTOIFINDEX:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
@@ -396,9 +396,9 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
case IPV6_FREEBIND:
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
@@ -693,9 +693,9 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock) {
+ if (IS_ERR(ssock)) {
release_sock(sk);
- return -EINVAL;
+ return PTR_ERR(ssock);
}
issk = inet_sk(ssock->sk);
@@ -762,13 +762,15 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
{
struct sock *sk = (struct sock *)msk;
struct socket *sock;
- int ret = -EINVAL;
+ int ret;
/* Limit to first subflow, before the connection establishment */
lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
- if (!sock)
+ if (IS_ERR(sock)) {
+ ret = PTR_ERR(sock);
goto unlock;
+ }
ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
@@ -861,7 +863,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
{
struct sock *sk = (struct sock *)msk;
struct socket *ssock;
- int ret = -EINVAL;
+ int ret;
struct sock *ssk;
lock_sock(sk);
@@ -872,8 +874,10 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
}
ssock = __mptcp_nmpc_socket(msk);
- if (!ssock)
+ if (IS_ERR(ssock)) {
+ ret = PTR_ERR(ssock);
goto out;
+ }
ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
@@ -885,7 +889,6 @@ out:
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
u32 flags = 0;
- u8 val;
memset(info, 0, sizeof(*info));
@@ -893,12 +896,19 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
- info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
- val = mptcp_pm_get_add_addr_signal_max(msk);
- info->mptcpi_add_addr_signal_max = val;
- val = mptcp_pm_get_add_addr_accept_max(msk);
- info->mptcpi_add_addr_accepted_max = val;
- info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
+
+ /* The following limits only make sense for the in-kernel PM */
+ if (mptcp_pm_is_kernel(msk)) {
+ info->mptcpi_subflows_max =
+ mptcp_pm_get_subflows_max(msk);
+ info->mptcpi_add_addr_signal_max =
+ mptcp_pm_get_add_addr_signal_max(msk);
+ info->mptcpi_add_addr_accepted_max =
+ mptcp_pm_get_add_addr_accept_max(msk);
+ info->mptcpi_local_addr_max =
+ mptcp_pm_get_local_addr_max(msk);
+ }
+
if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
flags |= MPTCP_INFO_FLAG_FALLBACK;
if (READ_ONCE(msk->can_ack))
@@ -1046,7 +1056,7 @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
memset(a, 0, sizeof(*a));
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 4ae1a7304cf0..ba065b66551a 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -397,15 +397,19 @@ void mptcp_subflow_reset(struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
+ /* mptcp_mp_fail_no_response() can reach here on an already closed
+ * socket
+ */
+ if (ssk->sk_state == TCP_CLOSE)
+ return;
+
/* must hold: tcp_done() could drop last reference on parent */
sock_hold(sk);
- tcp_set_state(ssk, TCP_CLOSE);
tcp_send_active_reset(ssk, GFP_ATOMIC);
tcp_done(ssk);
- if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
- schedule_work(&mptcp_sk(sk)->work))
- return; /* worker will put sk for us */
+ if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags))
+ mptcp_schedule_work(sk);
sock_put(sk);
}
@@ -622,7 +626,7 @@ static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init
static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
-static struct proto tcpv6_prot_override;
+static struct proto tcpv6_prot_override __ro_after_init;
static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
@@ -691,13 +695,6 @@ static bool subflow_hmac_valid(const struct request_sock *req,
return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
}
-static void mptcp_force_close(struct sock *sk)
-{
- /* the msk is not yet exposed to user-space */
- inet_sk_state_store(sk, TCP_CLOSE);
- sk_common_release(sk);
-}
-
static void subflow_ulp_fallback(struct sock *sk,
struct mptcp_subflow_context *old_ctx)
{
@@ -711,16 +708,19 @@ static void subflow_ulp_fallback(struct sock *sk,
mptcp_subflow_ops_undo_override(sk);
}
-static void subflow_drop_ctx(struct sock *ssk)
+void mptcp_subflow_drop_ctx(struct sock *ssk)
{
struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
if (!ctx)
return;
- subflow_ulp_fallback(ssk, ctx);
- if (ctx->conn)
- sock_put(ctx->conn);
+ list_del(&mptcp_subflow_ctx(ssk)->node);
+ if (inet_csk(ssk)->icsk_ulp_ops) {
+ subflow_ulp_fallback(ssk, ctx);
+ if (ctx->conn)
+ sock_put(ctx->conn);
+ }
kfree_rcu(ctx, rcu);
}
@@ -749,7 +749,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
bool fallback, fallback_is_fatal;
- struct sock *new_msk = NULL;
+ struct mptcp_sock *owner;
struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -777,14 +777,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* options.
*/
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC))
fallback = true;
- goto create_child;
- }
- new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
- if (!new_msk)
- fallback = true;
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
@@ -813,49 +808,55 @@ create_child:
subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
goto dispose_child;
}
-
- if (new_msk)
- mptcp_copy_inaddrs(new_msk, child);
- subflow_drop_ctx(child);
- goto out;
+ goto fallback;
}
/* ssk inherits options of listener sk */
ctx->setsockopt_seq = listener->setsockopt_seq;
if (ctx->mp_capable) {
+ ctx->conn = mptcp_sk_clone(listener->conn, &mp_opt, req);
+ if (!ctx->conn)
+ goto fallback;
+
+ owner = mptcp_sk(ctx->conn);
+
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
*/
- inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
+ inet_sk_state_store(ctx->conn, TCP_ESTABLISHED);
/* record the newly created socket as the first msk
* subflow, but don't link it yet into conn_list
*/
- WRITE_ONCE(mptcp_sk(new_msk)->first, child);
+ WRITE_ONCE(owner->first, child);
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
- mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
- mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
- mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
- ctx->conn = new_msk;
- new_msk = NULL;
+ owner->setsockopt_seq = ctx->setsockopt_seq;
+ mptcp_pm_new_connection(owner, child, 1);
+ mptcp_token_accept(subflow_req, owner);
/* set msk addresses early to ensure mptcp_pm_get_local_id()
* uses the correct data
*/
mptcp_copy_inaddrs(ctx->conn, child);
+ mptcp_propagate_sndbuf(ctx->conn, child);
+
+ mptcp_rcv_space_init(owner, child);
+ list_add(&ctx->node, &owner->conn_list);
+ sock_hold(child);
/* with OoO packets we can reach here without ingress
* mpc option
*/
- if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK)
+ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
mptcp_subflow_fully_established(ctx, &mp_opt);
+ mptcp_pm_fully_established(owner, child);
+ ctx->pm_notified = 1;
+ }
} else if (ctx->mp_join) {
- struct mptcp_sock *owner;
-
owner = subflow_req->msk;
if (!owner) {
subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
@@ -885,11 +886,6 @@ create_child:
}
}
-out:
- /* dispose of the left over mptcp master, if any */
- if (unlikely(new_msk))
- mptcp_force_close(new_msk);
-
/* check for expected invariant - should never trigger, just help
* catching eariler subtle bugs
*/
@@ -899,7 +895,7 @@ out:
return child;
dispose_child:
- subflow_drop_ctx(child);
+ mptcp_subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
@@ -907,10 +903,14 @@ dispose_child:
/* The last child reference will be released by the caller */
return child;
+
+fallback:
+ mptcp_subflow_drop_ctx(child);
+ return child;
}
static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
-static struct proto tcp_prot_override;
+static struct proto tcp_prot_override __ro_after_init;
enum mapping_status {
MAPPING_OK,
@@ -1103,8 +1103,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
skb_ext_del(skb, SKB_EXT_MPTCP);
return MAPPING_OK;
} else {
- if (updated && schedule_work(&msk->work))
- sock_hold((struct sock *)msk);
+ if (updated)
+ mptcp_schedule_work((struct sock *)msk);
return MAPPING_DATA_FIN;
}
@@ -1207,17 +1207,12 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
/* sched mptcp worker to remove the subflow if no more data is pending */
static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
{
- struct sock *sk = (struct sock *)msk;
-
if (likely(ssk->sk_state != TCP_CLOSE))
return;
if (skb_queue_empty(&ssk->sk_receive_queue) &&
- !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
- sock_hold(sk);
- if (!schedule_work(&msk->work))
- sock_put(sk);
- }
+ !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ mptcp_schedule_work((struct sock *)msk);
}
static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
@@ -1335,7 +1330,7 @@ fallback:
subflow->reset_reason = MPTCP_RST_EMPTCP;
reset:
- ssk->sk_err = EBADMSG;
+ WRITE_ONCE(ssk->sk_err, EBADMSG);
tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
@@ -1419,7 +1414,7 @@ void __mptcp_error_report(struct sock *sk)
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
inet_sk_state_store(sk, ssk_state);
- sk->sk_err = -err;
+ WRITE_ONCE(sk->sk_err, -err);
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
smp_wmb();
@@ -1432,6 +1427,13 @@ static void subflow_error_report(struct sock *ssk)
{
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+ /* bail early if this is a no-op, so that we avoid introducing a
+ * problematic lockdep dependency between TCP accept queue lock
+ * and msk socket spinlock
+ */
+ if (!sk->sk_socket)
+ return;
+
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
@@ -1808,13 +1810,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
struct mptcp_sock *msk, *next, *head = NULL;
struct request_sock *req;
+ struct sock *sk;
/* build a list of all unaccepted mptcp sockets */
spin_lock_bh(&queue->rskq_lock);
for (req = queue->rskq_accept_head; req; req = req->dl_next) {
struct mptcp_subflow_context *subflow;
struct sock *ssk = req->sk;
- struct mptcp_sock *msk;
if (!sk_is_mptcp(ssk))
continue;
@@ -1824,10 +1826,12 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
continue;
/* skip if already in list */
- msk = mptcp_sk(subflow->conn);
+ sk = subflow->conn;
+ msk = mptcp_sk(sk);
if (msk->dl_next || msk == head)
continue;
+ sock_hold(sk);
msk->dl_next = head;
head = msk;
}
@@ -1841,34 +1845,30 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
release_sock(listener_ssk);
for (msk = head; msk; msk = next) {
- struct sock *sk = (struct sock *)msk;
- bool do_cancel_work;
+ sk = (struct sock *)msk;
- sock_hold(sk);
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
next = msk->dl_next;
- msk->first = NULL;
msk->dl_next = NULL;
- do_cancel_work = __mptcp_close(sk, 0);
+ __mptcp_unaccepted_force_close(sk);
release_sock(sk);
- if (do_cancel_work) {
- /* lockdep will report a false positive ABBA deadlock
- * between cancel_work_sync and the listener socket.
- * The involved locks belong to different sockets WRT
- * the existing AB chain.
- * Using a per socket key is problematic as key
- * deregistration requires process context and must be
- * performed at socket disposal time, in atomic
- * context.
- * Just tell lockdep to consider the listener socket
- * released here.
- */
- mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
- mptcp_cancel_work(sk);
- mutex_acquire(&listener_sk->sk_lock.dep_map,
- SINGLE_DEPTH_NESTING, 0, _RET_IP_);
- }
+
+ /* lockdep will report a false positive ABBA deadlock
+ * between cancel_work_sync and the listener socket.
+ * The involved locks belong to different sockets WRT
+ * the existing AB chain.
+ * Using a per socket key is problematic as key
+ * deregistration requires process context and must be
+ * performed at socket disposal time, in atomic
+ * context.
+ * Just tell lockdep to consider the listener socket
+ * released here.
+ */
+ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
+ mptcp_cancel_work(sk);
+ mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
sock_put(sk);
}
@@ -1932,6 +1932,13 @@ static void subflow_ulp_release(struct sock *ssk)
* when the subflow is still unaccepted
*/
release = ctx->disposable || list_empty(&ctx->node);
+
+ /* inet_child_forget() does not call sk_state_change(),
+ * explicitly trigger the socket close machinery
+ */
+ if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW,
+ &mptcp_sk(sk)->flags))
+ mptcp_schedule_work(sk);
sock_put(sk);
}