diff options
Diffstat (limited to 'net/mptcp/protocol.c')
| -rw-r--r-- | net/mptcp/protocol.c | 183 | 
1 files changed, 120 insertions, 63 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 60b23b2716c4..08dc53f56bc2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -49,18 +49,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk);  DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);  static struct net_device mptcp_napi_dev; -/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not - * completed yet or has failed, return the subflow socket. - * Otherwise return NULL. - */ -struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) -{ -	if (!msk->subflow || READ_ONCE(msk->can_ack)) -		return NULL; - -	return msk->subflow; -} -  /* Returns end sequence number of the receiver's advertised window */  static u64 mptcp_wnd_end(const struct mptcp_sock *msk)  { @@ -116,6 +104,31 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)  	return 0;  } +/* If the MPC handshake is not started, returns the first subflow, + * eventually allocating it. + */ +struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) +{ +	struct sock *sk = (struct sock *)msk; +	int ret; + +	if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) +		return ERR_PTR(-EINVAL); + +	if (!msk->subflow) { +		if (msk->first) +			return ERR_PTR(-EINVAL); + +		ret = __mptcp_socket_create(msk); +		if (ret) +			return ERR_PTR(ret); + +		mptcp_sockopt_sync(msk, msk->first); +	} + +	return msk->subflow; +} +  static void mptcp_drop(struct sock *sk, struct sk_buff *skb)  {  	sk_drops_add(sk, skb); @@ -459,7 +472,7 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)  	return false;  } -static void mptcp_set_datafin_timeout(const struct sock *sk) +static void mptcp_set_datafin_timeout(struct sock *sk)  {  	struct inet_connection_sock *icsk = inet_csk(sk);  	u32 retransmits; @@ -1662,13 +1675,31 @@ static void mptcp_set_nospace(struct sock *sk)  static int mptcp_disconnect(struct sock *sk, int flags); -static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, +static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,  				  size_t len, int *copied_syn)  {  	unsigned int saved_flags = msg->msg_flags;  	struct mptcp_sock *msk = mptcp_sk(sk); +	struct socket *ssock; +	struct sock *ssk;  	int ret; +	/* on flags based fastopen the mptcp is supposed to create the +	 * first subflow right now. Otherwise we are in the defer_connect +	 * path, and the first subflow must be already present. +	 * Since the defer_connect flag is cleared after the first succsful +	 * fastopen attempt, no need to check for additional subflow status. +	 */ +	if (msg->msg_flags & MSG_FASTOPEN) { +		ssock = __mptcp_nmpc_socket(msk); +		if (IS_ERR(ssock)) +			return PTR_ERR(ssock); +	} +	if (!msk->first) +		return -EINVAL; + +	ssk = msk->first; +  	lock_sock(ssk);  	msg->msg_flags |= MSG_DONTWAIT;  	msk->connect_flags = O_NONBLOCK; @@ -1691,6 +1722,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh  	} else if (ret && ret != -EINPROGRESS) {  		mptcp_disconnect(sk, 0);  	} +	inet_sk(sk)->defer_connect = 0;  	return ret;  } @@ -1699,7 +1731,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)  {  	struct mptcp_sock *msk = mptcp_sk(sk);  	struct page_frag *pfrag; -	struct socket *ssock;  	size_t copied = 0;  	int ret = 0;  	long timeo; @@ -1709,12 +1740,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)  	lock_sock(sk); -	ssock = __mptcp_nmpc_socket(msk); -	if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect || -			       msg->msg_flags & MSG_FASTOPEN))) { +	if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {  		int copied_syn = 0; -		ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn); +		ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);  		copied += copied_syn;  		if (ret == -EINPROGRESS && copied_syn > 0)  			goto out; @@ -2315,7 +2344,26 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,  			      unsigned int flags)  {  	struct mptcp_sock *msk = mptcp_sk(sk); -	bool need_push, dispose_it; +	bool dispose_it, need_push = false; + +	/* If the first subflow moved to a close state before accept, e.g. due +	 * to an incoming reset, mptcp either: +	 * - if either the subflow or the msk are dead, destroy the context +	 *   (the subflow socket is deleted by inet_child_forget) and the msk +	 * - otherwise do nothing at the moment and take action at accept and/or +	 *   listener shutdown - user-space must be able to accept() the closed +	 *   socket. +	 */ +	if (msk->in_accept_queue && msk->first == ssk) { +		if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) +			return; + +		/* ensure later check in mptcp_worker() will dispose the msk */ +		sock_set_flag(sk, SOCK_DEAD); +		lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); +		mptcp_subflow_drop_ctx(ssk); +		goto out_release; +	}  	dispose_it = !msk->subflow || ssk != msk->subflow->sk;  	if (dispose_it) @@ -2351,28 +2399,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,  	if (!inet_csk(ssk)->icsk_ulp_ops) {  		WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));  		kfree_rcu(subflow, rcu); -	} else if (msk->in_accept_queue && msk->first == ssk) { -		/* if the first subflow moved to a close state, e.g. due to -		 * incoming reset and we reach here before inet_child_forget() -		 * the TCP stack could later try to close it via -		 * inet_csk_listen_stop(), or deliver it to the user space via -		 * accept(). -		 * We can't delete the subflow - or risk a double free - nor let -		 * the msk survive - or will be leaked in the non accept scenario: -		 * fallback and let TCP cope with the subflow cleanup. -		 */ -		WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD)); -		mptcp_subflow_drop_ctx(ssk);  	} else {  		/* otherwise tcp will dispose of the ssk and subflow ctx */ -		if (ssk->sk_state == TCP_LISTEN) +		if (ssk->sk_state == TCP_LISTEN) { +			tcp_set_state(ssk, TCP_CLOSE); +			mptcp_subflow_queue_clean(sk, ssk); +			inet_csk_listen_stop(ssk);  			mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); +		}  		__tcp_close(ssk, 0);  		/* close acquired an extra ref */  		__sock_put(ssk);  	} + +out_release:  	release_sock(ssk);  	sock_put(ssk); @@ -2427,21 +2469,14 @@ static void __mptcp_close_subflow(struct sock *sk)  		mptcp_close_ssk(sk, ssk, subflow);  	} -	/* if the MPC subflow has been closed before the msk is accepted, -	 * msk will never be accept-ed, close it now -	 */ -	if (!msk->first && msk->in_accept_queue) { -		sock_set_flag(sk, SOCK_DEAD); -		inet_sk_state_store(sk, TCP_CLOSE); -	}  } -static bool mptcp_check_close_timeout(const struct sock *sk) +static bool mptcp_should_close(const struct sock *sk)  {  	s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;  	struct mptcp_subflow_context *subflow; -	if (delta >= TCP_TIMEWAIT_LEN) +	if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)  		return true;  	/* if all subflows are in closed status don't bother with additional @@ -2480,15 +2515,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)  	/* Mirror the tcp_reset() error propagation */  	switch (sk->sk_state) {  	case TCP_SYN_SENT: -		sk->sk_err = ECONNREFUSED; +		WRITE_ONCE(sk->sk_err, ECONNREFUSED);  		break;  	case TCP_CLOSE_WAIT: -		sk->sk_err = EPIPE; +		WRITE_ONCE(sk->sk_err, EPIPE);  		break;  	case TCP_CLOSE:  		return;  	default: -		sk->sk_err = ECONNRESET; +		WRITE_ONCE(sk->sk_err, ECONNRESET);  	}  	inet_sk_state_store(sk, TCP_CLOSE); @@ -2626,7 +2661,7 @@ static void mptcp_worker(struct work_struct *work)  	lock_sock(sk);  	state = sk->sk_state; -	if (unlikely(state == TCP_CLOSE)) +	if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))  		goto unlock;  	mptcp_check_data_fin_ack(sk); @@ -2649,7 +2684,7 @@ static void mptcp_worker(struct work_struct *work)  	 * even if it is orphaned and in FIN_WAIT2 state  	 */  	if (sock_flag(sk, SOCK_DEAD)) { -		if (mptcp_check_close_timeout(sk)) { +		if (mptcp_should_close(sk)) {  			inet_sk_state_store(sk, TCP_CLOSE);  			mptcp_do_fastclose(sk);  		} @@ -2728,10 +2763,6 @@ static int mptcp_init_sock(struct sock *sk)  	if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))  		return -ENOMEM; -	ret = __mptcp_socket_create(mptcp_sk(sk)); -	if (ret) -		return ret; -  	set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);  	/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will @@ -2895,6 +2926,14 @@ static void __mptcp_destroy_sock(struct sock *sk)  	sock_put(sk);  } +void __mptcp_unaccepted_force_close(struct sock *sk) +{ +	sock_set_flag(sk, SOCK_DEAD); +	inet_sk_state_store(sk, TCP_CLOSE); +	mptcp_do_fastclose(sk); +	__mptcp_destroy_sock(sk); +} +  static __poll_t mptcp_check_readable(struct mptcp_sock *msk)  {  	/* Concurrent splices from sk_receive_queue into receive_queue will @@ -2928,10 +2967,13 @@ bool __mptcp_close(struct sock *sk, long timeout)  		goto cleanup;  	} -	if (mptcp_check_readable(msk)) { -		/* the msk has read data, do the MPTCP equivalent of TCP reset */ +	if (mptcp_check_readable(msk) || timeout < 0) { +		/* If the msk has read data, or the caller explicitly ask it, +		 * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose +		 */  		inet_sk_state_store(sk, TCP_CLOSE);  		mptcp_do_fastclose(sk); +		timeout = 0;  	} else if (mptcp_close_state(sk)) {  		__mptcp_wr_shutdown(sk);  	} @@ -3143,7 +3185,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,  	struct socket *listener;  	struct sock *newsk; -	listener = __mptcp_nmpc_socket(msk); +	listener = msk->subflow;  	if (WARN_ON_ONCE(!listener)) {  		*err = -EINVAL;  		return NULL; @@ -3363,7 +3405,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)  	struct mptcp_sock *msk = mptcp_sk(sk);  	struct socket *ssock; -	ssock = __mptcp_nmpc_socket(msk); +	ssock = msk->subflow;  	pr_debug("msk=%p, subflow=%p", msk, ssock);  	if (WARN_ON_ONCE(!ssock))  		return -EINVAL; @@ -3551,8 +3593,8 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	int err = -EINVAL;  	ssock = __mptcp_nmpc_socket(msk); -	if (!ssock) -		return -EINVAL; +	if (IS_ERR(ssock)) +		return PTR_ERR(ssock);  	mptcp_token_destroy(msk);  	inet_sk_state_store(sk, TCP_SYN_SENT); @@ -3640,8 +3682,8 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)  	lock_sock(sock->sk);  	ssock = __mptcp_nmpc_socket(msk); -	if (!ssock) { -		err = -EINVAL; +	if (IS_ERR(ssock)) { +		err = PTR_ERR(ssock);  		goto unlock;  	} @@ -3677,8 +3719,8 @@ static int mptcp_listen(struct socket *sock, int backlog)  	lock_sock(sk);  	ssock = __mptcp_nmpc_socket(msk); -	if (!ssock) { -		err = -EINVAL; +	if (IS_ERR(ssock)) { +		err = PTR_ERR(ssock);  		goto unlock;  	} @@ -3709,7 +3751,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,  	pr_debug("msk=%p", msk); -	ssock = __mptcp_nmpc_socket(msk); +	/* buggy applications can call accept on socket states other then LISTEN +	 * but no need to allocate the first subflow just to error out. +	 */ +	ssock = msk->subflow;  	if (!ssock)  		return -EINVAL; @@ -3733,6 +3778,18 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,  			if (!ssk->sk_socket)  				mptcp_sock_graft(ssk, newsock);  		} + +		/* Do late cleanup for the first subflow as necessary. Also +		 * deal with bad peers not doing a complete shutdown. +		 */ +		if (msk->first && +		    unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { +			__mptcp_close_ssk(newsk, msk->first, +					  mptcp_subflow_ctx(msk->first), 0); +			if (unlikely(list_empty(&msk->conn_list))) +				inet_sk_state_store(newsk, TCP_CLOSE); +		} +  		release_sock(newsk);  	} @@ -3791,7 +3848,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,  	/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */  	smp_rmb(); -	if (sk->sk_err) +	if (READ_ONCE(sk->sk_err))  		mask |= EPOLLERR;  	return mask;  |