diff options
Diffstat (limited to 'net/mptcp/protocol.c')
| -rw-r--r-- | net/mptcp/protocol.c | 165 | 
1 files changed, 102 insertions, 63 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7fc16f5175d..e252539b1e19 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -405,7 +405,7 @@ drop:  	return false;  } -static void mptcp_stop_timer(struct sock *sk) +static void mptcp_stop_rtx_timer(struct sock *sk)  {  	struct inet_connection_sock *icsk = inet_csk(sk); @@ -770,6 +770,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)  	return moved;  } +static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) +{ +	int err = sock_error(ssk); +	int ssk_state; + +	if (!err) +		return false; + +	/* only propagate errors on fallen-back sockets or +	 * on MPC connect +	 */ +	if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk))) +		return false; + +	/* We need to propagate only transition to CLOSE state. +	 * Orphaned socket will see such state change via +	 * subflow_sched_work_if_closed() and that path will properly +	 * destroy the msk as needed. +	 */ +	ssk_state = inet_sk_state_load(ssk); +	if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) +		inet_sk_state_store(sk, ssk_state); +	WRITE_ONCE(sk->sk_err, -err); + +	/* This barrier is coupled with smp_rmb() in mptcp_poll() */ +	smp_wmb(); +	sk_error_report(sk); +	return true; +} + +void __mptcp_error_report(struct sock *sk) +{ +	struct mptcp_subflow_context *subflow; +	struct mptcp_sock *msk = mptcp_sk(sk); + +	mptcp_for_each_subflow(msk, subflow) +		if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow))) +			break; +} +  /* In most cases we will be able to lock the mptcp socket.  If its already   * owned, we need to defer to the work queue to avoid ABBA deadlock.   */ @@ -852,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)  	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;  	mptcp_sockopt_sync_locked(msk, ssk);  	mptcp_subflow_joined(msk, ssk); +	mptcp_stop_tout_timer(sk);  	return true;  } @@ -871,12 +912,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list  	}  } -static bool mptcp_timer_pending(struct sock *sk) +static bool mptcp_rtx_timer_pending(struct sock *sk)  {  	return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);  } -static void mptcp_reset_timer(struct sock *sk) +static void mptcp_reset_rtx_timer(struct sock *sk)  {  	struct inet_connection_sock *icsk = inet_csk(sk);  	unsigned long tout; @@ -1010,10 +1051,10 @@ static void __mptcp_clean_una(struct sock *sk)  out:  	if (snd_una == READ_ONCE(msk->snd_nxt) &&  	    snd_una == READ_ONCE(msk->write_seq)) { -		if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) -			mptcp_stop_timer(sk); +		if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) +			mptcp_stop_rtx_timer(sk);  	} else { -		mptcp_reset_timer(sk); +		mptcp_reset_rtx_timer(sk);  	}  } @@ -1586,8 +1627,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)  		mptcp_push_release(ssk, &info);  	/* ensure the rtx timer is running */ -	if (!mptcp_timer_pending(sk)) -		mptcp_reset_timer(sk); +	if (!mptcp_rtx_timer_pending(sk)) +		mptcp_reset_rtx_timer(sk);  	if (do_check_data_fin)  		mptcp_check_send_data_fin(sk);  } @@ -1650,8 +1691,8 @@ out:  	if (copied) {  		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,  			 info.size_goal); -		if (!mptcp_timer_pending(sk)) -			mptcp_reset_timer(sk); +		if (!mptcp_rtx_timer_pending(sk)) +			mptcp_reset_rtx_timer(sk);  		if (msk->snd_data_fin_enable &&  		    msk->snd_nxt + 1 == msk->write_seq) @@ -2220,7 +2261,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)  	sock_put(sk);  } -static void mptcp_timeout_timer(struct timer_list *t) +static void mptcp_tout_timer(struct timer_list *t)  {  	struct sock *sk = from_timer(sk, t, sk_timer); @@ -2329,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,  	bool dispose_it, need_push = false;  	/* If the first subflow moved to a close state before accept, e.g. due -	 * to an incoming reset, mptcp either: -	 * - if either the subflow or the msk are dead, destroy the context -	 *   (the subflow socket is deleted by inet_child_forget) and the msk -	 * - otherwise do nothing at the moment and take action at accept and/or -	 *   listener shutdown - user-space must be able to accept() the closed -	 *   socket. +	 * to an incoming reset or listener shutdown, the subflow socket is +	 * already deleted by inet_child_forget() and the mptcp socket can't +	 * survive too.  	 */ -	if (msk->in_accept_queue && msk->first == ssk) { -		if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) -			return; - +	if (msk->in_accept_queue && msk->first == ssk && +	    (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {  		/* ensure later check in mptcp_worker() will dispose the msk */ +		mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));  		sock_set_flag(sk, SOCK_DEAD);  		lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);  		mptcp_subflow_drop_ctx(ssk); @@ -2392,6 +2429,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,  	}  out_release: +	__mptcp_subflow_error_report(sk, ssk);  	release_sock(ssk);  	sock_put(ssk); @@ -2402,6 +2440,22 @@ out_release:  out:  	if (need_push)  		__mptcp_push_pending(sk, 0); + +	/* Catch every 'all subflows closed' scenario, including peers silently +	 * closing them, e.g. due to timeout. +	 * For established sockets, allow an additional timeout before closing, +	 * as the protocol can still create more subflows. +	 */ +	if (list_is_singular(&msk->conn_list) && msk->first && +	    inet_sk_state_load(msk->first) == TCP_CLOSE) { +		if (sk->sk_state != TCP_ESTABLISHED || +		    msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { +			inet_sk_state_store(sk, TCP_CLOSE); +			mptcp_close_wake_up(sk); +		} else { +			mptcp_start_tout_timer(sk); +		} +	}  }  void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2445,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk)  } -static bool mptcp_should_close(const struct sock *sk) +static bool mptcp_close_tout_expired(const struct sock *sk)  { -	s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; -	struct mptcp_subflow_context *subflow; - -	if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) -		return true; +	if (!inet_csk(sk)->icsk_mtup.probe_timestamp || +	    sk->sk_state == TCP_CLOSE) +		return false; -	/* if all subflows are in closed status don't bother with additional -	 * timeout -	 */ -	mptcp_for_each_subflow(mptcp_sk(sk), subflow) { -		if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) != -		    TCP_CLOSE) -			return false; -	} -	return true; +	return time_after32(tcp_jiffies32, +		  inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);  }  static void mptcp_check_fastclose(struct mptcp_sock *msk) @@ -2588,27 +2633,28 @@ static void __mptcp_retrans(struct sock *sk)  reset_timer:  	mptcp_check_and_set_pending(sk); -	if (!mptcp_timer_pending(sk)) -		mptcp_reset_timer(sk); +	if (!mptcp_rtx_timer_pending(sk)) +		mptcp_reset_rtx_timer(sk);  }  /* schedule the timeout timer for the relevant event: either close timeout   * or mp_fail timeout. The close timeout takes precedence on the mp_fail one   */ -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)  {  	struct sock *sk = (struct sock *)msk;  	unsigned long timeout, close_timeout; -	if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) +	if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)  		return; -	close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; +	close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + +			TCP_TIMEWAIT_LEN;  	/* the close timeout takes precedence on the fail one, and here at least one of  	 * them is active  	 */ -	timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; +	timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;  	sk_reset_timer(sk, &sk->sk_timer, timeout);  } @@ -2627,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)  	mptcp_subflow_reset(ssk);  	WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);  	unlock_sock_fast(ssk, slow); - -	mptcp_reset_timeout(msk, 0);  }  static void mptcp_do_fastclose(struct sock *sk) @@ -2665,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work)  	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))  		__mptcp_close_subflow(sk); -	/* There is no point in keeping around an orphaned sk timedout or -	 * closed, but we need the msk around to reply to incoming DATA_FIN, -	 * even if it is orphaned and in FIN_WAIT2 state -	 */ -	if (sock_flag(sk, SOCK_DEAD)) { -		if (mptcp_should_close(sk)) -			mptcp_do_fastclose(sk); +	if (mptcp_close_tout_expired(sk)) { +		mptcp_do_fastclose(sk); +		mptcp_close_wake_up(sk); +	} -		if (sk->sk_state == TCP_CLOSE) { -			__mptcp_destroy_sock(sk); -			goto unlock; -		} +	if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) { +		__mptcp_destroy_sock(sk); +		goto unlock;  	}  	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) @@ -2717,7 +2757,7 @@ static void __mptcp_init_sock(struct sock *sk)  	/* re-use the csk retrans timer for MPTCP-level retrans */  	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); -	timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); +	timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);  }  static void mptcp_ca_reset(struct sock *sk) @@ -2808,8 +2848,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)  		} else {  			pr_debug("Sending DATA_FIN on subflow %p", ssk);  			tcp_send_ack(ssk); -			if (!mptcp_timer_pending(sk)) -				mptcp_reset_timer(sk); +			if (!mptcp_rtx_timer_pending(sk)) +				mptcp_reset_rtx_timer(sk);  		}  		break;  	} @@ -2892,7 +2932,7 @@ static void __mptcp_destroy_sock(struct sock *sk)  	might_sleep(); -	mptcp_stop_timer(sk); +	mptcp_stop_rtx_timer(sk);  	sk_stop_timer(sk, &sk->sk_timer);  	msk->pm.status = 0;  	mptcp_release_sched(msk); @@ -2975,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout)  cleanup:  	/* orphan all the subflows */ -	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;  	mptcp_for_each_subflow(msk, subflow) {  		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);  		bool slow = lock_sock_fast_nested(ssk); @@ -3012,7 +3051,7 @@ cleanup:  		__mptcp_destroy_sock(sk);  		do_cancel_work = true;  	} else { -		mptcp_reset_timeout(msk, 0); +		mptcp_start_tout_timer(sk);  	}  	return do_cancel_work; @@ -3075,8 +3114,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)  	mptcp_check_listen_stop(sk);  	inet_sk_state_store(sk, TCP_CLOSE); -	mptcp_stop_timer(sk); -	sk_stop_timer(sk, &sk->sk_timer); +	mptcp_stop_rtx_timer(sk); +	mptcp_stop_tout_timer(sk);  	if (msk->token)  		mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);  |