From 7b16871f9932d8a371488d2967b033387870a747 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 18 May 2022 15:04:43 -0700 Subject: mptcp: stop using the mptcp_has_another_subflow() helper The mentioned helper requires the msk socket lock, and the current callers don't own it nor can't acquire it, so the access is racy. All the current callers are really checking for infinite mapping fallback, and the latter condition is explicitly tracked by the relevant msk variable: we can safely remove the caller usage - and the caller itself. The issue is present since MP_FAIL implementation, but the fix only applies since the infinite fallback support, ence the somewhat unexpected fixes tag. Fixes: 0530020a7c8f ("mptcp: track and update contiguous data status") Acked-and-tested-by: Geliang Tang Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 2 +- net/mptcp/protocol.h | 13 ------------- net/mptcp/subflow.c | 3 +-- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 59fdab2d0c27..8ba51120f35b 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -303,7 +303,7 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) pr_debug("fail_seq=%llu", fail_seq); - if (mptcp_has_another_subflow(sk) || !READ_ONCE(msk->allow_infinite_fallback)) + if (!READ_ONCE(msk->allow_infinite_fallback)) return; if (!READ_ONCE(subflow->mp_fail_response_expect)) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 08b8015cb69f..fb4760ee8d47 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -650,19 +650,6 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } -static inline bool mptcp_has_another_subflow(struct sock *ssk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp; - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - - mptcp_for_each_subflow(msk, tmp) { - if (tmp != subflow) - return true; - } - - return false; -} - void __init mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) int __init mptcp_proto_v6_init(void); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index aecd98ac5dfe..27273cf091db 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1233,8 +1233,7 @@ fallback: if (!__mptcp_check_fallback(msk)) { /* RFC 8684 section 3.7. */ if (subflow->send_mp_fail) { - if (mptcp_has_another_subflow(ssk) || - !READ_ONCE(msk->allow_infinite_fallback)) { + if (!READ_ONCE(msk->allow_infinite_fallback)) { ssk->sk_err = EBADMSG; tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; -- cgit From d42f9e4e2384febf9cb2d19ffa0cfac96189517a Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 18 May 2022 15:04:44 -0700 Subject: mptcp: Check for orphaned subflow before handling MP_FAIL timer MP_FAIL timeout (waiting for a peer to respond to an MP_FAIL with another MP_FAIL) is implemented using the MPTCP socket's sk_timer. That timer is also used at MPTCP socket close, so it's important to not have the two timer users interfere with each other. At MPTCP socket close, all subflows are orphaned before sk_timer is manipulated. By checking the SOCK_DEAD flag on the subflows, each subflow can determine if the timer is safe to alter without acquiring any MPTCP-level lock. This replaces code that was using the mptcp_data_lock and MPTCP-level socket state checks that did not correctly protect the timer. Fixes: 49fa1919d6bc ("mptcp: reset subflow when MP_FAIL doesn't respond") Reviewed-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 7 ++----- net/mptcp/subflow.c | 12 ++++-------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 8ba51120f35b..59a85220edc9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -312,13 +312,10 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) subflow->send_mp_fail = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); subflow->send_infinite_map = 1; - } else if (s && inet_sk_state_load(s) != TCP_CLOSE) { + } else if (!sock_flag(sk, SOCK_DEAD)) { pr_debug("MP_FAIL response received"); - mptcp_data_lock(s); - if (inet_sk_state_load(s) != TCP_CLOSE) - sk_stop_timer(s, &s->sk_timer); - mptcp_data_unlock(s); + sk_stop_timer(s, &s->sk_timer); } } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 27273cf091db..8841e8cd9ad8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1016,12 +1016,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, pr_debug("infinite mapping received"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); subflow->map_data_len = 0; - if (sk && inet_sk_state_load(sk) != TCP_CLOSE) { - mptcp_data_lock(sk); - if (inet_sk_state_load(sk) != TCP_CLOSE) - sk_stop_timer(sk, &sk->sk_timer); - mptcp_data_unlock(sk); - } + if (!sock_flag(ssk, SOCK_DEAD)) + sk_stop_timer(sk, &sk->sk_timer); + return MAPPING_INVALID; } @@ -1241,9 +1238,8 @@ fallback: tcp_send_active_reset(ssk, GFP_ATOMIC); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); - } else { + } else if (!sock_flag(ssk, SOCK_DEAD)) { WRITE_ONCE(subflow->mp_fail_response_expect, true); - /* The data lock is acquired in __mptcp_move_skbs() */ sk_reset_timer((struct sock *)msk, &((struct sock *)msk)->sk_timer, jiffies + TCP_RTO_MAX); -- cgit From d9fb797046c596187b97a08ea88b954964cc2d33 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 18 May 2022 15:04:45 -0700 Subject: mptcp: Do not traverse the subflow connection list without lock The MPTCP socket's conn_list (list of subflows) requires the socket lock to access. The MP_FAIL timeout code added such an access, where it would check the list of subflows both in timer context and (later) in workqueue context where the socket lock is held. Rather than check the list twice, remove the check in the timeout handler and only depend on the check in the workqueue. Also remove the MPTCP_FAIL_NO_RESPONSE flag, since mptcp_mp_fail_no_response() has insignificant overhead and can be checked on each worker run. Fixes: 49fa1919d6bc ("mptcp: reset subflow when MP_FAIL doesn't respond") Reported-by: Paolo Abeni Reviewed-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 16 +--------------- net/mptcp/protocol.h | 1 - 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 921d67174e49..17e13396024a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2190,23 +2190,10 @@ mp_fail_response_expect_subflow(struct mptcp_sock *msk) return ret; } -static void mptcp_check_mp_fail_response(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - - bh_lock_sock(sk); - subflow = mp_fail_response_expect_subflow(msk); - if (subflow) - __set_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags); - bh_unlock_sock(sk); -} - static void mptcp_timeout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); - mptcp_check_mp_fail_response(mptcp_sk(sk)); mptcp_schedule_work(sk); sock_put(sk); } @@ -2588,8 +2575,7 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); - if (test_and_clear_bit(MPTCP_FAIL_NO_RESPONSE, &msk->flags)) - mptcp_mp_fail_no_response(msk); + mptcp_mp_fail_no_response(msk); unlock: release_sock(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index fb4760ee8d47..200f89f6d62f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -117,7 +117,6 @@ #define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 -#define MPTCP_FAIL_NO_RESPONSE 6 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 -- cgit From 2ba18161d407c596f256f7c4a6447b8588b9eb55 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 18 May 2022 15:04:46 -0700 Subject: selftests: mptcp: add MP_FAIL reset testcase Add the multiple subflows test case for MP_FAIL, to test the MP_FAIL reset case. Use the test_linkfail value to make 1024KB test files. Invoke reset_with_fail() to use 'iptables' and 'tc action pedit' rules to produce the bit flips to trigger the checksum failures on ns2eth2. Add delays on ns2eth1 to make sure more data can translate on ns2eth2. The check_invert flag is enabled in reset_with_fail(), so this test prints out the inverted bytes, instead of the file mismatch errors. Invoke pedit_action_pkts() to get the numbers of the packets edited by the tc pedit actions, and print this numbers to the output. Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 1a5f211c5902..a4406b7a8064 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2732,6 +2732,16 @@ fail_tests() chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" chk_fail_nr 1 -1 invert fi + + # multiple subflows + if reset_with_fail "MP_FAIL MP_RST" 2; then + tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 1024 + chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" + fi } userspace_tests() -- cgit