diff options
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r-- | net/mptcp/subflow.c | 184 |
1 files changed, 143 insertions, 41 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index be1de4084196..66d0b1893d26 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -108,6 +108,8 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis subflow_req->mp_capable = 0; subflow_req->mp_join = 0; + subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener)); + subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener)); subflow_req->msk = NULL; mptcp_token_init_request(req); } @@ -150,7 +152,7 @@ static int subflow_check_req(struct request_sock *req, return -EINVAL; #endif - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); @@ -162,7 +164,7 @@ static int subflow_check_req(struct request_sock *req, } if (mp_opt.mp_capable && listener->request_mptcp) { - int err, retries = 4; + int err, retries = MPTCP_TOKEN_MAX_RETRIES; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; again: @@ -247,7 +249,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, int err; subflow_init_req(req, sk_listener); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable && mp_opt.mp_join) return -EINVAL; @@ -371,6 +373,24 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; } +void __mptcp_set_connected(struct sock *sk) +{ + if (sk->sk_state == TCP_SYN_SENT) { + inet_sk_state_store(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); + } +} + +static void mptcp_set_connected(struct sock *sk) +{ + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + __mptcp_set_connected(sk); + else + set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags); + mptcp_data_unlock(sk); +} + static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -379,10 +399,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (inet_sk_state_load(parent) == TCP_SYN_SENT) { - inet_sk_state_store(parent, TCP_ESTABLISHED); - parent->sk_state_change(parent); - } /* be sure no special action on any packet other than syn-ack */ if (subflow->conn_finished) @@ -394,7 +410,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (subflow->request_mptcp) { if (!mp_opt.mp_capable) { MPTCP_INC_STATS(sock_net(sk), @@ -404,6 +420,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto fallback; } + if (mp_opt.csum_reqd) + WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); + if (mp_opt.deny_join_id0) + WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; subflow->can_ack = 1; subflow->remote_key = mp_opt.sndr_key; @@ -411,6 +431,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->remote_key); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); + mptcp_set_connected(parent); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -430,15 +451,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto do_reset; } + if (!mptcp_finish_join(sk)) + goto do_reset; + subflow_generate_hmac(subflow->local_key, subflow->remote_key, subflow->local_nonce, subflow->remote_nonce, hmac); memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); - if (!mptcp_finish_join(sk)) - goto do_reset; - subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); @@ -451,6 +472,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (mptcp_check_fallback(sk)) { fallback: mptcp_rcv_space_init(mptcp_sk(parent), sk); + mptcp_set_connected(parent); } return; @@ -558,6 +580,7 @@ static void mptcp_sock_destruct(struct sock *sk) static void mptcp_force_close(struct sock *sk) { + /* the msk is not yet exposed to user-space */ inet_sk_state_store(sk, TCP_CLOSE); sk_common_release(sk); } @@ -638,7 +661,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * reordered MPC will cause fallback, but we don't have other * options. */ - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; @@ -648,7 +671,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); @@ -775,15 +798,6 @@ enum mapping_status { MAPPING_DUMMY }; -static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) -{ - if ((u32)seq == (u32)old_seq) - return old_seq; - - /* Assume map covers data not mapped yet. */ - return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); -} - static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", @@ -824,10 +838,92 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) return true; } +static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, + bool csum_reqd) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct csum_pseudo_header header; + u32 offset, seq, delta; + __wsum csum; + int len; + + if (!csum_reqd) + return MAPPING_OK; + + /* mapping already validated on previous traversal */ + if (subflow->map_csum_len == subflow->map_data_len) + return MAPPING_OK; + + /* traverse the receive queue, ensuring it contains a full + * DSS mapping and accumulating the related csum. + * Preserve the accoumlate csum across multiple calls, to compute + * the csum only once + */ + delta = subflow->map_data_len - subflow->map_csum_len; + for (;;) { + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; + offset = seq - TCP_SKB_CB(skb)->seq; + + /* if the current skb has not been accounted yet, csum its contents + * up to the amount covered by the current DSS + */ + if (offset < skb->len) { + __wsum csum; + + len = min(skb->len - offset, delta); + csum = skb_checksum(skb, offset, len, 0); + subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum, + subflow->map_csum_len); + + delta -= len; + subflow->map_csum_len += len; + } + if (delta == 0) + break; + + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { + /* if this subflow is closed, the partial mapping + * will be never completed; flush the pending skbs, so + * that subflow_sched_work_if_closed() can kick in + */ + if (unlikely(ssk->sk_state == TCP_CLOSE)) + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + + /* not enough data to validate the csum */ + return MAPPING_EMPTY; + } + + /* the DSS mapping for next skbs will be validated later, + * when a get_mapping_status call will process such skb + */ + skb = skb->next; + } + + /* note that 'map_data_len' accounts only for the carried data, does + * not include the eventual seq increment due to the data fin, + * while the pseudo header requires the original DSS data len, + * including that + */ + header.data_seq = cpu_to_be64(subflow->map_seq); + header.subflow_seq = htonl(subflow->map_subflow_seq); + header.data_len = htons(subflow->map_data_len + subflow->map_data_fin); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); + if (unlikely(csum_fold(csum))) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + } + + return MAPPING_OK; +} + static enum mapping_status get_mapping_status(struct sock *ssk, struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + bool csum_reqd = READ_ONCE(msk->csum_enabled); struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -907,22 +1003,17 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_len--; } - if (!mpext->dsn64) { - map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, - mpext->data_seq); - pr_debug("expanded seq=%llu", subflow->map_seq); - } else { - map_seq = mpext->data_seq; - } + map_seq = mptcp_expand_seq(READ_ONCE(msk->ack_seq), mpext->data_seq, mpext->dsn64); WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64); if (subflow->map_valid) { /* Allow replacing only with an identical map */ if (subflow->map_seq == map_seq && subflow->map_subflow_seq == mpext->subflow_seq && - subflow->map_data_len == data_len) { + subflow->map_data_len == data_len && + subflow->map_csum_reqd == mpext->csum_reqd) { skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + goto validate_csum; } /* If this skb data are fully covered by the current mapping, @@ -934,27 +1025,41 @@ static enum mapping_status get_mapping_status(struct sock *ssk, } /* will validate the next map after consuming the current one */ - return MAPPING_OK; + goto validate_csum; } subflow->map_seq = map_seq; subflow->map_subflow_seq = mpext->subflow_seq; subflow->map_data_len = data_len; subflow->map_valid = 1; + subflow->map_data_fin = mpext->data_fin; subflow->mpc_map = mpext->mpc_map; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", + subflow->map_csum_reqd = mpext->csum_reqd; + subflow->map_csum_len = 0; + subflow->map_data_csum = csum_unfold(mpext->csum); + + /* Cfr RFC 8684 Section 3.3.0 */ + if (unlikely(subflow->map_csum_reqd != csum_reqd)) + return MAPPING_INVALID; + + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", subflow->map_seq, subflow->map_subflow_seq, - subflow->map_data_len); + subflow->map_data_len, subflow->map_csum_reqd, + subflow->map_data_csum); validate_seq: /* we revalidate valid mapping on new skb, because we must ensure * the current skb is completely covered by the available mapping */ - if (!validate_mapping(ssk, skb)) + if (!validate_mapping(ssk, skb)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH); return MAPPING_INVALID; + } skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + +validate_csum: + return validate_data_csum(ssk, skb, csum_reqd); } static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, @@ -1137,7 +1242,7 @@ void __mptcp_error_report(struct sock *sk) /* This barrier is coupled with smp_rmb() in mptcp_poll() */ smp_wmb(); - sk->sk_error_report(sk); + sk_error_report(sk); break; } } @@ -1489,10 +1594,7 @@ static void subflow_state_change(struct sock *sk) mptcp_rcv_space_init(mptcp_sk(parent), sk); pr_fallback(mptcp_sk(parent)); subflow->conn_finished = 1; - if (inet_sk_state_load(parent) == TCP_SYN_SENT) { - inet_sk_state_store(parent, TCP_ESTABLISHED); - parent->sk_state_change(parent); - } + mptcp_set_connected(parent); } /* as recvmsg() does not acquire the subflow socket for ssk selection |