aboutsummaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 15:47:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 15:47:48 -0800
commit7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (patch)
treeae0427c5a3b905f24b3a44b510a9bcf35d9b67a3 /net/mptcp/protocol.c
parent1ca06f1c1acecbe02124f14a37cce347b8c1a90c (diff)
parent7c4a6309e27f411743817fe74a832ec2d2798a4b (diff)
Merge tag 'net-next-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core: - Allow live renaming when an interface is up - Add retpoline wrappers for tc, improving considerably the performances of complex queue discipline configurations - Add inet drop monitor support - A few GRO performance improvements - Add infrastructure for atomic dev stats, addressing long standing data races - De-duplicate common code between OVS and conntrack offloading infrastructure - A bunch of UBSAN_BOUNDS/FORTIFY_SOURCE improvements - Netfilter: introduce packet parser for tunneled packets - Replace IPVS timer-based estimators with kthreads to scale up the workload with the number of available CPUs - Add the helper support for connection-tracking OVS offload BPF: - Support for user defined BPF objects: the use case is to allocate own objects, build own object hierarchies and use the building blocks to build own data structures flexibly, for example, linked lists in BPF - Make cgroup local storage available to non-cgroup attached BPF programs - Avoid unnecessary deadlock detection and failures wrt BPF task storage helpers - A relevant bunch of BPF verifier fixes and improvements - Veristat tool improvements to support custom filtering, sorting, and replay of results - Add LLVM disassembler as default library for dumping JITed code - Lots of new BPF documentation for various BPF maps - Add bpf_rcu_read_{,un}lock() support for sleepable programs - Add RCU grace period chaining to BPF to wait for the completion of access from both sleepable and non-sleepable BPF programs - Add support storing struct task_struct objects as kptrs in maps - Improve helper UAPI by explicitly defining BPF_FUNC_xxx integer values - Add libbpf *_opts API-variants for bpf_*_get_fd_by_id() functions Protocols: - TCP: implement Protective Load Balancing across switch links - TCP: allow dynamically disabling TCP-MD5 static key, reverting back to fast[er]-path - UDP: Introduce optional per-netns hash lookup table - IPv6: simplify and cleanup sockets disposal - Netlink: support different type policies for each generic netlink operation - MPTCP: add MSG_FASTOPEN and FastOpen listener side support - MPTCP: add netlink notification support for listener sockets events - SCTP: add VRF support, allowing sctp sockets binding to VRF devices - Add bridging MAC Authentication Bypass (MAB) support - Extensions for Ethernet VPN bridging implementation to better support multicast scenarios - More work for Wi-Fi 7 support, comprising conversion of all the existing drivers to internal TX queue usage - IPSec: introduce a new offload type (packet offload) allowing complete header processing and crypto offloading - IPSec: extended ack support for more descriptive XFRM error reporting - RXRPC: increase SACK table size and move processing into a per-local endpoint kernel thread, reducing considerably the required locking - IEEE 802154: synchronous send frame and extended filtering support, initial support for scanning available 15.4 networks - Tun: bump the link speed from 10Mbps to 10Gbps - Tun/VirtioNet: implement UDP segmentation offload support Driver API: - PHY/SFP: improve power level switching between standard level 1 and the higher power levels - New API for netdev <-> devlink_port linkage - PTP: convert existing drivers to new frequency adjustment implementation - DSA: add support for rx offloading - Autoload DSA tagging driver when dynamically changing protocol - Add new PCP and APPTRUST attributes to Data Center Bridging - Add configuration support for 800Gbps link speed - Add devlink port function attribute to enable/disable RoCE and migratable - Extend devlink-rate to support strict prioriry and weighted fair queuing - Add devlink support to directly reading from region memory - New device tree helper to fetch MAC address from nvmem - New big TCP helper to simplify temporary header stripping New hardware / drivers: - Ethernet: - Marvel Octeon CNF95N and CN10KB Ethernet Switches - Marvel Prestera AC5X Ethernet Switch - WangXun 10 Gigabit NIC - Motorcomm yt8521 Gigabit Ethernet - Microchip ksz9563 Gigabit Ethernet Switch - Microsoft Azure Network Adapter - Linux Automation 10Base-T1L adapter - PHY: - Aquantia AQR112 and AQR412 - Motorcomm YT8531S - PTP: - Orolia ART-CARD - WiFi: - MediaTek Wi-Fi 7 (802.11be) devices - RealTek rtw8821cu, rtw8822bu, rtw8822cu and rtw8723du USB devices - Bluetooth: - Broadcom BCM4377/4378/4387 Bluetooth chipsets - Realtek RTL8852BE and RTL8723DS - Cypress.CYW4373A0 WiFi + Bluetooth combo device Drivers: - CAN: - gs_usb: bus error reporting support - kvaser_usb: listen only and bus error reporting support - Ethernet NICs: - Intel (100G): - extend action skbedit to RX queue mapping - implement devlink-rate support - support direct read from memory - nVidia/Mellanox (mlx5): - SW steering improvements, increasing rules update rate - Support for enhanced events compression - extend H/W offload packet manipulation capabilities - implement IPSec packet offload mode - nVidia/Mellanox (mlx4): - better big TCP support - Netronome Ethernet NICs (nfp): - IPsec offload support - add support for multicast filter - Broadcom: - RSS and PTP support improvements - AMD/SolarFlare: - netlink extened ack improvements - add basic flower matches to offload, and related stats - Virtual NICs: - ibmvnic: introduce affinity hint support - small / embedded: - FreeScale fec: add initial XDP support - Marvel mv643xx_eth: support MII/GMII/RGMII modes for Kirkwood - TI am65-cpsw: add suspend/resume support - Mediatek MT7986: add RX wireless wthernet dispatch support - Realtek 8169: enable GRO software interrupt coalescing per default - Ethernet high-speed switches: - Microchip (sparx5): - add support for Sparx5 TC/flower H/W offload via VCAP - Mellanox mlxsw: - add 802.1X and MAC Authentication Bypass offload support - add ip6gre support - Embedded Ethernet switches: - Mediatek (mtk_eth_soc): - improve PCS implementation, add DSA untag support - enable flow offload support - Renesas: - add rswitch R-Car Gen4 gPTP support - Microchip (lan966x): - add full XDP support - add TC H/W offload via VCAP - enable PTP on bridge interfaces - Microchip (ksz8): - add MTU support for KSZ8 series - Qualcomm 802.11ax WiFi (ath11k): - support configuring channel dwell time during scan - MediaTek WiFi (mt76): - enable Wireless Ethernet Dispatch (WED) offload support - add ack signal support - enable coredump support - remain_on_channel support - Intel WiFi (iwlwifi): - enable Wi-Fi 7 Extremely High Throughput (EHT) PHY capabilities - 320 MHz channels support - RealTek WiFi (rtw89): - new dynamic header firmware format support - wake-over-WLAN support" * tag 'net-next-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2002 commits) ipvs: fix type warning in do_div() on 32 bit net: lan966x: Remove a useless test in lan966x_ptp_add_trap() net: ipa: add IPA v4.7 support dt-bindings: net: qcom,ipa: Add SM6350 compatible bnxt: Use generic HBH removal helper in tx path IPv6/GRO: generic helper to remove temporary HBH/jumbo header in driver selftests: forwarding: Add bridge MDB test selftests: forwarding: Rename bridge_mdb test bridge: mcast: Support replacement of MDB port group entries bridge: mcast: Allow user space to specify MDB entry routing protocol bridge: mcast: Allow user space to add (*, G) with a source list and filter mode bridge: mcast: Add support for (*, G) with a source list and filter mode bridge: mcast: Avoid arming group timer when (S, G) corresponds to a source bridge: mcast: Add a flag for user installed source entries bridge: mcast: Expose __br_multicast_del_group_src() bridge: mcast: Expose br_multicast_new_group_src() bridge: mcast: Add a centralized error path bridge: mcast: Place netlink policy before validation functions bridge: mcast: Split (*, G) and (S, G) addition into different functions bridge: mcast: Do not derive entry type from its filter mode ...
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c87
1 files changed, 25 insertions, 62 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1dbc62537259..f6f93957275b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -36,15 +36,6 @@ struct mptcp6_sock {
};
#endif
-struct mptcp_skb_cb {
- u64 map_seq;
- u64 end_seq;
- u32 offset;
- u8 has_rxtstamp:1;
-};
-
-#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
-
enum {
MPTCP_CMSG_TS = BIT(0),
MPTCP_CMSG_INQ = BIT(1),
@@ -200,7 +191,7 @@ static void mptcp_rfree(struct sk_buff *skb)
mptcp_rmem_uncharge(sk, len);
}
-static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
skb->sk = sk;
@@ -1602,7 +1593,7 @@ out:
__mptcp_check_send_data_fin(sk);
}
-static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
+static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_sendmsg_info info = {
@@ -1611,7 +1602,6 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
struct mptcp_data_frag *dfrag;
struct sock *xmit_ssk;
int len, copied = 0;
- bool first = true;
info.flags = 0;
while ((dfrag = mptcp_send_head(sk))) {
@@ -1621,11 +1611,10 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
while (len > 0) {
int ret = 0;
- /* the caller already invoked the packet scheduler,
- * check for a different subflow usage only after
+ /* check for a different subflow usage only after
* spooling the first chunk of data
*/
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+ xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
if (!xmit_ssk)
goto out;
if (xmit_ssk != ssk) {
@@ -1713,17 +1702,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int ret = 0;
long timeo;
- /* we don't support FASTOPEN yet */
- if (msg->msg_flags & MSG_FASTOPEN)
- return -EOPNOTSUPP;
-
/* silently ignore everything else */
- msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
+ msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN;
lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
- if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
+ if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
+ msg->msg_flags & MSG_FASTOPEN))) {
int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, ssock->sk, msg, len, &copied_syn);
@@ -2275,7 +2261,7 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
struct mptcp_data_frag *cur, *rtx_head;
struct mptcp_sock *msk = mptcp_sk(sk);
- if (__mptcp_check_fallback(mptcp_sk(sk)))
+ if (__mptcp_check_fallback(msk))
return false;
if (tcp_rtx_and_write_queues_empty(sk))
@@ -2369,6 +2355,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
tcp_set_state(ssk, TCP_CLOSE);
mptcp_subflow_queue_clean(ssk);
inet_csk_listen_stop(ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
__tcp_close(ssk, 0);
@@ -2451,7 +2438,7 @@ static bool mptcp_check_close_timeout(const struct sock *sk)
static void mptcp_check_fastclose(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
- struct sock *sk = &msk->sk.icsk_inet.sk;
+ struct sock *sk = (struct sock *)msk;
if (likely(!READ_ONCE(msk->rcv_fastclose)))
return;
@@ -2613,7 +2600,7 @@ static void mptcp_do_fastclose(struct sock *sk)
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
- struct sock *sk = &msk->sk.icsk_inet.sk;
+ struct sock *sk = (struct sock *)msk;
unsigned long fail_tout;
int state;
@@ -2725,6 +2712,8 @@ static int mptcp_init_sock(struct sock *sk)
if (ret)
return ret;
+ set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
+
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
* propagate the correct value
*/
@@ -2946,7 +2935,7 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
- if (mptcp_sk(sk)->token)
+ if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
if (sk->sk_state == TCP_CLOSE) {
@@ -3005,8 +2994,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
- if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+ if (msk->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
/* msk->subflow is still intact, the following will not free the first
* subflow
@@ -3048,7 +3037,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
struct mptcp_sock *msk;
- u64 ack_seq;
if (!nsk)
return NULL;
@@ -3074,15 +3062,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
- if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
- msk->can_ack = true;
- msk->remote_key = mp_opt->sndr_key;
- mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
- ack_seq++;
- WRITE_ONCE(msk->ack_seq, ack_seq);
- atomic64_set(&msk->rcv_wnd_sent, ack_seq);
- }
-
sock_reset_flag(nsk, SOCK_RCU_FREE);
/* will be fully established after successful MPC subflow creation */
inet_sk_state_store(nsk, TCP_SYN_RECV);
@@ -3217,16 +3196,10 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
if (!mptcp_send_head(sk))
return;
- if (!sock_owned_by_user(sk)) {
- struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
-
- if (xmit_ssk == ssk)
- __mptcp_subflow_push_pending(sk, ssk);
- else if (xmit_ssk)
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
- } else {
+ if (!sock_owned_by_user(sk))
+ __mptcp_subflow_push_pending(sk, ssk, false);
+ else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
- }
}
#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
@@ -3317,7 +3290,7 @@ void mptcp_subflow_process_delegated(struct sock *ssk)
if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
- __mptcp_subflow_push_pending(sk, ssk);
+ __mptcp_subflow_push_pending(sk, ssk, true);
else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
@@ -3361,7 +3334,6 @@ void mptcp_finish_connect(struct sock *ssk)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
struct sock *sk;
- u64 ack_seq;
subflow = mptcp_subflow_ctx(ssk);
sk = subflow->conn;
@@ -3369,22 +3341,16 @@ void mptcp_finish_connect(struct sock *ssk)
pr_debug("msk=%p, token=%u", sk, subflow->token);
- mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
- ack_seq++;
- subflow->map_seq = ack_seq;
+ subflow->map_seq = subflow->iasn;
subflow->map_subflow_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
*/
- WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->local_key, subflow->local_key);
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
- atomic64_set(&msk->rcv_wnd_sent, ack_seq);
mptcp_pm_new_connection(msk, ssk, 0);
@@ -3682,6 +3648,8 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (!err)
mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
+
unlock:
release_sock(sock->sk);
return err;
@@ -3706,6 +3674,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct mptcp_subflow_context *subflow;
struct sock *newsk = newsock->sk;
+ set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+
lock_sock(newsk);
/* PM/worker can now acquire the first subflow socket
@@ -3919,12 +3889,6 @@ static const struct proto_ops mptcp_v6_stream_ops = {
static struct proto mptcp_v6_prot;
-static void mptcp_v6_destroy(struct sock *sk)
-{
- mptcp_destroy(sk);
- inet6_destroy_sock(sk);
-}
-
static struct inet_protosw mptcp_v6_protosw = {
.type = SOCK_STREAM,
.protocol = IPPROTO_MPTCP,
@@ -3940,7 +3904,6 @@ int __init mptcp_proto_v6_init(void)
mptcp_v6_prot = mptcp_prot;
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
- mptcp_v6_prot.destroy = mptcp_v6_destroy;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
err = proto_register(&mptcp_v6_prot, 1);