diff options
-rw-r--r-- | MAINTAINERS | 10 | ||||
-rw-r--r-- | include/linux/skbuff.h | 6 | ||||
-rw-r--r-- | include/net/mptcp.h | 85 | ||||
-rw-r--r-- | include/net/sock.h | 27 | ||||
-rw-r--r-- | include/net/tcp.h | 20 | ||||
-rw-r--r-- | include/trace/events/sock.h | 5 | ||||
-rw-r--r-- | include/uapi/linux/in.h | 2 | ||||
-rw-r--r-- | net/ax25/af_ax25.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 60 | ||||
-rw-r--r-- | net/core/skbuff.c | 42 | ||||
-rw-r--r-- | net/decnet/af_decnet.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 12 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 6 | ||||
-rw-r--r-- | tools/include/uapi/linux/in.h | 2 |
18 files changed, 235 insertions, 79 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 218089f38e11..9dcb9cab5705 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11573,6 +11573,16 @@ F: net/ipv6/calipso.c F: net/netfilter/xt_CONNSECMARK.c F: net/netfilter/xt_SECMARK.c +NETWORKING [MPTCP] +M: Mat Martineau <[email protected]> +M: Matthieu Baerts <[email protected]> +W: https://github.com/multipath-tcp/mptcp_net-next/wiki +B: https://github.com/multipath-tcp/mptcp_net-next/issues +S: Maintained +F: include/net/mptcp.h + NETWORKING [TCP] M: Eric Dumazet <[email protected]> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 64e5b1be9ff5..016b3c4ab99a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4097,6 +4097,9 @@ enum skb_ext_id { #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) TC_SKB_EXT, #endif +#if IS_ENABLED(CONFIG_MPTCP) + SKB_EXT_MPTCP, +#endif SKB_EXT_NUM, /* must be last */ }; @@ -4117,6 +4120,9 @@ struct skb_ext { char data[0] __aligned(8); }; +struct skb_ext *__skb_ext_alloc(void); +void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, + struct skb_ext *ext); void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_put(struct skb_ext *ext); diff --git a/include/net/mptcp.h b/include/net/mptcp.h new file mode 100644 index 000000000000..0573ae75c3db --- /dev/null +++ b/include/net/mptcp.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Multipath TCP + * + * Copyright (c) 2017 - 2019, Intel Corporation. + */ + +#ifndef __NET_MPTCP_H +#define __NET_MPTCP_H + +#include <linux/skbuff.h> +#include <linux/types.h> + +/* MPTCP sk_buff extension data */ +struct mptcp_ext { + u64 data_ack; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + u8 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + __unused:3; + /* one byte hole */ +}; + +#ifdef CONFIG_MPTCP + +/* move the skb extension owership, with the assumption that 'to' is + * newly allocated + */ +static inline void mptcp_skb_ext_move(struct sk_buff *to, + struct sk_buff *from) +{ + if (!skb_ext_exist(from, SKB_EXT_MPTCP)) + return; + + if (WARN_ON_ONCE(to->active_extensions)) + skb_ext_put(to); + + to->active_extensions = from->active_extensions; + to->extensions = from->extensions; + from->active_extensions = 0; +} + +static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext, + const struct mptcp_ext *from_ext) +{ + /* MPTCP always clears the ext when adding it to the skb, so + * holes do not bother us here + */ + return !from_ext || + (to_ext && from_ext && + !memcmp(from_ext, to_ext, sizeof(struct mptcp_ext))); +} + +/* check if skbs can be collapsed. + * MPTCP collapse is allowed if neither @to or @from carry an mptcp data + * mapping, or if the extension of @to is the same as @from. + * Collapsing is not possible if @to lacks an extension, but @from carries one. + */ +static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, + const struct sk_buff *from) +{ + return mptcp_ext_matches(skb_ext_find(to, SKB_EXT_MPTCP), + skb_ext_find(from, SKB_EXT_MPTCP)); +} + +#else + +static inline void mptcp_skb_ext_move(struct sk_buff *to, + const struct sk_buff *from) +{ +} + +static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, + const struct sk_buff *from) +{ + return true; +} + +#endif /* CONFIG_MPTCP */ +#endif /* __NET_MPTCP_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 8dff68b4c316..432ff73d20f3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -436,31 +436,15 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ - unsigned int __sk_flags_offset[0]; -#ifdef __BIG_ENDIAN_BITFIELD -#define SK_FL_PROTO_SHIFT 16 -#define SK_FL_PROTO_MASK 0x00ff0000 - -#define SK_FL_TYPE_SHIFT 0 -#define SK_FL_TYPE_MASK 0x0000ffff -#else -#define SK_FL_PROTO_SHIFT 8 -#define SK_FL_PROTO_MASK 0x0000ff00 - -#define SK_FL_TYPE_SHIFT 16 -#define SK_FL_TYPE_MASK 0xffff0000 -#endif - - unsigned int sk_padding : 1, + u8 sk_padding : 1, sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, - sk_userlocks : 4, - sk_protocol : 8, - sk_type : 16; -#define SK_PROTOCOL_MAX U8_MAX - u16 sk_gso_max_segs; + sk_userlocks : 4; u8 sk_pacing_shift; + u16 sk_type; + u16 sk_protocol; + u16 sk_gso_max_segs; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; @@ -1480,6 +1464,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) sk_mem_uncharge(sk, skb->truesize); if (static_branch_unlikely(&tcp_tx_skb_cache_key) && !sk->sk_tx_skb_cache && !skb_cloned(skb)) { + skb_ext_reset(skb); skb_zcopy_clear(skb, true); sk->sk_tx_skb_cache = skb; return; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7df37e2fddca..5e4133d09b9d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -39,6 +39,7 @@ #include <net/tcp_states.h> #include <net/inet_ecn.h> #include <net/dst.h> +#include <net/mptcp.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -182,6 +183,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP @@ -328,6 +330,9 @@ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); +int tcp_send_mss(struct sock *sk, int *size_goal, int flags); +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, + int size_goal); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); @@ -977,6 +982,13 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) return likely(!TCP_SKB_CB(skb)->eor); } +static inline bool tcp_skb_can_collapse(const struct sk_buff *to, + const struct sk_buff *from) +{ + return likely(tcp_skb_can_collapse_to(to) && + mptcp_skb_can_collapse(to, from)); +} + /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ @@ -2002,6 +2014,11 @@ struct tcp_request_sock_ops { enum tcp_synack_type synack_type); }; +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; +#if IS_ENABLED(CONFIG_IPV6) +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; +#endif + #ifdef CONFIG_SYN_COOKIES static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, const struct sock *sk, struct sk_buff *skb, @@ -2153,6 +2170,9 @@ struct tcp_ulp_ops { /* diagnostic */ int (*get_info)(const struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); + /* clone ulp */ + void (*clone)(const struct request_sock *req, struct sock *newsk, + const gfp_t priority); char name[TCP_ULP_NAME_MAX]; struct module *owner; diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 51fe9f6719eb..a966d4b5ab37 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -19,7 +19,8 @@ #define inet_protocol_names \ EM(IPPROTO_TCP) \ EM(IPPROTO_DCCP) \ - EMe(IPPROTO_SCTP) + EM(IPPROTO_SCTP) \ + EMe(IPPROTO_MPTCP) #define tcp_state_names \ EM(TCP_ESTABLISHED) \ @@ -147,7 +148,7 @@ TRACE_EVENT(inet_sock_set_state, __field(__u16, sport) __field(__u16, dport) __field(__u16, family) - __field(__u8, protocol) + __field(__u16, protocol) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index e7ad9d350a28..1521073b6348 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -76,6 +76,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_MPTCP = 262, /* Multipath TCP connection */ +#define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX }; #endif diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 324306d6fde0..ff57ea89c27e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -808,7 +808,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, struct sock *sk; ax25_cb *ax25; - if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + if (protocol < 0 || protocol > U8_MAX) return -EINVAL; if (!net_eq(net, &init_net)) diff --git a/net/core/filter.c b/net/core/filter.c index 42fd17c48c5f..ef01c5599501 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7607,21 +7607,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock, type): - BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, __sk_flags_offset)); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); - *target_size = 2; + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock, sk_type), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock, sk_type, + sizeof_field(struct sock, sk_type), + target_size)); break; case offsetof(struct bpf_sock, protocol): - BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, __sk_flags_offset)); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); - *target_size = 1; + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock, sk_protocol), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock, sk_protocol, + sizeof_field(struct sock, sk_protocol), + target_size)); break; case offsetof(struct bpf_sock, src_ip4): @@ -7903,20 +7903,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock_addr, type): - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( - struct bpf_sock_addr_kern, struct sock, sk, - __sk_flags_offset, BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); + SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sock, sk, sk_type); break; case offsetof(struct bpf_sock_addr, protocol): - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( - struct bpf_sock_addr_kern, struct sock, sk, - __sk_flags_offset, BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, - SK_FL_PROTO_SHIFT); + SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sock, sk, sk_protocol); break; case offsetof(struct bpf_sock_addr, msg_src_ip4): @@ -8835,11 +8828,11 @@ sk_reuseport_is_valid_access(int off, int size, skb, \ SKB_FIELD) -#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \ - SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \ - struct sock, \ - sk, \ - SK_FIELD, BPF_SIZE, EXTRA_OFF) +#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \ + SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \ + struct sock, \ + sk, \ + SK_FIELD) static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, @@ -8863,16 +8856,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct sk_reuseport_md, ip_protocol): - BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); - SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, - BPF_W, 0); - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, - SK_FL_PROTO_SHIFT); - /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian - * aware. No further narrowing or masking is needed. - */ - *target_size = 1; + SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol); break; case offsetof(struct sk_reuseport_md, data_end): diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 44b0894d8ae1..48a7029529c9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,6 +68,7 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> +#include <net/mptcp.h> #include <linux/uaccess.h> #include <trace/events/skb.h> @@ -4109,6 +4110,9 @@ static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), #endif +#if IS_ENABLED(CONFIG_MPTCP) + [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), +#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4123,6 +4127,9 @@ static __always_inline unsigned int skb_ext_total_length(void) #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) skb_ext_type_len[TC_SKB_EXT] + #endif +#if IS_ENABLED(CONFIG_MPTCP) + skb_ext_type_len[SKB_EXT_MPTCP] + +#endif 0; } @@ -5980,7 +5987,14 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); } -static struct skb_ext *skb_ext_alloc(void) +/** + * __skb_ext_alloc - allocate a new skb extensions storage + * + * Returns the newly allocated pointer. The pointer can later attached to a + * skb via __skb_ext_set(). + * Note: caller must handle the skb_ext as an opaque data. + */ +struct skb_ext *__skb_ext_alloc(void) { struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); @@ -6021,6 +6035,30 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, } /** + * __skb_ext_set - attach the specified extension storage to this skb + * @skb: buffer + * @id: extension id + * @ext: extension storage previously allocated via __skb_ext_alloc() + * + * Existing extensions, if any, are cleared. + * + * Returns the pointer to the extension. + */ +void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, + struct skb_ext *ext) +{ + unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext); + + skb_ext_put(skb); + newlen = newoff + skb_ext_type_len[id]; + ext->chunks = newlen; + ext->offset[id] = newoff; + skb->extensions = ext; + skb->active_extensions = 1 << id; + return skb_ext_get_ptr(ext, id); +} + +/** * skb_ext_add - allocate space for given extension, COW if needed * @skb: buffer * @id: extension to allocate space for @@ -6053,7 +6091,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) } else { newoff = SKB_EXT_CHUNKSIZEOF(*new); - new = skb_ext_alloc(); + new = __skb_ext_alloc(); if (!new) return NULL; } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index e19a92a62e14..0a46ea3bddd5 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -670,7 +670,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, { struct sock *sk; - if (protocol < 0 || protocol > SK_PROTOCOL_MAX) + if (protocol < 0 || protocol > U8_MAX) return -EINVAL; if (!net_eq(net, &init_net)) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 18c0d5bffe12..6a691fd04398 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -770,6 +770,18 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, } EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); +static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk, + const gfp_t priority) +{ + struct inet_connection_sock *icsk = inet_csk(newsk); + + if (!icsk->icsk_ulp_ops) + return; + + if (icsk->icsk_ulp_ops->clone) + icsk->icsk_ulp_ops->clone(req, newsk, priority); +} + /** * inet_csk_clone_lock - clone an inet socket, and lock its clone * @sk: the socket to clone @@ -810,6 +822,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + inet_clone_ulp(req, newsk, priority); + security_inet_csk_clone(newsk, req); } return newsk; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f09fbc85b108..6711a97de3ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -690,8 +690,8 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } -static void tcp_push(struct sock *sk, int flags, int mss_now, - int nonagle, int size_goal) +void tcp_push(struct sock *sk, int flags, int mss_now, + int nonagle, int size_goal) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -925,7 +925,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, return max(size_goal, mss_now); } -static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) +int tcp_send_mss(struct sock *sk, int *size_goal, int flags) { int mss_now; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 062b696a5fa1..2914fdf1d543 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1422,7 +1422,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) goto fallback; - if (!tcp_skb_can_collapse_to(prev)) + if (!tcp_skb_can_collapse(prev, skb)) goto fallback; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && @@ -4423,6 +4423,9 @@ static bool tcp_try_coalesce(struct sock *sk, if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) return false; + if (!mptcp_skb_can_collapse(to, from)) + return false; + #ifdef CONFIG_TLS_DEVICE if (from->decrypted != to->decrypted) return false; @@ -4932,7 +4935,7 @@ restart: /* The first skb to collapse is: * - not SYN/FIN and * - bloated or contains data before "start" or - * overlaps to the next one. + * overlaps to the next one and mptcp allow collapsing. */ if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && (tcp_win_from_space(sk, skb->truesize) > skb->len || @@ -4941,7 +4944,7 @@ restart: break; } - if (n && n != tail && + if (n && n != tail && mptcp_skb_can_collapse(skb, n) && TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { end_of_skbs = false; break; @@ -4974,6 +4977,7 @@ restart: else __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ skb_set_owner_r(nskb, sk); + mptcp_skb_ext_move(nskb, skb); /* Copy data, releasing collapsed skbs. */ while (copy > 0) { @@ -4993,6 +4997,7 @@ restart: skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || + !mptcp_skb_can_collapse(nskb, skb) || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) goto end; #ifdef CONFIG_TLS_DEVICE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4adac9c75343..fedb537839ec 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1426,7 +1426,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .mss_clamp = TCP_MSS_DEFAULT, #ifdef CONFIG_TCP_MD5SIG .req_md5_lookup = tcp_v4_md5_lookup, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 58c92a7d671c..05109d0c675b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -754,13 +754,17 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; if (unlikely(eff_sacks)) { const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; + if (unlikely(remaining < TCPOLEN_SACK_BASE_ALIGNED + + TCPOLEN_SACK_PERBLOCK)) + return size; + opts->num_sack_blocks = min_t(unsigned int, eff_sacks, (remaining - TCPOLEN_SACK_BASE_ALIGNED) / TCPOLEN_SACK_PERBLOCK); - if (likely(opts->num_sack_blocks)) - size += TCPOLEN_SACK_BASE_ALIGNED + - opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; + + size += TCPOLEN_SACK_BASE_ALIGNED + + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; } return size; @@ -2865,7 +2869,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (!tcp_can_collapse(sk, skb)) break; - if (!tcp_skb_can_collapse_to(to)) + if (!tcp_skb_can_collapse(to, skb)) break; space -= skb->len; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 95e4e1e95db2..5b5260103b65 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -75,7 +75,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; -static const struct inet_connection_sock_af_ops ipv6_specific; +const struct inet_connection_sock_af_ops ipv6_specific; #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; @@ -819,7 +819,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG @@ -1794,7 +1794,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor = tcp_twsk_destructor, }; -static const struct inet_connection_sock_af_ops ipv6_specific = { +const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e7ad9d350a28..1521073b6348 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -76,6 +76,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_MPTCP = 262, /* Multipath TCP connection */ +#define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX }; #endif |