diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/esp4.c | 5 | ||||
-rw-r--r-- | net/ipv4/esp4_offload.c | 6 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 7 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 2 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 7 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_h323.c | 8 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_dup_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_fib_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_reject_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/route.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 24 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 33 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 4 |
16 files changed, 73 insertions, 53 deletions
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e1b1d080e908..70e6c87fbe3d 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -446,6 +446,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + unsigned int allocsz; /* this is non-NULL only with TCP/UDP Encapsulation */ if (x->encap) { @@ -455,6 +456,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * return err; } + allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES); + if (allocsz > ESP_SKB_FRAG_MAXSIZE) + goto cow; + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index d87f02a6e934..935026f4c807 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -110,8 +110,7 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { - __skb_push(skb, skb->mac_len); - return skb_mac_gso_segment(skb, features); + return skb_eth_gso_segment(skb, features, htons(ETH_P_IP)); } static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, @@ -160,6 +159,9 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; } + if (proto == IPPROTO_IPV6) + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; + __skb_pull(skb, skb_transport_offset(skb)); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7408051632ac..af8209f912ab 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -291,7 +291,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, - .flowi4_oif = l3mdev_master_ifindex_rcu(dev), + .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, .flowi4_scope = scope, @@ -353,9 +353,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); - if (!fl4.flowi4_iif) - fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; + fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c5a29703185a..cc8e84ef2ae4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2234,7 +2234,7 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb) { - if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) + if (fl4->flowi4_oif) goto check_saddr; #ifdef CONFIG_IP_ROUTE_MULTIPATH diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2af2b99e0bea..fb0e49c36c2e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1429,11 +1429,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) return false; - if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { - if (flp->flowi4_oif && - flp->flowi4_oif != nhc->nhc_oif) - return false; - } + if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif) + return false; return true; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 3e2685c120c7..76a411ae9fe6 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -580,7 +580,7 @@ static struct nf_ct_helper_expectfn callforwarding_nat = { }; /****************************************************************************/ -static int __init init(void) +static int __init nf_nat_h323_init(void) { BUG_ON(set_h245_addr_hook != NULL); BUG_ON(set_h225_addr_hook != NULL); @@ -607,7 +607,7 @@ static int __init init(void) } /****************************************************************************/ -static void __exit fini(void) +static void __exit nf_nat_h323_fini(void) { RCU_INIT_POINTER(set_h245_addr_hook, NULL); RCU_INIT_POINTER(set_h225_addr_hook, NULL); @@ -624,8 +624,8 @@ static void __exit fini(void) } /****************************************************************************/ -module_init(init); -module_exit(fini); +module_init(nf_nat_h323_init); +module_exit(nf_nat_h323_fini); MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); MODULE_DESCRIPTION("H.323 NAT helper"); diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index aeb631760eb9..0bcd6aee6000 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -75,6 +75,7 @@ static const struct nft_expr_ops nft_dup_ipv4_ops = { .eval = nft_dup_ipv4_eval, .init = nft_dup_ipv4_init, .dump = nft_dup_ipv4_dump, + .reduce = NFT_REDUCE_READONLY, }; static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = { diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 03df986217b7..4151eb1262dd 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -152,6 +152,7 @@ static const struct nft_expr_ops nft_fib4_type_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, + .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib4_ops = { @@ -161,6 +162,7 @@ static const struct nft_expr_ops nft_fib4_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, + .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 55fc23a8f7a7..6cb213bb7256 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -45,6 +45,7 @@ static const struct nft_expr_ops nft_reject_ipv4_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_validate, + .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f444f5983405..63f3256a407d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2263,6 +2263,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ + fl4.flowi4_l3mdev = 0; fl4.flowi4_oif = 0; fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; @@ -2738,8 +2739,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, res->fi = NULL; res->table = NULL; if (fl4->flowi4_oif && - (ipv4_is_multicast(fl4->daddr) || - !netif_index_is_l3_master(net, fl4->flowi4_oif))) { + (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { /* Apparently, routing tables are wrong. Assume, * that the destination is on link. * diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1cae27b5dcd8..ad80d180b60b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1272,6 +1272,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ONE, }, { + .procname = "tcp_tso_rtt_log", + .data = &init_net.ipv4.sysctl_tcp_tso_rtt_log, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + }, + { .procname = "tcp_min_rtt_wlen", .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 33f20134e3f1..cf18fbcbf123 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -688,7 +688,8 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, return skb->len < size_goal && sock_net(sk)->ipv4.sysctl_tcp_autocorking && !tcp_rtx_queue_empty(sk) && - refcount_read(&sk->sk_wmem_alloc) > skb->truesize; + refcount_read(&sk->sk_wmem_alloc) > skb->truesize && + tcp_skb_can_collapse_to(skb); } void tcp_push(struct sock *sk, int flags, int mss_now, @@ -4434,10 +4435,10 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *ke EXPORT_SYMBOL(tcp_md5_hash_key); /* Called with rcu_read_lock() */ -bool tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - enum skb_drop_reason *reason, - const void *saddr, const void *daddr, - int family, int dif, int sdif) +enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) { /* * This gets called for each TCP segment that arrives @@ -4464,18 +4465,16 @@ bool tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) - return false; + return SKB_NOT_DROPPED_YET; if (hash_expected && !hash_location) { - *reason = SKB_DROP_REASON_TCP_MD5NOTFOUND; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return true; + return SKB_DROP_REASON_TCP_MD5NOTFOUND; } if (!hash_expected && hash_location) { - *reason = SKB_DROP_REASON_TCP_MD5UNEXPECTED; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - return true; + return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } /* check the signature */ @@ -4483,7 +4482,6 @@ bool tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { - *reason = SKB_DROP_REASON_TCP_MD5FAILURE; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); if (family == AF_INET) { net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n", @@ -4497,9 +4495,9 @@ bool tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, saddr, ntohs(th->source), daddr, ntohs(th->dest), l3index); } - return true; + return SKB_DROP_REASON_TCP_MD5FAILURE; } - return false; + return SKB_NOT_DROPPED_YET; } EXPORT_SYMBOL(tcp_inbound_md5_hash); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db5831e6c136..dc95572163df 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -135,7 +135,6 @@ u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) return key; } -EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name); char *tcp_ca_get_name_by_key(u32 key, char *buffer) { @@ -151,7 +150,6 @@ char *tcp_ca_get_name_by_key(u32 key, char *buffer) return ret; } -EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 411357ad9757..f9cec624068d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1965,9 +1965,10 @@ process: struct sock *nsk; sk = req->rsk_listener; - if (unlikely(tcp_inbound_md5_hash(sk, skb, &drop_reason, - &iph->saddr, &iph->daddr, - AF_INET, dif, sdif))) { + drop_reason = tcp_inbound_md5_hash(sk, skb, + &iph->saddr, &iph->daddr, + AF_INET, dif, sdif); + if (unlikely(drop_reason)) { sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; @@ -2041,8 +2042,9 @@ process: goto discard_and_relse; } - if (tcp_inbound_md5_hash(sk, skb, &drop_reason, &iph->saddr, - &iph->daddr, AF_INET, dif, sdif)) + drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, + &iph->daddr, AF_INET, dif, sdif); + if (drop_reason) goto discard_and_relse; nf_reset_ct(skb); @@ -3135,6 +3137,7 @@ static int __net_init tcp_sk_init(struct net *net) /* rfc5961 challenge ack rate limiting */ net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; net->ipv4.sysctl_tcp_min_tso_segs = 2; + net->ipv4.sysctl_tcp_tso_rtt_log = 9; /* 2^9 = 512 usec */ net->ipv4.sysctl_tcp_min_rtt_wlen = 300; net->ipv4.sysctl_tcp_autocorking = 1; net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2319531267c6..81aaa7da3e8c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1951,25 +1951,34 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, } /* Return how many segs we'd like on a TSO packet, - * to send one TSO packet per ms + * depending on current pacing rate, and how close the peer is. + * + * Rationale is: + * - For close peers, we rather send bigger packets to reduce + * cpu costs, because occasional losses will be repaired fast. + * - For long distance/rtt flows, we would like to get ACK clocking + * with 1 ACK per ms. + * + * Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting + * in bigger TSO bursts. We we cut the RTT-based allowance in half + * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance + * is below 1500 bytes after 6 * ~500 usec = 3ms. */ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, int min_tso_segs) { - u32 bytes, segs; + unsigned long bytes; + u32 r; - bytes = min_t(unsigned long, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), - sk->sk_gso_max_size); + bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); - /* Goal is to send at least one packet per ms, - * not one big TSO packet every 100 ms. - * This preserves ACK clocking and is consistent - * with tcp_tso_should_defer() heuristic. - */ - segs = max_t(u32, bytes / mss_now, min_tso_segs); + r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log; + if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) + bytes += sk->sk_gso_max_size >> r; + + bytes = min_t(unsigned long, bytes, sk->sk_gso_max_size); - return segs; + return max_t(u32, bytes / mss_now, min_tso_segs); } /* Return the number of segments we want in the skb we are transmitting. diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9e83bcb6bc99..6fde0b184791 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; - fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); + fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); fl4->flowi4_mark = mark; if (saddr) fl4->saddr = saddr->a4; - fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; - rt = __ip_route_output_key(net, fl4); if (!IS_ERR(rt)) return &rt->dst; |