aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fou.c2
-rw-r--r--net/ipv4/geneve.c3
-rw-r--r--net/ipv4/gre_offload.c4
-rw-r--r--net/ipv4/igmp.c11
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_vti.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c53
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c10
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c91
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c2
-rw-r--r--net/ipv4/ping.c14
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/tcp.c61
-rw-r--r--net/ipv4/tcp_input.c64
-rw-r--r--net/ipv4/tcp_ipv4.c9
-rw-r--r--net/ipv4/tcp_memcontrol.c87
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp_offload.c2
23 files changed, 247 insertions, 201 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 92db7a69f2b9..e67da4e6c324 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1246,7 +1246,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
encap = SKB_GSO_CB(skb)->encap_level > 0;
if (encap)
- features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ features &= skb->dev->hw_enc_features;
SKB_GSO_CB(skb)->encap_level += ihl;
skb_reset_transport_header(skb);
@@ -1386,6 +1386,17 @@ out:
return pp;
}
+int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
+{
+ if (sk->sk_family == AF_INET)
+ return ip_recv_error(sk, msg, len, addr_len);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
+#endif
+ return -EINVAL;
+}
+
static int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f2e15738534d..8f7bd56955b0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -62,6 +62,10 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
else
res->tclassid = 0;
#endif
+
+ if (err == -ESRCH)
+ err = -ENETUNREACH;
+
return err;
}
EXPORT_SYMBOL_GPL(__fib_lookup);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 32e78924e246..606c520ffd5a 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -133,6 +133,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff)
int err = -ENOSYS;
const struct net_offload **offloads;
+ udp_tunnel_gro_complete(skb, nhoff);
+
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 065cd94c640c..dedb21e99914 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -144,6 +144,8 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
+ skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+
return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst,
tos, ttl, df, src_port, dst_port, xnet);
}
@@ -364,6 +366,7 @@ late_initcall(geneve_init_module);
static void __exit geneve_cleanup_module(void)
{
destroy_workqueue(geneve_wq);
+ unregister_pernet_subsys(&geneve_net_ops);
}
module_exit(geneve_cleanup_module);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index ccda09628de7..bb5947b0ce2d 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -47,7 +47,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
greh = (struct gre_base_hdr *)skb_transport_header(skb);
- ghl = skb_inner_network_header(skb) - skb_transport_header(skb);
+ ghl = skb_inner_mac_header(skb) - skb_transport_header(skb);
if (unlikely(ghl < sizeof(*greh)))
goto out;
@@ -68,7 +68,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
skb->mac_len = skb_inner_network_offset(skb);
/* segment inner packet. */
- enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ enc_features = skb->dev->hw_enc_features & features;
segs = skb_mac_gso_segment(skb, enc_features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index fb70e3ecc3e4..bb15d0e03d4f 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -318,9 +318,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
-#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb))
-
-static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
+static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
struct rtable *rt;
@@ -330,6 +328,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
+ unsigned int size = mtu;
while (1) {
skb = alloc_skb(size + hlen + tlen,
@@ -341,7 +340,6 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
return NULL;
}
skb->priority = TC_PRIO_CONTROL;
- igmp_skb_size(skb) = size;
rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
0, 0,
@@ -354,6 +352,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
+ skb->reserved_tailroom = skb_end_offset(skb) -
+ min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
@@ -423,8 +423,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
return skb;
}
-#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \
- skb_tailroom(skb)) : 0)
+#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0)
static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
int type, int gdeleted, int sdeleted)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 9eb89f3f0ee4..19419b60cb37 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -146,7 +146,6 @@ evict_again:
atomic_inc(&fq->refcnt);
spin_unlock(&hb->chain_lock);
del_timer_sync(&fq->timer);
- WARN_ON(atomic_read(&fq->refcnt) != 1);
inet_frag_put(fq, f);
goto evict_again;
}
@@ -285,7 +284,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
struct inet_frag_bucket *hb;
hb = get_frag_bucket_locked(fq, f);
- hlist_del(&fq->list);
+ if (!(fq->flags & INET_FRAG_EVICTED))
+ hlist_del(&fq->list);
spin_unlock(&hb->chain_lock);
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 88e5ef2c7f51..bc6471d4abcd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -231,7 +231,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
*/
features = netif_skb_features(skb);
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
- if (IS_ERR(segs)) {
+ if (IS_ERR_OR_NULL(segs)) {
kfree_skb(skb);
return -ENOMEM;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c373a9ad4555..9daf2177dc00 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -195,7 +195,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
if (!CMSG_OK(msg, cmsg))
return -EINVAL;
-#if defined(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
if (allow_ipv6 &&
cmsg->cmsg_level == SOL_IPV6 &&
cmsg->cmsg_type == IPV6_PKTINFO) {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 3e861011e4a3..1a7e979e80ba 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -528,6 +528,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
.validate = vti_tunnel_validate,
.newlink = vti_newlink,
.changelink = vti_changelink,
+ .dellink = ip_tunnel_dellink,
.get_size = vti_get_size,
.fill_info = vti_fill_info,
};
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index a054fe083431..5c61328b7704 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -56,11 +56,11 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static int ipv4_print_tuple(struct seq_file *s,
+static void ipv4_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
- return seq_printf(s, "src=%pI4 dst=%pI4 ",
- &tuple->src.u3.ip, &tuple->dst.u3.ip);
+ seq_printf(s, "src=%pI4 dst=%pI4 ",
+ &tuple->src.u3.ip, &tuple->dst.u3.ip);
}
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 4c48e434bb1f..a460a87e14f8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -94,7 +94,7 @@ static void ct_seq_stop(struct seq_file *s, void *v)
}
#ifdef CONFIG_NF_CONNTRACK_SECMARK
-static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
{
int ret;
u32 len;
@@ -102,17 +102,15 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
if (ret)
- return 0;
+ return;
- ret = seq_printf(s, "secctx=%s ", secctx);
+ seq_printf(s, "secctx=%s ", secctx);
security_release_secctx(secctx, len);
- return ret;
}
#else
-static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
{
- return 0;
}
#endif
@@ -141,47 +139,52 @@ static int ct_seq_show(struct seq_file *s, void *v)
NF_CT_ASSERT(l4proto);
ret = -ENOSPC;
- if (seq_printf(s, "%-8s %u %ld ",
- l4proto->name, nf_ct_protonum(ct),
- timer_pending(&ct->timeout)
- ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
- goto release;
+ seq_printf(s, "%-8s %u %ld ",
+ l4proto->name, nf_ct_protonum(ct),
+ timer_pending(&ct->timeout)
+ ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
+
+ if (l4proto->print_conntrack)
+ l4proto->print_conntrack(s, ct);
- if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
+ if (seq_has_overflowed(s))
goto release;
- if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- l3proto, l4proto))
+ print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ l3proto, l4proto);
+
+ if (seq_has_overflowed(s))
goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
goto release;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
- if (seq_printf(s, "[UNREPLIED] "))
- goto release;
+ seq_printf(s, "[UNREPLIED] ");
- if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- l3proto, l4proto))
+ print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ l3proto, l4proto);
+
+ if (seq_has_overflowed(s))
goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
if (test_bit(IPS_ASSURED_BIT, &ct->status))
- if (seq_printf(s, "[ASSURED] "))
- goto release;
+ seq_printf(s, "[ASSURED] ");
#ifdef CONFIG_NF_CONNTRACK_MARK
- if (seq_printf(s, "mark=%u ", ct->mark))
- goto release;
+ seq_printf(s, "mark=%u ", ct->mark);
#endif
- if (ct_show_secctx(s, ct))
- goto release;
+ ct_show_secctx(s, ct);
- if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
+
+ if (seq_has_overflowed(s))
goto release;
+
ret = 0;
release:
nf_ct_put(ct);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index b91b2641adda..80d5554b9a88 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -72,13 +72,13 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
}
/* Print out the per-protocol part of the tuple. */
-static int icmp_print_tuple(struct seq_file *s,
+static void icmp_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
- return seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
}
static unsigned int *icmp_get_timeouts(struct net *net)
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index b023b4eb1a96..1baaa83dfe5c 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -6,48 +6,45 @@
* published by the Free Software Foundation.
*/
+#include <linux/module.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/route.h>
#include <net/dst.h>
#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/ipv4/nf_reject.h>
-/* Send RST reply */
-void nf_send_reset(struct sk_buff *oldskb, int hook)
+const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
+ struct tcphdr *_oth, int hook)
{
- struct sk_buff *nskb;
- const struct iphdr *oiph;
- struct iphdr *niph;
const struct tcphdr *oth;
- struct tcphdr _otcph, *tcph;
/* IP header checks: fragment. */
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
- return;
+ return NULL;
oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
- sizeof(_otcph), &_otcph);
+ sizeof(struct tcphdr), _oth);
if (oth == NULL)
- return;
+ return NULL;
/* No RST for RST. */
if (oth->rst)
- return;
-
- if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
- return;
+ return NULL;
/* Check checksum */
if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
- return;
- oiph = ip_hdr(oldskb);
+ return NULL;
- nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
- LL_MAX_HEADER, GFP_ATOMIC);
- if (!nskb)
- return;
+ return oth;
+}
+EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get);
- skb_reserve(nskb, LL_MAX_HEADER);
+struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
+ const struct sk_buff *oldskb,
+ __be16 protocol, int ttl)
+{
+ struct iphdr *niph, *oiph = ip_hdr(oldskb);
skb_reset_network_header(nskb);
niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
@@ -56,10 +53,23 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
niph->tos = 0;
niph->id = 0;
niph->frag_off = htons(IP_DF);
- niph->protocol = IPPROTO_TCP;
+ niph->protocol = protocol;
niph->check = 0;
niph->saddr = oiph->daddr;
niph->daddr = oiph->saddr;
+ niph->ttl = ttl;
+
+ nskb->protocol = htons(ETH_P_IP);
+
+ return niph;
+}
+EXPORT_SYMBOL_GPL(nf_reject_iphdr_put);
+
+void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
+ const struct tcphdr *oth)
+{
+ struct iphdr *niph = ip_hdr(nskb);
+ struct tcphdr *tcph;
skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
@@ -68,9 +78,9 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
tcph->dest = oth->source;
tcph->doff = sizeof(struct tcphdr) / 4;
- if (oth->ack)
+ if (oth->ack) {
tcph->seq = oth->ack_seq;
- else {
+ } else {
tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
oldskb->len - ip_hdrlen(oldskb) -
(oth->doff << 2));
@@ -83,16 +93,43 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = (unsigned char *)tcph - nskb->head;
nskb->csum_offset = offsetof(struct tcphdr, check);
+}
+EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
+
+/* Send RST reply */
+void nf_send_reset(struct sk_buff *oldskb, int hook)
+{
+ struct sk_buff *nskb;
+ const struct iphdr *oiph;
+ struct iphdr *niph;
+ const struct tcphdr *oth;
+ struct tcphdr _oth;
+
+ oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook);
+ if (!oth)
+ return;
+
+ if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ return;
+
+ oiph = ip_hdr(oldskb);
+
+ nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ LL_MAX_HEADER, GFP_ATOMIC);
+ if (!nskb)
+ return;
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
- nskb->protocol = htons(ETH_P_IP);
+ skb_reserve(nskb, LL_MAX_HEADER);
+ niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
+ ip4_dst_hoplimit(skb_dst(nskb)));
+ nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
+
if (ip_route_me_harder(nskb, RTN_UNSPEC))
goto free_nskb;
- niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
-
/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;
@@ -125,3 +162,5 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
kfree_skb(nskb);
}
EXPORT_SYMBOL_GPL(nf_send_reset);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 1c636d6b5b50..665de06561cd 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -24,6 +24,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
struct nf_nat_range range;
unsigned int verdict;
+ memset(&range, 0, sizeof(range));
range.flags = priv->flags;
verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
@@ -39,6 +40,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = {
.eval = nft_masq_ipv4_eval,
.init = nft_masq_init,
.dump = nft_masq_dump,
+ .validate = nft_masq_validate,
};
static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 57f7c9804139..5d740cccf69e 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -217,6 +217,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
&ipv6_hdr(skb)->daddr))
continue;
#endif
+ } else {
+ continue;
}
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
@@ -853,16 +855,8 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- if (flags & MSG_ERRQUEUE) {
- if (family == AF_INET) {
- return ip_recv_error(sk, msg, len, addr_len);
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (family == AF_INET6) {
- return pingv6_ops.ipv6_recv_error(sk, msg, len,
- addr_len);
-#endif
- }
- }
+ if (flags & MSG_ERRQUEUE)
+ return inet_recv_error(sk, msg, len, addr_len);
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2d4ae469b471..6a2155b02602 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1798,6 +1798,7 @@ local_input:
no_route:
RT_CACHE_STAT_INC(in_no_route);
res.type = RTN_UNREACHABLE;
+ res.fi = NULL;
goto local_input;
/*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1bec4e76d88c..38c2bcb8dd5d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1598,7 +1598,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
u32 urg_hole = 0;
if (unlikely(flags & MSG_ERRQUEUE))
- return ip_recv_error(sk, msg, len, addr_len);
+ return inet_recv_error(sk, msg, len, addr_len);
if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
(sk->sk_state == TCP_ESTABLISHED))
@@ -2868,61 +2868,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
-
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
-
- if (p->md5_desc.tfm)
- crypto_free_hash(p->md5_desc.tfm);
- }
- free_percpu(pool);
-}
+static bool tcp_md5sig_pool_populated = false;
static void __tcp_alloc_md5sig_pool(void)
{
int cpu;
- struct tcp_md5sig_pool __percpu *pool;
-
- pool = alloc_percpu(struct tcp_md5sig_pool);
- if (!pool)
- return;
for_each_possible_cpu(cpu) {
- struct crypto_hash *hash;
-
- hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(hash))
- goto out_free;
+ if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) {
+ struct crypto_hash *hash;
- per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
+ hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR_OR_NULL(hash))
+ return;
+ per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
+ }
}
- /* before setting tcp_md5sig_pool, we must commit all writes
- * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+ /* before setting tcp_md5sig_pool_populated, we must commit all writes
+ * to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool = pool;
- return;
-out_free:
- __tcp_free_md5sig_pool(pool);
+ tcp_md5sig_pool_populated = true;
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool)) {
+ if (unlikely(!tcp_md5sig_pool_populated)) {
mutex_lock(&tcp_md5sig_mutex);
- if (!tcp_md5sig_pool)
+ if (!tcp_md5sig_pool_populated)
__tcp_alloc_md5sig_pool();
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool != NULL;
+ return tcp_md5sig_pool_populated;
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -2936,13 +2917,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
*/
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
- struct tcp_md5sig_pool __percpu *p;
-
local_bh_disable();
- p = ACCESS_ONCE(tcp_md5sig_pool);
- if (p)
- return raw_cpu_ptr(p);
+ if (tcp_md5sig_pool_populated) {
+ /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
+ smp_rmb();
+ return this_cpu_ptr(&tcp_md5sig_pool);
+ }
local_bh_enable();
return NULL;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a12b455928e5..d107ee246a1d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2315,6 +2315,35 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
/* Undo procedures. */
+/* We can clear retrans_stamp when there are no retransmissions in the
+ * window. It would seem that it is trivially available for us in
+ * tp->retrans_out, however, that kind of assumptions doesn't consider
+ * what will happen if errors occur when sending retransmission for the
+ * second time. ...It could the that such segment has only
+ * TCPCB_EVER_RETRANS set at the present time. It seems that checking
+ * the head skb is enough except for some reneging corner cases that
+ * are not worth the effort.
+ *
+ * Main reason for all this complexity is the fact that connection dying
+ * time now depends on the validity of the retrans_stamp, in particular,
+ * that successive retransmissions of a segment must not advance
+ * retrans_stamp under any conditions.
+ */
+static bool tcp_any_retrans_done(const struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+
+ if (tp->retrans_out)
+ return true;
+
+ skb = tcp_write_queue_head(sk);
+ if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
+ return true;
+
+ return false;
+}
+
#if FASTRETRANS_DEBUG > 1
static void DBGUNDO(struct sock *sk, const char *msg)
{
@@ -2410,6 +2439,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
* is ACKed. For Reno it is MUST to prevent false
* fast retransmits (RFC2582). SACK TCP is safe. */
tcp_moderate_cwnd(tp);
+ if (!tcp_any_retrans_done(sk))
+ tp->retrans_stamp = 0;
return true;
}
tcp_set_ca_state(sk, TCP_CA_Open);
@@ -2430,35 +2461,6 @@ static bool tcp_try_undo_dsack(struct sock *sk)
return false;
}
-/* We can clear retrans_stamp when there are no retransmissions in the
- * window. It would seem that it is trivially available for us in
- * tp->retrans_out, however, that kind of assumptions doesn't consider
- * what will happen if errors occur when sending retransmission for the
- * second time. ...It could the that such segment has only
- * TCPCB_EVER_RETRANS set at the present time. It seems that checking
- * the head skb is enough except for some reneging corner cases that
- * are not worth the effort.
- *
- * Main reason for all this complexity is the fact that connection dying
- * time now depends on the validity of the retrans_stamp, in particular,
- * that successive retransmissions of a segment must not advance
- * retrans_stamp under any conditions.
- */
-static bool tcp_any_retrans_done(const struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
-
- if (tp->retrans_out)
- return true;
-
- skb = tcp_write_queue_head(sk);
- if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
- return true;
-
- return false;
-}
-
/* Undo during loss recovery after partial ACK or using F-RTO. */
static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
{
@@ -5229,7 +5231,7 @@ slow_path:
if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
goto csum_error;
- if (!th->ack && !th->rst)
+ if (!th->ack && !th->rst && !th->syn)
goto discard;
/*
@@ -5648,7 +5650,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
}
- if (!th->ack && !th->rst)
+ if (!th->ack && !th->rst && !th->syn)
goto discard;
if (!tcp_validate_incoming(sk, skb, th, 0))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94d1a7757ff7..147be2024290 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -206,8 +206,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = usin->sin_port;
inet->inet_daddr = daddr;
- inet_set_txhash(sk);
-
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
@@ -224,6 +222,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (err)
goto failure;
+ inet_set_txhash(sk);
+
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
@@ -598,7 +598,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
if (th->rst)
return;
- if (skb_rtable(skb)->rt_type != RTN_LOCAL)
+ /* If sk not NULL, it means we did a successful lookup and incoming
+ * route had to be correct. prequeue might have dropped our dst.
+ */
+ if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
return;
/* Swap the send and the receive. */
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 1d191357bf88..272327134a1b 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -9,13 +9,13 @@
int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
/*
- * The root cgroup does not use res_counters, but rather,
+ * The root cgroup does not use page_counters, but rather,
* rely on the data already collected by the network
* subsystem
*/
- struct res_counter *res_parent = NULL;
- struct cg_proto *cg_proto, *parent_cg;
struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+ struct page_counter *counter_parent = NULL;
+ struct cg_proto *cg_proto, *parent_cg;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
@@ -29,9 +29,9 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
parent_cg = tcp_prot.proto_cgroup(parent);
if (parent_cg)
- res_parent = &parent_cg->memory_allocated;
+ counter_parent = &parent_cg->memory_allocated;
- res_counter_init(&cg_proto->memory_allocated, res_parent);
+ page_counter_init(&cg_proto->memory_allocated, counter_parent);
percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
return 0;
@@ -50,7 +50,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
-static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
+static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
{
struct cg_proto *cg_proto;
int i;
@@ -60,20 +60,17 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
if (!cg_proto)
return -EINVAL;
- if (val > RES_COUNTER_MAX)
- val = RES_COUNTER_MAX;
-
- ret = res_counter_set_limit(&cg_proto->memory_allocated, val);
+ ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
if (ret)
return ret;
for (i = 0; i < 3; i++)
- cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT,
+ cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
sysctl_tcp_mem[i]);
- if (val == RES_COUNTER_MAX)
+ if (nr_pages == PAGE_COUNTER_MAX)
clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- else if (val != RES_COUNTER_MAX) {
+ else {
/*
* The active bit needs to be written after the static_key
* update. This is what guarantees that the socket activation
@@ -102,11 +99,20 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
return 0;
}
+enum {
+ RES_USAGE,
+ RES_LIMIT,
+ RES_MAX_USAGE,
+ RES_FAILCNT,
+};
+
+static DEFINE_MUTEX(tcp_limit_mutex);
+
static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
- unsigned long long val;
+ unsigned long nr_pages;
int ret = 0;
buf = strstrip(buf);
@@ -114,10 +120,12 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
switch (of_cft(of)->private) {
case RES_LIMIT:
/* see memcontrol.c */
- ret = res_counter_memparse_write_strategy(buf, &val);
+ ret = page_counter_memparse(buf, &nr_pages);
if (ret)
break;
- ret = tcp_update_limit(memcg, val);
+ mutex_lock(&tcp_limit_mutex);
+ ret = tcp_update_limit(memcg, nr_pages);
+ mutex_unlock(&tcp_limit_mutex);
break;
default:
ret = -EINVAL;
@@ -126,43 +134,36 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
return ret ?: nbytes;
}
-static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
-{
- struct cg_proto *cg_proto;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return default_val;
-
- return res_counter_read_u64(&cg_proto->memory_allocated, type);
-}
-
-static u64 tcp_read_usage(struct mem_cgroup *memcg)
-{
- struct cg_proto *cg_proto;
-
- cg_proto = tcp_prot.proto_cgroup(memcg);
- if (!cg_proto)
- return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
-
- return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE);
-}
-
static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
u64 val;
switch (cft->private) {
case RES_LIMIT:
- val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
+ if (!cg_proto)
+ return PAGE_COUNTER_MAX;
+ val = cg_proto->memory_allocated.limit;
+ val *= PAGE_SIZE;
break;
case RES_USAGE:
- val = tcp_read_usage(memcg);
+ if (!cg_proto)
+ val = atomic_long_read(&tcp_memory_allocated);
+ else
+ val = page_counter_read(&cg_proto->memory_allocated);
+ val *= PAGE_SIZE;
break;
case RES_FAILCNT:
+ if (!cg_proto)
+ return 0;
+ val = cg_proto->memory_allocated.failcnt;
+ break;
case RES_MAX_USAGE:
- val = tcp_read_stat(memcg, cft->private, 0);
+ if (!cg_proto)
+ return 0;
+ val = cg_proto->memory_allocated.watermark;
+ val *= PAGE_SIZE;
break;
default:
BUG();
@@ -183,10 +184,10 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
switch (of_cft(of)->private) {
case RES_MAX_USAGE:
- res_counter_reset_max(&cg_proto->memory_allocated);
+ page_counter_reset_watermark(&cg_proto->memory_allocated);
break;
case RES_FAILCNT:
- res_counter_reset_failcnt(&cg_proto->memory_allocated);
+ cg_proto->memory_allocated.failcnt = 0;
break;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3af21296d967..a3d453b94747 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2126,7 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
static bool skb_still_in_host_queue(const struct sock *sk,
const struct sk_buff *skb)
{
- if (unlikely(skb_fclone_busy(skb))) {
+ if (unlikely(skb_fclone_busy(sk, skb))) {
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
return true;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 507310ef4b56..6480cea7aa53 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -58,7 +58,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
skb->encap_hdr_csum = 1;
/* segment inner packet. */
- enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+ enc_features = skb->dev->hw_enc_features & features;
segs = gso_inner_segment(skb, enc_features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,