aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c161
1 files changed, 92 insertions, 69 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a7aea2048a0d..6bf89a6312bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,6 +83,10 @@
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
+static int ip_fragment(struct sock *sk, struct sk_buff *skb,
+ unsigned int mtu,
+ int (*output)(struct sock *, struct sk_buff *));
+
/* Generate a checksum for an outgoing IP datagram. */
void ip_send_check(struct iphdr *iph)
{
@@ -91,14 +95,19 @@ void ip_send_check(struct iphdr *iph)
}
EXPORT_SYMBOL(ip_send_check);
-int __ip_local_out(struct sk_buff *skb)
+static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = htons(skb->len);
ip_send_check(iph);
- return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
- skb_dst(skb)->dev, dst_output);
+ return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL,
+ skb_dst(skb)->dev, dst_output_sk);
+}
+
+int __ip_local_out(struct sk_buff *skb)
+{
+ return __ip_local_out_sk(skb->sk, skb);
}
int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
@@ -148,7 +157,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(skb, sk);
+ ip_select_ident(sock_net(sk), skb, sk);
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -163,7 +172,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
}
EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
-static inline int ip_finish_output2(struct sk_buff *skb)
+static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = (struct rtable *)dst;
@@ -182,7 +191,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
struct sk_buff *skb2;
skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
- if (skb2 == NULL) {
+ if (!skb2) {
kfree_skb(skb);
return -ENOMEM;
}
@@ -211,7 +220,8 @@ static inline int ip_finish_output2(struct sk_buff *skb)
return -EINVAL;
}
-static int ip_finish_output_gso(struct sk_buff *skb)
+static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
+ unsigned int mtu)
{
netdev_features_t features;
struct sk_buff *segs;
@@ -219,8 +229,8 @@ static int ip_finish_output_gso(struct sk_buff *skb)
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
- skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
- return ip_finish_output2(skb);
+ skb_gso_network_seglen(skb) <= mtu)
+ return ip_finish_output2(sk, skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
*
@@ -243,7 +253,7 @@ static int ip_finish_output_gso(struct sk_buff *skb)
int err;
segs->next = NULL;
- err = ip_fragment(segs, ip_finish_output2);
+ err = ip_fragment(sk, segs, mtu, ip_finish_output2);
if (err && ret == 0)
ret = err;
@@ -253,22 +263,25 @@ static int ip_finish_output_gso(struct sk_buff *skb)
return ret;
}
-static int ip_finish_output(struct sk_buff *skb)
+static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
{
+ unsigned int mtu;
+
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
- if (skb_dst(skb)->xfrm != NULL) {
+ if (skb_dst(skb)->xfrm) {
IPCB(skb)->flags |= IPSKB_REROUTED;
- return dst_output(skb);
+ return dst_output_sk(sk, skb);
}
#endif
+ mtu = ip_skb_dst_mtu(skb);
if (skb_is_gso(skb))
- return ip_finish_output_gso(skb);
+ return ip_finish_output_gso(sk, skb, mtu);
- if (skb->len > ip_skb_dst_mtu(skb))
- return ip_fragment(skb, ip_finish_output2);
+ if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
+ return ip_fragment(sk, skb, mtu, ip_finish_output2);
- return ip_finish_output2(skb);
+ return ip_finish_output2(sk, skb);
}
int ip_mc_output(struct sock *sk, struct sk_buff *skb)
@@ -307,7 +320,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- newskb, NULL, newskb->dev,
+ sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
}
@@ -322,11 +335,11 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
if (rt->rt_flags&RTCF_BROADCAST) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
if (newskb)
- NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
+ NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb,
NULL, newskb->dev, dev_loopback_xmit);
}
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL,
skb->dev, ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
@@ -340,7 +353,8 @@ int ip_output(struct sock *sk, struct sk_buff *skb)
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
+ NULL, dev,
ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
@@ -376,12 +390,12 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
inet_opt = rcu_dereference(inet->inet_opt);
fl4 = &fl->u.ip4;
rt = skb_rtable(skb);
- if (rt != NULL)
+ if (rt)
goto packet_routed;
/* Make sure we can route this packet. */
rt = (struct rtable *)__sk_dst_check(sk, 0);
- if (rt == NULL) {
+ if (!rt) {
__be32 daddr;
/* Use correct destination address if we have options. */
@@ -430,7 +444,8 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
+ ip_select_ident_segs(sock_net(sk), skb, sk,
+ skb_shinfo(skb)->gso_segs ?: 1);
/* TODO : should we use skb->sk here instead of sk ? */
skb->priority = sk->sk_priority;
@@ -448,7 +463,6 @@ no_route:
}
EXPORT_SYMBOL(ip_queue_xmit);
-
static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
to->pkt_type = from->pkt_type;
@@ -472,6 +486,31 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+static int ip_fragment(struct sock *sk, struct sk_buff *skb,
+ unsigned int mtu,
+ int (*output)(struct sock *, struct sk_buff *))
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ if ((iph->frag_off & htons(IP_DF)) == 0)
+ return ip_do_fragment(sk, skb, output);
+
+ if (unlikely(!skb->ignore_df ||
+ (IPCB(skb)->frag_max_size &&
+ IPCB(skb)->frag_max_size > mtu))) {
+ struct rtable *rt = skb_rtable(skb);
+ struct net_device *dev = rt->dst.dev;
+
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ return ip_do_fragment(sk, skb, output);
+}
+
/*
* This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus
@@ -479,7 +518,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending.
*/
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
{
struct iphdr *iph;
int ptr;
@@ -500,15 +540,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
iph = ip_hdr(skb);
mtu = ip_skb_dst_mtu(skb);
- if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
- (IPCB(skb)->frag_max_size &&
- IPCB(skb)->frag_max_size > mtu))) {
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
- kfree_skb(skb);
- return -EMSGSIZE;
- }
+ if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
+ mtu = IPCB(skb)->frag_max_size;
/*
* Setup starting values.
@@ -516,10 +549,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
hlen = iph->ihl * 4;
mtu = mtu - hlen; /* Size of data space */
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge)
- mtu -= nf_bridge_mtu_reduction(skb);
-#endif
IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
/* When frag_list is given, use it. First, check its validity:
@@ -586,13 +615,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
ip_options_fragment(frag);
offset += skb->len - hlen;
iph->frag_off = htons(offset>>3);
- if (frag->next != NULL)
+ if (frag->next)
iph->frag_off |= htons(IP_MF);
/* Ready, complete checksum */
ip_send_check(iph);
}
- err = output(skb);
+ err = output(sk, skb);
if (!err)
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
@@ -636,10 +665,7 @@ slow_path:
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
- /* for bridged IP traffic encapsulated inside f.e. a vlan header,
- * we need to make room for the encapsulating header
- */
- ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
+ ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
/*
* Fragment the datagram.
@@ -707,6 +733,9 @@ slow_path:
iph = ip_hdr(skb2);
iph->frag_off = htons((offset >> 3));
+ if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
+ iph->frag_off |= htons(IP_DF);
+
/* ANK: dirty, but effective trick. Upgrade options only if
* the segment to be fragmented was THE FIRST (otherwise,
* options are already fixed) and make it ONCE
@@ -732,7 +761,7 @@ slow_path:
ip_send_check(iph);
- err = output(skb2);
+ err = output(sk, skb2);
if (err)
goto fail;
@@ -747,7 +776,7 @@ fail:
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err;
}
-EXPORT_SYMBOL(ip_fragment);
+EXPORT_SYMBOL(ip_do_fragment);
int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
@@ -792,12 +821,13 @@ static inline int ip_ufo_append_data(struct sock *sk,
* device, so create one single skb packet containing complete
* udp datagram
*/
- if ((skb = skb_peek_tail(queue)) == NULL) {
+ skb = skb_peek_tail(queue);
+ if (!skb) {
skb = sock_alloc_send_skb(sk,
hh_len + fragheaderlen + transhdrlen + 20,
(flags & MSG_DONTWAIT), &err);
- if (skb == NULL)
+ if (!skb)
return err;
/* reserve space for Hardware header */
@@ -814,7 +844,6 @@ static inline int ip_ufo_append_data(struct sock *sk,
skb->csum = 0;
-
__skb_queue_tail(queue, skb);
} else if (skb_is_gso(skb)) {
goto append;
@@ -963,10 +992,10 @@ alloc_new_skb:
skb = sock_wmalloc(sk,
alloclen + hh_len + 15, 1,
sk->sk_allocation);
- if (unlikely(skb == NULL))
+ if (unlikely(!skb))
err = -ENOBUFS;
}
- if (skb == NULL)
+ if (!skb)
goto error;
/*
@@ -1090,10 +1119,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
*/
opt = ipc->opt;
if (opt) {
- if (cork->opt == NULL) {
+ if (!cork->opt) {
cork->opt = kmalloc(sizeof(struct ip_options) + 40,
sk->sk_allocation);
- if (unlikely(cork->opt == NULL))
+ if (unlikely(!cork->opt))
return -ENOBUFS;
}
memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen);
@@ -1200,7 +1229,8 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EMSGSIZE;
}
- if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ skb = skb_peek_tail(&sk->sk_write_queue);
+ if (!skb)
return -EINVAL;
cork->length += size;
@@ -1211,13 +1241,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
}
-
while (size > 0) {
- int i;
-
- if (skb_is_gso(skb))
+ if (skb_is_gso(skb)) {
len = size;
- else {
+ } else {
/* Check if the remaining data fits into current packet. */
len = mtu - skb->len;
@@ -1269,15 +1296,10 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
continue;
}
- i = skb_shinfo(skb)->nr_frags;
if (len > size)
len = size;
- if (skb_can_coalesce(skb, i, page, offset)) {
- skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
- } else if (i < MAX_SKB_FRAGS) {
- get_page(page);
- skb_fill_page_desc(skb, i, page, offset, len);
- } else {
+
+ if (skb_append_pagefrags(skb, page, offset, len)) {
err = -EMSGSIZE;
goto error;
}
@@ -1331,7 +1353,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
__be16 df = 0;
__u8 ttl;
- if ((skb = __skb_dequeue(queue)) == NULL)
+ skb = __skb_dequeue(queue);
+ if (!skb)
goto out;
tail_skb = &(skb_shinfo(skb)->frag_list);
@@ -1382,7 +1405,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
- ip_select_ident(skb, sk);
+ ip_select_ident(net, skb, sk);
if (opt) {
iph->ihl += opt->optlen>>2;