aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/net/ip_tunnels.h19
-rw-r--r--include/uapi/linux/fou.h32
-rw-r--r--include/uapi/linux/if_tunnel.h16
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/fou.c368
-rw-r--r--net/ipv4/ip_gre.c90
-rw-r--r--net/ipv4/ip_tunnel.c91
-rw-r--r--net/ipv4/ipip.c78
-rw-r--r--net/ipv4/protocol.c1
-rw-r--r--net/ipv4/udp_offload.c5
-rw-r--r--net/ipv6/protocol.c1
-rw-r--r--net/ipv6/sit.c107
14 files changed, 802 insertions, 20 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28d4378615e5..4354b4307e37 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1874,7 +1874,7 @@ struct napi_gro_cb {
/* jiffies when first packet was created/queued */
unsigned long age;
- /* Used in ipv6_gro_receive() */
+ /* Used in ipv6_gro_receive() and foo-over-udp */
u16 proto;
/* Used in udp_gro_receive */
@@ -1925,6 +1925,7 @@ struct packet_offload {
struct udp_offload {
__be16 port;
+ u8 ipproto;
struct offload_callbacks callbacks;
};
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 8dd8cab88b87..7f538ba6e267 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -10,6 +10,7 @@
#include <net/gro_cells.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
+#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#if IS_ENABLED(CONFIG_IPV6)
@@ -31,6 +32,13 @@ struct ip_tunnel_6rd_parm {
};
#endif
+struct ip_tunnel_encap {
+ __u16 type;
+ __u16 flags;
+ __be16 sport;
+ __be16 dport;
+};
+
struct ip_tunnel_prl_entry {
struct ip_tunnel_prl_entry __rcu *next;
__be32 addr;
@@ -56,13 +64,18 @@ struct ip_tunnel {
/* These four fields used only by GRE */
__u32 i_seqno; /* The last seen seqno */
__u32 o_seqno; /* The last output seqno */
- int hlen; /* Precalculated header length */
+ int tun_hlen; /* Precalculated header length */
int mlink;
struct ip_tunnel_dst __percpu *dst_cache;
struct ip_tunnel_parm parms;
+ int encap_hlen; /* Encap header length (FOU,GUE) */
+ struct ip_tunnel_encap encap;
+
+ int hlen; /* tun_hlen + encap_hlen */
+
/* for SIT */
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd_parm ip6rd;
@@ -114,6 +127,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+ u8 *protocol, struct flowi4 *fl4);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
@@ -131,6 +146,8 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p);
void ip_tunnel_setup(struct net_device *dev, int net_id);
void ip_tunnel_dst_reset_all(struct ip_tunnel *t);
+int ip_tunnel_encap_setup(struct ip_tunnel *t,
+ struct ip_tunnel_encap *ipencap);
/* Extract dsfield from inner protocol */
static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
new file mode 100644
index 000000000000..e03376de453d
--- /dev/null
+++ b/include/uapi/linux/fou.h
@@ -0,0 +1,32 @@
+/* fou.h - FOU Interface */
+
+#ifndef _UAPI_LINUX_FOU_H
+#define _UAPI_LINUX_FOU_H
+
+/* NETLINK_GENERIC related info
+ */
+#define FOU_GENL_NAME "fou"
+#define FOU_GENL_VERSION 0x1
+
+enum {
+ FOU_ATTR_UNSPEC,
+ FOU_ATTR_PORT, /* u16 */
+ FOU_ATTR_AF, /* u8 */
+ FOU_ATTR_IPPROTO, /* u8 */
+
+ __FOU_ATTR_MAX,
+};
+
+#define FOU_ATTR_MAX (__FOU_ATTR_MAX - 1)
+
+enum {
+ FOU_CMD_UNSPEC,
+ FOU_CMD_ADD,
+ FOU_CMD_DEL,
+
+ __FOU_CMD_MAX,
+};
+
+#define FOU_CMD_MAX (__FOU_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_FOU_H */
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 3bce9e9d9f7c..7c832afdfa94 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -53,10 +53,22 @@ enum {
IFLA_IPTUN_6RD_RELAY_PREFIX,
IFLA_IPTUN_6RD_PREFIXLEN,
IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+ IFLA_IPTUN_ENCAP_TYPE,
+ IFLA_IPTUN_ENCAP_FLAGS,
+ IFLA_IPTUN_ENCAP_SPORT,
+ IFLA_IPTUN_ENCAP_DPORT,
__IFLA_IPTUN_MAX,
};
#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
+enum tunnel_encap_types {
+ TUNNEL_ENCAP_NONE,
+ TUNNEL_ENCAP_FOU,
+};
+
+#define TUNNEL_ENCAP_FLAG_CSUM (1<<0)
+#define TUNNEL_ENCAP_FLAG_CSUM6 (1<<1)
+
/* SIT-mode i_flags */
#define SIT_ISATAP 0x0001
@@ -94,6 +106,10 @@ enum {
IFLA_GRE_ENCAP_LIMIT,
IFLA_GRE_FLOWINFO,
IFLA_GRE_FLAGS,
+ IFLA_GRE_ENCAP_TYPE,
+ IFLA_GRE_ENCAP_FLAGS,
+ IFLA_GRE_ENCAP_SPORT,
+ IFLA_GRE_ENCAP_DPORT,
__IFLA_GRE_MAX,
};
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index dbc10d84161f..84f710b7472a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -311,6 +311,16 @@ config NET_UDP_TUNNEL
tristate
default n
+config NET_FOU
+ tristate "IP: Foo (IP protocols) over UDP"
+ select XFRM
+ select NET_UDP_TUNNEL
+ ---help---
+ Foo over UDP allows any IP protocol to be directly encapsulated
+ over UDP include tunnels (IPIP, GRE, SIT). By encapsulating in UDP
+ network mechanisms and optimizations for UDP (such as ECMP
+ and RSS) can be leveraged to provide better service.
+
config INET_AH
tristate "IP: AH transformation"
select XFRM_ALGO
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 8ee1cd4053ee..d78d404c596f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
+obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
new file mode 100644
index 000000000000..dced89fbe480
--- /dev/null
+++ b/net/ipv4/fou.c
@@ -0,0 +1,368 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <net/genetlink.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/xfrm.h>
+#include <uapi/linux/fou.h>
+#include <uapi/linux/genetlink.h>
+
+static DEFINE_SPINLOCK(fou_lock);
+static LIST_HEAD(fou_list);
+
+struct fou {
+ struct socket *sock;
+ u8 protocol;
+ u16 port;
+ struct udp_offload udp_offloads;
+ struct list_head list;
+};
+
+struct fou_cfg {
+ u8 protocol;
+ struct udp_port_cfg udp_config;
+};
+
+static inline struct fou *fou_from_sock(struct sock *sk)
+{
+ return sk->sk_user_data;
+}
+
+static int fou_udp_encap_recv_deliver(struct sk_buff *skb,
+ u8 protocol, size_t len)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ /* Remove 'len' bytes from the packet (UDP header and
+ * FOU header if present), modify the protocol to the one
+ * we found, and then call rcv_encap.
+ */
+ iph->tot_len = htons(ntohs(iph->tot_len) - len);
+ __skb_pull(skb, len);
+ skb_postpull_rcsum(skb, udp_hdr(skb), len);
+ skb_reset_transport_header(skb);
+
+ return -protocol;
+}
+
+static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct fou *fou = fou_from_sock(sk);
+
+ if (!fou)
+ return 1;
+
+ return fou_udp_encap_recv_deliver(skb, fou->protocol,
+ sizeof(struct udphdr));
+}
+
+static struct sk_buff **fou_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb,
+ const struct net_offload **offloads)
+{
+ const struct net_offload *ops;
+ struct sk_buff **pp = NULL;
+ u8 proto = NAPI_GRO_CB(skb)->proto;
+
+ rcu_read_lock();
+ ops = rcu_dereference(offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out_unlock;
+
+ pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return pp;
+}
+
+static int fou_gro_complete(struct sk_buff *skb, int nhoff,
+ const struct net_offload **offloads)
+{
+ const struct net_offload *ops;
+ u8 proto = NAPI_GRO_CB(skb)->proto;
+ int err = -ENOSYS;
+
+ rcu_read_lock();
+ ops = rcu_dereference(offloads[proto]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ goto out_unlock;
+
+ err = ops->callbacks.gro_complete(skb, nhoff);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ return fou_gro_receive(head, skb, inet_offloads);
+}
+
+static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ return fou_gro_complete(skb, nhoff, inet_offloads);
+}
+
+static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ return fou_gro_receive(head, skb, inet6_offloads);
+}
+
+static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ return fou_gro_complete(skb, nhoff, inet6_offloads);
+}
+
+static int fou_add_to_port_list(struct fou *fou)
+{
+ struct fou *fout;
+
+ spin_lock(&fou_lock);
+ list_for_each_entry(fout, &fou_list, list) {
+ if (fou->port == fout->port) {
+ spin_unlock(&fou_lock);
+ return -EALREADY;
+ }
+ }
+
+ list_add(&fou->list, &fou_list);
+ spin_unlock(&fou_lock);
+
+ return 0;
+}
+
+static void fou_release(struct fou *fou)
+{
+ struct socket *sock = fou->sock;
+ struct sock *sk = sock->sk;
+
+ udp_del_offload(&fou->udp_offloads);
+
+ list_del(&fou->list);
+
+ /* Remove hooks into tunnel socket */
+ sk->sk_user_data = NULL;
+
+ sock_release(sock);
+
+ kfree(fou);
+}
+
+static int fou_create(struct net *net, struct fou_cfg *cfg,
+ struct socket **sockp)
+{
+ struct fou *fou = NULL;
+ int err;
+ struct socket *sock = NULL;
+ struct sock *sk;
+
+ /* Open UDP socket */
+ err = udp_sock_create(net, &cfg->udp_config, &sock);
+ if (err < 0)
+ goto error;
+
+ /* Allocate FOU port structure */
+ fou = kzalloc(sizeof(*fou), GFP_KERNEL);
+ if (!fou) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ sk = sock->sk;
+
+ /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+ fou->protocol = cfg->protocol;
+ fou->port = cfg->udp_config.local_udp_port;
+ udp_sk(sk)->encap_rcv = fou_udp_recv;
+
+ udp_sk(sk)->encap_type = 1;
+ udp_encap_enable();
+
+ sk->sk_user_data = fou;
+ fou->sock = sock;
+
+ udp_set_convert_csum(sk, true);
+
+ sk->sk_allocation = GFP_ATOMIC;
+
+ switch (cfg->udp_config.family) {
+ case AF_INET:
+ fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
+ break;
+ case AF_INET6:
+ fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
+ fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
+ break;
+ default:
+ err = -EPFNOSUPPORT;
+ goto error;
+ }
+
+ fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+ fou->udp_offloads.ipproto = cfg->protocol;
+
+ if (cfg->udp_config.family == AF_INET) {
+ err = udp_add_offload(&fou->udp_offloads);
+ if (err)
+ goto error;
+ }
+
+ err = fou_add_to_port_list(fou);
+ if (err)
+ goto error;
+
+ if (sockp)
+ *sockp = sock;
+
+ return 0;
+
+error:
+ kfree(fou);
+ if (sock)
+ sock_release(sock);
+
+ return err;
+}
+
+static int fou_destroy(struct net *net, struct fou_cfg *cfg)
+{
+ struct fou *fou;
+ u16 port = cfg->udp_config.local_udp_port;
+ int err = -EINVAL;
+
+ spin_lock(&fou_lock);
+ list_for_each_entry(fou, &fou_list, list) {
+ if (fou->port == port) {
+ udp_del_offload(&fou->udp_offloads);
+ fou_release(fou);
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock(&fou_lock);
+
+ return err;
+}
+
+static struct genl_family fou_nl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = FOU_GENL_NAME,
+ .version = FOU_GENL_VERSION,
+ .maxattr = FOU_ATTR_MAX,
+ .netnsok = true,
+};
+
+static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
+ [FOU_ATTR_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_AF] = { .type = NLA_U8, },
+ [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+};
+
+static int parse_nl_config(struct genl_info *info,
+ struct fou_cfg *cfg)
+{
+ memset(cfg, 0, sizeof(*cfg));
+
+ cfg->udp_config.family = AF_INET;
+
+ if (info->attrs[FOU_ATTR_AF]) {
+ u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
+
+ if (family != AF_INET && family != AF_INET6)
+ return -EINVAL;
+
+ cfg->udp_config.family = family;
+ }
+
+ if (info->attrs[FOU_ATTR_PORT]) {
+ u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]);
+
+ cfg->udp_config.local_udp_port = port;
+ }
+
+ if (info->attrs[FOU_ATTR_IPPROTO])
+ cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
+
+ return 0;
+}
+
+static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
+{
+ struct fou_cfg cfg;
+ int err;
+
+ err = parse_nl_config(info, &cfg);
+ if (err)
+ return err;
+
+ return fou_create(&init_net, &cfg, NULL);
+}
+
+static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
+{
+ struct fou_cfg cfg;
+
+ parse_nl_config(info, &cfg);
+
+ return fou_destroy(&init_net, &cfg);
+}
+
+static const struct genl_ops fou_nl_ops[] = {
+ {
+ .cmd = FOU_CMD_ADD,
+ .doit = fou_nl_cmd_add_port,
+ .policy = fou_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = FOU_CMD_DEL,
+ .doit = fou_nl_cmd_rm_port,
+ .policy = fou_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
+static int __init fou_init(void)
+{
+ int ret;
+
+ ret = genl_register_family_with_ops(&fou_nl_family,
+ fou_nl_ops);
+
+ return ret;
+}
+
+static void __exit fou_fini(void)
+{
+ struct fou *fou, *next;
+
+ genl_unregister_family(&fou_nl_family);
+
+ /* Close all the FOU sockets */
+
+ spin_lock(&fou_lock);
+ list_for_each_entry_safe(fou, next, &fou_list, list)
+ fou_release(fou);
+ spin_unlock(&fou_lock);
+}
+
+module_init(fou_init);
+module_exit(fou_fini);
+MODULE_AUTHOR("Tom Herbert <[email protected]>");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9b842544aea3..829aff8bf723 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -239,7 +239,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
tpi.seq = htonl(tunnel->o_seqno);
/* Push GRE header. */
- gre_build_header(skb, &tpi, tunnel->hlen);
+ gre_build_header(skb, &tpi, tunnel->tun_hlen);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -310,7 +310,7 @@ out:
static int ipgre_tunnel_ioctl(struct net_device *dev,
struct ifreq *ifr, int cmd)
{
- int err = 0;
+ int err;
struct ip_tunnel_parm p;
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
@@ -470,13 +470,18 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
+ int t_hlen;
tunnel = netdev_priv(dev);
- tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
tunnel->parms.iph.protocol = IPPROTO_GRE;
- dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+
+ t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+ dev->mtu = ETH_DATA_LEN - t_hlen - 4;
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@@ -628,6 +633,40 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
parms->iph.frag_off = htons(IP_DF);
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipgre_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_GRE_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
static int gre_tap_init(struct net_device *dev)
{
__gre_tunnel_init(dev);
@@ -657,6 +696,15 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipgre_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipgre_netlink_parms(data, tb, &p);
return ip_tunnel_newlink(dev, tb, &p);
@@ -666,6 +714,15 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipgre_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipgre_netlink_parms(data, tb, &p);
return ip_tunnel_changelink(dev, tb, &p);
@@ -694,6 +751,14 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_GRE_PMTUDISC */
nla_total_size(1) +
+ /* IFLA_GRE_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -714,6 +779,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_PMTUDISC,
!!(p->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
+ t->encap.type) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT,
+ t->encap.sport) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
+ t->encap.dport) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
+ t->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -731,6 +807,10 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index afed1aac2638..e3a3dc91e49c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -55,6 +55,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
+#include <net/udp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -487,6 +488,91 @@ drop:
}
EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
+static int ip_encap_hlen(struct ip_tunnel_encap *e)
+{
+ switch (e->type) {
+ case TUNNEL_ENCAP_NONE:
+ return 0;
+ case TUNNEL_ENCAP_FOU:
+ return sizeof(struct udphdr);
+ default:
+ return -EINVAL;
+ }
+}
+
+int ip_tunnel_encap_setup(struct ip_tunnel *t,
+ struct ip_tunnel_encap *ipencap)
+{
+ int hlen;
+
+ memset(&t->encap, 0, sizeof(t->encap));
+
+ hlen = ip_encap_hlen(ipencap);
+ if (hlen < 0)
+ return hlen;
+
+ t->encap.type = ipencap->type;
+ t->encap.sport = ipencap->sport;
+ t->encap.dport = ipencap->dport;
+ t->encap.flags = ipencap->flags;
+
+ t->encap_hlen = hlen;
+ t->hlen = t->encap_hlen + t->tun_hlen;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
+
+static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
+{
+ struct udphdr *uh;
+ __be16 sport;
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* Get length and hash before making space in skb */
+
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+
+ skb_push(skb, hdr_len);
+
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+
+ return 0;
+}
+
+int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ switch (t->encap.type) {
+ case TUNNEL_ENCAP_NONE:
+ return 0;
+ case TUNNEL_ENCAP_FOU:
+ return fou_build_header(skb, &t->encap, t->encap_hlen,
+ protocol, fl4);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(ip_tunnel_encap);
+
static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
struct rtable *rt, __be16 df)
{
@@ -536,7 +622,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
}
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
- const struct iphdr *tnl_params, const u8 protocol)
+ const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *inner_iph;
@@ -617,6 +703,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
+ if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
+ goto tx_error;
+
rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
if (!rt) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 62eaa005e146..bfec31df8b21 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -301,7 +301,8 @@ static int ipip_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
- tunnel->hlen = 0;
+ tunnel->tun_hlen = 0;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
tunnel->parms.iph.protocol = IPPROTO_IPIP;
return ip_tunnel_init(dev);
}
@@ -340,10 +341,53 @@ static void ipip_netlink_parms(struct nlattr *data[],
parms->iph.frag_off = htons(IP_DF);
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipip_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipip_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipip_netlink_parms(data, &p);
return ip_tunnel_newlink(dev, tb, &p);
@@ -353,6 +397,15 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
+
+ if (ipip_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ipip_netlink_parms(data, &p);
@@ -378,6 +431,14 @@ static size_t ipip_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_IPTUN_PMTUDISC */
nla_total_size(1) +
+ /* IFLA_IPTUN_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -394,6 +455,17 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
+ tunnel->encap.type) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ tunnel->encap.sport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
+ tunnel->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -407,6 +479,10 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_TTL] = { .type = NLA_U8 },
[IFLA_IPTUN_TOS] = { .type = NLA_U8 },
[IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 46d6a1c923a8..4b7c0ec65251 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -30,6 +30,7 @@
const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet_offloads);
int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index adab393b2fe5..d7c43f764c71 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -276,6 +276,7 @@ unflush:
skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+ NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
pp = uo_priv->offload->callbacks.gro_receive(head, skb);
out_unlock:
@@ -329,8 +330,10 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
break;
}
- if (uo_priv != NULL)
+ if (uo_priv != NULL) {
+ NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+ }
rcu_read_unlock();
return err;
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index e048cf1bb6a2..e3770abe688a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -51,6 +51,7 @@ EXPORT_SYMBOL(inet6_del_protocol);
#endif
const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet6_offloads);
int inet6_add_offload(const struct net_offload *prot, unsigned char protocol)
{
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 86e3fa81f85e..db75809ab843 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -822,6 +822,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
int addr_type;
u8 ttl;
int err;
+ u8 protocol = IPPROTO_IPV6;
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
@@ -911,8 +913,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
+ skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
+ if (IS_ERR(skb)) {
+ ip_rt_put(rt);
+ goto out;
+ }
+
if (df) {
- mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+ mtu = dst_mtu(&rt->dst) - t_hlen;
if (mtu < 68) {
dev->stats.collisions++;
@@ -947,7 +955,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
- max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
+ max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -969,14 +977,13 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
ttl = iph6->hop_limit;
tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT);
- if (IS_ERR(skb)) {
+ if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) {
ip_rt_put(rt);
- goto out;
+ goto tx_error;
}
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
- IPPROTO_IPV6, tos, ttl, df,
+ protocol, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
@@ -1059,8 +1066,10 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
if (tdev) {
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
- dev->mtu = tdev->mtu - sizeof(struct iphdr);
+ dev->mtu = tdev->mtu - t_hlen;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
@@ -1307,7 +1316,10 @@ done:
static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+ if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen)
return -EINVAL;
dev->mtu = new_mtu;
return 0;
@@ -1338,12 +1350,15 @@ static void ipip6_dev_free(struct net_device *dev)
static void ipip6_tunnel_setup(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
dev->netdev_ops = &ipip6_netdev_ops;
dev->destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
dev->flags = IFF_NOARP;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
dev->iflink = 0;
@@ -1466,6 +1481,40 @@ static void ipip6_netlink_parms(struct nlattr *data[],
}
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipip6_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
#ifdef CONFIG_IPV6_SIT_6RD
/* This function returns true when 6RD attributes are present in the nl msg */
static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
@@ -1509,12 +1558,20 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
{
struct net *net = dev_net(dev);
struct ip_tunnel *nt;
+ struct ip_tunnel_encap ipencap;
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
int err;
nt = netdev_priv(dev);
+
+ if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ err = ip_tunnel_encap_setup(nt, &ipencap);
+ if (err < 0)
+ return err;
+ }
+
ipip6_netlink_parms(data, &nt->parms);
if (ipip6_tunnel_locate(net, &nt->parms, 0))
@@ -1537,15 +1594,23 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_parm p;
+ struct ip_tunnel_encap ipencap;
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd ip6rd;
#endif
+ int err;
if (dev == sitn->fb_tunnel_dev)
return -EINVAL;
+ if (ipip6_netlink_encap_parms(data, &ipencap)) {
+ err = ip_tunnel_encap_setup(t, &ipencap);
+ if (err < 0)
+ return err;
+ }
+
ipip6_netlink_parms(data, &p);
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
@@ -1599,6 +1664,14 @@ static size_t ipip6_get_size(const struct net_device *dev)
/* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
nla_total_size(2) +
#endif
+ /* IFLA_IPTUN_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -1630,6 +1703,16 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
#endif
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
+ tunnel->encap.type) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ tunnel->encap.sport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
+ tunnel->encap.dport))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -1651,6 +1734,10 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 },
[IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
#endif
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
};
static void ipip6_dellink(struct net_device *dev, struct list_head *head)