diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/Kconfig | 2 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 271 | ||||
-rw-r--r-- | net/ipv6/addrconf_core.c | 3 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 100 | ||||
-rw-r--r-- | net/ipv6/anycast.c | 4 | ||||
-rw-r--r-- | net/ipv6/ila/ila.h | 5 | ||||
-rw-r--r-- | net/ipv6/ila/ila_xlat.c | 2 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 60 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 247 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 234 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 17 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 5 | ||||
-rw-r--r-- | net/ipv6/raw.c | 1 | ||||
-rw-r--r-- | net/ipv6/route.c | 115 | ||||
-rw-r--r-- | net/ipv6/sit.c | 363 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 3 | ||||
-rw-r--r-- | net/ipv6/tunnel6.c | 87 |
17 files changed, 979 insertions, 540 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2ccaee98fddb..5a6111da26c4 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -13,7 +13,7 @@ menuconfig IPV6 For general information about IPv6, see <https://en.wikipedia.org/wiki/IPv6>. For specific information about IPv6 under Linux, see - Documentation/networking/ipv6.txt and read the HOWTO at + Documentation/networking/ipv6.rst and read the HOWTO at <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/> To compile this protocol support as a module, choose M here: the diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 24e319dfb510..09cfbf5dd7ce 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -135,8 +135,7 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -static void ipv6_regen_rndid(struct inet6_dev *idev); -static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); +static void ipv6_gen_rnd_iid(struct in6_addr *addr); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(const struct inet6_dev *idev); @@ -432,8 +431,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { ndev->cnf.use_tempaddr = -1; - } else - ipv6_regen_rndid(ndev); + } ndev->token = in6addr_any; @@ -1238,7 +1236,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, ifp->idev->dev, 0, RTF_DEFAULT, true); if (f6i) { if (del_rt) - ip6_del_rt(dev_net(ifp->idev->dev), f6i); + ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); else { if (!(f6i->fib6_flags & RTF_EXPIRES)) fib6_set_expires(f6i, expires); @@ -1306,29 +1304,21 @@ out: in6_ifa_put(ifp); } -static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, - struct inet6_ifaddr *ift, - bool block) +static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) { struct inet6_dev *idev = ifp->idev; - struct in6_addr addr, *tmpaddr; unsigned long tmp_tstamp, age; unsigned long regen_advance; - struct ifa6_config cfg; - int ret = 0; unsigned long now = jiffies; - long max_desync_factor; s32 cnf_temp_preferred_lft; + struct inet6_ifaddr *ift; + struct ifa6_config cfg; + long max_desync_factor; + struct in6_addr addr; + int ret = 0; write_lock_bh(&idev->lock); - if (ift) { - spin_lock_bh(&ift->lock); - memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); - spin_unlock_bh(&ift->lock); - tmpaddr = &addr; - } else { - tmpaddr = NULL; - } + retry: in6_dev_hold(idev); if (idev->cnf.use_tempaddr <= 0) { @@ -1351,8 +1341,8 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - ipv6_try_regen_rndid(idev, tmpaddr); - memcpy(&addr.s6_addr[8], idev->rndid, 8); + ipv6_gen_rnd_iid(&addr); + age = (now - ifp->tstamp) / HZ; regen_advance = idev->cnf.regen_max_retry * @@ -1417,7 +1407,6 @@ retry: in6_ifa_put(ifp); in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); - tmpaddr = &addr; write_lock_bh(&idev->lock); goto retry; } @@ -2032,7 +2021,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (ifpub) { in6_ifa_hold(ifpub); spin_unlock_bh(&ifp->lock); - ipv6_create_tempaddr(ifpub, ifp, true); + ipv6_create_tempaddr(ifpub, true); in6_ifa_put(ifpub); } else { spin_unlock_bh(&ifp->lock); @@ -2329,40 +2318,38 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) return err; } -/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static void ipv6_regen_rndid(struct inet6_dev *idev) +/* Generation of a randomized Interface Identifier + * draft-ietf-6man-rfc4941bis, Section 3.3.1 + */ + +static void ipv6_gen_rnd_iid(struct in6_addr *addr) { regen: - get_random_bytes(idev->rndid, sizeof(idev->rndid)); - idev->rndid[0] &= ~0x02; + get_random_bytes(&addr->s6_addr[8], 8); - /* - * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>: - * check if generated address is not inappropriate + /* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1: + * check if generated address is not inappropriate: * - * - Reserved subnet anycast (RFC 2526) - * 11111101 11....11 1xxxxxxx - * - ISATAP (RFC4214) 6.1 - * 00-00-5E-FE-xx-xx-xx-xx - * - value 0 - * - XXX: already assigned to an address on the device + * - Reserved IPv6 Interface Identifers + * - XXX: already assigned to an address on the device */ - if (idev->rndid[0] == 0xfd && - (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff && - (idev->rndid[7]&0x80)) + + /* Subnet-router anycast: 0000:0000:0000:0000 */ + if (!(addr->s6_addr32[2] | addr->s6_addr32[3])) goto regen; - if ((idev->rndid[0]|idev->rndid[1]) == 0) { - if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe) - goto regen; - if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) - goto regen; - } -} -static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) -{ - if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - ipv6_regen_rndid(idev); + /* IANA Ethernet block: 0200:5EFF:FE00:0000-0200:5EFF:FE00:5212 + * Proxy Mobile IPv6: 0200:5EFF:FE00:5213 + * IANA Ethernet block: 0200:5EFF:FE00:5214-0200:5EFF:FEFF:FFFF + */ + if (ntohl(addr->s6_addr32[2]) == 0x02005eff && + (ntohl(addr->s6_addr32[3]) & 0Xff000000) == 0xfe000000) + goto regen; + + /* Reserved subnet anycast addresses */ + if (ntohl(addr->s6_addr32[2]) == 0xfdffffff && + ntohl(addr->s6_addr32[3]) >= 0Xffffff80) + goto regen; } /* @@ -2544,7 +2531,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, * no temporary address currently exists. */ read_unlock_bh(&idev->lock); - ipv6_create_tempaddr(ifp, NULL, false); + ipv6_create_tempaddr(ifp, false); } else { read_unlock_bh(&idev->lock); } @@ -2564,7 +2551,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, __u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1); - int create = 0, update_lft = 0; + int create = 0; if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -2608,32 +2595,19 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, unsigned long now; u32 stored_lft; - /* update lifetime (RFC2462 5.5.3 e) */ + /* Update lifetime (RFC4862 5.5.3 e) + * We deviate from RFC4862 by honoring all Valid Lifetimes to + * improve the reaction of SLAAC to renumbering events + * (draft-gont-6man-slaac-renum-06, Section 4.2) + */ spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { - const u32 minimum_lft = min_t(u32, - stored_lft, MIN_VALID_LIFETIME); - valid_lft = max(valid_lft, minimum_lft); - - /* RFC4862 Section 5.5.3e: - * "Note that the preferred lifetime of the - * corresponding address is always reset to - * the Preferred Lifetime in the received - * Prefix Information option, regardless of - * whether the valid lifetime is also reset or - * ignored." - * - * So we should always update prefered_lft here. - */ - update_lft = 1; - } - if (update_lft) { + if (!create && stored_lft) { ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = now; @@ -2731,7 +2705,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (rt) { /* Autoconf prefix route */ if (valid_lft == 0) { - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); rt = NULL; } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ @@ -2809,6 +2783,33 @@ put: in6_dev_put(in6_dev); } +static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev, + struct in6_ifreq *ireq) +{ + struct ip_tunnel_parm p = { }; + int err; + + if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4)) + return -EADDRNOTAVAIL; + + p.iph.daddr = ireq->ifr6_addr.s6_addr32[3]; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPV6; + p.iph.ttl = 64; + + if (!dev->netdev_ops->ndo_tunnel_ctl) + return -EOPNOTSUPP; + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, SIOCADDTUNNEL); + if (err) + return err; + + dev = __dev_get_by_name(net, p.name); + if (!dev) + return -ENOBUFS; + return dev_open(dev, NULL); +} + /* * Set destination address. * Special case for SIT interfaces where we create a new "virtual" @@ -2816,61 +2817,19 @@ put: */ int addrconf_set_dstaddr(struct net *net, void __user *arg) { - struct in6_ifreq ireq; struct net_device *dev; - int err = -EINVAL; - - rtnl_lock(); + struct in6_ifreq ireq; + int err = -ENODEV; - err = -EFAULT; + if (!IS_ENABLED(CONFIG_IPV6_SIT)) + return -ENODEV; if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) - goto err_exit; + return -EFAULT; + rtnl_lock(); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); - - err = -ENODEV; - if (!dev) - goto err_exit; - -#if IS_ENABLED(CONFIG_IPV6_SIT) - if (dev->type == ARPHRD_SIT) { - const struct net_device_ops *ops = dev->netdev_ops; - struct ifreq ifr; - struct ip_tunnel_parm p; - - err = -EADDRNOTAVAIL; - if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) - goto err_exit; - - memset(&p, 0, sizeof(p)); - p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; - p.iph.saddr = 0; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPV6; - p.iph.ttl = 64; - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); - - set_fs(KERNEL_DS); - err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); - } else - err = -EOPNOTSUPP; - - if (err == 0) { - err = -ENOBUFS; - dev = __dev_get_by_name(net, p.name); - if (!dev) - goto err_exit; - err = dev_open(dev, NULL); - } - } -#endif - -err_exit: + if (dev && dev->type == ARPHRD_SIT) + err = addrconf_set_sit_dstaddr(net, dev, &ireq); rtnl_unlock(); return err; } @@ -3826,7 +3785,7 @@ restart: spin_unlock_bh(&ifa->lock); if (rt) - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); @@ -4544,7 +4503,7 @@ restart: ifpub->regen_count = 0; spin_unlock(&ifpub->lock); rcu_read_unlock_bh(); - ipv6_create_tempaddr(ifpub, ifp, true); + ipv6_create_tempaddr(ifpub, true); in6_ifa_put(ifpub); in6_ifa_put(ifp); rcu_read_lock_bh(); @@ -4665,7 +4624,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF; if (f6i->fib6_metric != prio) { /* delete old one */ - ip6_del_rt(dev_net(ifp->idev->dev), f6i); + ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); /* add new one */ addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr, @@ -6086,10 +6045,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ifp->idev->dev, 0, 0, false); if (rt) - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); } if (ifp->rt) { - ip6_del_rt(net, ifp->rt); + ip6_del_rt(net, ifp->rt, false); ifp->rt = NULL; } rt_genid_bump_ipv6(net); @@ -6108,9 +6067,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL -static -int addrconf_sysctl_forward(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int addrconf_sysctl_forward(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; @@ -6134,9 +6092,8 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } -static -int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { struct inet6_dev *idev = ctl->extra1; int min_mtu = IPV6_MIN_MTU; @@ -6206,9 +6163,8 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) return 0; } -static -int addrconf_sysctl_disable(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int addrconf_sysctl_disable(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; @@ -6232,9 +6188,8 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write, return ret; } -static -int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int ret; @@ -6275,7 +6230,7 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, } static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; @@ -6337,7 +6292,7 @@ out: } static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int err; @@ -6404,8 +6359,7 @@ out: static int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, - int write, - void __user *buffer, + int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -6505,10 +6459,8 @@ int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) return 0; } -static -int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; @@ -7024,9 +6976,26 @@ static int __net_init addrconf_init_net(struct net *net) goto err_alloc_dflt; if (IS_ENABLED(CONFIG_SYSCTL) && - sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) { - memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); - memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt)); + !net_eq(net, &init_net)) { + switch (sysctl_devconf_inherit_init_net) { + case 1: /* copy from init_net */ + memcpy(all, init_net.ipv6.devconf_all, + sizeof(ipv6_devconf)); + memcpy(dflt, init_net.ipv6.devconf_dflt, + sizeof(ipv6_devconf_dflt)); + break; + case 3: /* copy from the current netns */ + memcpy(all, current->nsproxy->net_ns->ipv6.devconf_all, + sizeof(ipv6_devconf)); + memcpy(dflt, + current->nsproxy->net_ns->ipv6.devconf_dflt, + sizeof(ipv6_devconf_dflt)); + break; + case 0: + case 2: + /* use compiled values */ + break; + } } /* these will be inherited by all namespaces */ diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index ea00ce3d4117..9ebf3fe0d2b1 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -185,7 +185,8 @@ static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, return -EAFNOSUPPORT; } -static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt) +static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt, + bool skip_notify) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 345baa0a754f..0625a97a8894 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -60,6 +60,7 @@ #include <net/calipso.h> #include <net/seg6.h> #include <net/rpl.h> +#include <net/compat.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -273,7 +274,7 @@ out_rcu_unlock: } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock) + u32 flags) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct inet_sock *inet = inet_sk(sk); @@ -297,7 +298,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; - if (with_lock) + if (flags & BIND_WITH_LOCK) lock_sock(sk); /* Check these errors (active socket, double bind). */ @@ -400,18 +401,20 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, /* Make sure we are allowed to bind here. */ if (snum || !(inet->bind_address_no_port || - force_bind_address_no_port)) { + (flags & BIND_FORCE_ADDRESS_NO_PORT))) { if (sk->sk_prot->get_port(sk, snum)) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); err = -EADDRINUSE; goto out; } - err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk); - if (err) { - sk->sk_ipv6only = saved_ipv6only; - inet_reset_saddr(sk); - goto out; + if (!(flags & BIND_FROM_BPF)) { + err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk); + if (err) { + sk->sk_ipv6only = saved_ipv6only; + inet_reset_saddr(sk); + goto out; + } } } @@ -423,7 +426,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, inet->inet_dport = 0; inet->inet_daddr = 0; out: - if (with_lock) + if (flags & BIND_WITH_LOCK) release_sock(sk); return err; out_unlock: @@ -451,7 +454,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (err) return err; - return __inet6_bind(sk, uaddr, addr_len, false, true); + return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK); } EXPORT_SYMBOL(inet6_bind); @@ -502,9 +505,8 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock); /* * This does both peername and sockname. */ - int inet6_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) + int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; @@ -529,9 +531,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_addr = np->saddr; else sin->sin6_addr = sk->sk_v6_rcv_saddr; - sin->sin6_port = inet->inet_sport; } + if (cgroup_bpf_enabled) + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin, + peer ? BPF_CGROUP_INET6_GETPEERNAME : + BPF_CGROUP_INET6_GETSOCKNAME, + NULL); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); return sizeof(*sin); @@ -540,21 +546,25 @@ EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); switch (cmd) { case SIOCADDRT: - case SIOCDELRT: - - return ipv6_route_ioctl(net, cmd, (void __user *)arg); + case SIOCDELRT: { + struct in6_rtmsg rtmsg; + if (copy_from_user(&rtmsg, argp, sizeof(rtmsg))) + return -EFAULT; + return ipv6_route_ioctl(net, cmd, &rtmsg); + } case SIOCSIFADDR: - return addrconf_add_ifaddr(net, (void __user *) arg); + return addrconf_add_ifaddr(net, argp); case SIOCDIFADDR: - return addrconf_del_ifaddr(net, (void __user *) arg); + return addrconf_del_ifaddr(net, argp); case SIOCSIFDSTADDR: - return addrconf_set_dstaddr(net, (void __user *) arg); + return addrconf_set_dstaddr(net, argp); default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; @@ -565,6 +575,56 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(inet6_ioctl); +#ifdef CONFIG_COMPAT +struct compat_in6_rtmsg { + struct in6_addr rtmsg_dst; + struct in6_addr rtmsg_src; + struct in6_addr rtmsg_gateway; + u32 rtmsg_type; + u16 rtmsg_dst_len; + u16 rtmsg_src_len; + u32 rtmsg_metric; + u32 rtmsg_info; + u32 rtmsg_flags; + s32 rtmsg_ifindex; +}; + +static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_in6_rtmsg __user *ur) +{ + struct in6_rtmsg rt; + + if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst, + 3 * sizeof(struct in6_addr)) || + get_user(rt.rtmsg_type, &ur->rtmsg_type) || + get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) || + get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) || + get_user(rt.rtmsg_metric, &ur->rtmsg_metric) || + get_user(rt.rtmsg_info, &ur->rtmsg_info) || + get_user(rt.rtmsg_flags, &ur->rtmsg_flags) || + get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex)) + return -EFAULT; + + + return ipv6_route_ioctl(sock_net(sk), cmd, &rt); +} + +int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return inet6_compat_routing_ioctl(sk, cmd, argp); + default: + return -ENOIOCTLCMD; + } +} +EXPORT_SYMBOL_GPL(inet6_compat_ioctl); +#endif /* CONFIG_COMPAT */ + INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -626,6 +686,7 @@ const struct proto_ops inet6_stream_ops = { .read_sock = tcp_read_sock, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif @@ -654,6 +715,7 @@ const struct proto_ops inet6_dgram_ops = { .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index fed91ab7ec46..893261230ffc 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -364,7 +364,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); - ip6_del_rt(dev_net(idev->dev), aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); aca_put(aca); return 0; @@ -393,7 +393,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) addrconf_leave_solict(idev, &aca->aca_addr); - ip6_del_rt(dev_net(idev->dev), aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); aca_put(aca); diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index bb6fc0d54dae..ad5f6f6ba333 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -68,11 +68,6 @@ static inline struct ila_addr *ila_a2i(struct in6_addr *addr) return (struct ila_addr *)addr; } -static inline bool ila_addr_is_ila(struct ila_addr *iaddr) -{ - return (iaddr->ident.type != ILA_ATYPE_IID); -} - struct ila_params { struct ila_locator locator; struct ila_locator locator_match; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 5fc1f4e0c0cf..a1ac0e3d8c60 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -601,8 +601,6 @@ out_ret: return ret; } -#define ILA_HASH_TABLE_SIZE 1024 - int ila_xlat_init_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 20314895509c..49ee89bbcba0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2467,7 +2467,7 @@ void fib6_gc_cleanup(void) } #ifdef CONFIG_PROC_FS -static int ipv6_route_seq_show(struct seq_file *seq, void *v) +static int ipv6_route_native_seq_show(struct seq_file *seq, void *v) { struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; @@ -2625,7 +2625,7 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) return w->node && !(w->state == FWS_U && w->node == w->root); } -static void ipv6_route_seq_stop(struct seq_file *seq, void *v) +static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v) __releases(RCU_BH) { struct net *net = seq_file_net(seq); @@ -2637,6 +2637,62 @@ static void ipv6_route_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock_bh(); } +#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) +static int ipv6_route_prog_seq_show(struct bpf_prog *prog, + struct bpf_iter_meta *meta, + void *v) +{ + struct bpf_iter__ipv6_route ctx; + + ctx.meta = meta; + ctx.rt = v; + return bpf_iter_run_prog(prog, &ctx); +} + +static int ipv6_route_seq_show(struct seq_file *seq, void *v) +{ + struct ipv6_route_iter *iter = seq->private; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, false); + if (!prog) + return ipv6_route_native_seq_show(seq, v); + + ret = ipv6_route_prog_seq_show(prog, &meta, v); + iter->w.leaf = NULL; + + return ret; +} + +static void ipv6_route_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_meta meta; + struct bpf_prog *prog; + + if (!v) { + meta.seq = seq; + prog = bpf_iter_get_info(&meta, true); + if (prog) + (void)ipv6_route_prog_seq_show(prog, &meta, v); + } + + ipv6_route_native_seq_stop(seq, v); +} +#else +static int ipv6_route_seq_show(struct seq_file *seq, void *v) +{ + return ipv6_route_native_seq_show(seq, v); +} + +static void ipv6_route_seq_stop(struct seq_file *seq, void *v) +{ + ipv6_route_native_seq_stop(seq, v); +} +#endif + const struct seq_operations ipv6_route_seq_ops = { .start = ipv6_route_seq_start, .next = ipv6_route_seq_next, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4703b09808d0..821d96c720b9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -89,6 +89,11 @@ struct ip6_tnl_net { struct ip6_tnl __rcu *collect_md_tun; }; +static inline int ip6_tnl_mpls_supported(void) +{ + return IS_ENABLED(CONFIG_MPLS); +} + static struct net_device_stats *ip6_get_stats(struct net_device *dev) { struct pcpu_sw_netstats tmp, sum = { 0 }; @@ -718,6 +723,20 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } +static int +mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + __u32 rel_info = ntohl(info); + int err, rel_msg = 0; + u8 rel_type = type; + u8 rel_code = code; + + err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code, + &rel_msg, &rel_info, offset); + return err; +} + static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) @@ -740,6 +759,14 @@ static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, return IP6_ECN_decapsulate(ipv6h, skb); } +static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb) +{ + /* ECN is not supported in AF_MPLS */ + return 0; +} + __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) @@ -901,6 +928,11 @@ static const struct tnl_ptk_info tpi_v4 = { .proto = htons(ETH_P_IP), }; +static const struct tnl_ptk_info tpi_mpls = { + /* no tunnel info required for mplsip6. */ + .proto = htons(ETH_P_MPLS_UC), +}; + static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, const struct tnl_ptk_info *tpi, int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, @@ -958,6 +990,12 @@ static int ip6ip6_rcv(struct sk_buff *skb) ip6ip6_dscp_ecn_decapsulate); } +static int mplsip6_rcv(struct sk_buff *skb) +{ + return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls, + mplsip6_dscp_ecn_decapsulate); +} + struct ipv6_tel_txoption { struct ipv6_txoptions ops; __u8 dst_opt[8]; @@ -1232,6 +1270,8 @@ route_lookup: ipv6_push_frag_opts(skb, &opt.ops, &proto); } + skb_set_inner_ipproto(skb, proto); + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1253,22 +1293,22 @@ tx_err_dst_release: EXPORT_SYMBOL(ip6_tnl_xmit); static inline int -ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, + u8 protocol) { struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h; const struct iphdr *iph; int encap_limit = -1; + __u16 offset; struct flowi6 fl6; - __u8 dsfield; + __u8 dsfield, orig_dsfield; __u32 mtu; u8 tproto; int err; - iph = ip_hdr(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - tproto = READ_ONCE(t->parms.proto); - if (tproto != IPPROTO_IPIP && tproto != 0) + if (tproto != protocol && tproto != 0) return -1; if (t->parms.collect_md) { @@ -1281,129 +1321,100 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) return -1; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; + fl6.flowi6_proto = protocol; fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; + switch (protocol) { + case IPPROTO_IPIP: + iph = ip_hdr(skb); + orig_dsfield = ipv4_get_dsfield(iph); + break; + case IPPROTO_IPV6: + ipv6h = ipv6_hdr(skb); + orig_dsfield = ipv6_get_dsfield(ipv6h); + break; + default: + orig_dsfield = dsfield; + break; + } } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; + if (protocol == IPPROTO_IPV6) { + offset = ip6_tnl_parse_tlv_enc_lim(skb, + skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have + * reallocated skb->head + */ + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv4_get_dsfield(iph); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; - else - fl6.flowi6_mark = t->parms.fwmark; - } - - fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); - dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); - - if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) - return -1; - - skb_set_inner_ipproto(skb, IPPROTO_IPIP); - - err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, - IPPROTO_IPIP); - if (err != 0) { - /* XXX: send ICMP error even if DF is not set. */ - if (err == -EMSGSIZE) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); - return -1; - } - - return 0; -} - -static inline int -ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h; - int encap_limit = -1; - __u16 offset; - struct flowi6 fl6; - __u8 dsfield; - __u32 mtu; - u8 tproto; - int err; - - ipv6h = ipv6_hdr(skb); - tproto = READ_ONCE(t->parms.proto); - if ((tproto != IPPROTO_IPV6 && tproto != 0) || - ip6_tnl_addr_conflict(t, ipv6h)) - return -1; - - if (t->parms.collect_md) { - struct ip_tunnel_info *tun_info; - const struct ip_tunnel_key *key; - - tun_info = skb_tunnel_info(skb); - if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || - ip_tunnel_info_af(tun_info) != AF_INET6)) - return -1; - key = &tun_info->key; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; - fl6.saddr = key->u.ipv6.src; - fl6.daddr = key->u.ipv6.dst; - fl6.flowlabel = key->label; - dsfield = key->tos; - } else { - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ - ipv6h = ipv6_hdr(skb); - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - - tel = (void *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); - return -1; + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { - encap_limit = t->parms.encap_limit; } memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; + fl6.flowi6_proto = protocol; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv6_get_dsfield(ipv6h); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; else fl6.flowi6_mark = t->parms.fwmark; + switch (protocol) { + case IPPROTO_IPIP: + iph = ip_hdr(skb); + orig_dsfield = ipv4_get_dsfield(iph); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = orig_dsfield; + else + dsfield = ip6_tclass(t->parms.flowinfo); + break; + case IPPROTO_IPV6: + ipv6h = ipv6_hdr(skb); + orig_dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = orig_dsfield; + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + break; + default: + orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo); + break; + } } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); - dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, - IPPROTO_IPV6); + protocol); if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + switch (protocol) { + case IPPROTO_IPIP: + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + break; + case IPPROTO_IPV6: + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + break; + default: + break; + } return -1; } @@ -1415,6 +1426,7 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; + u8 ipproto; int ret; if (!pskb_inet_may_pull(skb)) @@ -1422,15 +1434,21 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): - ret = ip4ip6_tnl_xmit(skb, dev); + ipproto = IPPROTO_IPIP; break; case htons(ETH_P_IPV6): - ret = ip6ip6_tnl_xmit(skb, dev); + if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb))) + goto tx_err; + ipproto = IPPROTO_IPV6; + break; + case htons(ETH_P_MPLS_UC): + ipproto = IPPROTO_MPLS; break; default: goto tx_err; } + ret = ipxip6_tnl_xmit(skb, dev, ipproto); if (ret < 0) goto tx_err; @@ -2218,6 +2236,12 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .priority = 1, }; +static struct xfrm6_tunnel mplsip6_handler __read_mostly = { + .handler = mplsip6_rcv, + .err_handler = mplsip6_err, + .priority = 1, +}; + static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -2332,6 +2356,15 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + + if (ip6_tnl_mpls_supported()) { + err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS); + if (err < 0) { + pr_err("%s: can't register mplsip6\n", __func__); + goto out_mplsip6; + } + } + err = rtnl_link_register(&ip6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -2339,6 +2372,9 @@ static int __init ip6_tunnel_init(void) return 0; rtnl_link_failed: + if (ip6_tnl_mpls_supported()) + xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS); +out_mplsip6: xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); @@ -2361,6 +2397,9 @@ static void __exit ip6_tunnel_cleanup(void) if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) pr_info("%s: can't deregister ip6ip6\n", __func__); + if (ip6_tnl_mpls_supported() && + xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS)) + pr_info("%s: can't deregister mplsip6\n", __func__); unregister_pernet_device(&ip6_tnl_net_ops); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 18d05403d3b5..e10258c2210e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -136,6 +136,41 @@ static bool setsockopt_needs_rtnl(int optname) return false; } +static int do_ipv6_mcast_group_source(struct sock *sk, int optname, + struct group_source_req *greqs) +{ + int omode, add; + + if (greqs->gsr_group.ss_family != AF_INET6 || + greqs->gsr_source.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + if (optname == MCAST_BLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 1; + } else if (optname == MCAST_UNBLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 0; + } else if (optname == MCAST_JOIN_SOURCE_GROUP) { + struct sockaddr_in6 *psin6; + int retv; + + psin6 = (struct sockaddr_in6 *)&greqs->gsr_group; + retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface, + &psin6->sin6_addr, + MCAST_INCLUDE); + /* prior join w/ different source is ok */ + if (retv && retv != -EADDRINUSE) + return retv; + omode = MCAST_INCLUDE; + add = 1; + } else /* MCAST_LEAVE_SOURCE_GROUP */ { + omode = MCAST_INCLUDE; + add = 0; + } + return ip6_mc_source(add, omode, sk, greqs); +} + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -715,7 +750,6 @@ done: case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; - int omode, add; if (optlen < sizeof(struct group_source_req)) goto e_inval; @@ -723,34 +757,7 @@ done: retv = -EFAULT; break; } - if (greqs.gsr_group.ss_family != AF_INET6 || - greqs.gsr_source.ss_family != AF_INET6) { - retv = -EADDRNOTAVAIL; - break; - } - if (optname == MCAST_BLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 1; - } else if (optname == MCAST_UNBLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 0; - } else if (optname == MCAST_JOIN_SOURCE_GROUP) { - struct sockaddr_in6 *psin6; - - psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; - retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface, - &psin6->sin6_addr, - MCAST_INCLUDE); - /* prior join w/ different source is ok */ - if (retv && retv != -EADDRINUSE) - break; - omode = MCAST_INCLUDE; - add = 1; - } else /* MCAST_LEAVE_SOURCE_GROUP */ { - omode = MCAST_INCLUDE; - add = 0; - } - retv = ip6_mc_source(add, omode, sk, &greqs); + retv = do_ipv6_mcast_group_source(sk, optname, &greqs); break; } case MCAST_MSFILTER: @@ -780,7 +787,7 @@ done: retv = -EINVAL; break; } - retv = ip6_mc_msfilter(sk, gsf); + retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); kfree(gsf); break; @@ -973,9 +980,110 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER) - return compat_mc_setsockopt(sk, level, optname, optval, optlen, - ipv6_setsockopt); + switch (optname) { + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + { + struct compat_group_req __user *gr32 = (void __user *)optval; + struct group_req greq; + struct sockaddr_in6 *psin6 = (struct sockaddr_in6 *)&greq.gr_group; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + + if (get_user(greq.gr_interface, &gr32->gr_interface) || + copy_from_user(&greq.gr_group, &gr32->gr_group, + sizeof(greq.gr_group))) + return -EFAULT; + + if (greq.gr_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + rtnl_lock(); + lock_sock(sk); + if (optname == MCAST_JOIN_GROUP) + err = ipv6_sock_mc_join(sk, greq.gr_interface, + &psin6->sin6_addr); + else + err = ipv6_sock_mc_drop(sk, greq.gr_interface, + &psin6->sin6_addr); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + { + struct compat_group_source_req __user *gsr32 = (void __user *)optval; + struct group_source_req greqs; + + if (optlen < sizeof(struct compat_group_source_req)) + return -EINVAL; + + if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || + copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, + sizeof(greqs.gsr_group)) || + copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, + sizeof(greqs.gsr_source))) + return -EFAULT; + + rtnl_lock(); + lock_sock(sk); + err = do_ipv6_mcast_group_source(sk, optname, &greqs); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_MSFILTER: + { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + void *p; + int n; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + if (copy_from_user(gf32, optval, optlen)) { + err = -EFAULT; + goto mc_msf_out; + } + + n = gf32->gf_numsrc; + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n >= 0x1ffffffU || + n > sysctl_mld_max_msf) { + err = -ENOBUFS; + goto mc_msf_out; + } + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { + err = -EINVAL; + goto mc_msf_out; + } + + rtnl_lock(); + lock_sock(sk); + err = ip6_mc_msfilter(sk, &(struct group_filter){ + .gf_interface = gf32->gf_interface, + .gf_group = gf32->gf_group, + .gf_fmode = gf32->gf_fmode, + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + release_sock(sk); + rtnl_unlock(); +mc_msf_out: + kfree(p); + return err; + } + } err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER @@ -1048,18 +1156,28 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case MCAST_MSFILTER: { + struct group_filter __user *p = (void __user *)optval; struct group_filter gsf; + const int size0 = offsetof(struct group_filter, gf_slist); + int num; int err; - if (len < GROUP_FILTER_SIZE(0)) + if (len < size0) return -EINVAL; - if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) + if (copy_from_user(&gsf, p, size0)) return -EFAULT; if (gsf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; + num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, - (struct group_filter __user *)optval, optlen); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + if (!err) { + if (num > gsf.gf_numsrc) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + err = -EFAULT; + } release_sock(sk); return err; } @@ -1075,6 +1193,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; msg.msg_flags = flags; + msg.msg_control_is_user = true; lock_sock(sk); skb = np->pktoptions; @@ -1427,9 +1546,44 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname == MCAST_MSFILTER) - return compat_mc_getsockopt(sk, level, optname, optval, optlen, - ipv6_getsockopt); + if (optname == MCAST_MSFILTER) { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = (void __user *)optval; + struct compat_group_filter gf32; + struct group_filter gf; + int ulen, err; + int num; + + if (get_user(ulen, optlen)) + return -EFAULT; + + if (ulen < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + if (gf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + lock_sock(sk); + err = ip6_mc_msfget(sk, &gf, p->gf_slist); + release_sock(sk); + if (err) + return err; + if (num > gf.gf_numsrc) + num = gf.gf_numsrc; + ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); + if (put_user(ulen, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; + } err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index eaa4c2cc2fbb..7e12d2114158 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -457,7 +457,8 @@ done: return err; } -int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) +int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, + struct sockaddr_storage *list) { const struct in6_addr *group; struct ipv6_mc_socklist *pmc; @@ -509,10 +510,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) goto done; } newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc; - for (i = 0; i < newpsl->sl_count; ++i) { + for (i = 0; i < newpsl->sl_count; ++i, ++list) { struct sockaddr_in6 *psin6; - psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i]; + psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } err = ip6_mc_add_src(idev, group, gsf->gf_fmode, @@ -547,7 +548,7 @@ done: } int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen) + struct sockaddr_storage *p) { int err, i, count, copycount; const struct in6_addr *group; @@ -592,14 +593,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - if (put_user(GROUP_FILTER_SIZE(copycount), optlen) || - copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { - return -EFAULT; - } /* changes to psl require the socket lock, and a write lock * on pmc->sflock. We have the socket lock so reading here is safe. */ - for (i = 0; i < copycount; i++) { + for (i = 0; i < copycount; i++, p++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -607,7 +604,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, memset(&ss, 0, sizeof(ss)); psin6->sin6_family = AF_INET6; psin6->sin6_addr = psl->sl_addr[i]; - if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss))) + if (copy_to_user(p, &ss, sizeof(ss))) return -EFAULT; } return 0; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1ecd4e9b0bdf..27f29b957ee7 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1302,7 +1302,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } } if (rt && lifetime == 0) { - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); rt = NULL; } @@ -1835,7 +1835,8 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl, } } -int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) +int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0028aa1d7869..8ef5a7b30524 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1377,6 +1377,7 @@ const struct proto_ops inet6_sockraw_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ff847a324220..82cbb46a2a4f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -984,7 +984,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, gwaddr, dev); if (rt && !lifetime) { - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); rt = NULL; } @@ -1377,7 +1377,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) rcu_read_lock(); dev = ip6_rt_get_dev_rcu(res); - pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); + pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT); rcu_read_unlock(); if (!pcpu_rt) { fib6_info_release(f6i); @@ -3197,6 +3197,9 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries; entries = dst_entries_get_fast(ops); + if (entries > rt_max_size) + entries = dst_entries_get_slow(ops); + if (time_after(rt_last_gc + rt_min_interval, jiffies) && entries <= rt_max_size) goto out; @@ -3418,6 +3421,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, #ifdef CONFIG_IPV6_ROUTER_PREF fib6_nh->last_probe = jiffies; #endif + if (cfg->fc_is_fdb) { + fib6_nh->fib_nh_gw6 = cfg->fc_gateway; + fib6_nh->fib_nh_gw_family = AF_INET6; + return 0; + } err = -ENODEV; if (cfg->fc_ifindex) { @@ -3756,9 +3764,12 @@ out: return err; } -int ip6_del_rt(struct net *net, struct fib6_info *rt) +int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify) { - struct nl_info info = { .nl_net = net }; + struct nl_info info = { + .nl_net = net, + .skip_notify = skip_notify + }; return __ip6_del_rt(rt, &info); } @@ -4279,7 +4290,7 @@ restart: (!idev || idev->cnf.accept_ra != 2) && fib6_info_hold_safe(rt)) { rcu_read_unlock(); - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); goto restart; } } @@ -4330,41 +4341,29 @@ static void rtmsg_to_fib6_config(struct net *net, }; } -int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) { struct fib6_config cfg; - struct in6_rtmsg rtmsg; int err; - switch (cmd) { - case SIOCADDRT: /* Add a route */ - case SIOCDELRT: /* Delete a route */ - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - err = copy_from_user(&rtmsg, arg, - sizeof(struct in6_rtmsg)); - if (err) - return -EFAULT; - - rtmsg_to_fib6_config(net, &rtmsg, &cfg); + if (cmd != SIOCADDRT && cmd != SIOCDELRT) + return -EINVAL; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; - rtnl_lock(); - switch (cmd) { - case SIOCADDRT: - err = ip6_route_add(&cfg, GFP_KERNEL, NULL); - break; - case SIOCDELRT: - err = ip6_route_del(&cfg, NULL); - break; - default: - err = -EINVAL; - } - rtnl_unlock(); + rtmsg_to_fib6_config(net, rtmsg, &cfg); - return err; + rtnl_lock(); + switch (cmd) { + case SIOCADDRT: + err = ip6_route_add(&cfg, GFP_KERNEL, NULL); + break; + case SIOCDELRT: + err = ip6_route_del(&cfg, NULL); + break; } - - return -EINVAL; + rtnl_unlock(); + return err; } /* @@ -5581,7 +5580,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nexthop_is_blackhole(rt->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) + if (net->ipv4.sysctl_nexthop_compat_mode && + rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) goto nla_put_failure; rtm->rtm_flags |= nh_flags; @@ -6115,9 +6115,8 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_SYSCTL -static -int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int delay; @@ -6417,6 +6416,35 @@ void __init ip6_route_init_special_entries(void) #endif } +#if IS_BUILTIN(CONFIG_IPV6) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) + +static const struct bpf_iter_reg ipv6_route_reg_info = { + .target = "ipv6_route", + .seq_ops = &ipv6_route_seq_ops, + .init_seq_private = bpf_iter_init_seq_net, + .fini_seq_private = bpf_iter_fini_seq_net, + .seq_priv_size = sizeof(struct ipv6_route_iter), + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__ipv6_route, rt), + PTR_TO_BTF_ID_OR_NULL }, + }, +}; + +static int __init bpf_iter_register(void) +{ + return bpf_iter_reg_target(&ipv6_route_reg_info); +} + +static void bpf_iter_unregister(void) +{ + bpf_iter_unreg_target(&ipv6_route_reg_info); +} +#endif +#endif + int __init ip6_route_init(void) { int ret; @@ -6479,6 +6507,14 @@ int __init ip6_route_init(void) if (ret) goto out_register_late_subsys; +#if IS_BUILTIN(CONFIG_IPV6) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) + ret = bpf_iter_register(); + if (ret) + goto out_register_late_subsys; +#endif +#endif + for_each_possible_cpu(cpu) { struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); @@ -6511,6 +6547,11 @@ out_kmem_cache: void ip6_route_cleanup(void) { +#if IS_BUILTIN(CONFIG_IPV6) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) + bpf_iter_unregister(); +#endif +#endif unregister_netdevice_notifier(&ip6_route_dev_notifier); unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_cleanup(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 98954830c40b..1fbb4dfbb191 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -83,6 +83,13 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; +static inline struct sit_net *dev_to_sit_net(struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + + return net_generic(t->net, sit_net_id); +} + /* * Must be invoked with rcu_read_lock */ @@ -291,14 +298,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct ip_tunnel *t, - struct ip_tunnel_prl __user *a) +static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) { + struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data; + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + return -EINVAL; + if (copy_from_user(&kprl, a, sizeof(kprl))) return -EFAULT; cmax = kprl.datalen / sizeof(kprl); @@ -441,6 +452,35 @@ out: return err; } +static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_prl prl; + int err; + + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + return -EINVAL; + + if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) + return -EFAULT; + + switch (cmd) { + case SIOCDELPRL: + err = ipip6_tunnel_del_prl(t, &prl); + break; + case SIOCADDPRL: + case SIOCCHGPRL: + err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); + break; + } + dst_cache_reset(&t->dst_cache); + netdev_state_change(dev); + return err; +} + static int isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) { @@ -1151,7 +1191,53 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, netdev_state_change(t->dev); return 0; } -#endif + +static int +ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_6rd ip6rd; + struct ip_tunnel_parm p; + + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + t = ipip6_tunnel_locate(t->net, &p, 0); + } + if (!t) + t = netdev_priv(dev); + + ip6rd.prefix = t->ip6rd.prefix; + ip6rd.relay_prefix = t->ip6rd.relay_prefix; + ip6rd.prefixlen = t->ip6rd.prefixlen; + ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; + if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd))) + return -EFAULT; + return 0; +} + +static int +ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_6rd ip6rd; + int err; + + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd))) + return -EFAULT; + + if (cmd != SIOCDEL6RD) { + err = ipip6_tunnel_update_6rd(t, &ip6rd); + if (err < 0) + return err; + } else + ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev)); + return 0; +} + +#endif /* CONFIG_IPV6_SIT_6RD */ static bool ipip6_valid_ip_proto(u8 ipproto) { @@ -1164,185 +1250,145 @@ static bool ipip6_valid_ip_proto(u8 ipproto) } static int -ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p) +{ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!ipip6_valid_ip_proto(p->iph.protocol)) + return -EINVAL; + if (p->iph.version != 4 || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) + return -EINVAL; + + if (p->iph.ttl) + p->iph.frag_off |= htons(IP_DF); + return 0; +} + +static int +ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p) { - int err = 0; - struct ip_tunnel_parm p; - struct ip_tunnel_prl prl; struct ip_tunnel *t = netdev_priv(dev); - struct net *net = t->net; - struct sit_net *sitn = net_generic(net, sit_net_id); -#ifdef CONFIG_IPV6_SIT_6RD - struct ip_tunnel_6rd ip6rd; -#endif - switch (cmd) { - case SIOCGETTUNNEL: -#ifdef CONFIG_IPV6_SIT_6RD - case SIOCGET6RD: -#endif - if (dev == sitn->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipip6_tunnel_locate(net, &p, 0); - if (!t) - t = netdev_priv(dev); - } + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + t = ipip6_tunnel_locate(t->net, p, 0); + if (!t) + t = netdev_priv(dev); + memcpy(p, &t->parms, sizeof(*p)); + return 0; +} - err = -EFAULT; - if (cmd == SIOCGETTUNNEL) { - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, - sizeof(p))) - goto done; -#ifdef CONFIG_IPV6_SIT_6RD +static int +ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = __ipip6_tunnel_ioctl_validate(t->net, p); + if (err) + return err; + + t = ipip6_tunnel_locate(t->net, p, 1); + if (!t) + return -ENOBUFS; + return 0; +} + +static int +ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = __ipip6_tunnel_ioctl_validate(t->net, p); + if (err) + return err; + + t = ipip6_tunnel_locate(t->net, p, 0); + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + if (!t) + return -ENOENT; + } else { + if (t) { + if (t->dev != dev) + return -EEXIST; } else { - ip6rd.prefix = t->ip6rd.prefix; - ip6rd.relay_prefix = t->ip6rd.relay_prefix; - ip6rd.prefixlen = t->ip6rd.prefixlen; - ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, - sizeof(ip6rd))) - goto done; -#endif + if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) || + (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr)) + return -EINVAL; + t = netdev_priv(dev); } - err = 0; - break; - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; + ipip6_tunnel_update(t, p, t->fwmark); + } - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (!ipip6_valid_ip_proto(p.iph.protocol)) - goto done; - if (p.iph.version != 4 || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || - (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { - err = -EINVAL; - break; - } - t = netdev_priv(dev); - } + return 0; +} - ipip6_tunnel_update(t, &p, t->fwmark); - } +static int +ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); - if (t) { - err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + t = ipip6_tunnel_locate(t->net, p, 0); + if (!t) + return -ENOENT; + if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev)) + return -EPERM; + dev = t->dev; + } + unregister_netdevice(dev); + return 0; +} +static int +ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ + switch (cmd) { + case SIOCGETTUNNEL: + return ipip6_tunnel_get(dev, p); + case SIOCADDTUNNEL: + return ipip6_tunnel_add(dev, p); + case SIOCCHGTUNNEL: + return ipip6_tunnel_change(dev, p); case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == sitn->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - t = ipip6_tunnel_locate(net, &p, 0); - if (!t) - goto done; - err = -EPERM; - if (t == netdev_priv(sitn->fb_tunnel_dev)) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; + return ipip6_tunnel_del(dev, p); + default: + return -EINVAL; + } +} +static int +ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCGETTUNNEL: + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + case SIOCDELTUNNEL: + return ip_tunnel_ioctl(dev, ifr, cmd); case SIOCGETPRL: - err = -EINVAL; - if (dev == sitn->fb_tunnel_dev) - goto done; - err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); - break; - + return ipip6_tunnel_get_prl(dev, ifr); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - err = -EINVAL; - if (dev == sitn->fb_tunnel_dev) - goto done; - err = -EFAULT; - if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) - goto done; - - switch (cmd) { - case SIOCDELPRL: - err = ipip6_tunnel_del_prl(t, &prl); - break; - case SIOCADDPRL: - case SIOCCHGPRL: - err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); - break; - } - dst_cache_reset(&t->dst_cache); - netdev_state_change(dev); - break; - + return ipip6_tunnel_prl_ctl(dev, ifr, cmd); #ifdef CONFIG_IPV6_SIT_6RD + case SIOCGET6RD: + return ipip6_tunnel_get6rd(dev, ifr); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, - sizeof(ip6rd))) - goto done; - - if (cmd != SIOCDEL6RD) { - err = ipip6_tunnel_update_6rd(t, &ip6rd); - if (err < 0) - goto done; - } else - ipip6_tunnel_clone_6rd(dev, sitn); - - err = 0; - break; + return ipip6_tunnel_6rdctl(dev, ifr, cmd); #endif - default: - err = -EINVAL; + return -EINVAL; } - -done: - return err; } static const struct net_device_ops ipip6_netdev_ops = { @@ -1352,6 +1398,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; static void ipip6_dev_free(struct net_device *dev) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 63b657aa8d29..fac2135aa47b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -26,8 +26,7 @@ static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int ret; diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 21e7b95ddbfa..06c02ebe6b9b 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -21,8 +21,14 @@ static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly; static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly; +static struct xfrm6_tunnel __rcu *tunnelmpls6_handlers __read_mostly; static DEFINE_MUTEX(tunnel6_mutex); +static inline int xfrm6_tunnel_mpls_supported(void) +{ + return IS_ENABLED(CONFIG_MPLS); +} + int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) { struct xfrm6_tunnel __rcu **pprev; @@ -32,8 +38,21 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) mutex_lock(&tunnel6_mutex); - for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; - (t = rcu_dereference_protected(*pprev, + switch (family) { + case AF_INET6: + pprev = &tunnel6_handlers; + break; + case AF_INET: + pprev = &tunnel46_handlers; + break; + case AF_MPLS: + pprev = &tunnelmpls6_handlers; + break; + default: + goto err; + } + + for (; (t = rcu_dereference_protected(*pprev, lockdep_is_held(&tunnel6_mutex))) != NULL; pprev = &t->next) { if (t->priority > priority) @@ -62,8 +81,21 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) mutex_lock(&tunnel6_mutex); - for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; - (t = rcu_dereference_protected(*pprev, + switch (family) { + case AF_INET6: + pprev = &tunnel6_handlers; + break; + case AF_INET: + pprev = &tunnel46_handlers; + break; + case AF_MPLS: + pprev = &tunnelmpls6_handlers; + break; + default: + goto err; + } + + for (; (t = rcu_dereference_protected(*pprev, lockdep_is_held(&tunnel6_mutex))) != NULL; pprev = &t->next) { if (t == handler) { @@ -73,6 +105,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) } } +err: mutex_unlock(&tunnel6_mutex); synchronize_net(); @@ -86,6 +119,24 @@ EXPORT_SYMBOL(xfrm6_tunnel_deregister); handler != NULL; \ handler = rcu_dereference(handler->next)) \ +static int tunnelmpls6_rcv(struct sk_buff *skb) +{ + struct xfrm6_tunnel *handler; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto drop; + + for_each_tunnel_rcu(tunnelmpls6_handlers, handler) + if (!handler->handler(skb)) + return 0; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} + static int tunnel6_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; @@ -146,6 +197,18 @@ static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return -ENOENT; } +static int tunnelmpls6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_tunnel *handler; + + for_each_tunnel_rcu(tunnelmpls6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + return 0; + + return -ENOENT; +} + static const struct inet6_protocol tunnel6_protocol = { .handler = tunnel6_rcv, .err_handler = tunnel6_err, @@ -158,6 +221,12 @@ static const struct inet6_protocol tunnel46_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; +static const struct inet6_protocol tunnelmpls6_protocol = { + .handler = tunnelmpls6_rcv, + .err_handler = tunnelmpls6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + static int __init tunnel6_init(void) { if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) { @@ -169,6 +238,13 @@ static int __init tunnel6_init(void) inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); return -EAGAIN; } + if (xfrm6_tunnel_mpls_supported() && + inet6_add_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) { + pr_err("%s: can't add protocol\n", __func__); + inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); + inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP); + return -EAGAIN; + } return 0; } @@ -178,6 +254,9 @@ static void __exit tunnel6_fini(void) pr_err("%s: can't remove protocol\n", __func__); if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6)) pr_err("%s: can't remove protocol\n", __func__); + if (xfrm6_tunnel_mpls_supported() && + inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) + pr_err("%s: can't remove protocol\n", __func__); } module_init(tunnel6_init); |