diff options
Diffstat (limited to 'net/ipv6')
35 files changed, 615 insertions, 1526 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 84c358804355..4ae17a966ae3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -597,6 +597,43 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; +static int inet6_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) @@ -605,14 +642,12 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlattr *tb[NETCONFA_MAX+1]; struct inet6_dev *in6_dev = NULL; struct net_device *dev = NULL; - struct netconfmsg *ncm; struct sk_buff *skb; struct ipv6_devconf *devconf; int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv6_policy, extack); + err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; @@ -1165,7 +1200,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa == ifp) continue; - if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + if (ifa->prefix_len != ifp->prefix_len || + !ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) continue; if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) @@ -5181,6 +5217,52 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrmsg *ifm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request"); + return -EINVAL; + } + + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFA_TARGET_NETNSID: + case IFA_ADDRESS: + case IFA_LOCAL: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -5201,8 +5283,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack); if (err < 0) return err; @@ -6824,6 +6905,12 @@ static int __net_init addrconf_init_net(struct net *net) if (!dflt) goto err_alloc_dflt; + if (IS_ENABLED(CONFIG_SYSCTL) && + sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) { + memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); + memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt)); + } + /* these will be inherited by all namespaces */ dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 5cd0029d930e..6c79af056d9b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1, return -EAFNOSUPPORT; } +static int eafnosupport_ipv6_route_input(struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} + static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id) { return NULL; @@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup, + .ipv6_route_input = eafnosupport_ipv6_route_input, .fib6_get_table = eafnosupport_fib6_get_table, .fib6_table_lookup = eafnosupport_fib6_table_lookup, .fib6_lookup = eafnosupport_fib6_lookup, diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 0d1ee82ee55b..d43d076c98f5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -523,6 +523,50 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } +static int ip6addrlbl_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrlblmsg *ifal; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + + ifal = nlmsg_data(nlh); + if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFAL_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFAL_ADDRESS: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request"); + return -EINVAL; + } + } + + return 0; +} + static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -535,8 +579,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct ip6addrlbl_entry *p; struct sk_buff *skb; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, - extack); + err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d99753b5e39b..2f45d2a3e3a3 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = { .exit = inet6_net_exit, }; +static int ipv6_route_input(struct sk_buff *skb) +{ + ip6_route_input(skb); + return skb_dst(skb)->error; +} + static const struct ipv6_stub ipv6_stub_impl = { .ipv6_sock_mc_join = ipv6_sock_mc_join, .ipv6_sock_mc_drop = ipv6_sock_mc_drop, .ipv6_dst_lookup = ip6_dst_lookup, + .ipv6_route_input = ipv6_route_input, .fib6_get_table = fib6_get_table, .fib6_table_lookup = fib6_table_lookup, .fib6_lookup = fib6_lookup, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 5afe9f83374d..239d4a65ad6e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -296,7 +296,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb->len += tailen; skb->data_len += tailen; skb->truesize += tailen; - if (sk) + if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); goto out; diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index b858bd5280bf..867474abe269 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -72,7 +72,7 @@ static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, static int gue6_err_proto_handler(int proto, struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, u32 info) + u8 type, u8 code, int offset, __be32 info) { const struct inet6_protocol *ipprot; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bbcdfd299692..802faa2fcc0e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -81,7 +81,7 @@ */ static inline struct sock *icmpv6_sk(struct net *net) { - return net->ipv6.icmp_sk[smp_processor_id()]; + return *this_cpu_ptr(net->ipv6.icmp_sk); } static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -953,13 +953,21 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); } +static void __net_exit icmpv6_sk_exit(struct net *net) +{ + int i; + + for_each_possible_cpu(i) + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i)); + free_percpu(net->ipv6.icmp_sk); +} + static int __net_init icmpv6_sk_init(struct net *net) { struct sock *sk; - int err, i, j; + int err, i; - net->ipv6.icmp_sk = - kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL); + net->ipv6.icmp_sk = alloc_percpu(struct sock *); if (!net->ipv6.icmp_sk) return -ENOMEM; @@ -972,7 +980,7 @@ static int __net_init icmpv6_sk_init(struct net *net) goto fail; } - net->ipv6.icmp_sk[i] = sk; + *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. @@ -982,22 +990,10 @@ static int __net_init icmpv6_sk_init(struct net *net) return 0; fail: - for (j = 0; j < i; j++) - inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]); - kfree(net->ipv6.icmp_sk); + icmpv6_sk_exit(net); return err; } -static void __net_exit icmpv6_sk_exit(struct net *net) -{ - int i; - - for_each_possible_cpu(i) { - inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]); - } - kfree(net->ipv6.icmp_sk); -} - static struct pernet_operations icmpv6_sk_ops = { .init = icmpv6_sk_init, .exit = icmpv6_sk_exit, diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 17c455ff69ff..79d2e43c05c5 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -383,12 +383,9 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info) struct rhashtable_iter iter; struct ila_map *ila; spinlock_t *lock; - int ret; - - ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL); - if (ret) - goto done; + int ret = 0; + rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter); rhashtable_walk_start(&iter); for (;;) { @@ -509,23 +506,17 @@ int ila_xlat_nl_dump_start(struct netlink_callback *cb) struct net *net = sock_net(cb->skb->sk); struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_dump_iter *iter; - int ret; iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; - ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter, - GFP_KERNEL); - if (ret) { - kfree(iter); - return ret; - } + rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter->rhiter); iter->skip = 0; cb->args[0] = (long)iter; - return ret; + return 0; } int ila_xlat_nl_dump_done(struct netlink_callback *cb) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 801a9a0c217e..b32c95f02128 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -524,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) return PACKET_REJECT; } -static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, +static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi) { struct erspan_base_hdr *ershdr; @@ -607,7 +607,7 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) return 0; goto out; } @@ -1719,6 +1719,27 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[], return 0; } +static void ip6erspan_set_version(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + if (!data) + return; + + parms->erspan_ver = 1; + if (data[IFLA_GRE_ERSPAN_VER]) + parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); + + if (parms->erspan_ver == 1) { + if (data[IFLA_GRE_ERSPAN_INDEX]) + parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + } else if (parms->erspan_ver == 2) { + if (data[IFLA_GRE_ERSPAN_DIR]) + parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); + if (data[IFLA_GRE_ERSPAN_HWID]) + parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); + } +} + static void ip6gre_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { @@ -1767,20 +1788,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], if (data[IFLA_GRE_COLLECT_METADATA]) parms->collect_md = true; - - parms->erspan_ver = 1; - if (data[IFLA_GRE_ERSPAN_VER]) - parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); - - if (parms->erspan_ver == 1) { - if (data[IFLA_GRE_ERSPAN_INDEX]) - parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); - } else if (parms->erspan_ver == 2) { - if (data[IFLA_GRE_ERSPAN_DIR]) - parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); - if (data[IFLA_GRE_ERSPAN_HWID]) - parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); - } } static int ip6gre_tap_init(struct net_device *dev) @@ -2100,9 +2107,23 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *p = &t->parms; __be16 o_flags = p->o_flags; - if ((p->erspan_ver == 1 || p->erspan_ver == 2) && - !p->collect_md) - o_flags |= TUNNEL_KEY; + if (p->erspan_ver == 1 || p->erspan_ver == 2) { + if (!p->collect_md) + o_flags |= TUNNEL_KEY; + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) + goto nla_put_failure; + + if (p->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) + goto nla_put_failure; + } else { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid)) + goto nla_put_failure; + } + } if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, @@ -2117,8 +2138,7 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || - nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) || - nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) + nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, @@ -2136,19 +2156,6 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; } - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver)) - goto nla_put_failure; - - if (p->erspan_ver == 1) { - if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index)) - goto nla_put_failure; - } else if (p->erspan_ver == 2) { - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir)) - goto nla_put_failure; - if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid)) - goto nla_put_failure; - } - return 0; nla_put_failure: @@ -2203,6 +2210,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev, int err; ip6gre_netlink_parms(data, &nt->parms); + ip6erspan_set_version(data, &nt->parms); ign = net_generic(net, ip6gre_net_id); if (nt->parms.collect_md) { @@ -2248,6 +2256,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], if (IS_ERR(t)) return PTR_ERR(t); + ip6erspan_set_version(data, &p); ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 5c045691c302..345882d9c061 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -383,9 +383,36 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { }, }; +static struct sk_buff *sit_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4)) + return ERR_PTR(-EINVAL); + + return ipv6_gso_segment(skb, features); +} + +static struct sk_buff *ip4ip6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) + return ERR_PTR(-EINVAL); + + return inet_gso_segment(skb, features); +} + +static struct sk_buff *ip6ip6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6)) + return ERR_PTR(-EINVAL); + + return ipv6_gso_segment(skb, features); +} + static const struct net_offload sit_offload = { .callbacks = { - .gso_segment = ipv6_gso_segment, + .gso_segment = sit_gso_segment, .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = sit_gro_complete, }, @@ -393,7 +420,7 @@ static const struct net_offload sit_offload = { static const struct net_offload ip4ip6_offload = { .callbacks = { - .gso_segment = inet_gso_segment, + .gso_segment = ip4ip6_gso_segment, .gro_receive = ip4ip6_gro_receive, .gro_complete = ip4ip6_gro_complete, }, @@ -401,7 +428,7 @@ static const struct net_offload ip4ip6_offload = { static const struct net_offload ip6ip6_offload = { .callbacks = { - .gso_segment = ipv6_gso_segment, + .gso_segment = ip6ip6_gso_segment, .gro_receive = sit_ip6ip6_gro_receive, .gro_complete = ip6ip6_gro_complete, }, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5f9fa0302b5a..edbd12067170 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -300,6 +300,12 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (np && np->rtalert_isolate && + !net_eq(sock_net(sk), dev_net(skb->dev))) { + continue; + } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index ad1a9ccd4b44..25430c991cea 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -32,18 +32,9 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; } if (cfg->bind_ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(net, cfg->bind_ifindex); - if (!dev) { - err = -ENODEV; - goto error; - } - - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, - dev->name, strlen(dev->name) + 1); - dev_put(dev); - + err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, + (void *)&cfg->bind_ifindex, + sizeof(cfg->bind_ifindex)); if (err < 0) goto error; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index cc01aa3f2b5e..e4dd57976737 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -97,7 +97,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr_table *mrt, bool all); +static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES @@ -393,7 +393,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt, true); + mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | + MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } @@ -1496,42 +1497,51 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt, bool all) +static void mroute_clean_tables(struct mr_table *mrt, int flags) { struct mr_mfc *c, *tmp; LIST_HEAD(list); int i; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { - if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) - continue; - mif6_delete(mrt, i, 0, &list); + if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) { + for (i = 0; i < mrt->maxvif; i++) { + if (((mrt->vif_table[i].flags & VIFF_STATIC) && + !(flags & MRT6_FLUSH_MIFS_STATIC)) || + (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS))) + continue; + mif6_delete(mrt, i, 0, &list); + } + unregister_netdevice_many(&list); } - unregister_netdevice_many(&list); /* Wipe the cache */ - list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { - if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); - list_del_rcu(&c->list); - call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), - FIB_EVENT_ENTRY_DEL, - (struct mfc6_cache *)c, mrt->id); - mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); - mr_cache_put(c); + if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) { + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { + if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) || + (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC))) + continue; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); + list_del_rcu(&c->list); + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_ENTRY_DEL, + (struct mfc6_cache *)c, mrt->id); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); + mr_cache_put(c); + } } - if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { - spin_lock_bh(&mfc_unres_lock); - list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { - list_del(&c->list); - mr6_netlink_event(mrt, (struct mfc6_cache *)c, - RTM_DELROUTE); - ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); + if (flags & MRT6_FLUSH_MFC) { + if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { + spin_lock_bh(&mfc_unres_lock); + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { + list_del(&c->list); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, + RTM_DELROUTE); + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); + } + spin_unlock_bh(&mfc_unres_lock); } - spin_unlock_bh(&mfc_unres_lock); } } @@ -1587,7 +1597,7 @@ int ip6mr_sk_done(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); - mroute_clean_tables(mrt, false); + mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC); err = 0; break; } @@ -1703,6 +1713,20 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns rtnl_unlock(); return ret; + case MRT6_FLUSH: + { + int flags; + + if (optlen != sizeof(flags)) + return -EINVAL; + if (get_user(flags, (int __user *)optval)) + return -EFAULT; + rtnl_lock(); + mroute_clean_tables(mrt, flags); + rtnl_unlock(); + return 0; + } + /* * Control PIM assert (to activate pim will activate assert) */ @@ -1964,10 +1988,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTFORWDATAGRAMS); - __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTOCTETS, skb->len); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(net, sk, skb); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 973e215c3114..40f21fef25ff 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -787,6 +787,12 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; + case IPV6_ROUTER_ALERT_ISOLATE: + if (optlen < sizeof(int)) + goto e_inval; + np->rtalert_isolate = valbool; + retv = 0; + break; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; @@ -1358,6 +1364,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rxopt.bits.recvfragsize; break; + case IPV6_ROUTER_ALERT_ISOLATE: + val = np->rtalert_isolate; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 21f6deb2aec9..42f3f5cd349f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -940,6 +940,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) { return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE); } +EXPORT_SYMBOL(ipv6_dev_mc_inc); /* * device multicast group del @@ -987,6 +988,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) return err; } +EXPORT_SYMBOL(ipv6_dev_mc_dec); /* * check if the interface/address pair is valid diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index 9405b04eecc6..dddd75d1be0e 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -41,6 +41,8 @@ static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) if (skb->len < len || len <= offset) return -EINVAL; + skb_set_transport_header(skb, offset); + return 0; } @@ -77,27 +79,27 @@ static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) len += sizeof(struct mld2_report); - return pskb_may_pull(skb, len) ? 0 : -EINVAL; + return ipv6_mc_may_pull(skb, len) ? 0 : -EINVAL; } static int ipv6_mc_check_mld_query(struct sk_buff *skb) { + unsigned int transport_len = ipv6_transport_len(skb); struct mld_msg *mld; - unsigned int len = skb_transport_offset(skb); + unsigned int len; /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) return -EINVAL; - len += sizeof(struct mld_msg); - if (skb->len < len) - return -EINVAL; - /* MLDv1? */ - if (skb->len != len) { + if (transport_len != sizeof(struct mld_msg)) { /* or MLDv2? */ - len += sizeof(struct mld2_query) - sizeof(struct mld_msg); - if (skb->len < len || !pskb_may_pull(skb, len)) + if (transport_len < sizeof(struct mld2_query)) + return -EINVAL; + + len = skb_transport_offset(skb) + sizeof(struct mld2_query); + if (!ipv6_mc_may_pull(skb, len)) return -EINVAL; } @@ -115,12 +117,17 @@ static int ipv6_mc_check_mld_query(struct sk_buff *skb) static int ipv6_mc_check_mld_msg(struct sk_buff *skb) { - struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + struct mld_msg *mld; + + if (!ipv6_mc_may_pull(skb, len)) + return -EINVAL; + + mld = (struct mld_msg *)skb_transport_header(skb); switch (mld->mld_type) { case ICMPV6_MGM_REDUCTION: case ICMPV6_MGM_REPORT: - /* fall through */ return 0; case ICMPV6_MLD2_REPORT: return ipv6_mc_check_mld_reportv2(skb); @@ -136,49 +143,30 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); } -static int __ipv6_mc_check_mld(struct sk_buff *skb, - struct sk_buff **skb_trimmed) - +int ipv6_mc_check_icmpv6(struct sk_buff *skb) { - struct sk_buff *skb_chk = NULL; - unsigned int transport_len; - unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); - int ret = -EINVAL; + unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr); + unsigned int transport_len = ipv6_transport_len(skb); + struct sk_buff *skb_chk; - transport_len = ntohs(ipv6_hdr(skb)->payload_len); - transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + if (!ipv6_mc_may_pull(skb, len)) + return -EINVAL; skb_chk = skb_checksum_trimmed(skb, transport_len, ipv6_mc_validate_checksum); if (!skb_chk) - goto err; - - if (!pskb_may_pull(skb_chk, len)) - goto err; - - ret = ipv6_mc_check_mld_msg(skb_chk); - if (ret) - goto err; - - if (skb_trimmed) - *skb_trimmed = skb_chk; - /* free now unneeded clone */ - else if (skb_chk != skb) - kfree_skb(skb_chk); - - ret = 0; + return -EINVAL; -err: - if (ret && skb_chk && skb_chk != skb) + if (skb_chk != skb) kfree_skb(skb_chk); - return ret; + return 0; } +EXPORT_SYMBOL(ipv6_mc_check_icmpv6); /** * ipv6_mc_check_mld - checks whether this is a sane MLD packet * @skb: the skb to validate - * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) * * Checks whether an IPv6 packet is a valid MLD packet. If so sets * skb transport header accordingly and returns zero. @@ -188,18 +176,10 @@ err: * -ENOMSG: IP header validation succeeded but it is not an MLD packet. * -ENOMEM: A memory allocation failure happened. * - * Optionally, an skb pointer might be provided via skb_trimmed (or set it - * to NULL): After parsing an MLD packet successfully it will point to - * an skb which has its tail aligned to the IP packet end. This might - * either be the originally provided skb or a trimmed, cloned version if - * the skb frame had data beyond the IP packet. A cloned skb allows us - * to leave the original skb and its full frame unchanged (which might be - * desirable for layer 2 frame jugglers). - * * Caller needs to set the skb network header and free any returned skb if it * differs from the provided skb. */ -int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +int ipv6_mc_check_mld(struct sk_buff *skb) { int ret; @@ -211,6 +191,10 @@ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) if (ret < 0) return ret; - return __ipv6_mc_check_mld(skb, skb_trimmed); + ret = ipv6_mc_check_icmpv6(skb); + if (ret < 0) + return ret; + + return ipv6_mc_check_mld_msg(skb); } EXPORT_SYMBOL(ipv6_mc_check_mld); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6d0b1f3e927b..1240ccd57f39 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -86,8 +86,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict) +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { @@ -107,12 +107,17 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst, *dst = result; return err; } +EXPORT_SYMBOL_GPL(__nf_ip6_route); static const struct nf_ipv6_ops ipv6ops = { +#if IS_MODULE(CONFIG_IPV6) .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, + .route_me_harder = ip6_route_me_harder, + .dev_get_saddr = ipv6_dev_get_saddr, + .route = __nf_ip6_route, +#endif + .route_input = ip6_route_input, .fragment = ip6_fragment, - .route = nf_ip6_route, .reroute = nf_ip6_reroute, }; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 339d0762b027..ddc99a1653aa 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -31,34 +31,6 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. -if NF_NAT_IPV6 - -config NFT_CHAIN_NAT_IPV6 - tristate "IPv6 nf_tables nat chain support" - help - This option enables the "nat" chain for IPv6 in nf_tables. This - chain type is used to perform Network Address Translation (NAT) - packet transformations such as the source, destination address and - source and destination ports. - -config NFT_MASQ_IPV6 - tristate "IPv6 masquerade support for nf_tables" - depends on NFT_MASQ - select NF_NAT_MASQUERADE_IPV6 - help - This is the expression that provides IPv4 masquerading support for - nf_tables. - -config NFT_REDIR_IPV6 - tristate "IPv6 redirect support for nf_tables" - depends on NFT_REDIR - select NF_NAT_REDIRECT - help - This is the expression that provides IPv4 redirect support for - nf_tables. - -endif # NF_NAT_IPV6 - config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT @@ -106,23 +78,6 @@ config NF_LOG_IPV6 default m if NETFILTER_ADVANCED=n select NF_LOG_COMMON -config NF_NAT_IPV6 - tristate "IPv6 NAT" - depends on NF_CONNTRACK - depends on NETFILTER_ADVANCED - select NF_NAT - help - The IPv6 NAT option allows masquerading, port forwarding and other - forms of full Network Address Port Translation. This can be - controlled by iptables or nft. - -if NF_NAT_IPV6 - -config NF_NAT_MASQUERADE_IPV6 - bool - -endif # NF_NAT_IPV6 - config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 @@ -311,7 +266,6 @@ config IP6_NF_NAT depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_NAT - select NF_NAT_IPV6 select NETFILTER_XT_NAT help This enables the `nat' table in ip6tables. This allows masquerading, @@ -324,7 +278,7 @@ if IP6_NF_NAT config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - select NF_NAT_MASQUERADE_IPV6 + select NF_NAT_MASQUERADE help Masquerading is a special case of NAT: all outgoing connections are changed to seem to come from a particular interface's address, and diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 9ea43d5256e0..3853c648ebaa 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -11,10 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o -nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o -nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o -obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o - # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o @@ -32,10 +28,7 @@ obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o # nf_tables obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o -obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o -obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o -obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 67ba70ab9f5c..3e1fab9d7503 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -17,8 +17,6 @@ #include <net/ipv6.h> #include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> static int __net_init ip6table_nat_table_init(struct net *net); @@ -72,10 +70,10 @@ static int ip6t_nat_register_lookups(struct net *net) int i, ret; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) { - ret = nf_nat_l3proto_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]); + ret = nf_nat_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]); if (ret) { while (i) - nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]); + nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]); return ret; } @@ -89,7 +87,7 @@ static void ip6t_nat_unregister_lookups(struct net *net) int i; for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) - nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]); + nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]); } static int __net_init ip6table_nat_table_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 181da2c40f9a..3de0e9b0a482 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -136,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) } #endif +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); + static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); @@ -177,9 +180,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { - struct sk_buff *prev, *next; unsigned int payload_len; - int offset, end; + struct net_device *dev; + struct sk_buff *prev; + int offset, end, err; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) { @@ -254,55 +258,18 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, goto err; } - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || prev->ip_defrag_offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (next->ip_defrag_offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* RFC5722, Section 4: - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments, including those not yet received) MUST be silently - * discarded. - */ - - /* Check for overlap with preceding fragment. */ - if (prev && - (prev->ip_defrag_offset + prev->len) > offset) - goto discard_fq; - - /* Look for overlap with succeeding segment. */ - if (next && next->ip_defrag_offset < end) - goto discard_fq; - - /* Note : skb->ip_defrag_offset and skb->dev share the same location */ - if (skb->dev) - fq->iif = skb->dev->ifindex; + /* Note : skb->rbnode and skb->dev share the same location. */ + dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); - skb->ip_defrag_offset = offset; - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; + prev = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto insert_error; + + if (dev) + fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; @@ -319,11 +286,25 @@ found: fq->q.flags |= INET_FRAG_FIRST_IN; } - return 0; + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) { + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + err = nf_ct_frag6_reasm(fq, skb, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + + skb_dst_drop(skb); + return -EINPROGRESS; -discard_fq: +insert_error: + if (err == IPFRAG_DUP) + goto err; inet_frag_kill(&fq->q); err: + skb_dst_drop(skb); return -EINVAL; } @@ -333,147 +314,66 @@ err: * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. - * - * returns true if *prev skb has been transformed into the reassembled - * skb, false otherwise. */ -static bool -nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len, delta; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); - WARN_ON(head == NULL); - WARN_ON(head->ip_defrag_offset != 0); - ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - return false; + goto err; + + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto err; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); - return false; - } - - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - return false; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(fq->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (clone == NULL) - return false; - - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - - add_frag_mem_limit(fq->q.net, clone->truesize); - } - - /* morph head into last received skb: prev. - * - * This allows callers of ipv6 conntrack defrag to continue - * to use the last skb(frag) passed into the reasm engine. - * The last skb frag 'silently' turns into the full reassembled skb. - * - * Since prev is also part of q->fragments we have to clone it first. - */ - if (head != prev) { - struct sk_buff *iter; - - fp = skb_clone(prev, GFP_ATOMIC); - if (!fp) - return false; - - fp->next = prev->next; - - iter = head; - while (iter) { - if (iter->next == prev) { - iter->next = fp; - break; - } - iter = iter->next; - } - - skb_morph(prev, head); - prev->next = head->next; - consume_skb(head); - head = prev; + goto err; } /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ - skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); - - skb_shinfo(head)->frag_list = head->next; - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); - - for (fp = head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp->sk = NULL; - } - sub_frag_mem_limit(fq->q.net, head->truesize); + skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(skb); + + inet_frag_reasm_finish(&fq->q, skb, reasm_data); - head->ignore_df = 1; - skb_mark_not_on_list(head); - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; + skb->ignore_df = 1; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(skb_network_header(skb), + skb_network_header_len(skb), + skb->csum); - fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; - return true; + return 0; + +err: + inet_frag_kill(&fq->q); + return -EINVAL; } /* @@ -542,7 +442,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; - struct net_device *dev = skb->dev; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -565,10 +464,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - return -EINVAL; - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); @@ -580,31 +475,17 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) spin_lock_bh(&fq->q.lock); ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); - if (ret < 0) { - if (ret == -EPROTO) { - skb->transport_header = savethdr; - ret = 0; - } - goto out_unlock; + if (ret == -EPROTO) { + skb->transport_header = savethdr; + ret = 0; } /* after queue has assumed skb ownership, only 0 or -EINPROGRESS * must be returned. */ - ret = -EINPROGRESS; - if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) { - unsigned long orefdst = skb->_skb_refdst; - - skb->_skb_refdst = 0UL; - if (nf_ct_frag6_reasm(fq, skb, dev)) - ret = 0; - skb->_skb_refdst = orefdst; - } else { - skb_dst_drop(skb); - } + if (ret) + ret = -EINPROGRESS; -out_unlock: spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q); return ret; diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c deleted file mode 100644 index 23022447eb49..000000000000 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of IPv6 NAT funded by Astaro. - */ -#include <linux/types.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ipv6.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <net/secure_seq.h> -#include <net/checksum.h> -#include <net/ip6_checksum.h> -#include <net/ip6_route.h> -#include <net/ipv6.h> - -#include <net/netfilter/nf_conntrack_core.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/netfilter/nf_nat_l4proto.h> - -static const struct nf_nat_l3proto nf_nat_l3proto_ipv6; - -#ifdef CONFIG_XFRM -static void nf_nat_ipv6_decode_session(struct sk_buff *skb, - const struct nf_conn *ct, - enum ip_conntrack_dir dir, - unsigned long statusbit, - struct flowi *fl) -{ - const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; - struct flowi6 *fl6 = &fl->u.ip6; - - if (ct->status & statusbit) { - fl6->daddr = t->dst.u3.in6; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl6->fl6_dport = t->dst.u.all; - } - - statusbit ^= IPS_NAT_MASK; - - if (ct->status & statusbit) { - fl6->saddr = t->src.u3.in6; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl6->fl6_sport = t->src.u.all; - } -} -#endif - -static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype) -{ - struct ipv6hdr *ipv6h; - __be16 frag_off; - int hdroff; - u8 nexthdr; - - if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) - return false; - - ipv6h = (void *)skb->data + iphdroff; - nexthdr = ipv6h->nexthdr; - hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), - &nexthdr, &frag_off); - if (hdroff < 0) - goto manip_addr; - - if ((frag_off & htons(~0x7)) == 0 && - !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, - target, maniptype)) - return false; - - /* must reload, offset might have changed */ - ipv6h = (void *)skb->data + iphdroff; - -manip_addr: - if (maniptype == NF_NAT_MANIP_SRC) - ipv6h->saddr = target->src.u3.in6; - else - ipv6h->daddr = target->dst.u3.in6; - - return true; -} - -static void nf_nat_ipv6_csum_update(struct sk_buff *skb, - unsigned int iphdroff, __sum16 *check, - const struct nf_conntrack_tuple *t, - enum nf_nat_manip_type maniptype) -{ - const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); - const struct in6_addr *oldip, *newip; - - if (maniptype == NF_NAT_MANIP_SRC) { - oldip = &ipv6h->saddr; - newip = &t->src.u3.in6; - } else { - oldip = &ipv6h->daddr; - newip = &t->dst.u3.in6; - } - inet_proto_csum_replace16(check, skb, oldip->s6_addr32, - newip->s6_addr32, true); -} - -static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, - u8 proto, void *data, __sum16 *check, - int datalen, int oldlen) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + - (data - (void *)skb->data); - skb->csum_offset = (void *)check - data; - *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - datalen, proto, 0); - } else - inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), true); -} - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) -static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range2 *range) -{ - if (tb[CTA_NAT_V6_MINIP]) { - nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], - sizeof(struct in6_addr)); - range->flags |= NF_NAT_RANGE_MAP_IPS; - } - - if (tb[CTA_NAT_V6_MAXIP]) - nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], - sizeof(struct in6_addr)); - else - range->max_addr = range->min_addr; - - return 0; -} -#endif - -static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { - .l3proto = NFPROTO_IPV6, - .manip_pkt = nf_nat_ipv6_manip_pkt, - .csum_update = nf_nat_ipv6_csum_update, - .csum_recalc = nf_nat_ipv6_csum_recalc, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, -#endif -#ifdef CONFIG_XFRM - .decode_session = nf_nat_ipv6_decode_session, -#endif -}; - -int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - unsigned int hdrlen) -{ - struct { - struct icmp6hdr icmp6; - struct ipv6hdr ip6; - } *inside; - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - struct nf_conntrack_tuple target; - unsigned long statusbit; - - WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); - - if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) - return 0; - if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) - return 0; - - inside = (void *)skb->data + hdrlen; - if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { - if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) - return 0; - if (ct->status & IPS_NAT_MASK) - return 0; - } - - if (manip == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply direction */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - if (!(ct->status & statusbit)) - return 1; - - if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), - &ct->tuplehash[!dir].tuple, !manip)) - return 0; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); - inside = (void *)skb->data + hdrlen; - inside->icmp6.icmp6_cksum = 0; - inside->icmp6.icmp6_cksum = - csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - skb->len - hdrlen, IPPROTO_ICMPV6, - skb_checksum(skb, hdrlen, - skb->len - hdrlen, 0)); - } - - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); - -static unsigned int -nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be16 frag_off; - int hdrlen; - u8 nexthdr; - - ct = nf_ct_get(skb, &ctinfo); - /* Can't track? It's not due to stress, or conntrack would - * have dropped it. Hence it's the user's responsibilty to - * packet filter it out, or implement conntrack/NAT for that - * protocol. 8) --RR - */ - if (!ct) - return NF_ACCEPT; - - if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) { - nexthdr = ipv6_hdr(skb)->nexthdr; - hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - - if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { - if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - state->hook, - hdrlen)) - return NF_DROP; - else - return NF_ACCEPT; - } - } - - return nf_nat_inet_fn(priv, skb, state); -} - -static unsigned int -nf_nat_ipv6_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - unsigned int ret; - struct in6_addr daddr = ipv6_hdr(skb)->daddr; - - ret = nf_nat_ipv6_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && - ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) - skb_dst_drop(skb); - - return ret; -} - -static unsigned int -nf_nat_ipv6_out(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#ifdef CONFIG_XFRM - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - int err; -#endif - unsigned int ret; - - ret = nf_nat_ipv6_fn(priv, skb, state); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3) || - (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && - ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(state->net, skb, AF_INET6); - if (err < 0) - ret = NF_DROP_ERR(err); - } - } -#endif - return ret; -} - -static unsigned int -nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - int err; - - ret = nf_nat_ipv6_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, - &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#ifdef CONFIG_XFRM - else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && - ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(state->net, skb, AF_INET6); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#endif - } - return ret; -} - -static const struct nf_hook_ops nf_nat_ipv6_ops[] = { - /* Before packet filtering, change destination */ - { - .hook = nf_nat_ipv6_in, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_ipv6_out, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_NAT_SRC, - }, - /* Before packet filtering, change destination */ - { - .hook = nf_nat_ipv6_local_fn, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_ipv6_fn, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP6_PRI_NAT_SRC, - }, -}; - -int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops) -{ - return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn); - -void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops) -{ - nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops)); -} -EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn); - -static int __init nf_nat_l3proto_ipv6_init(void) -{ - return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); -} - -static void __exit nf_nat_l3proto_ipv6_exit(void) -{ - nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6); -} - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("nf-nat-" __stringify(AF_INET6)); - -module_init(nf_nat_l3proto_ipv6_init); -module_exit(nf_nat_l3proto_ipv6_exit); diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c deleted file mode 100644 index 0ad0da5a2600..000000000000 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include <linux/kernel.h> -#include <linux/atomic.h> -#include <linux/netdevice.h> -#include <linux/ipv6.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <net/netfilter/nf_nat.h> -#include <net/addrconf.h> -#include <net/ipv6.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> - -#define MAX_WORK_COUNT 16 - -static atomic_t v6_worker_count; - -unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, - const struct net_device *out) -{ - enum ip_conntrack_info ctinfo; - struct nf_conn_nat *nat; - struct in6_addr src; - struct nf_conn *ct; - struct nf_nat_range2 newrange; - - ct = nf_ct_get(skb, &ctinfo); - WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY))); - - if (ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) - return NF_DROP; - - nat = nf_ct_nat_ext_add(ct); - if (nat) - nat->masq_index = out->ifindex; - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = src; - newrange.max_addr.in6 = src; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); - -static int device_cmp(struct nf_conn *ct, void *ifindex) -{ - const struct nf_conn_nat *nat = nfct_nat(ct); - - if (!nat) - return 0; - if (nf_ct_l3num(ct) != NFPROTO_IPV6) - return 0; - return nat->masq_index == (int)(long)ifindex; -} - -static int masq_device_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - const struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net *net = dev_net(dev); - - if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup_net(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); - - return NOTIFY_DONE; -} - -static struct notifier_block masq_dev_notifier = { - .notifier_call = masq_device_event, -}; - -struct masq_dev_work { - struct work_struct work; - struct net *net; - struct in6_addr addr; - int ifindex; -}; - -static int inet_cmp(struct nf_conn *ct, void *work) -{ - struct masq_dev_work *w = (struct masq_dev_work *)work; - struct nf_conntrack_tuple *tuple; - - if (!device_cmp(ct, (void *)(long)w->ifindex)) - return 0; - - tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - - return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); -} - -static void iterate_cleanup_work(struct work_struct *work) -{ - struct masq_dev_work *w; - - w = container_of(work, struct masq_dev_work, work); - - nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0); - - put_net(w->net); - kfree(w); - atomic_dec(&v6_worker_count); - module_put(THIS_MODULE); -} - -/* ipv6 inet notifier is an atomic notifier, i.e. we cannot - * schedule. - * - * Unfortunately, nf_ct_iterate_cleanup_net can run for a long - * time if there are lots of conntracks and the system - * handles high softirq load, so it frequently calls cond_resched - * while iterating the conntrack table. - * - * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue. - * - * As we can have 'a lot' of inet_events (depending on amount - * of ipv6 addresses being deleted), we also need to add an upper - * limit to the number of queued work items. - */ -static int masq_inet6_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct inet6_ifaddr *ifa = ptr; - const struct net_device *dev; - struct masq_dev_work *w; - struct net *net; - - if (event != NETDEV_DOWN || - atomic_read(&v6_worker_count) >= MAX_WORK_COUNT) - return NOTIFY_DONE; - - dev = ifa->idev->dev; - net = maybe_get_net(dev_net(dev)); - if (!net) - return NOTIFY_DONE; - - if (!try_module_get(THIS_MODULE)) - goto err_module; - - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - atomic_inc(&v6_worker_count); - - INIT_WORK(&w->work, iterate_cleanup_work); - w->ifindex = dev->ifindex; - w->net = net; - w->addr = ifa->addr; - schedule_work(&w->work); - - return NOTIFY_DONE; - } - - module_put(THIS_MODULE); - err_module: - put_net(net); - return NOTIFY_DONE; -} - -static struct notifier_block masq_inet6_notifier = { - .notifier_call = masq_inet6_event, -}; - -static int masq_refcnt; -static DEFINE_MUTEX(masq_mutex); - -int nf_nat_masquerade_ipv6_register_notifier(void) -{ - int ret = 0; - - mutex_lock(&masq_mutex); - /* check if the notifier is already set */ - if (++masq_refcnt > 1) - goto out_unlock; - - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; - - ret = register_inet6addr_notifier(&masq_inet6_notifier); - if (ret) - goto err_unregister; - - mutex_unlock(&masq_mutex); - return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); -err_dec: - masq_refcnt--; -out_unlock: - mutex_unlock(&masq_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); - -void nf_nat_masquerade_ipv6_unregister_notifier(void) -{ - mutex_lock(&masq_mutex); - /* check if the notifier still has clients */ - if (--masq_refcnt > 0) - goto out_unlock; - - unregister_inet6addr_notifier(&masq_inet6_notifier); - unregister_netdevice_notifier(&masq_dev_notifier); -out_unlock: - mutex_unlock(&masq_mutex); -} -EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index b9c8a763c863..02e9228641e0 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -233,6 +233,9 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook) if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; + if (!nf_reject_verify_csum(proto)) + return true; + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; } diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c deleted file mode 100644 index 8a081ad7d5db..000000000000 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> - * Copyright (c) 2012 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv6.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_core.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables_ipv6.h> -#include <net/netfilter/nf_nat_l3proto.h> -#include <net/ipv6.h> - -static unsigned int nft_nat_do_chain(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv6(&pkt, skb); - - return nft_do_chain(&pkt, priv); -} - -static int nft_nat_ipv6_reg(struct net *net, const struct nf_hook_ops *ops) -{ - return nf_nat_l3proto_ipv6_register_fn(net, ops); -} - -static void nft_nat_ipv6_unreg(struct net *net, const struct nf_hook_ops *ops) -{ - nf_nat_l3proto_ipv6_unregister_fn(net, ops); -} - -static const struct nft_chain_type nft_chain_nat_ipv6 = { - .name = "nat", - .type = NFT_CHAIN_T_NAT, - .family = NFPROTO_IPV6, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .hooks = { - [NF_INET_PRE_ROUTING] = nft_nat_do_chain, - [NF_INET_POST_ROUTING] = nft_nat_do_chain, - [NF_INET_LOCAL_OUT] = nft_nat_do_chain, - [NF_INET_LOCAL_IN] = nft_nat_do_chain, - }, - .ops_register = nft_nat_ipv6_reg, - .ops_unregister = nft_nat_ipv6_unreg, -}; - -static int __init nft_chain_nat_ipv6_init(void) -{ - nft_register_chain_type(&nft_chain_nat_ipv6); - - return 0; -} - -static void __exit nft_chain_nat_ipv6_exit(void) -{ - nft_unregister_chain_type(&nft_chain_nat_ipv6); -} - -module_init(nft_chain_nat_ipv6_init); -module_exit(nft_chain_nat_ipv6_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); -MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 36be3cf0adef..73cdc0bc63f7 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; - const struct nf_ipv6_ops *v6ops; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return RTN_UNREACHABLE; - if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) @@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c deleted file mode 100644 index e06c82e9dfcd..000000000000 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nft_masq.h> -#include <net/netfilter/ipv6/nf_nat_masquerade.h> - -static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_masq *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - range.flags = priv->flags; - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, - nft_out(pkt)); -} - -static void -nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) -{ - nf_ct_netns_put(ctx->net, NFPROTO_IPV6); -} - -static struct nft_expr_type nft_masq_ipv6_type; -static const struct nft_expr_ops nft_masq_ipv6_ops = { - .type = &nft_masq_ipv6_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), - .eval = nft_masq_ipv6_eval, - .init = nft_masq_init, - .destroy = nft_masq_ipv6_destroy, - .dump = nft_masq_dump, - .validate = nft_masq_validate, -}; - -static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { - .family = NFPROTO_IPV6, - .name = "masq", - .ops = &nft_masq_ipv6_ops, - .policy = nft_masq_policy, - .maxattr = NFTA_MASQ_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_masq_ipv6_module_init(void) -{ - int ret; - - ret = nft_register_expr(&nft_masq_ipv6_type); - if (ret < 0) - return ret; - - ret = nf_nat_masquerade_ipv6_register_notifier(); - if (ret) - nft_unregister_expr(&nft_masq_ipv6_type); - - return ret; -} - -static void __exit nft_masq_ipv6_module_exit(void) -{ - nft_unregister_expr(&nft_masq_ipv6_type); - nf_nat_masquerade_ipv6_unregister_notifier(); -} - -module_init(nft_masq_ipv6_module_init); -module_exit(nft_masq_ipv6_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c deleted file mode 100644 index 74269865acc8..000000000000 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nft_redir.h> -#include <net/netfilter/nf_nat_redirect.h> - -static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_redir *priv = nft_expr_priv(expr); - struct nf_nat_range2 range; - - memset(&range, 0, sizeof(range)); - if (priv->sreg_proto_min) { - range.min_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_min]); - range.max_proto.all = (__force __be16)nft_reg_load16( - ®s->data[priv->sreg_proto_max]); - range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - - range.flags |= priv->flags; - - regs->verdict.code = - nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); -} - -static void -nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) -{ - nf_ct_netns_put(ctx->net, NFPROTO_IPV6); -} - -static struct nft_expr_type nft_redir_ipv6_type; -static const struct nft_expr_ops nft_redir_ipv6_ops = { - .type = &nft_redir_ipv6_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), - .eval = nft_redir_ipv6_eval, - .init = nft_redir_init, - .destroy = nft_redir_ipv6_destroy, - .dump = nft_redir_dump, - .validate = nft_redir_validate, -}; - -static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { - .family = NFPROTO_IPV6, - .name = "redir", - .ops = &nft_redir_ipv6_ops, - .policy = nft_redir_policy, - .maxattr = NFTA_REDIR_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_redir_ipv6_module_init(void) -{ - return nft_register_expr(&nft_redir_ipv6_type); -} - -static void __exit nft_redir_ipv6_module_exit(void) -{ - nft_unregister_expr(&nft_redir_ipv6_type); -} - -module_init(nft_redir_ipv6_module_init); -module_exit(nft_redir_ipv6_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 36a3d8dc61f5..1a832f5e190b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -69,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static struct inet_frags ip6_frags; -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev); +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); static void ip6_frag_expire(struct timer_list *t) { @@ -111,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff, u32 *prob_offset) { - struct sk_buff *prev, *next; - struct net_device *dev; - int offset, end, fragsize; struct net *net = dev_net(skb_dst(skb)->dev); + int offset, end, fragsize; + struct sk_buff *prev_tail; + struct net_device *dev; + int err = -ENOENT; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) goto err; + err = -EINVAL; offset = ntohs(fhdr->frag_off) & ~0x7; end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); + /* note that if prob_offset is set, the skb is freed elsewhere, + * we do not free it here. + */ return -1; } @@ -170,62 +175,27 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (end == offset) goto discard_fq; + err = -ENOMEM; /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto discard_fq; - if (pskb_trim_rcsum(skb, end - offset)) + err = pskb_trim_rcsum(skb, end - offset); + if (err) goto discard_fq; - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || prev->ip_defrag_offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (next->ip_defrag_offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* RFC5722, Section 4, amended by Errata ID : 3089 - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments) MUST be silently discarded. - */ - - /* Check for overlap with preceding fragment. */ - if (prev && - (prev->ip_defrag_offset + prev->len) > offset) - goto discard_fq; - - /* Look for overlap with succeeding segment. */ - if (next && next->ip_defrag_offset < end) - goto discard_fq; - - /* Note : skb->ip_defrag_offset and skb->sk share the same location */ + /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; - if (dev) - fq->iif = dev->ifindex; /* Makes sure compiler wont do silly aliasing games */ barrier(); - skb->ip_defrag_offset = offset; - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; + prev_tail = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto insert_error; + + if (dev) + fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; @@ -246,44 +216,48 @@ found: if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - int res; unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = ip6_frag_reasm(fq, prev, dev); + err = ip6_frag_reasm(fq, skb, prev_tail, dev); skb->_skb_refdst = orefdst; - return res; + return err; } skb_dst_drop(skb); - return -1; + return -EINPROGRESS; +insert_error: + if (err == IPFRAG_DUP) { + kfree_skb(skb); + return -EINVAL; + } + err = -EINVAL; + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASM_OVERLAPS); discard_fq: inet_frag_kill(&fq->q); -err: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); +err: kfree_skb(skb); - return -1; + return err; } /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev) +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = container_of(fq->q.net, struct net, ipv6.frags); - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len, delta; unsigned int nhoff; - int sum_truesize; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); @@ -292,128 +266,47 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, if (unlikely(ecn == 0xff)) goto out_fail; - /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); - - if (!fp) - goto out_oom; - - fp->next = head->next; - if (!fp->next) - fq->q.fragments_tail = fp; - prev->next = fp; - - skb_morph(head, fq->q.fragments); - head->next = fq->q.fragments->next; - - consume_skb(fq->q.fragments); - fq->q.fragments = head; - } - - WARN_ON(head == NULL); - WARN_ON(head->ip_defrag_offset != 0); + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto out_oom; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - goto out_oom; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(fq->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (!clone) - goto out_oom; - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - add_frag_mem_limit(fq->q.net, clone->truesize); - } - /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ nhoff = fq->nhoffset; - skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - if (skb_mac_header_was_set(head)) - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); - - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); - - sum_truesize = head->truesize; - for (fp = head->next; fp;) { - bool headstolen; - int delta; - struct sk_buff *next = fp->next; - - sum_truesize += fp->truesize; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - - if (skb_try_coalesce(head, fp, &headstolen, &delta)) { - kfree_skb_partial(fp, headstolen); - } else { - fp->sk = NULL; - if (!skb_shinfo(head)->frag_list) - skb_shinfo(head)->frag_list = fp; - head->data_len += fp->len; - head->len += fp->len; - head->truesize += fp->truesize; - } - fp = next; - } - sub_frag_mem_limit(fq->q.net, sum_truesize); + skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + if (skb_mac_header_was_set(skb)) + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(skb); + + inet_frag_reasm_finish(&fq->q, skb, reasm_data); - skb_mark_not_on_list(head); - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->nhoff = nhoff; - IP6CB(head)->flags |= IP6SKB_FRAGMENTED; - IP6CB(head)->frag_max_size = fq->q.max_size; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->nhoff = nhoff; + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - skb_postpush_rcsum(head, skb_network_header(head), - skb_network_header_len(head)); + skb_postpush_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); - fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; return 1; out_oversize: @@ -464,10 +357,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - goto fail_hdr; - iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { @@ -485,6 +374,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (prob_offset) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); + /* icmpv6_param_prob() calls kfree_skb(skb) */ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); } return ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 964491cf3672..4ef4bbdb49d4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1274,18 +1274,29 @@ static DEFINE_SPINLOCK(rt6_exception_lock); static void rt6_remove_exception(struct rt6_exception_bucket *bucket, struct rt6_exception *rt6_ex) { + struct fib6_info *from; struct net *net; if (!bucket || !rt6_ex) return; net = dev_net(rt6_ex->rt6i->dst.dev); + net->ipv6.rt6_stats->fib_rt_cache--; + + /* purge completely the exception to allow releasing the held resources: + * some [sk] cache may keep the dst around for unlimited time + */ + from = rcu_dereference_protected(rt6_ex->rt6i->from, + lockdep_is_held(&rt6_exception_lock)); + rcu_assign_pointer(rt6_ex->rt6i->from, NULL); + fib6_info_release(from); + dst_dev_put(&rt6_ex->rt6i->dst); + hlist_del_rcu(&rt6_ex->hlist); dst_release(&rt6_ex->rt6i->dst); kfree_rcu(rt6_ex, rcu); WARN_ON_ONCE(!bucket->depth); bucket->depth--; - net->ipv6.rt6_stats->fib_rt_cache--; } /* Remove oldest rt6_ex in bucket and free the memory @@ -1599,15 +1610,15 @@ static int rt6_remove_exception_rt(struct rt6_info *rt) static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; - - if (!from || - !(rt->rt6i_flags & RTF_CACHE)) - return; + struct fib6_info *from; rcu_read_lock(); + from = rcu_dereference(rt->from); + if (!from || !(rt->rt6i_flags & RTF_CACHE)) + goto unlock; + bucket = rcu_dereference(from->rt6i_exception_bucket); #ifdef CONFIG_IPV6_SUBTREES @@ -1626,6 +1637,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) if (rt6_ex) rt6_ex->stamp = jiffies; +unlock: rcu_read_unlock(); } @@ -2277,14 +2289,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { - bool from_set; - - rcu_read_lock(); - from_set = !!rcu_dereference(rt->from); - rcu_read_unlock(); - return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || from_set); + (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -2742,20 +2748,24 @@ static int ip6_route_check_nh_onlink(struct net *net, u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; const struct in6_addr *gw_addr = &cfg->fc_gateway; u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; + struct fib6_info *from; struct rt6_info *grt; int err; err = 0; grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); if (grt) { + rcu_read_lock(); + from = rcu_dereference(grt->from); if (!grt->dst.error && /* ignore match if it is the default route */ - grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) && + from && !ipv6_addr_any(&from->fib6_dst.addr) && (grt->rt6i_flags & flags || dev != grt->dst.dev)) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway or device mismatch"); err = -EINVAL; } + rcu_read_unlock(); ip6_rt_put(grt); } @@ -4166,6 +4176,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; } + if (tb[RTA_VIA]) { + NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute"); + goto errout; + } if (tb[RTA_DST]) { int plen = (rtm->rtm_dst_len + 7) >> 3; @@ -4649,7 +4663,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, table = rt->fib6_table->tb6_id; else table = RT6_TABLE_UNSPEC; - rtm->rtm_table = table; + rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; @@ -4812,6 +4826,73 @@ int rt6_dump_route(struct fib6_info *rt, void *p_arg) arg->cb->nlh->nlmsg_seq, flags); } +static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for get route request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || + rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || + rtm->rtm_type) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid flags for get route request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_SRC: + case RTA_DST: + case RTA_IIF: + case RTA_OIF: + case RTA_MARK: + case RTA_UID: + case RTA_SPORT: + case RTA_DPORT: + case RTA_IP_PROTO: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4826,8 +4907,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct flowi6 fl6 = {}; bool fibmatch; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - extack); + err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; @@ -4873,7 +4953,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (tb[RTA_IP_PROTO]) { err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], - &fl6.flowi6_proto, extack); + &fl6.flowi6_proto, AF_INET6, + extack); if (err) goto errout; } diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 8d0ba757a46c..9b2f272ca164 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(msg, hdr); - genlmsg_reply(msg, info); - - return 0; + return genlmsg_reply(msg, info); nla_put_failure: rcu_read_unlock(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e8a1dabef803..09e440e8dfae 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1873,6 +1873,7 @@ static int __net_init sit_init_net(struct net *net) err_reg_dev: ipip6_dev_free(sitn->fb_tunnel_dev); + free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b81eb7cb815e..57ef69a10889 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -220,8 +220,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; - SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); - if (__ipv6_only_sock(sk)) return -ENETUNREACH; @@ -1864,7 +1862,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), tp->snd_cwnd, state == TCP_LISTEN ? fastopenq->max_qlen : diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2596ffdeebea..b444483cdb2b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -288,8 +288,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int peeked, peeking, off; int err; int is_udplite = IS_UDPLITE(sk); + struct udp_mib __percpu *mib; bool checksum_valid = false; - struct udp_mib *mib; int is_udp4; if (flags & MSG_ERRQUEUE) @@ -420,17 +420,19 @@ EXPORT_SYMBOL(udpv6_encap_enable); */ static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, u32 info) + u8 type, u8 code, int offset, __be32 info) { int i; for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, u32 info); + u8 type, u8 code, int offset, __be32 info); + const struct ip6_tnl_encap_ops *encap; - if (!ip6tun_encaps[i]) + encap = rcu_dereference(ip6tun_encaps[i]); + if (!encap) continue; - handler = rcu_dereference(ip6tun_encaps[i]->err_handler); + handler = encap->err_handler; if (handler && !handler(skb, opt, type, code, offset, info)) return 0; } diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index f5b4febeaa25..bc65db782bfb 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -344,8 +344,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); xfrm_flush_gc(); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); |