diff options
Diffstat (limited to 'net/ipv6/route.c')
| -rw-r--r-- | net/ipv6/route.c | 1928 | 
1 files changed, 1037 insertions, 891 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f4d61736c41a..2ce0bd17de4f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -63,14 +63,20 @@  #include <net/lwtunnel.h>  #include <net/ip_tunnels.h>  #include <net/l3mdev.h> -#include <trace/events/fib6.h> - +#include <net/ip.h>  #include <linux/uaccess.h>  #ifdef CONFIG_SYSCTL  #include <linux/sysctl.h>  #endif +static int ip6_rt_type_to_error(u8 fib6_type); + +#define CREATE_TRACE_POINTS +#include <trace/events/fib6.h> +EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); +#undef CREATE_TRACE_POINTS +  enum rt6_nud_state {  	RT6_NUD_FAIL_HARD = -3,  	RT6_NUD_FAIL_PROBE = -2, @@ -78,7 +84,6 @@ enum rt6_nud_state {  	RT6_NUD_SUCCEED = 1  }; -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);  static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);  static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);  static unsigned int	 ip6_mtu(const struct dst_entry *dst); @@ -97,25 +102,24 @@ static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,  					   struct sk_buff *skb, u32 mtu);  static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,  					struct sk_buff *skb); -static void		rt6_dst_from_metrics_check(struct rt6_info *rt); -static int rt6_score_route(struct rt6_info *rt, int oif, int strict); -static size_t rt6_nlmsg_size(struct rt6_info *rt); -static int rt6_fill_node(struct net *net, -			 struct sk_buff *skb, struct rt6_info *rt, -			 struct in6_addr *dst, struct in6_addr *src, +static int rt6_score_route(struct fib6_info *rt, int oif, int strict); +static size_t rt6_nlmsg_size(struct fib6_info *rt); +static int rt6_fill_node(struct net *net, struct sk_buff *skb, +			 struct fib6_info *rt, struct dst_entry *dst, +			 struct in6_addr *dest, struct in6_addr *src,  			 int iif, int type, u32 portid, u32 seq,  			 unsigned int flags); -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,  					   struct in6_addr *daddr,  					   struct in6_addr *saddr);  #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net,  					   const struct in6_addr *prefix, int prefixlen,  					   const struct in6_addr *gwaddr,  					   struct net_device *dev,  					   unsigned int pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net,  					   const struct in6_addr *prefix, int prefixlen,  					   const struct in6_addr *gwaddr,  					   struct net_device *dev); @@ -184,29 +188,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)  	}  } -static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) -{ -	return dst_metrics_write_ptr(&rt->from->dst); -} - -static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) -{ -	struct rt6_info *rt = (struct rt6_info *)dst; - -	if (rt->rt6i_flags & RTF_PCPU) -		return rt6_pcpu_cow_metrics(rt); -	else if (rt->rt6i_flags & RTF_CACHE) -		return NULL; -	else -		return dst_cow_metrics_generic(dst, old); -} - -static inline const void *choose_neigh_daddr(struct rt6_info *rt, +static inline const void *choose_neigh_daddr(const struct in6_addr *p,  					     struct sk_buff *skb,  					     const void *daddr)  { -	struct in6_addr *p = &rt->rt6i_gateway; -  	if (!ipv6_addr_any(p))  		return (const void *) p;  	else if (skb) @@ -214,18 +199,27 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt,  	return daddr;  } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, -					  struct sk_buff *skb, -					  const void *daddr) +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, +				   struct net_device *dev, +				   struct sk_buff *skb, +				   const void *daddr)  { -	struct rt6_info *rt = (struct rt6_info *) dst;  	struct neighbour *n; -	daddr = choose_neigh_daddr(rt, skb, daddr); -	n = __ipv6_neigh_lookup(dst->dev, daddr); +	daddr = choose_neigh_daddr(gw, skb, daddr); +	n = __ipv6_neigh_lookup(dev, daddr);  	if (n)  		return n; -	return neigh_create(&nd_tbl, daddr, dst->dev); +	return neigh_create(&nd_tbl, daddr, dev); +} + +static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, +					      struct sk_buff *skb, +					      const void *daddr) +{ +	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); + +	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);  }  static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) @@ -233,7 +227,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)  	struct net_device *dev = dst->dev;  	struct rt6_info *rt = (struct rt6_info *)dst; -	daddr = choose_neigh_daddr(rt, NULL, daddr); +	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);  	if (!daddr)  		return;  	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -250,7 +244,7 @@ static struct dst_ops ip6_dst_ops_template = {  	.check			=	ip6_dst_check,  	.default_advmss		=	ip6_default_advmss,  	.mtu			=	ip6_mtu, -	.cow_metrics		=	ipv6_cow_metrics, +	.cow_metrics		=	dst_cow_metrics_generic,  	.destroy		=	ip6_dst_destroy,  	.ifdown			=	ip6_dst_ifdown,  	.negative_advice	=	ip6_negative_advice, @@ -258,7 +252,7 @@ static struct dst_ops ip6_dst_ops_template = {  	.update_pmtu		=	ip6_rt_update_pmtu,  	.redirect		=	rt6_do_redirect,  	.local_out		=	__ip6_local_out, -	.neigh_lookup		=	ip6_neigh_lookup, +	.neigh_lookup		=	ip6_dst_neigh_lookup,  	.confirm_neigh		=	ip6_confirm_neigh,  }; @@ -288,13 +282,22 @@ static struct dst_ops ip6_dst_blackhole_ops = {  	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,  	.redirect		=	ip6_rt_blackhole_redirect,  	.cow_metrics		=	dst_cow_metrics_generic, -	.neigh_lookup		=	ip6_neigh_lookup, +	.neigh_lookup		=	ip6_dst_neigh_lookup,  };  static const u32 ip6_template_metrics[RTAX_MAX] = {  	[RTAX_HOPLIMIT - 1] = 0,  }; +static const struct fib6_info fib6_null_entry_template = { +	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP), +	.fib6_protocol  = RTPROT_KERNEL, +	.fib6_metric	= ~(u32)0, +	.fib6_ref	= ATOMIC_INIT(1), +	.fib6_type	= RTN_UNREACHABLE, +	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics, +}; +  static const struct rt6_info ip6_null_entry_template = {  	.dst = {  		.__refcnt	= ATOMIC_INIT(1), @@ -305,9 +308,6 @@ static const struct rt6_info ip6_null_entry_template = {  		.output		= ip6_pkt_discard_out,  	},  	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP), -	.rt6i_protocol  = RTPROT_KERNEL, -	.rt6i_metric	= ~(u32) 0, -	.rt6i_ref	= ATOMIC_INIT(1),  };  #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -322,9 +322,6 @@ static const struct rt6_info ip6_prohibit_entry_template = {  		.output		= ip6_pkt_prohibit_out,  	},  	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP), -	.rt6i_protocol  = RTPROT_KERNEL, -	.rt6i_metric	= ~(u32) 0, -	.rt6i_ref	= ATOMIC_INIT(1),  };  static const struct rt6_info ip6_blk_hole_entry_template = { @@ -337,9 +334,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = {  		.output		= dst_discard_out,  	},  	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP), -	.rt6i_protocol  = RTPROT_KERNEL, -	.rt6i_metric	= ~(u32) 0, -	.rt6i_ref	= ATOMIC_INIT(1),  };  #endif @@ -349,14 +343,12 @@ static void rt6_info_init(struct rt6_info *rt)  	struct dst_entry *dst = &rt->dst;  	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); -	INIT_LIST_HEAD(&rt->rt6i_siblings);  	INIT_LIST_HEAD(&rt->rt6i_uncached);  }  /* allocate dst with ip6_dst_ops */ -static struct rt6_info *__ip6_dst_alloc(struct net *net, -					struct net_device *dev, -					int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, +			       int flags)  {  	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,  					1, DST_OBSOLETE_FORCE_CHK, flags); @@ -368,34 +360,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,  	return rt;  } - -struct rt6_info *ip6_dst_alloc(struct net *net, -			       struct net_device *dev, -			       int flags) -{ -	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); - -	if (rt) { -		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); -		if (!rt->rt6i_pcpu) { -			dst_release_immediate(&rt->dst); -			return NULL; -		} -	} - -	return rt; -}  EXPORT_SYMBOL(ip6_dst_alloc);  static void ip6_dst_destroy(struct dst_entry *dst)  {  	struct rt6_info *rt = (struct rt6_info *)dst; -	struct rt6_exception_bucket *bucket; -	struct rt6_info *from = rt->from; +	struct fib6_info *from;  	struct inet6_dev *idev;  	dst_destroy_metrics_generic(dst); -	free_percpu(rt->rt6i_pcpu);  	rt6_uncached_list_del(rt);  	idev = rt->rt6i_idev; @@ -403,14 +376,12 @@ static void ip6_dst_destroy(struct dst_entry *dst)  		rt->rt6i_idev = NULL;  		in6_dev_put(idev);  	} -	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); -	if (bucket) { -		rt->rt6i_exception_bucket = NULL; -		kfree(bucket); -	} -	rt->from = NULL; -	dst_release(&from->dst); +	rcu_read_lock(); +	from = rcu_dereference(rt->from); +	rcu_assign_pointer(rt->from, NULL); +	fib6_info_release(from); +	rcu_read_unlock();  }  static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -440,23 +411,27 @@ static bool __rt6_check_expired(const struct rt6_info *rt)  static bool rt6_check_expired(const struct rt6_info *rt)  { +	struct fib6_info *from; + +	from = rcu_dereference(rt->from); +  	if (rt->rt6i_flags & RTF_EXPIRES) {  		if (time_after(jiffies, rt->dst.expires))  			return true; -	} else if (rt->from) { +	} else if (from) {  		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || -			rt6_check_expired(rt->from); +			fib6_check_expired(from);  	}  	return false;  } -static struct rt6_info *rt6_multipath_select(const struct net *net, -					     struct rt6_info *match, -					     struct flowi6 *fl6, int oif, -					     const struct sk_buff *skb, -					     int strict) +struct fib6_info *fib6_multipath_select(const struct net *net, +					struct fib6_info *match, +					struct flowi6 *fl6, int oif, +					const struct sk_buff *skb, +					int strict)  { -	struct rt6_info *sibling, *next_sibling; +	struct fib6_info *sibling, *next_sibling;  	/* We might have already computed the hash for ICMPv6 errors. In such  	 * case it will always be non-zero. Otherwise now is the time to do it. @@ -464,12 +439,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,  	if (!fl6->mp_hash)  		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); -	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) +	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))  		return match; -	list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, -				 rt6i_siblings) { -		if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) +	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, +				 fib6_siblings) { +		int nh_upper_bound; + +		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); +		if (fl6->mp_hash > nh_upper_bound)  			continue;  		if (rt6_score_route(sibling, oif, strict) < 0)  			break; @@ -484,38 +462,27 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,   *	Route lookup. rcu_read_lock() should be held.   */ -static inline struct rt6_info *rt6_device_match(struct net *net, -						    struct rt6_info *rt, +static inline struct fib6_info *rt6_device_match(struct net *net, +						 struct fib6_info *rt,  						    const struct in6_addr *saddr,  						    int oif,  						    int flags)  { -	struct rt6_info *local = NULL; -	struct rt6_info *sprt; +	struct fib6_info *sprt; -	if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) +	if (!oif && ipv6_addr_any(saddr) && +	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))  		return rt; -	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { -		struct net_device *dev = sprt->dst.dev; +	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { +		const struct net_device *dev = sprt->fib6_nh.nh_dev; -		if (sprt->rt6i_nh_flags & RTNH_F_DEAD) +		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)  			continue;  		if (oif) {  			if (dev->ifindex == oif)  				return sprt; -			if (dev->flags & IFF_LOOPBACK) { -				if (!sprt->rt6i_idev || -				    sprt->rt6i_idev->dev->ifindex != oif) { -					if (flags & RT6_LOOKUP_F_IFACE) -						continue; -					if (local && -					    local->rt6i_idev->dev->ifindex == oif) -						continue; -				} -				local = sprt; -			}  		} else {  			if (ipv6_chk_addr(net, saddr, dev,  					  flags & RT6_LOOKUP_F_IFACE)) @@ -523,15 +490,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,  		}  	} -	if (oif) { -		if (local) -			return local; +	if (oif && flags & RT6_LOOKUP_F_IFACE) +		return net->ipv6.fib6_null_entry; -		if (flags & RT6_LOOKUP_F_IFACE) -			return net->ipv6.ip6_null_entry; -	} - -	return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; +	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;  }  #ifdef CONFIG_IPV6_ROUTER_PREF @@ -553,10 +515,13 @@ static void rt6_probe_deferred(struct work_struct *w)  	kfree(work);  } -static void rt6_probe(struct rt6_info *rt) +static void rt6_probe(struct fib6_info *rt)  {  	struct __rt6_probe_work *work; +	const struct in6_addr *nh_gw;  	struct neighbour *neigh; +	struct net_device *dev; +  	/*  	 * Okay, this does not seem to be appropriate  	 * for now, however, we need to check if it @@ -565,20 +530,25 @@ static void rt6_probe(struct rt6_info *rt)  	 * Router Reachability Probe MUST be rate-limited  	 * to no more than one per minute.  	 */ -	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) +	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))  		return; + +	nh_gw = &rt->fib6_nh.nh_gw; +	dev = rt->fib6_nh.nh_dev;  	rcu_read_lock_bh(); -	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); +	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);  	if (neigh) { +		struct inet6_dev *idev; +  		if (neigh->nud_state & NUD_VALID)  			goto out; +		idev = __in6_dev_get(dev);  		work = NULL;  		write_lock(&neigh->lock);  		if (!(neigh->nud_state & NUD_VALID) &&  		    time_after(jiffies, -			       neigh->updated + -			       rt->rt6i_idev->cnf.rtr_probe_interval)) { +			       neigh->updated + idev->cnf.rtr_probe_interval)) {  			work = kmalloc(sizeof(*work), GFP_ATOMIC);  			if (work)  				__neigh_set_probe_once(neigh); @@ -590,9 +560,9 @@ static void rt6_probe(struct rt6_info *rt)  	if (work) {  		INIT_WORK(&work->work, rt6_probe_deferred); -		work->target = rt->rt6i_gateway; -		dev_hold(rt->dst.dev); -		work->dev = rt->dst.dev; +		work->target = *nh_gw; +		dev_hold(dev); +		work->dev = dev;  		schedule_work(&work->work);  	} @@ -600,7 +570,7 @@ out:  	rcu_read_unlock_bh();  }  #else -static inline void rt6_probe(struct rt6_info *rt) +static inline void rt6_probe(struct fib6_info *rt)  {  }  #endif @@ -608,28 +578,27 @@ static inline void rt6_probe(struct rt6_info *rt)  /*   * Default Router Selection (RFC 2461 6.3.6)   */ -static inline int rt6_check_dev(struct rt6_info *rt, int oif) +static inline int rt6_check_dev(struct fib6_info *rt, int oif)  { -	struct net_device *dev = rt->dst.dev; +	const struct net_device *dev = rt->fib6_nh.nh_dev; +  	if (!oif || dev->ifindex == oif)  		return 2; -	if ((dev->flags & IFF_LOOPBACK) && -	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) -		return 1;  	return 0;  } -static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)  { -	struct neighbour *neigh;  	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; +	struct neighbour *neigh; -	if (rt->rt6i_flags & RTF_NONEXTHOP || -	    !(rt->rt6i_flags & RTF_GATEWAY)) +	if (rt->fib6_flags & RTF_NONEXTHOP || +	    !(rt->fib6_flags & RTF_GATEWAY))  		return RT6_NUD_SUCCEED;  	rcu_read_lock_bh(); -	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); +	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, +					  &rt->fib6_nh.nh_gw);  	if (neigh) {  		read_lock(&neigh->lock);  		if (neigh->nud_state & NUD_VALID) @@ -650,8 +619,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)  	return ret;  } -static int rt6_score_route(struct rt6_info *rt, int oif, -			   int strict) +static int rt6_score_route(struct fib6_info *rt, int oif, int strict)  {  	int m; @@ -659,7 +627,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif,  	if (!m && (strict & RT6_LOOKUP_F_IFACE))  		return RT6_NUD_FAIL_HARD;  #ifdef CONFIG_IPV6_ROUTER_PREF -	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; +	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;  #endif  	if (strict & RT6_LOOKUP_F_REACHABLE) {  		int n = rt6_check_neigh(rt); @@ -669,23 +637,37 @@ static int rt6_score_route(struct rt6_info *rt, int oif,  	return m;  } -static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, -				   int *mpri, struct rt6_info *match, +/* called with rc_read_lock held */ +static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i) +{ +	const struct net_device *dev = fib6_info_nh_dev(f6i); +	bool rc = false; + +	if (dev) { +		const struct inet6_dev *idev = __in6_dev_get(dev); + +		rc = !!idev->cnf.ignore_routes_with_linkdown; +	} + +	return rc; +} + +static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, +				   int *mpri, struct fib6_info *match,  				   bool *do_rr)  {  	int m;  	bool match_do_rr = false; -	struct inet6_dev *idev = rt->rt6i_idev; -	if (rt->rt6i_nh_flags & RTNH_F_DEAD) +	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)  		goto out; -	if (idev->cnf.ignore_routes_with_linkdown && -	    rt->rt6i_nh_flags & RTNH_F_LINKDOWN && +	if (fib6_ignore_linkdown(rt) && +	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&  	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))  		goto out; -	if (rt6_check_expired(rt)) +	if (fib6_check_expired(rt))  		goto out;  	m = rt6_score_route(rt, oif, strict); @@ -709,19 +691,19 @@ out:  	return match;  } -static struct rt6_info *find_rr_leaf(struct fib6_node *fn, -				     struct rt6_info *leaf, -				     struct rt6_info *rr_head, +static struct fib6_info *find_rr_leaf(struct fib6_node *fn, +				     struct fib6_info *leaf, +				     struct fib6_info *rr_head,  				     u32 metric, int oif, int strict,  				     bool *do_rr)  { -	struct rt6_info *rt, *match, *cont; +	struct fib6_info *rt, *match, *cont;  	int mpri = -1;  	match = NULL;  	cont = NULL; -	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) { -		if (rt->rt6i_metric != metric) { +	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) { +		if (rt->fib6_metric != metric) {  			cont = rt;  			break;  		} @@ -730,8 +712,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,  	}  	for (rt = leaf; rt && rt != rr_head; -	     rt = rcu_dereference(rt->rt6_next)) { -		if (rt->rt6i_metric != metric) { +	     rt = rcu_dereference(rt->fib6_next)) { +		if (rt->fib6_metric != metric) {  			cont = rt;  			break;  		} @@ -742,22 +724,22 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,  	if (match || !cont)  		return match; -	for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next)) +	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))  		match = find_match(rt, oif, strict, &mpri, match, do_rr);  	return match;  } -static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, +static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,  				   int oif, int strict)  { -	struct rt6_info *leaf = rcu_dereference(fn->leaf); -	struct rt6_info *match, *rt0; +	struct fib6_info *leaf = rcu_dereference(fn->leaf); +	struct fib6_info *match, *rt0;  	bool do_rr = false;  	int key_plen; -	if (!leaf || leaf == net->ipv6.ip6_null_entry) -		return net->ipv6.ip6_null_entry; +	if (!leaf || leaf == net->ipv6.fib6_null_entry) +		return net->ipv6.fib6_null_entry;  	rt0 = rcu_dereference(fn->rr_ptr);  	if (!rt0) @@ -768,39 +750,39 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,  	 * (This might happen if all routes under fn are deleted from  	 * the tree and fib6_repair_tree() is called on the node.)  	 */ -	key_plen = rt0->rt6i_dst.plen; +	key_plen = rt0->fib6_dst.plen;  #ifdef CONFIG_IPV6_SUBTREES -	if (rt0->rt6i_src.plen) -		key_plen = rt0->rt6i_src.plen; +	if (rt0->fib6_src.plen) +		key_plen = rt0->fib6_src.plen;  #endif  	if (fn->fn_bit != key_plen) -		return net->ipv6.ip6_null_entry; +		return net->ipv6.fib6_null_entry; -	match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, +	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,  			     &do_rr);  	if (do_rr) { -		struct rt6_info *next = rcu_dereference(rt0->rt6_next); +		struct fib6_info *next = rcu_dereference(rt0->fib6_next);  		/* no entries matched; do round-robin */ -		if (!next || next->rt6i_metric != rt0->rt6i_metric) +		if (!next || next->fib6_metric != rt0->fib6_metric)  			next = leaf;  		if (next != rt0) { -			spin_lock_bh(&leaf->rt6i_table->tb6_lock); +			spin_lock_bh(&leaf->fib6_table->tb6_lock);  			/* make sure next is not being deleted from the tree */ -			if (next->rt6i_node) +			if (next->fib6_node)  				rcu_assign_pointer(fn->rr_ptr, next); -			spin_unlock_bh(&leaf->rt6i_table->tb6_lock); +			spin_unlock_bh(&leaf->fib6_table->tb6_lock);  		}  	} -	return match ? match : net->ipv6.ip6_null_entry; +	return match ? match : net->ipv6.fib6_null_entry;  } -static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) +static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)  { -	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); +	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));  }  #ifdef CONFIG_IPV6_ROUTE_INFO @@ -812,7 +794,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,  	struct in6_addr prefix_buf, *prefix;  	unsigned int pref;  	unsigned long lifetime; -	struct rt6_info *rt; +	struct fib6_info *rt;  	if (len < sizeof(struct route_info)) {  		return -EINVAL; @@ -850,13 +832,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,  	}  	if (rinfo->prefix_len == 0) -		rt = rt6_get_dflt_router(gwaddr, dev); +		rt = rt6_get_dflt_router(net, gwaddr, dev);  	else  		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,  					gwaddr, dev);  	if (rt && !lifetime) { -		ip6_del_rt(rt); +		ip6_del_rt(net, rt);  		rt = NULL;  	} @@ -864,21 +846,162 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,  		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,  					dev, pref);  	else if (rt) -		rt->rt6i_flags = RTF_ROUTEINFO | -				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); +		rt->fib6_flags = RTF_ROUTEINFO | +				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);  	if (rt) {  		if (!addrconf_finite_timeout(lifetime)) -			rt6_clean_expires(rt); +			fib6_clean_expires(rt);  		else -			rt6_set_expires(rt, jiffies + HZ * lifetime); +			fib6_set_expires(rt, jiffies + HZ * lifetime); -		ip6_rt_put(rt); +		fib6_info_release(rt);  	}  	return 0;  }  #endif +/* + *	Misc support functions + */ + +/* called with rcu_lock held */ +static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) +{ +	struct net_device *dev = rt->fib6_nh.nh_dev; + +	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { +		/* for copies of local routes, dst->dev needs to be the +		 * device if it is a master device, the master device if +		 * device is enslaved, and the loopback as the default +		 */ +		if (netif_is_l3_slave(dev) && +		    !rt6_need_strict(&rt->fib6_dst.addr)) +			dev = l3mdev_master_dev_rcu(dev); +		else if (!netif_is_l3_master(dev)) +			dev = dev_net(dev)->loopback_dev; +		/* last case is netif_is_l3_master(dev) is true in which +		 * case we want dev returned to be dev +		 */ +	} + +	return dev; +} + +static const int fib6_prop[RTN_MAX + 1] = { +	[RTN_UNSPEC]	= 0, +	[RTN_UNICAST]	= 0, +	[RTN_LOCAL]	= 0, +	[RTN_BROADCAST]	= 0, +	[RTN_ANYCAST]	= 0, +	[RTN_MULTICAST]	= 0, +	[RTN_BLACKHOLE]	= -EINVAL, +	[RTN_UNREACHABLE] = -EHOSTUNREACH, +	[RTN_PROHIBIT]	= -EACCES, +	[RTN_THROW]	= -EAGAIN, +	[RTN_NAT]	= -EINVAL, +	[RTN_XRESOLVE]	= -EINVAL, +}; + +static int ip6_rt_type_to_error(u8 fib6_type) +{ +	return fib6_prop[fib6_type]; +} + +static unsigned short fib6_info_dst_flags(struct fib6_info *rt) +{ +	unsigned short flags = 0; + +	if (rt->dst_nocount) +		flags |= DST_NOCOUNT; +	if (rt->dst_nopolicy) +		flags |= DST_NOPOLICY; +	if (rt->dst_host) +		flags |= DST_HOST; + +	return flags; +} + +static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) +{ +	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); + +	switch (ort->fib6_type) { +	case RTN_BLACKHOLE: +		rt->dst.output = dst_discard_out; +		rt->dst.input = dst_discard; +		break; +	case RTN_PROHIBIT: +		rt->dst.output = ip6_pkt_prohibit_out; +		rt->dst.input = ip6_pkt_prohibit; +		break; +	case RTN_THROW: +	case RTN_UNREACHABLE: +	default: +		rt->dst.output = ip6_pkt_discard_out; +		rt->dst.input = ip6_pkt_discard; +		break; +	} +} + +static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) +{ +	rt->dst.flags |= fib6_info_dst_flags(ort); + +	if (ort->fib6_flags & RTF_REJECT) { +		ip6_rt_init_dst_reject(rt, ort); +		return; +	} + +	rt->dst.error = 0; +	rt->dst.output = ip6_output; + +	if (ort->fib6_type == RTN_LOCAL) { +		rt->dst.input = ip6_input; +	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { +		rt->dst.input = ip6_mc_input; +	} else { +		rt->dst.input = ip6_forward; +	} + +	if (ort->fib6_nh.nh_lwtstate) { +		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); +		lwtunnel_set_redirect(&rt->dst); +	} + +	rt->dst.lastuse = jiffies; +} + +static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) +{ +	rt->rt6i_flags &= ~RTF_EXPIRES; +	fib6_info_hold(from); +	rcu_assign_pointer(rt->from, from); +	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); +	if (from->fib6_metrics != &dst_default_metrics) { +		rt->dst._metrics |= DST_METRICS_REFCOUNTED; +		refcount_inc(&from->fib6_metrics->refcnt); +	} +} + +static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) +{ +	struct net_device *dev = fib6_info_nh_dev(ort); + +	ip6_rt_init_dst(rt, ort); + +	rt->rt6i_dst = ort->fib6_dst; +	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; +	rt->rt6i_gateway = ort->fib6_nh.nh_gw; +	rt->rt6i_flags = ort->fib6_flags; +	rt6_set_from(rt, ort); +#ifdef CONFIG_IPV6_SUBTREES +	rt->rt6i_src = ort->fib6_src; +#endif +	rt->rt6i_prefsrc = ort->fib6_prefsrc; +	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); +} +  static struct fib6_node* fib6_backtrack(struct fib6_node *fn,  					struct in6_addr *saddr)  { @@ -889,7 +1012,7 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,  		pn = rcu_dereference(fn->parent);  		sn = FIB6_SUBTREE(pn);  		if (sn && sn != fn) -			fn = fib6_lookup(sn, NULL, saddr); +			fn = fib6_node_lookup(sn, NULL, saddr);  		else  			fn = pn;  		if (fn->fn_flags & RTN_RTINFO) @@ -914,50 +1037,74 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,  	return false;  } +/* called with rcu_lock held */ +static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) +{ +	unsigned short flags = fib6_info_dst_flags(rt); +	struct net_device *dev = rt->fib6_nh.nh_dev; +	struct rt6_info *nrt; + +	nrt = ip6_dst_alloc(dev_net(dev), dev, flags); +	if (nrt) +		ip6_rt_copy_init(nrt, rt); + +	return nrt; +} +  static struct rt6_info *ip6_pol_route_lookup(struct net *net,  					     struct fib6_table *table,  					     struct flowi6 *fl6,  					     const struct sk_buff *skb,  					     int flags)  { -	struct rt6_info *rt, *rt_cache; +	struct fib6_info *f6i;  	struct fib6_node *fn; +	struct rt6_info *rt;  	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)  		flags &= ~RT6_LOOKUP_F_IFACE;  	rcu_read_lock(); -	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); +	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);  restart: -	rt = rcu_dereference(fn->leaf); -	if (!rt) { -		rt = net->ipv6.ip6_null_entry; +	f6i = rcu_dereference(fn->leaf); +	if (!f6i) { +		f6i = net->ipv6.fib6_null_entry;  	} else { -		rt = rt6_device_match(net, rt, &fl6->saddr, +		f6i = rt6_device_match(net, f6i, &fl6->saddr,  				      fl6->flowi6_oif, flags); -		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) -			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, -						  skb, flags); +		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0) +			f6i = fib6_multipath_select(net, f6i, fl6, +						    fl6->flowi6_oif, skb, +						    flags);  	} -	if (rt == net->ipv6.ip6_null_entry) { +	if (f6i == net->ipv6.fib6_null_entry) {  		fn = fib6_backtrack(fn, &fl6->saddr);  		if (fn)  			goto restart;  	} -	/* Search through exception table */ -	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); -	if (rt_cache) -		rt = rt_cache; -	if (ip6_hold_safe(net, &rt, true)) -		dst_use_noref(&rt->dst, jiffies); +	trace_fib6_table_lookup(net, f6i, table, fl6); -	rcu_read_unlock(); +	/* Search through exception table */ +	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); +	if (rt) { +		if (ip6_hold_safe(net, &rt, true)) +			dst_use_noref(&rt->dst, jiffies); +	} else if (f6i == net->ipv6.fib6_null_entry) { +		rt = net->ipv6.ip6_null_entry; +		dst_hold(&rt->dst); +	} else { +		rt = ip6_create_rt_rcu(f6i); +		if (!rt) { +			rt = net->ipv6.ip6_null_entry; +			dst_hold(&rt->dst); +		} +	} -	trace_fib6_table_lookup(net, rt, table, fl6); +	rcu_read_unlock();  	return rt; -  }  struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, @@ -999,55 +1146,28 @@ EXPORT_SYMBOL(rt6_lookup);   * Caller must hold dst before calling it.   */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, -			struct mx6_config *mxc, +static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,  			struct netlink_ext_ack *extack)  {  	int err;  	struct fib6_table *table; -	table = rt->rt6i_table; +	table = rt->fib6_table;  	spin_lock_bh(&table->tb6_lock); -	err = fib6_add(&table->tb6_root, rt, info, mxc, extack); +	err = fib6_add(&table->tb6_root, rt, info, extack);  	spin_unlock_bh(&table->tb6_lock);  	return err;  } -int ip6_ins_rt(struct rt6_info *rt) +int ip6_ins_rt(struct net *net, struct fib6_info *rt)  { -	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), }; -	struct mx6_config mxc = { .mx = NULL, }; +	struct nl_info info = {	.nl_net = net, }; -	/* Hold dst to account for the reference from the fib6 tree */ -	dst_hold(&rt->dst); -	return __ip6_ins_rt(rt, &info, &mxc, NULL); -} - -/* called with rcu_lock held */ -static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) -{ -	struct net_device *dev = rt->dst.dev; - -	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { -		/* for copies of local routes, dst->dev needs to be the -		 * device if it is a master device, the master device if -		 * device is enslaved, and the loopback as the default -		 */ -		if (netif_is_l3_slave(dev) && -		    !rt6_need_strict(&rt->rt6i_dst.addr)) -			dev = l3mdev_master_dev_rcu(dev); -		else if (!netif_is_l3_master(dev)) -			dev = dev_net(dev)->loopback_dev; -		/* last case is netif_is_l3_master(dev) is true in which -		 * case we want dev returned to be dev -		 */ -	} - -	return dev; +	return __ip6_ins_rt(rt, &info, NULL);  } -static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, +static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,  					   const struct in6_addr *daddr,  					   const struct in6_addr *saddr)  { @@ -1058,26 +1178,20 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,  	 *	Clone the route.  	 */ -	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) -		ort = ort->from; - -	rcu_read_lock();  	dev = ip6_rt_get_dev_rcu(ort); -	rt = __ip6_dst_alloc(dev_net(dev), dev, 0); -	rcu_read_unlock(); +	rt = ip6_dst_alloc(dev_net(dev), dev, 0);  	if (!rt)  		return NULL;  	ip6_rt_copy_init(rt, ort);  	rt->rt6i_flags |= RTF_CACHE; -	rt->rt6i_metric = 0;  	rt->dst.flags |= DST_HOST;  	rt->rt6i_dst.addr = *daddr;  	rt->rt6i_dst.plen = 128;  	if (!rt6_is_gw_or_nonexthop(ort)) { -		if (ort->rt6i_dst.plen != 128 && -		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) +		if (ort->fib6_dst.plen != 128 && +		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))  			rt->rt6i_flags |= RTF_ANYCAST;  #ifdef CONFIG_IPV6_SUBTREES  		if (rt->rt6i_src.plen && saddr) { @@ -1090,45 +1204,44 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,  	return rt;  } -static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) +static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)  { +	unsigned short flags = fib6_info_dst_flags(rt);  	struct net_device *dev;  	struct rt6_info *pcpu_rt;  	rcu_read_lock();  	dev = ip6_rt_get_dev_rcu(rt); -	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); +	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);  	rcu_read_unlock();  	if (!pcpu_rt)  		return NULL;  	ip6_rt_copy_init(pcpu_rt, rt); -	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;  	pcpu_rt->rt6i_flags |= RTF_PCPU;  	return pcpu_rt;  }  /* It should be called with rcu_read_lock() acquired */ -static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)  {  	struct rt6_info *pcpu_rt, **p;  	p = this_cpu_ptr(rt->rt6i_pcpu);  	pcpu_rt = *p; -	if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) -		rt6_dst_from_metrics_check(pcpu_rt); +	if (pcpu_rt) +		ip6_hold_safe(NULL, &pcpu_rt, false);  	return pcpu_rt;  } -static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_make_pcpu_route(struct net *net, +					    struct fib6_info *rt)  {  	struct rt6_info *pcpu_rt, *prev, **p;  	pcpu_rt = ip6_rt_pcpu_alloc(rt);  	if (!pcpu_rt) { -		struct net *net = dev_net(rt->dst.dev); -  		dst_hold(&net->ipv6.ip6_null_entry->dst);  		return net->ipv6.ip6_null_entry;  	} @@ -1138,7 +1251,6 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)  	prev = cmpxchg(p, NULL, pcpu_rt);  	BUG_ON(prev); -	rt6_dst_from_metrics_check(pcpu_rt);  	return pcpu_rt;  } @@ -1158,9 +1270,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,  		return;  	net = dev_net(rt6_ex->rt6i->dst.dev); -	rt6_ex->rt6i->rt6i_node = NULL;  	hlist_del_rcu(&rt6_ex->hlist); -	rt6_release(rt6_ex->rt6i); +	dst_release(&rt6_ex->rt6i->dst);  	kfree_rcu(rt6_ex, rcu);  	WARN_ON_ONCE(!bucket->depth);  	bucket->depth--; @@ -1268,20 +1379,36 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,  	return NULL;  } +static unsigned int fib6_mtu(const struct fib6_info *rt) +{ +	unsigned int mtu; + +	if (rt->fib6_pmtu) { +		mtu = rt->fib6_pmtu; +	} else { +		struct net_device *dev = fib6_info_nh_dev(rt); +		struct inet6_dev *idev; + +		rcu_read_lock(); +		idev = __in6_dev_get(dev); +		mtu = idev->cnf.mtu6; +		rcu_read_unlock(); +	} + +	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); + +	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); +} +  static int rt6_insert_exception(struct rt6_info *nrt, -				struct rt6_info *ort) +				struct fib6_info *ort)  { -	struct net *net = dev_net(ort->dst.dev); +	struct net *net = dev_net(nrt->dst.dev);  	struct rt6_exception_bucket *bucket;  	struct in6_addr *src_key = NULL;  	struct rt6_exception *rt6_ex;  	int err = 0; -	/* ort can't be a cache or pcpu route */ -	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) -		ort = ort->from; -	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); -  	spin_lock_bh(&rt6_exception_lock);  	if (ort->exception_bucket_flushed) { @@ -1308,19 +1435,19 @@ static int rt6_insert_exception(struct rt6_info *nrt,  	 * Otherwise, the exception table is indexed by  	 * a hash of only rt6i_dst.  	 */ -	if (ort->rt6i_src.plen) +	if (ort->fib6_src.plen)  		src_key = &nrt->rt6i_src.addr;  #endif  	/* Update rt6i_prefsrc as it could be changed  	 * in rt6_remove_prefsrc()  	 */ -	nrt->rt6i_prefsrc = ort->rt6i_prefsrc; +	nrt->rt6i_prefsrc = ort->fib6_prefsrc;  	/* rt6_mtu_change() might lower mtu on ort.  	 * Only insert this exception route if its mtu  	 * is less than ort's mtu value.  	 */ -	if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { +	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {  		err = -EINVAL;  		goto out;  	} @@ -1337,8 +1464,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,  	}  	rt6_ex->rt6i = nrt;  	rt6_ex->stamp = jiffies; -	atomic_inc(&nrt->rt6i_ref); -	nrt->rt6i_node = ort->rt6i_node;  	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);  	bucket->depth++;  	net->ipv6.rt6_stats->fib_rt_cache++; @@ -1351,16 +1476,16 @@ out:  	/* Update fn->fn_sernum to invalidate all cached dst */  	if (!err) { -		spin_lock_bh(&ort->rt6i_table->tb6_lock); -		fib6_update_sernum(ort); -		spin_unlock_bh(&ort->rt6i_table->tb6_lock); +		spin_lock_bh(&ort->fib6_table->tb6_lock); +		fib6_update_sernum(net, ort); +		spin_unlock_bh(&ort->fib6_table->tb6_lock);  		fib6_force_start_gc(net);  	}  	return err;  } -void rt6_flush_exceptions(struct rt6_info *rt) +void rt6_flush_exceptions(struct fib6_info *rt)  {  	struct rt6_exception_bucket *bucket;  	struct rt6_exception *rt6_ex; @@ -1390,7 +1515,7 @@ out:  /* Find cached rt in the hash table inside passed in rt   * Caller has to hold rcu_read_lock()   */ -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,  					   struct in6_addr *daddr,  					   struct in6_addr *saddr)  { @@ -1408,7 +1533,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,  	 * Otherwise, the exception table is indexed by  	 * a hash of only rt6i_dst.  	 */ -	if (rt->rt6i_src.plen) +	if (rt->fib6_src.plen)  		src_key = saddr;  #endif  	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); @@ -1420,14 +1545,15 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,  }  /* Remove the passed in cached rt from the hash table that contains it */ -int rt6_remove_exception_rt(struct rt6_info *rt) +static int rt6_remove_exception_rt(struct rt6_info *rt)  {  	struct rt6_exception_bucket *bucket; -	struct rt6_info *from = rt->from;  	struct in6_addr *src_key = NULL;  	struct rt6_exception *rt6_ex; +	struct fib6_info *from;  	int err; +	from = rcu_dereference(rt->from);  	if (!from ||  	    !(rt->rt6i_flags & RTF_CACHE))  		return -EINVAL; @@ -1445,7 +1571,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt)  	 * Otherwise, the exception table is indexed by  	 * a hash of only rt6i_dst.  	 */ -	if (from->rt6i_src.plen) +	if (from->fib6_src.plen)  		src_key = &rt->rt6i_src.addr;  #endif  	rt6_ex = __rt6_find_exception_spinlock(&bucket, @@ -1468,7 +1594,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt)  static void rt6_update_exception_stamp_rt(struct rt6_info *rt)  {  	struct rt6_exception_bucket *bucket; -	struct rt6_info *from = rt->from; +	struct fib6_info *from = rt->from;  	struct in6_addr *src_key = NULL;  	struct rt6_exception *rt6_ex; @@ -1486,7 +1612,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)  	 * Otherwise, the exception table is indexed by  	 * a hash of only rt6i_dst.  	 */ -	if (from->rt6i_src.plen) +	if (from->fib6_src.plen)  		src_key = &rt->rt6i_src.addr;  #endif  	rt6_ex = __rt6_find_exception_rcu(&bucket, @@ -1498,7 +1624,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)  	rcu_read_unlock();  } -static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) +static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)  {  	struct rt6_exception_bucket *bucket;  	struct rt6_exception *rt6_ex; @@ -1540,7 +1666,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,  }  static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, -				       struct rt6_info *rt, int mtu) +				       struct fib6_info *rt, int mtu)  {  	struct rt6_exception_bucket *bucket;  	struct rt6_exception *rt6_ex; @@ -1557,12 +1683,12 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,  			struct rt6_info *entry = rt6_ex->rt6i;  			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected -			 * route), the metrics of its rt->dst.from have already +			 * route), the metrics of its rt->from have already  			 * been updated.  			 */ -			if (entry->rt6i_pmtu && +			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&  			    rt6_mtu_change_route_allowed(idev, entry, mtu)) -				entry->rt6i_pmtu = mtu; +				dst_metric_set(&entry->dst, RTAX_MTU, mtu);  		}  		bucket++;  	} @@ -1570,7 +1696,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,  #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE) -static void rt6_exceptions_clean_tohost(struct rt6_info *rt, +static void rt6_exceptions_clean_tohost(struct fib6_info *rt,  					struct in6_addr *gateway)  {  	struct rt6_exception_bucket *bucket; @@ -1649,7 +1775,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,  	gc_args->more++;  } -void rt6_age_exceptions(struct rt6_info *rt, +void rt6_age_exceptions(struct fib6_info *rt,  			struct fib6_gc_args *gc_args,  			unsigned long now)  { @@ -1680,32 +1806,22 @@ void rt6_age_exceptions(struct rt6_info *rt,  	rcu_read_unlock_bh();  } -struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, -			       int oif, struct flowi6 *fl6, -			       const struct sk_buff *skb, int flags) +/* must be called with rcu lock held */ +struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, +				    int oif, struct flowi6 *fl6, int strict)  {  	struct fib6_node *fn, *saved_fn; -	struct rt6_info *rt, *rt_cache; -	int strict = 0; - -	strict |= flags & RT6_LOOKUP_F_IFACE; -	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; -	if (net->ipv6.devconf_all->forwarding == 0) -		strict |= RT6_LOOKUP_F_REACHABLE; - -	rcu_read_lock(); +	struct fib6_info *f6i; -	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); +	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);  	saved_fn = fn;  	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)  		oif = 0;  redo_rt6_select: -	rt = rt6_select(net, fn, oif, strict); -	if (rt->rt6i_nsiblings) -		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); -	if (rt == net->ipv6.ip6_null_entry) { +	f6i = rt6_select(net, fn, oif, strict); +	if (f6i == net->ipv6.fib6_null_entry) {  		fn = fib6_backtrack(fn, &fl6->saddr);  		if (fn)  			goto redo_rt6_select; @@ -1717,45 +1833,57 @@ redo_rt6_select:  		}  	} -	/*Search through exception table */ -	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); -	if (rt_cache) -		rt = rt_cache; +	trace_fib6_table_lookup(net, f6i, table, fl6); -	if (rt == net->ipv6.ip6_null_entry) { +	return f6i; +} + +struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, +			       int oif, struct flowi6 *fl6, +			       const struct sk_buff *skb, int flags) +{ +	struct fib6_info *f6i; +	struct rt6_info *rt; +	int strict = 0; + +	strict |= flags & RT6_LOOKUP_F_IFACE; +	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; +	if (net->ipv6.devconf_all->forwarding == 0) +		strict |= RT6_LOOKUP_F_REACHABLE; + +	rcu_read_lock(); + +	f6i = fib6_table_lookup(net, table, oif, fl6, strict); +	if (f6i->fib6_nsiblings) +		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict); + +	if (f6i == net->ipv6.fib6_null_entry) { +		rt = net->ipv6.ip6_null_entry;  		rcu_read_unlock();  		dst_hold(&rt->dst); -		trace_fib6_table_lookup(net, rt, table, fl6);  		return rt; -	} else if (rt->rt6i_flags & RTF_CACHE) { -		if (ip6_hold_safe(net, &rt, true)) { +	} + +	/*Search through exception table */ +	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); +	if (rt) { +		if (ip6_hold_safe(net, &rt, true))  			dst_use_noref(&rt->dst, jiffies); -			rt6_dst_from_metrics_check(rt); -		} +  		rcu_read_unlock(); -		trace_fib6_table_lookup(net, rt, table, fl6);  		return rt;  	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && -			    !(rt->rt6i_flags & RTF_GATEWAY))) { +			    !(f6i->fib6_flags & RTF_GATEWAY))) {  		/* Create a RTF_CACHE clone which will not be  		 * owned by the fib6 tree.  It is for the special case where  		 * the daddr in the skb during the neighbor look-up is different  		 * from the fl6->daddr used to look-up route here.  		 */ -  		struct rt6_info *uncached_rt; -		if (ip6_hold_safe(net, &rt, true)) { -			dst_use_noref(&rt->dst, jiffies); -		} else { -			rcu_read_unlock(); -			uncached_rt = rt; -			goto uncached_rt_out; -		} -		rcu_read_unlock(); +		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); -		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); -		dst_release(&rt->dst); +		rcu_read_unlock();  		if (uncached_rt) {  			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() @@ -1768,36 +1896,21 @@ redo_rt6_select:  			dst_hold(&uncached_rt->dst);  		} -uncached_rt_out: -		trace_fib6_table_lookup(net, uncached_rt, table, fl6);  		return uncached_rt; -  	} else {  		/* Get a percpu copy */  		struct rt6_info *pcpu_rt; -		dst_use_noref(&rt->dst, jiffies);  		local_bh_disable(); -		pcpu_rt = rt6_get_pcpu_route(rt); - -		if (!pcpu_rt) { -			/* atomic_inc_not_zero() is needed when using rcu */ -			if (atomic_inc_not_zero(&rt->rt6i_ref)) { -				/* No dst_hold() on rt is needed because grabbing -				 * rt->rt6i_ref makes sure rt can't be released. -				 */ -				pcpu_rt = rt6_make_pcpu_route(rt); -				rt6_release(rt); -			} else { -				/* rt is already removed from tree */ -				pcpu_rt = net->ipv6.ip6_null_entry; -				dst_hold(&pcpu_rt->dst); -			} -		} +		pcpu_rt = rt6_get_pcpu_route(f6i); + +		if (!pcpu_rt) +			pcpu_rt = rt6_make_pcpu_route(net, f6i); +  		local_bh_enable();  		rcu_read_unlock(); -		trace_fib6_table_lookup(net, pcpu_rt, table, fl6); +  		return pcpu_rt;  	}  } @@ -1868,7 +1981,7 @@ out:  	} else {  		keys->addrs.v6addrs.src = key_iph->saddr;  		keys->addrs.v6addrs.dst = key_iph->daddr; -		keys->tags.flow_label = ip6_flowinfo(key_iph); +		keys->tags.flow_label = ip6_flowlabel(key_iph);  		keys->basic.ip_proto = key_iph->nexthdr;  	}  } @@ -1889,7 +2002,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,  		} else {  			hash_keys.addrs.v6addrs.src = fl6->saddr;  			hash_keys.addrs.v6addrs.dst = fl6->daddr; -			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; +			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);  			hash_keys.basic.ip_proto = fl6->flowi6_proto;  		}  		break; @@ -2020,7 +2133,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori  		rt->rt6i_idev = in6_dev_get(loopback_dev);  		rt->rt6i_gateway = ort->rt6i_gateway;  		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; -		rt->rt6i_metric = 0;  		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));  #ifdef CONFIG_IPV6_SUBTREES @@ -2036,18 +2148,27 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori   *	Destination cache support functions   */ -static void rt6_dst_from_metrics_check(struct rt6_info *rt) +static bool fib6_check(struct fib6_info *f6i, u32 cookie)  { -	if (rt->from && -	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) -		dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true); +	u32 rt_cookie = 0; + +	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie) +		return false; + +	if (fib6_check_expired(f6i)) +		return false; + +	return true;  } -static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) +static struct dst_entry *rt6_check(struct rt6_info *rt, +				   struct fib6_info *from, +				   u32 cookie)  {  	u32 rt_cookie = 0; -	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) +	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) || +	    rt_cookie != cookie)  		return NULL;  	if (rt6_check_expired(rt)) @@ -2056,11 +2177,13 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)  	return &rt->dst;  } -static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) +static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, +					    struct fib6_info *from, +					    u32 cookie)  {  	if (!__rt6_check_expired(rt) &&  	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && -	    rt6_check(rt->from, cookie)) +	    fib6_check(from, cookie))  		return &rt->dst;  	else  		return NULL; @@ -2068,22 +2191,30 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)  static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)  { +	struct dst_entry *dst_ret; +	struct fib6_info *from;  	struct rt6_info *rt; -	rt = (struct rt6_info *) dst; +	rt = container_of(dst, struct rt6_info, dst); + +	rcu_read_lock();  	/* All IPV6 dsts are created with ->obsolete set to the value  	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down  	 * into this function always.  	 */ -	rt6_dst_from_metrics_check(rt); +	from = rcu_dereference(rt->from); -	if (rt->rt6i_flags & RTF_PCPU || -	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) -		return rt6_dst_from_check(rt, cookie); +	if (from && (rt->rt6i_flags & RTF_PCPU || +	    unlikely(!list_empty(&rt->rt6i_uncached)))) +		dst_ret = rt6_dst_from_check(rt, from, cookie);  	else -		return rt6_check(rt, cookie); +		dst_ret = rt6_check(rt, from, cookie); + +	rcu_read_unlock(); + +	return dst_ret;  }  static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -2092,10 +2223,12 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)  	if (rt) {  		if (rt->rt6i_flags & RTF_CACHE) { +			rcu_read_lock();  			if (rt6_check_expired(rt)) { -				ip6_del_rt(rt); +				rt6_remove_exception_rt(rt);  				dst = NULL;  			} +			rcu_read_unlock();  		} else {  			dst_release(dst);  			dst = NULL; @@ -2112,35 +2245,60 @@ static void ip6_link_failure(struct sk_buff *skb)  	rt = (struct rt6_info *) skb_dst(skb);  	if (rt) { +		rcu_read_lock();  		if (rt->rt6i_flags & RTF_CACHE) {  			if (dst_hold_safe(&rt->dst)) -				ip6_del_rt(rt); +				rt6_remove_exception_rt(rt);  		} else { +			struct fib6_info *from;  			struct fib6_node *fn; -			rcu_read_lock(); -			fn = rcu_dereference(rt->rt6i_node); -			if (fn && (rt->rt6i_flags & RTF_DEFAULT)) -				fn->fn_sernum = -1; -			rcu_read_unlock(); +			from = rcu_dereference(rt->from); +			if (from) { +				fn = rcu_dereference(from->fib6_node); +				if (fn && (rt->rt6i_flags & RTF_DEFAULT)) +					fn->fn_sernum = -1; +			}  		} +		rcu_read_unlock(); +	} +} + +static void rt6_update_expires(struct rt6_info *rt0, int timeout) +{ +	if (!(rt0->rt6i_flags & RTF_EXPIRES)) { +		struct fib6_info *from; + +		rcu_read_lock(); +		from = rcu_dereference(rt0->from); +		if (from) +			rt0->dst.expires = from->expires; +		rcu_read_unlock();  	} + +	dst_set_expires(&rt0->dst, timeout); +	rt0->rt6i_flags |= RTF_EXPIRES;  }  static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)  {  	struct net *net = dev_net(rt->dst.dev); +	dst_metric_set(&rt->dst, RTAX_MTU, mtu);  	rt->rt6i_flags |= RTF_MODIFIED; -	rt->rt6i_pmtu = mtu;  	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);  }  static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)  { +	bool from_set; + +	rcu_read_lock(); +	from_set = !!rcu_dereference(rt->from); +	rcu_read_unlock(); +  	return !(rt->rt6i_flags & RTF_CACHE) && -		(rt->rt6i_flags & RTF_PCPU || -		 rcu_access_pointer(rt->rt6i_node)); +		(rt->rt6i_flags & RTF_PCPU || from_set);  }  static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -2149,9 +2307,6 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,  	const struct in6_addr *daddr, *saddr;  	struct rt6_info *rt6 = (struct rt6_info *)dst; -	if (rt6->rt6i_flags & RTF_LOCAL) -		return; -  	if (dst_metric_locked(dst, RTAX_MTU))  		return; @@ -2176,14 +2331,18 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,  		if (rt6->rt6i_flags & RTF_CACHE)  			rt6_update_exception_stamp_rt(rt6);  	} else if (daddr) { +		struct fib6_info *from;  		struct rt6_info *nrt6; -		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); +		rcu_read_lock(); +		from = rcu_dereference(rt6->from); +		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);  		if (nrt6) {  			rt6_do_update_pmtu(nrt6, mtu); -			if (rt6_insert_exception(nrt6, rt6)) +			if (rt6_insert_exception(nrt6, from))  				dst_release_immediate(&nrt6->dst);  		} +		rcu_read_unlock();  	}  } @@ -2264,7 +2423,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,  					     int flags)  {  	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; -	struct rt6_info *rt, *rt_cache; +	struct rt6_info *ret = NULL, *rt_cache; +	struct fib6_info *rt;  	struct fib6_node *fn;  	/* Get the "current" route for this destination and @@ -2278,32 +2438,32 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,  	 */  	rcu_read_lock(); -	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); +	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);  restart:  	for_each_fib6_node_rt_rcu(fn) { -		if (rt->rt6i_nh_flags & RTNH_F_DEAD) +		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)  			continue; -		if (rt6_check_expired(rt)) +		if (fib6_check_expired(rt))  			continue; -		if (rt->dst.error) +		if (rt->fib6_flags & RTF_REJECT)  			break; -		if (!(rt->rt6i_flags & RTF_GATEWAY)) +		if (!(rt->fib6_flags & RTF_GATEWAY))  			continue; -		if (fl6->flowi6_oif != rt->dst.dev->ifindex) +		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)  			continue;  		/* rt_cache's gateway might be different from its 'parent'  		 * in the case of an ip redirect.  		 * So we keep searching in the exception table if the gateway  		 * is different.  		 */ -		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { +		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {  			rt_cache = rt6_find_cached_rt(rt,  						      &fl6->daddr,  						      &fl6->saddr);  			if (rt_cache &&  			    ipv6_addr_equal(&rdfl->gateway,  					    &rt_cache->rt6i_gateway)) { -				rt = rt_cache; +				ret = rt_cache;  				break;  			}  			continue; @@ -2312,25 +2472,28 @@ restart:  	}  	if (!rt) -		rt = net->ipv6.ip6_null_entry; -	else if (rt->dst.error) { -		rt = net->ipv6.ip6_null_entry; +		rt = net->ipv6.fib6_null_entry; +	else if (rt->fib6_flags & RTF_REJECT) { +		ret = net->ipv6.ip6_null_entry;  		goto out;  	} -	if (rt == net->ipv6.ip6_null_entry) { +	if (rt == net->ipv6.fib6_null_entry) {  		fn = fib6_backtrack(fn, &fl6->saddr);  		if (fn)  			goto restart;  	}  out: -	ip6_hold_safe(net, &rt, true); +	if (ret) +		dst_hold(&ret->dst); +	else +		ret = ip6_create_rt_rcu(rt);  	rcu_read_unlock();  	trace_fib6_table_lookup(net, rt, table, fl6); -	return rt; +	return ret;  };  static struct dst_entry *ip6_route_redirect(struct net *net, @@ -2422,12 +2585,8 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)  static unsigned int ip6_mtu(const struct dst_entry *dst)  { -	const struct rt6_info *rt = (const struct rt6_info *)dst; -	unsigned int mtu = rt->rt6i_pmtu;  	struct inet6_dev *idev; - -	if (mtu) -		goto out; +	unsigned int mtu;  	mtu = dst_metric_raw(dst, RTAX_MTU);  	if (mtu) @@ -2447,6 +2606,54 @@ out:  	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);  } +/* MTU selection: + * 1. mtu on route is locked - use it + * 2. mtu from nexthop exception + * 3. mtu from egress device + * + * based on ip6_dst_mtu_forward and exception logic of + * rt6_find_cached_rt; called with rcu_read_lock + */ +u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, +		      struct in6_addr *saddr) +{ +	struct rt6_exception_bucket *bucket; +	struct rt6_exception *rt6_ex; +	struct in6_addr *src_key; +	struct inet6_dev *idev; +	u32 mtu = 0; + +	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { +		mtu = f6i->fib6_pmtu; +		if (mtu) +			goto out; +	} + +	src_key = NULL; +#ifdef CONFIG_IPV6_SUBTREES +	if (f6i->fib6_src.plen) +		src_key = saddr; +#endif + +	bucket = rcu_dereference(f6i->rt6i_exception_bucket); +	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); +	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) +		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU); + +	if (likely(!mtu)) { +		struct net_device *dev = fib6_info_nh_dev(f6i); + +		mtu = IPV6_MIN_MTU; +		idev = __in6_dev_get(dev); +		if (idev && idev->cnf.mtu6 > mtu) +			mtu = idev->cnf.mtu6; +	} + +	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); +out: +	return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu); +} +  struct dst_entry *icmp6_dst_alloc(struct net_device *dev,  				  struct flowi6 *fl6)  { @@ -2511,60 +2718,22 @@ out:  	return entries > rt_max_size;  } -static int ip6_convert_metrics(struct mx6_config *mxc, -			       const struct fib6_config *cfg) +static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, +			       struct fib6_config *cfg)  { -	struct net *net = cfg->fc_nlinfo.nl_net; -	bool ecn_ca = false; -	struct nlattr *nla; -	int remaining; -	u32 *mp; +	struct dst_metrics *p;  	if (!cfg->fc_mx)  		return 0; -	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); -	if (unlikely(!mp)) +	p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL); +	if (unlikely(!p))  		return -ENOMEM; -	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { -		int type = nla_type(nla); -		u32 val; - -		if (!type) -			continue; -		if (unlikely(type > RTAX_MAX)) -			goto err; - -		if (type == RTAX_CC_ALGO) { -			char tmp[TCP_CA_NAME_MAX]; - -			nla_strlcpy(tmp, nla, sizeof(tmp)); -			val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); -			if (val == TCP_CA_UNSPEC) -				goto err; -		} else { -			val = nla_get_u32(nla); -		} -		if (type == RTAX_HOPLIMIT && val > 255) -			val = 255; -		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) -			goto err; - -		mp[type - 1] = val; -		__set_bit(type - 1, mxc->mx_valid); -	} - -	if (ecn_ca) { -		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid); -		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; -	} +	refcount_set(&p->refcnt, 1); +	rt->fib6_metrics = p; -	mxc->mx = mp; -	return 0; - err: -	kfree(mp); -	return -EINVAL; +	return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);  }  static struct rt6_info *ip6_nh_lookup_table(struct net *net, @@ -2750,11 +2919,12 @@ out:  	return err;  } -static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, +static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, +					      gfp_t gfp_flags,  					      struct netlink_ext_ack *extack)  {  	struct net *net = cfg->fc_nlinfo.nl_net; -	struct rt6_info *rt = NULL; +	struct fib6_info *rt = NULL;  	struct net_device *dev = NULL;  	struct inet6_dev *idev = NULL;  	struct fib6_table *table; @@ -2773,6 +2943,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  		goto out;  	} +	if (cfg->fc_type > RTN_MAX) { +		NL_SET_ERR_MSG(extack, "Invalid route type"); +		goto out; +	} +  	if (cfg->fc_dst_len > 128) {  		NL_SET_ERR_MSG(extack, "Invalid prefix length");  		goto out; @@ -2831,35 +3006,30 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  	if (!table)  		goto out; -	rt = ip6_dst_alloc(net, NULL, -			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); +	err = -ENOMEM; +	rt = fib6_info_alloc(gfp_flags); +	if (!rt) +		goto out; -	if (!rt) { -		err = -ENOMEM; +	if (cfg->fc_flags & RTF_ADDRCONF) +		rt->dst_nocount = true; + +	err = ip6_convert_metrics(net, rt, cfg); +	if (err < 0)  		goto out; -	}  	if (cfg->fc_flags & RTF_EXPIRES) -		rt6_set_expires(rt, jiffies + +		fib6_set_expires(rt, jiffies +  				clock_t_to_jiffies(cfg->fc_expires));  	else -		rt6_clean_expires(rt); +		fib6_clean_expires(rt);  	if (cfg->fc_protocol == RTPROT_UNSPEC)  		cfg->fc_protocol = RTPROT_BOOT; -	rt->rt6i_protocol = cfg->fc_protocol; +	rt->fib6_protocol = cfg->fc_protocol;  	addr_type = ipv6_addr_type(&cfg->fc_dst); -	if (addr_type & IPV6_ADDR_MULTICAST) -		rt->dst.input = ip6_mc_input; -	else if (cfg->fc_flags & RTF_LOCAL) -		rt->dst.input = ip6_input; -	else -		rt->dst.input = ip6_forward; - -	rt->dst.output = ip6_output; -  	if (cfg->fc_encap) {  		struct lwtunnel_state *lwtstate; @@ -2868,22 +3038,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  					   &lwtstate, extack);  		if (err)  			goto out; -		rt->dst.lwtstate = lwtstate_get(lwtstate); -		lwtunnel_set_redirect(&rt->dst); +		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);  	} -	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); -	rt->rt6i_dst.plen = cfg->fc_dst_len; -	if (rt->rt6i_dst.plen == 128) -		rt->dst.flags |= DST_HOST; +	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); +	rt->fib6_dst.plen = cfg->fc_dst_len; +	if (rt->fib6_dst.plen == 128) +		rt->dst_host = true;  #ifdef CONFIG_IPV6_SUBTREES -	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); -	rt->rt6i_src.plen = cfg->fc_src_len; +	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); +	rt->fib6_src.plen = cfg->fc_src_len;  #endif -	rt->rt6i_metric = cfg->fc_metric; -	rt->rt6i_nh_weight = 1; +	rt->fib6_metric = cfg->fc_metric; +	rt->fib6_nh.nh_weight = 1; + +	rt->fib6_type = cfg->fc_type;  	/* We cannot add true routes via loopback here,  	   they would result in kernel looping; promote them to reject routes @@ -2906,28 +3077,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  				goto out;  			}  		} -		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; -		switch (cfg->fc_type) { -		case RTN_BLACKHOLE: -			rt->dst.error = -EINVAL; -			rt->dst.output = dst_discard_out; -			rt->dst.input = dst_discard; -			break; -		case RTN_PROHIBIT: -			rt->dst.error = -EACCES; -			rt->dst.output = ip6_pkt_prohibit_out; -			rt->dst.input = ip6_pkt_prohibit; -			break; -		case RTN_THROW: -		case RTN_UNREACHABLE: -		default: -			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN -					: (cfg->fc_type == RTN_UNREACHABLE) -					? -EHOSTUNREACH : -ENETUNREACH; -			rt->dst.output = ip6_pkt_discard_out; -			rt->dst.input = ip6_pkt_discard; -			break; -		} +		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;  		goto install_route;  	} @@ -2936,7 +3086,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  		if (err)  			goto out; -		rt->rt6i_gateway = cfg->fc_gateway; +		rt->fib6_nh.nh_gw = cfg->fc_gateway;  	}  	err = -ENODEV; @@ -2961,96 +3111,82 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  			err = -EINVAL;  			goto out;  		} -		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; -		rt->rt6i_prefsrc.plen = 128; +		rt->fib6_prefsrc.addr = cfg->fc_prefsrc; +		rt->fib6_prefsrc.plen = 128;  	} else -		rt->rt6i_prefsrc.plen = 0; +		rt->fib6_prefsrc.plen = 0; -	rt->rt6i_flags = cfg->fc_flags; +	rt->fib6_flags = cfg->fc_flags;  install_route: -	if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && +	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&  	    !netif_carrier_ok(dev)) -		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; -	rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); -	rt->dst.dev = dev; -	rt->rt6i_idev = idev; -	rt->rt6i_table = table; +		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; +	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); +	rt->fib6_nh.nh_dev = dev; +	rt->fib6_table = table;  	cfg->fc_nlinfo.nl_net = dev_net(dev); +	if (idev) +		in6_dev_put(idev); +  	return rt;  out:  	if (dev)  		dev_put(dev);  	if (idev)  		in6_dev_put(idev); -	if (rt) -		dst_release_immediate(&rt->dst); +	fib6_info_release(rt);  	return ERR_PTR(err);  } -int ip6_route_add(struct fib6_config *cfg, +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,  		  struct netlink_ext_ack *extack)  { -	struct mx6_config mxc = { .mx = NULL, }; -	struct rt6_info *rt; +	struct fib6_info *rt;  	int err; -	rt = ip6_route_info_create(cfg, extack); -	if (IS_ERR(rt)) { -		err = PTR_ERR(rt); -		rt = NULL; -		goto out; -	} - -	err = ip6_convert_metrics(&mxc, cfg); -	if (err) -		goto out; - -	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); +	rt = ip6_route_info_create(cfg, gfp_flags, extack); +	if (IS_ERR(rt)) +		return PTR_ERR(rt); -	kfree(mxc.mx); - -	return err; -out: -	if (rt) -		dst_release_immediate(&rt->dst); +	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); +	fib6_info_release(rt);  	return err;  } -static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)  { -	int err; +	struct net *net = info->nl_net;  	struct fib6_table *table; -	struct net *net = dev_net(rt->dst.dev); +	int err; -	if (rt == net->ipv6.ip6_null_entry) { +	if (rt == net->ipv6.fib6_null_entry) {  		err = -ENOENT;  		goto out;  	} -	table = rt->rt6i_table; +	table = rt->fib6_table;  	spin_lock_bh(&table->tb6_lock);  	err = fib6_del(rt, info);  	spin_unlock_bh(&table->tb6_lock);  out: -	ip6_rt_put(rt); +	fib6_info_release(rt);  	return err;  } -int ip6_del_rt(struct rt6_info *rt) +int ip6_del_rt(struct net *net, struct fib6_info *rt)  { -	struct nl_info info = { -		.nl_net = dev_net(rt->dst.dev), -	}; +	struct nl_info info = { .nl_net = net }; +  	return __ip6_del_rt(rt, &info);  } -static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) +static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)  {  	struct nl_info *info = &cfg->fc_nlinfo;  	struct net *net = info->nl_net; @@ -3058,20 +3194,20 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)  	struct fib6_table *table;  	int err = -ENOENT; -	if (rt == net->ipv6.ip6_null_entry) +	if (rt == net->ipv6.fib6_null_entry)  		goto out_put; -	table = rt->rt6i_table; +	table = rt->fib6_table;  	spin_lock_bh(&table->tb6_lock); -	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { -		struct rt6_info *sibling, *next_sibling; +	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) { +		struct fib6_info *sibling, *next_sibling;  		/* prefer to send a single notification with all hops */  		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());  		if (skb) {  			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; -			if (rt6_fill_node(net, skb, rt, +			if (rt6_fill_node(net, skb, rt, NULL,  					  NULL, NULL, 0, RTM_DELROUTE,  					  info->portid, seq, 0) < 0) {  				kfree_skb(skb); @@ -3081,8 +3217,8 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)  		}  		list_for_each_entry_safe(sibling, next_sibling, -					 &rt->rt6i_siblings, -					 rt6i_siblings) { +					 &rt->fib6_siblings, +					 fib6_siblings) {  			err = fib6_del(sibling, info);  			if (err)  				goto out_unlock; @@ -3093,7 +3229,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)  out_unlock:  	spin_unlock_bh(&table->tb6_lock);  out_put: -	ip6_rt_put(rt); +	fib6_info_release(rt);  	if (skb) {  		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, @@ -3102,11 +3238,28 @@ out_put:  	return err;  } +static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) +{ +	int rc = -ESRCH; + +	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) +		goto out; + +	if (cfg->fc_flags & RTF_GATEWAY && +	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) +		goto out; +	if (dst_hold_safe(&rt->dst)) +		rc = rt6_remove_exception_rt(rt); +out: +	return rc; +} +  static int ip6_route_del(struct fib6_config *cfg,  			 struct netlink_ext_ack *extack)  { -	struct rt6_info *rt, *rt_cache; +	struct rt6_info *rt_cache;  	struct fib6_table *table; +	struct fib6_info *rt;  	struct fib6_node *fn;  	int err = -ESRCH; @@ -3126,25 +3279,31 @@ static int ip6_route_del(struct fib6_config *cfg,  	if (fn) {  		for_each_fib6_node_rt_rcu(fn) {  			if (cfg->fc_flags & RTF_CACHE) { +				int rc; +  				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,  							      &cfg->fc_src); -				if (!rt_cache) -					continue; -				rt = rt_cache; +				if (rt_cache) { +					rc = ip6_del_cached_rt(rt_cache, cfg); +					if (rc != -ESRCH) { +						rcu_read_unlock(); +						return rc; +					} +				} +				continue;  			}  			if (cfg->fc_ifindex && -			    (!rt->dst.dev || -			     rt->dst.dev->ifindex != cfg->fc_ifindex)) +			    (!rt->fib6_nh.nh_dev || +			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))  				continue;  			if (cfg->fc_flags & RTF_GATEWAY && -			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) +			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))  				continue; -			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) +			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)  				continue; -			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) +			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)  				continue; -			if (!dst_hold_safe(&rt->dst)) -				break; +			fib6_info_hold(rt);  			rcu_read_unlock();  			/* if gateway was specified only delete the one hop */ @@ -3166,6 +3325,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu  	struct ndisc_options ndopts;  	struct inet6_dev *in6_dev;  	struct neighbour *neigh; +	struct fib6_info *from;  	struct rd_msg *msg;  	int optlen, on_link;  	u8 *lladdr; @@ -3247,7 +3407,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu  				     NEIGH_UPDATE_F_ISROUTER)),  		     NDISC_REDIRECT, &ndopts); -	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); +	rcu_read_lock(); +	from = rcu_dereference(rt->from); +	fib6_info_hold(from); +	rcu_read_unlock(); + +	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);  	if (!nrt)  		goto out; @@ -3255,14 +3420,13 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu  	if (on_link)  		nrt->rt6i_flags &= ~RTF_GATEWAY; -	nrt->rt6i_protocol = RTPROT_REDIRECT;  	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;  	/* No need to remove rt from the exception table if rt is  	 * a cached route because rt6_insert_exception() will  	 * takes care of it  	 */ -	if (rt6_insert_exception(nrt, rt)) { +	if (rt6_insert_exception(nrt, from)) {  		dst_release_immediate(&nrt->dst);  		goto out;  	} @@ -3274,47 +3438,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu  	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);  out: +	fib6_info_release(from);  	neigh_release(neigh);  } -/* - *	Misc support functions - */ - -static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ -	BUG_ON(from->from); - -	rt->rt6i_flags &= ~RTF_EXPIRES; -	dst_hold(&from->dst); -	rt->from = from; -	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); -} - -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) -{ -	rt->dst.input = ort->dst.input; -	rt->dst.output = ort->dst.output; -	rt->rt6i_dst = ort->rt6i_dst; -	rt->dst.error = ort->dst.error; -	rt->rt6i_idev = ort->rt6i_idev; -	if (rt->rt6i_idev) -		in6_dev_hold(rt->rt6i_idev); -	rt->dst.lastuse = jiffies; -	rt->rt6i_gateway = ort->rt6i_gateway; -	rt->rt6i_flags = ort->rt6i_flags; -	rt6_set_from(rt, ort); -	rt->rt6i_metric = ort->rt6i_metric; -#ifdef CONFIG_IPV6_SUBTREES -	rt->rt6i_src = ort->rt6i_src; -#endif -	rt->rt6i_prefsrc = ort->rt6i_prefsrc; -	rt->rt6i_table = ort->rt6i_table; -	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); -} -  #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net,  					   const struct in6_addr *prefix, int prefixlen,  					   const struct in6_addr *gwaddr,  					   struct net_device *dev) @@ -3322,7 +3451,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,  	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;  	int ifindex = dev->ifindex;  	struct fib6_node *fn; -	struct rt6_info *rt = NULL; +	struct fib6_info *rt = NULL;  	struct fib6_table *table;  	table = fib6_get_table(net, tb_id); @@ -3335,13 +3464,13 @@ static struct rt6_info *rt6_get_route_info(struct net *net,  		goto out;  	for_each_fib6_node_rt_rcu(fn) { -		if (rt->dst.dev->ifindex != ifindex) +		if (rt->fib6_nh.nh_dev->ifindex != ifindex)  			continue; -		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) +		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))  			continue; -		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) +		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))  			continue; -		ip6_hold_safe(NULL, &rt, false); +		fib6_info_hold(rt);  		break;  	}  out: @@ -3349,7 +3478,7 @@ out:  	return rt;  } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net,  					   const struct in6_addr *prefix, int prefixlen,  					   const struct in6_addr *gwaddr,  					   struct net_device *dev, @@ -3362,6 +3491,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,  		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |  				  RTF_UP | RTF_PREF(pref),  		.fc_protocol = RTPROT_RA, +		.fc_type = RTN_UNICAST,  		.fc_nlinfo.portid = 0,  		.fc_nlinfo.nlh = NULL,  		.fc_nlinfo.nl_net = net, @@ -3375,36 +3505,39 @@ static struct rt6_info *rt6_add_route_info(struct net *net,  	if (!prefixlen)  		cfg.fc_flags |= RTF_DEFAULT; -	ip6_route_add(&cfg, NULL); +	ip6_route_add(&cfg, GFP_ATOMIC, NULL);  	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);  }  #endif -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) +struct fib6_info *rt6_get_dflt_router(struct net *net, +				     const struct in6_addr *addr, +				     struct net_device *dev)  {  	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; -	struct rt6_info *rt; +	struct fib6_info *rt;  	struct fib6_table *table; -	table = fib6_get_table(dev_net(dev), tb_id); +	table = fib6_get_table(net, tb_id);  	if (!table)  		return NULL;  	rcu_read_lock();  	for_each_fib6_node_rt_rcu(&table->tb6_root) { -		if (dev == rt->dst.dev && -		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && -		    ipv6_addr_equal(&rt->rt6i_gateway, addr)) +		if (dev == rt->fib6_nh.nh_dev && +		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && +		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))  			break;  	}  	if (rt) -		ip6_hold_safe(NULL, &rt, false); +		fib6_info_hold(rt);  	rcu_read_unlock();  	return rt;  } -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, +				     const struct in6_addr *gwaddr,  				     struct net_device *dev,  				     unsigned int pref)  { @@ -3415,14 +3548,15 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,  		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |  				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),  		.fc_protocol = RTPROT_RA, +		.fc_type = RTN_UNICAST,  		.fc_nlinfo.portid = 0,  		.fc_nlinfo.nlh = NULL, -		.fc_nlinfo.nl_net = dev_net(dev), +		.fc_nlinfo.nl_net = net,  	};  	cfg.fc_gateway = *gwaddr; -	if (!ip6_route_add(&cfg, NULL)) { +	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {  		struct fib6_table *table;  		table = fib6_get_table(dev_net(dev), cfg.fc_table); @@ -3430,24 +3564,25 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,  			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;  	} -	return rt6_get_dflt_router(gwaddr, dev); +	return rt6_get_dflt_router(net, gwaddr, dev);  } -static void __rt6_purge_dflt_routers(struct fib6_table *table) +static void __rt6_purge_dflt_routers(struct net *net, +				     struct fib6_table *table)  { -	struct rt6_info *rt; +	struct fib6_info *rt;  restart:  	rcu_read_lock();  	for_each_fib6_node_rt_rcu(&table->tb6_root) { -		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && -		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { -			if (dst_hold_safe(&rt->dst)) { -				rcu_read_unlock(); -				ip6_del_rt(rt); -			} else { -				rcu_read_unlock(); -			} +		struct net_device *dev = fib6_info_nh_dev(rt); +		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL; + +		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) && +		    (!idev || idev->cnf.accept_ra != 2)) { +			fib6_info_hold(rt); +			rcu_read_unlock(); +			ip6_del_rt(net, rt);  			goto restart;  		}  	} @@ -3468,7 +3603,7 @@ void rt6_purge_dflt_routers(struct net *net)  		head = &net->ipv6.fib_table_hash[h];  		hlist_for_each_entry_rcu(table, head, tb6_hlist) {  			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) -				__rt6_purge_dflt_routers(table); +				__rt6_purge_dflt_routers(net, table);  		}  	} @@ -3489,6 +3624,7 @@ static void rtmsg_to_fib6_config(struct net *net,  	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;  	cfg->fc_src_len = rtmsg->rtmsg_src_len;  	cfg->fc_flags = rtmsg->rtmsg_flags; +	cfg->fc_type = rtmsg->rtmsg_type;  	cfg->fc_nlinfo.nl_net = net; @@ -3518,7 +3654,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)  		rtnl_lock();  		switch (cmd) {  		case SIOCADDRT: -			err = ip6_route_add(&cfg, NULL); +			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);  			break;  		case SIOCDELRT:  			err = ip6_route_del(&cfg, NULL); @@ -3546,7 +3682,8 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)  	case IPSTATS_MIB_INNOROUTES:  		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);  		if (type == IPV6_ADDR_ANY) { -			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), +			IP6_INC_STATS(dev_net(dst->dev), +				      __in6_dev_get_safely(skb->dev),  				      IPSTATS_MIB_INADDRERRORS);  			break;  		} @@ -3587,40 +3724,40 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff   *	Allocate a dst for local (unicast / anycast) address.   */ -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, -				    const struct in6_addr *addr, -				    bool anycast) +struct fib6_info *addrconf_f6i_alloc(struct net *net, +				     struct inet6_dev *idev, +				     const struct in6_addr *addr, +				     bool anycast, gfp_t gfp_flags)  {  	u32 tb_id; -	struct net *net = dev_net(idev->dev);  	struct net_device *dev = idev->dev; -	struct rt6_info *rt; +	struct fib6_info *f6i; -	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); -	if (!rt) +	f6i = fib6_info_alloc(gfp_flags); +	if (!f6i)  		return ERR_PTR(-ENOMEM); -	in6_dev_hold(idev); - -	rt->dst.flags |= DST_HOST; -	rt->dst.input = ip6_input; -	rt->dst.output = ip6_output; -	rt->rt6i_idev = idev; - -	rt->rt6i_protocol = RTPROT_KERNEL; -	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; -	if (anycast) -		rt->rt6i_flags |= RTF_ANYCAST; -	else -		rt->rt6i_flags |= RTF_LOCAL; +	f6i->dst_nocount = true; +	f6i->dst_host = true; +	f6i->fib6_protocol = RTPROT_KERNEL; +	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP; +	if (anycast) { +		f6i->fib6_type = RTN_ANYCAST; +		f6i->fib6_flags |= RTF_ANYCAST; +	} else { +		f6i->fib6_type = RTN_LOCAL; +		f6i->fib6_flags |= RTF_LOCAL; +	} -	rt->rt6i_gateway  = *addr; -	rt->rt6i_dst.addr = *addr; -	rt->rt6i_dst.plen = 128; +	f6i->fib6_nh.nh_gw = *addr; +	dev_hold(dev); +	f6i->fib6_nh.nh_dev = dev; +	f6i->fib6_dst.addr = *addr; +	f6i->fib6_dst.plen = 128;  	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; -	rt->rt6i_table = fib6_get_table(net, tb_id); +	f6i->fib6_table = fib6_get_table(net, tb_id); -	return rt; +	return f6i;  }  /* remove deleted ip from prefsrc entries */ @@ -3630,18 +3767,18 @@ struct arg_dev_net_ip {  	struct in6_addr *addr;  }; -static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) +static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)  {  	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;  	struct net *net = ((struct arg_dev_net_ip *)arg)->net;  	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; -	if (((void *)rt->dst.dev == dev || !dev) && -	    rt != net->ipv6.ip6_null_entry && -	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { +	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && +	    rt != net->ipv6.fib6_null_entry && +	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {  		spin_lock_bh(&rt6_exception_lock);  		/* remove prefsrc entry */ -		rt->rt6i_prefsrc.plen = 0; +		rt->fib6_prefsrc.plen = 0;  		/* need to update cache as well */  		rt6_exceptions_remove_prefsrc(rt);  		spin_unlock_bh(&rt6_exception_lock); @@ -3663,12 +3800,12 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)  #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)  /* Remove routers and update dst entries when gateway turn into host. */ -static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +static int fib6_clean_tohost(struct fib6_info *rt, void *arg)  {  	struct in6_addr *gateway = (struct in6_addr *)arg; -	if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && -	    ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { +	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && +	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {  		return -1;  	} @@ -3694,85 +3831,85 @@ struct arg_netdev_event {  	};  }; -static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) +static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)  { -	struct rt6_info *iter; +	struct fib6_info *iter;  	struct fib6_node *fn; -	fn = rcu_dereference_protected(rt->rt6i_node, -			lockdep_is_held(&rt->rt6i_table->tb6_lock)); +	fn = rcu_dereference_protected(rt->fib6_node, +			lockdep_is_held(&rt->fib6_table->tb6_lock));  	iter = rcu_dereference_protected(fn->leaf, -			lockdep_is_held(&rt->rt6i_table->tb6_lock)); +			lockdep_is_held(&rt->fib6_table->tb6_lock));  	while (iter) { -		if (iter->rt6i_metric == rt->rt6i_metric && +		if (iter->fib6_metric == rt->fib6_metric &&  		    rt6_qualify_for_ecmp(iter))  			return iter; -		iter = rcu_dereference_protected(iter->rt6_next, -				lockdep_is_held(&rt->rt6i_table->tb6_lock)); +		iter = rcu_dereference_protected(iter->fib6_next, +				lockdep_is_held(&rt->fib6_table->tb6_lock));  	}  	return NULL;  } -static bool rt6_is_dead(const struct rt6_info *rt) +static bool rt6_is_dead(const struct fib6_info *rt)  { -	if (rt->rt6i_nh_flags & RTNH_F_DEAD || -	    (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && -	     rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) +	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || +	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && +	     fib6_ignore_linkdown(rt)))  		return true;  	return false;  } -static int rt6_multipath_total_weight(const struct rt6_info *rt) +static int rt6_multipath_total_weight(const struct fib6_info *rt)  { -	struct rt6_info *iter; +	struct fib6_info *iter;  	int total = 0;  	if (!rt6_is_dead(rt)) -		total += rt->rt6i_nh_weight; +		total += rt->fib6_nh.nh_weight; -	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { +	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {  		if (!rt6_is_dead(iter)) -			total += iter->rt6i_nh_weight; +			total += iter->fib6_nh.nh_weight;  	}  	return total;  } -static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) +static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)  {  	int upper_bound = -1;  	if (!rt6_is_dead(rt)) { -		*weight += rt->rt6i_nh_weight; +		*weight += rt->fib6_nh.nh_weight;  		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,  						    total) - 1;  	} -	atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); +	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);  } -static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) +static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)  { -	struct rt6_info *iter; +	struct fib6_info *iter;  	int weight = 0;  	rt6_upper_bound_set(rt, &weight, total); -	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) +	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)  		rt6_upper_bound_set(iter, &weight, total);  } -void rt6_multipath_rebalance(struct rt6_info *rt) +void rt6_multipath_rebalance(struct fib6_info *rt)  { -	struct rt6_info *first; +	struct fib6_info *first;  	int total;  	/* In case the entire multipath route was marked for flushing,  	 * then there is no need to rebalance upon the removal of every  	 * sibling route.  	 */ -	if (!rt->rt6i_nsiblings || rt->should_flush) +	if (!rt->fib6_nsiblings || rt->should_flush)  		return;  	/* During lookup routes are evaluated in order, so we need to @@ -3787,14 +3924,14 @@ void rt6_multipath_rebalance(struct rt6_info *rt)  	rt6_multipath_upper_bound_set(first, total);  } -static int fib6_ifup(struct rt6_info *rt, void *p_arg) +static int fib6_ifup(struct fib6_info *rt, void *p_arg)  {  	const struct arg_netdev_event *arg = p_arg; -	const struct net *net = dev_net(arg->dev); +	struct net *net = dev_net(arg->dev); -	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { -		rt->rt6i_nh_flags &= ~arg->nh_flags; -		fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); +	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { +		rt->fib6_nh.nh_flags &= ~arg->nh_flags; +		fib6_update_sernum_upto_root(net, rt);  		rt6_multipath_rebalance(rt);  	} @@ -3816,95 +3953,96 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)  	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);  } -static bool rt6_multipath_uses_dev(const struct rt6_info *rt, +static bool rt6_multipath_uses_dev(const struct fib6_info *rt,  				   const struct net_device *dev)  { -	struct rt6_info *iter; +	struct fib6_info *iter; -	if (rt->dst.dev == dev) +	if (rt->fib6_nh.nh_dev == dev)  		return true; -	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) -		if (iter->dst.dev == dev) +	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) +		if (iter->fib6_nh.nh_dev == dev)  			return true;  	return false;  } -static void rt6_multipath_flush(struct rt6_info *rt) +static void rt6_multipath_flush(struct fib6_info *rt)  { -	struct rt6_info *iter; +	struct fib6_info *iter;  	rt->should_flush = 1; -	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) +	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)  		iter->should_flush = 1;  } -static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, +static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,  					     const struct net_device *down_dev)  { -	struct rt6_info *iter; +	struct fib6_info *iter;  	unsigned int dead = 0; -	if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) +	if (rt->fib6_nh.nh_dev == down_dev || +	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)  		dead++; -	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) -		if (iter->dst.dev == down_dev || -		    iter->rt6i_nh_flags & RTNH_F_DEAD) +	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) +		if (iter->fib6_nh.nh_dev == down_dev || +		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)  			dead++;  	return dead;  } -static void rt6_multipath_nh_flags_set(struct rt6_info *rt, +static void rt6_multipath_nh_flags_set(struct fib6_info *rt,  				       const struct net_device *dev,  				       unsigned int nh_flags)  { -	struct rt6_info *iter; +	struct fib6_info *iter; -	if (rt->dst.dev == dev) -		rt->rt6i_nh_flags |= nh_flags; -	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) -		if (iter->dst.dev == dev) -			iter->rt6i_nh_flags |= nh_flags; +	if (rt->fib6_nh.nh_dev == dev) +		rt->fib6_nh.nh_flags |= nh_flags; +	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) +		if (iter->fib6_nh.nh_dev == dev) +			iter->fib6_nh.nh_flags |= nh_flags;  }  /* called with write lock held for table with rt */ -static int fib6_ifdown(struct rt6_info *rt, void *p_arg) +static int fib6_ifdown(struct fib6_info *rt, void *p_arg)  {  	const struct arg_netdev_event *arg = p_arg;  	const struct net_device *dev = arg->dev; -	const struct net *net = dev_net(dev); +	struct net *net = dev_net(dev); -	if (rt == net->ipv6.ip6_null_entry) +	if (rt == net->ipv6.fib6_null_entry)  		return 0;  	switch (arg->event) {  	case NETDEV_UNREGISTER: -		return rt->dst.dev == dev ? -1 : 0; +		return rt->fib6_nh.nh_dev == dev ? -1 : 0;  	case NETDEV_DOWN:  		if (rt->should_flush)  			return -1; -		if (!rt->rt6i_nsiblings) -			return rt->dst.dev == dev ? -1 : 0; +		if (!rt->fib6_nsiblings) +			return rt->fib6_nh.nh_dev == dev ? -1 : 0;  		if (rt6_multipath_uses_dev(rt, dev)) {  			unsigned int count;  			count = rt6_multipath_dead_count(rt, dev); -			if (rt->rt6i_nsiblings + 1 == count) { +			if (rt->fib6_nsiblings + 1 == count) {  				rt6_multipath_flush(rt);  				return -1;  			}  			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |  						   RTNH_F_LINKDOWN); -			fib6_update_sernum(rt); +			fib6_update_sernum(net, rt);  			rt6_multipath_rebalance(rt);  		}  		return -2;  	case NETDEV_CHANGE: -		if (rt->dst.dev != dev || -		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) +		if (rt->fib6_nh.nh_dev != dev || +		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))  			break; -		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; +		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;  		rt6_multipath_rebalance(rt);  		break;  	} @@ -3936,7 +4074,7 @@ struct rt6_mtu_change_arg {  	unsigned int mtu;  }; -static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) +static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)  {  	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;  	struct inet6_dev *idev; @@ -3956,12 +4094,15 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)  	   Since RFC 1981 doesn't include administrative MTU increase  	   update PMTU increase is a MUST. (i.e. jumbo frame)  	 */ -	if (rt->dst.dev == arg->dev && -	    !dst_metric_locked(&rt->dst, RTAX_MTU)) { +	if (rt->fib6_nh.nh_dev == arg->dev && +	    !fib6_metric_locked(rt, RTAX_MTU)) { +		u32 mtu = rt->fib6_pmtu; + +		if (mtu >= arg->mtu || +		    (mtu < arg->mtu && mtu == idev->cnf.mtu6)) +			fib6_metric_set(rt, RTAX_MTU, arg->mtu); +  		spin_lock_bh(&rt6_exception_lock); -		if (dst_metric_raw(&rt->dst, RTAX_MTU) && -		    rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) -			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);  		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);  		spin_unlock_bh(&rt6_exception_lock);  	} @@ -3993,6 +4134,9 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {  	[RTA_UID]		= { .type = NLA_U32 },  	[RTA_MARK]		= { .type = NLA_U32 },  	[RTA_TABLE]		= { .type = NLA_U32 }, +	[RTA_IP_PROTO]		= { .type = NLA_U8 }, +	[RTA_SPORT]		= { .type = NLA_U16 }, +	[RTA_DPORT]		= { .type = NLA_U16 },  };  static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -4122,9 +4266,8 @@ errout:  }  struct rt6_nh { -	struct rt6_info *rt6_info; +	struct fib6_info *fib6_info;  	struct fib6_config r_cfg; -	struct mx6_config mxc;  	struct list_head next;  }; @@ -4139,23 +4282,25 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)  	}  } -static int ip6_route_info_append(struct list_head *rt6_nh_list, -				 struct rt6_info *rt, struct fib6_config *r_cfg) +static int ip6_route_info_append(struct net *net, +				 struct list_head *rt6_nh_list, +				 struct fib6_info *rt, +				 struct fib6_config *r_cfg)  {  	struct rt6_nh *nh;  	int err = -EEXIST;  	list_for_each_entry(nh, rt6_nh_list, next) { -		/* check if rt6_info already exists */ -		if (rt6_duplicate_nexthop(nh->rt6_info, rt)) +		/* check if fib6_info already exists */ +		if (rt6_duplicate_nexthop(nh->fib6_info, rt))  			return err;  	}  	nh = kzalloc(sizeof(*nh), GFP_KERNEL);  	if (!nh)  		return -ENOMEM; -	nh->rt6_info = rt; -	err = ip6_convert_metrics(&nh->mxc, r_cfg); +	nh->fib6_info = rt; +	err = ip6_convert_metrics(net, rt, r_cfg);  	if (err) {  		kfree(nh);  		return err; @@ -4166,8 +4311,8 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list,  	return 0;  } -static void ip6_route_mpath_notify(struct rt6_info *rt, -				   struct rt6_info *rt_last, +static void ip6_route_mpath_notify(struct fib6_info *rt, +				   struct fib6_info *rt_last,  				   struct nl_info *info,  				   __u16 nlflags)  { @@ -4177,10 +4322,10 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,  	 * nexthop. Since sibling routes are always added at the end of  	 * the list, find the first sibling of the last route appended  	 */ -	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { -		rt = list_first_entry(&rt_last->rt6i_siblings, -				      struct rt6_info, -				      rt6i_siblings); +	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { +		rt = list_first_entry(&rt_last->fib6_siblings, +				      struct fib6_info, +				      fib6_siblings);  	}  	if (rt) @@ -4190,11 +4335,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,  static int ip6_route_multipath_add(struct fib6_config *cfg,  				   struct netlink_ext_ack *extack)  { -	struct rt6_info *rt_notif = NULL, *rt_last = NULL; +	struct fib6_info *rt_notif = NULL, *rt_last = NULL;  	struct nl_info *info = &cfg->fc_nlinfo;  	struct fib6_config r_cfg;  	struct rtnexthop *rtnh; -	struct rt6_info *rt; +	struct fib6_info *rt;  	struct rt6_nh *err_nh;  	struct rt6_nh *nh, *nh_safe;  	__u16 nlflags; @@ -4214,7 +4359,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,  	rtnh = (struct rtnexthop *)cfg->fc_mp;  	/* Parse a Multipath Entry and build a list (rt6_nh_list) of -	 * rt6_info structs per nexthop +	 * fib6_info structs per nexthop  	 */  	while (rtnh_ok(rtnh, remaining)) {  		memcpy(&r_cfg, cfg, sizeof(*cfg)); @@ -4237,18 +4382,26 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,  		}  		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); -		rt = ip6_route_info_create(&r_cfg, extack); +		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);  		if (IS_ERR(rt)) {  			err = PTR_ERR(rt);  			rt = NULL;  			goto cleanup;  		} +		if (!rt6_qualify_for_ecmp(rt)) { +			err = -EINVAL; +			NL_SET_ERR_MSG(extack, +				       "Device only routes can not be added for IPv6 using the multipath API."); +			fib6_info_release(rt); +			goto cleanup; +		} -		rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; +		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; -		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); +		err = ip6_route_info_append(info->nl_net, &rt6_nh_list, +					    rt, &r_cfg);  		if (err) { -			dst_release_immediate(&rt->dst); +			fib6_info_release(rt);  			goto cleanup;  		} @@ -4263,14 +4416,20 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,  	err_nh = NULL;  	list_for_each_entry(nh, &rt6_nh_list, next) { -		rt_last = nh->rt6_info; -		err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); -		/* save reference to first route for notification */ -		if (!rt_notif && !err) -			rt_notif = nh->rt6_info; - -		/* nh->rt6_info is used or freed at this point, reset to NULL*/ -		nh->rt6_info = NULL; +		err = __ip6_ins_rt(nh->fib6_info, info, extack); +		fib6_info_release(nh->fib6_info); + +		if (!err) { +			/* save reference to last route successfully inserted */ +			rt_last = nh->fib6_info; + +			/* save reference to first route for notification */ +			if (!rt_notif) +				rt_notif = nh->fib6_info; +		} + +		/* nh->fib6_info is used or freed at this point, reset to NULL*/ +		nh->fib6_info = NULL;  		if (err) {  			if (replace && nhn)  				ip6_print_replace_route_err(&rt6_nh_list); @@ -4311,9 +4470,8 @@ add_errout:  cleanup:  	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { -		if (nh->rt6_info) -			dst_release_immediate(&nh->rt6_info->dst); -		kfree(nh->mxc.mx); +		if (nh->fib6_info) +			fib6_info_release(nh->fib6_info);  		list_del(&nh->next);  		kfree(nh);  	} @@ -4390,20 +4548,20 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,  	if (cfg.fc_mp)  		return ip6_route_multipath_add(&cfg, extack);  	else -		return ip6_route_add(&cfg, extack); +		return ip6_route_add(&cfg, GFP_KERNEL, extack);  } -static size_t rt6_nlmsg_size(struct rt6_info *rt) +static size_t rt6_nlmsg_size(struct fib6_info *rt)  {  	int nexthop_len = 0; -	if (rt->rt6i_nsiblings) { +	if (rt->fib6_nsiblings) {  		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */  			    + NLA_ALIGN(sizeof(struct rtnexthop))  			    + nla_total_size(16) /* RTA_GATEWAY */ -			    + lwtunnel_get_encap_size(rt->dst.lwtstate); +			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); -		nexthop_len *= rt->rt6i_nsiblings; +		nexthop_len *= rt->fib6_nsiblings;  	}  	return NLMSG_ALIGN(sizeof(struct rtmsg)) @@ -4419,38 +4577,41 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)  	       + nla_total_size(sizeof(struct rta_cacheinfo))  	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */  	       + nla_total_size(1) /* RTA_PREF */ -	       + lwtunnel_get_encap_size(rt->dst.lwtstate) +	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)  	       + nexthop_len;  } -static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,  			    unsigned int *flags, bool skip_oif)  { -	if (rt->rt6i_nh_flags & RTNH_F_DEAD) +	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)  		*flags |= RTNH_F_DEAD; -	if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { +	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {  		*flags |= RTNH_F_LINKDOWN; -		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) + +		rcu_read_lock(); +		if (fib6_ignore_linkdown(rt))  			*flags |= RTNH_F_DEAD; +		rcu_read_unlock();  	} -	if (rt->rt6i_flags & RTF_GATEWAY) { -		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) +	if (rt->fib6_flags & RTF_GATEWAY) { +		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)  			goto nla_put_failure;  	} -	*flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); -	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) +	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK); +	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)  		*flags |= RTNH_F_OFFLOAD;  	/* not needed for multipath encoding b/c it has a rtnexthop struct */ -	if (!skip_oif && rt->dst.dev && -	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) +	if (!skip_oif && rt->fib6_nh.nh_dev && +	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))  		goto nla_put_failure; -	if (rt->dst.lwtstate && -	    lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) +	if (rt->fib6_nh.nh_lwtstate && +	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)  		goto nla_put_failure;  	return 0; @@ -4460,8 +4621,9 @@ nla_put_failure:  }  /* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) +static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)  { +	const struct net_device *dev = rt->fib6_nh.nh_dev;  	struct rtnexthop *rtnh;  	unsigned int flags = 0; @@ -4469,8 +4631,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)  	if (!rtnh)  		goto nla_put_failure; -	rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; -	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; +	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1; +	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;  	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)  		goto nla_put_failure; @@ -4486,16 +4648,16 @@ nla_put_failure:  	return -EMSGSIZE;  } -static int rt6_fill_node(struct net *net, -			 struct sk_buff *skb, struct rt6_info *rt, -			 struct in6_addr *dst, struct in6_addr *src, +static int rt6_fill_node(struct net *net, struct sk_buff *skb, +			 struct fib6_info *rt, struct dst_entry *dst, +			 struct in6_addr *dest, struct in6_addr *src,  			 int iif, int type, u32 portid, u32 seq,  			 unsigned int flags)  { -	u32 metrics[RTAX_MAX];  	struct rtmsg *rtm;  	struct nlmsghdr *nlh; -	long expires; +	long expires = 0; +	u32 *pmetrics;  	u32 table;  	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); @@ -4504,53 +4666,31 @@ static int rt6_fill_node(struct net *net,  	rtm = nlmsg_data(nlh);  	rtm->rtm_family = AF_INET6; -	rtm->rtm_dst_len = rt->rt6i_dst.plen; -	rtm->rtm_src_len = rt->rt6i_src.plen; +	rtm->rtm_dst_len = rt->fib6_dst.plen; +	rtm->rtm_src_len = rt->fib6_src.plen;  	rtm->rtm_tos = 0; -	if (rt->rt6i_table) -		table = rt->rt6i_table->tb6_id; +	if (rt->fib6_table) +		table = rt->fib6_table->tb6_id;  	else  		table = RT6_TABLE_UNSPEC;  	rtm->rtm_table = table;  	if (nla_put_u32(skb, RTA_TABLE, table))  		goto nla_put_failure; -	if (rt->rt6i_flags & RTF_REJECT) { -		switch (rt->dst.error) { -		case -EINVAL: -			rtm->rtm_type = RTN_BLACKHOLE; -			break; -		case -EACCES: -			rtm->rtm_type = RTN_PROHIBIT; -			break; -		case -EAGAIN: -			rtm->rtm_type = RTN_THROW; -			break; -		default: -			rtm->rtm_type = RTN_UNREACHABLE; -			break; -		} -	} -	else if (rt->rt6i_flags & RTF_LOCAL) -		rtm->rtm_type = RTN_LOCAL; -	else if (rt->rt6i_flags & RTF_ANYCAST) -		rtm->rtm_type = RTN_ANYCAST; -	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) -		rtm->rtm_type = RTN_LOCAL; -	else -		rtm->rtm_type = RTN_UNICAST; + +	rtm->rtm_type = rt->fib6_type;  	rtm->rtm_flags = 0;  	rtm->rtm_scope = RT_SCOPE_UNIVERSE; -	rtm->rtm_protocol = rt->rt6i_protocol; +	rtm->rtm_protocol = rt->fib6_protocol; -	if (rt->rt6i_flags & RTF_CACHE) +	if (rt->fib6_flags & RTF_CACHE)  		rtm->rtm_flags |= RTM_F_CLONED; -	if (dst) { -		if (nla_put_in6_addr(skb, RTA_DST, dst)) +	if (dest) { +		if (nla_put_in6_addr(skb, RTA_DST, dest))  			goto nla_put_failure;  		rtm->rtm_dst_len = 128;  	} else if (rtm->rtm_dst_len) -		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) +		if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))  			goto nla_put_failure;  #ifdef CONFIG_IPV6_SUBTREES  	if (src) { @@ -4558,12 +4698,12 @@ static int rt6_fill_node(struct net *net,  			goto nla_put_failure;  		rtm->rtm_src_len = 128;  	} else if (rtm->rtm_src_len && -		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) +		   nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))  		goto nla_put_failure;  #endif  	if (iif) {  #ifdef CONFIG_IPV6_MROUTE -		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { +		if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {  			int err = ip6mr_get_route(net, skb, rtm, portid);  			if (err == 0) @@ -4574,34 +4714,32 @@ static int rt6_fill_node(struct net *net,  #endif  			if (nla_put_u32(skb, RTA_IIF, iif))  				goto nla_put_failure; -	} else if (dst) { +	} else if (dest) {  		struct in6_addr saddr_buf; -		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && +		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&  		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))  			goto nla_put_failure;  	} -	if (rt->rt6i_prefsrc.plen) { +	if (rt->fib6_prefsrc.plen) {  		struct in6_addr saddr_buf; -		saddr_buf = rt->rt6i_prefsrc.addr; +		saddr_buf = rt->fib6_prefsrc.addr;  		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))  			goto nla_put_failure;  	} -	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); -	if (rt->rt6i_pmtu) -		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; -	if (rtnetlink_put_metrics(skb, metrics) < 0) +	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics; +	if (rtnetlink_put_metrics(skb, pmetrics) < 0)  		goto nla_put_failure; -	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) +	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))  		goto nla_put_failure;  	/* For multipath routes, walk the siblings list and add  	 * each as a nexthop within RTA_MULTIPATH.  	 */ -	if (rt->rt6i_nsiblings) { -		struct rt6_info *sibling, *next_sibling; +	if (rt->fib6_nsiblings) { +		struct fib6_info *sibling, *next_sibling;  		struct nlattr *mp;  		mp = nla_nest_start(skb, RTA_MULTIPATH); @@ -4612,7 +4750,7 @@ static int rt6_fill_node(struct net *net,  			goto nla_put_failure;  		list_for_each_entry_safe(sibling, next_sibling, -					 &rt->rt6i_siblings, rt6i_siblings) { +					 &rt->fib6_siblings, fib6_siblings) {  			if (rt6_add_nexthop(skb, sibling) < 0)  				goto nla_put_failure;  		} @@ -4623,12 +4761,15 @@ static int rt6_fill_node(struct net *net,  			goto nla_put_failure;  	} -	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; +	if (rt->fib6_flags & RTF_EXPIRES) { +		expires = dst ? dst->expires : rt->expires; +		expires -= jiffies; +	} -	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) +	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)  		goto nla_put_failure; -	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) +	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))  		goto nla_put_failure; @@ -4640,12 +4781,12 @@ nla_put_failure:  	return -EMSGSIZE;  } -int rt6_dump_route(struct rt6_info *rt, void *p_arg) +int rt6_dump_route(struct fib6_info *rt, void *p_arg)  {  	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;  	struct net *net = arg->net; -	if (rt == net->ipv6.ip6_null_entry) +	if (rt == net->ipv6.fib6_null_entry)  		return 0;  	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { @@ -4653,16 +4794,15 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)  		/* user wants prefix routes only */  		if (rtm->rtm_flags & RTM_F_PREFIX && -		    !(rt->rt6i_flags & RTF_PREFIX_RT)) { +		    !(rt->fib6_flags & RTF_PREFIX_RT)) {  			/* success since this is not a prefix route */  			return 1;  		}  	} -	return rt6_fill_node(net, -		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, -		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, -		     NLM_F_MULTI); +	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, +			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, +			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);  }  static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, @@ -4671,6 +4811,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,  	struct net *net = sock_net(in_skb->sk);  	struct nlattr *tb[RTA_MAX+1];  	int err, iif = 0, oif = 0; +	struct fib6_info *from;  	struct dst_entry *dst;  	struct rt6_info *rt;  	struct sk_buff *skb; @@ -4718,6 +4859,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,  	else  		fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); +	if (tb[RTA_SPORT]) +		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]); + +	if (tb[RTA_DPORT]) +		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]); + +	if (tb[RTA_IP_PROTO]) { +		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], +						  &fl6.flowi6_proto, extack); +		if (err) +			goto errout; +	} +  	if (iif) {  		struct net_device *dev;  		int flags = 0; @@ -4759,14 +4913,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,  		goto errout;  	} -	if (fibmatch && rt->from) { -		struct rt6_info *ort = rt->from; - -		dst_hold(&ort->dst); -		ip6_rt_put(rt); -		rt = ort; -	} -  	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);  	if (!skb) {  		ip6_rt_put(rt); @@ -4775,14 +4921,21 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,  	}  	skb_dst_set(skb, &rt->dst); + +	rcu_read_lock(); +	from = rcu_dereference(rt->from); +  	if (fibmatch) -		err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, +		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,  				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,  				    nlh->nlmsg_seq, 0);  	else -		err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, -				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid, -				    nlh->nlmsg_seq, 0); +		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, +				    &fl6.saddr, iif, RTM_NEWROUTE, +				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, +				    0); +	rcu_read_unlock(); +  	if (err < 0) {  		kfree_skb(skb);  		goto errout; @@ -4793,7 +4946,7 @@ errout:  	return err;  } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,  		     unsigned int nlm_flags)  {  	struct sk_buff *skb; @@ -4808,8 +4961,8 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,  	if (!skb)  		goto errout; -	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, -				event, info->portid, seq, nlm_flags); +	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, +			    event, info->portid, seq, nlm_flags);  	if (err < 0) {  		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */  		WARN_ON(err == -EMSGSIZE); @@ -4834,6 +4987,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,  		return NOTIFY_OK;  	if (event == NETDEV_REGISTER) { +		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;  		net->ipv6.ip6_null_entry->dst.dev = dev;  		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);  #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -4862,14 +5016,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,   */  #ifdef CONFIG_PROC_FS - -static const struct file_operations ipv6_route_proc_fops = { -	.open		= ipv6_route_open, -	.read		= seq_read, -	.llseek		= seq_lseek, -	.release	= seq_release_net, -}; -  static int rt6_stats_seq_show(struct seq_file *seq, void *v)  {  	struct net *net = (struct net *)seq->private; @@ -4884,18 +5030,6 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)  	return 0;  } - -static int rt6_stats_seq_open(struct inode *inode, struct file *file) -{ -	return single_open_net(inode, file, rt6_stats_seq_show); -} - -static const struct file_operations rt6_stats_seq_fops = { -	.open	 = rt6_stats_seq_open, -	.read	 = seq_read, -	.llseek	 = seq_lseek, -	.release = single_release_net, -};  #endif	/* CONFIG_PROC_FS */  #ifdef CONFIG_SYSCTL @@ -5030,11 +5164,17 @@ static int __net_init ip6_route_net_init(struct net *net)  	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)  		goto out_ip6_dst_ops; +	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template, +					    sizeof(*net->ipv6.fib6_null_entry), +					    GFP_KERNEL); +	if (!net->ipv6.fib6_null_entry) +		goto out_ip6_dst_entries; +  	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,  					   sizeof(*net->ipv6.ip6_null_entry),  					   GFP_KERNEL);  	if (!net->ipv6.ip6_null_entry) -		goto out_ip6_dst_entries; +		goto out_fib6_null_entry;  	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;  	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,  			 ip6_template_metrics, true); @@ -5081,6 +5221,8 @@ out_ip6_prohibit_entry:  out_ip6_null_entry:  	kfree(net->ipv6.ip6_null_entry);  #endif +out_fib6_null_entry: +	kfree(net->ipv6.fib6_null_entry);  out_ip6_dst_entries:  	dst_entries_destroy(&net->ipv6.ip6_dst_ops);  out_ip6_dst_ops: @@ -5089,6 +5231,7 @@ out_ip6_dst_ops:  static void __net_exit ip6_route_net_exit(struct net *net)  { +	kfree(net->ipv6.fib6_null_entry);  	kfree(net->ipv6.ip6_null_entry);  #ifdef CONFIG_IPV6_MULTIPLE_TABLES  	kfree(net->ipv6.ip6_prohibit_entry); @@ -5100,8 +5243,10 @@ static void __net_exit ip6_route_net_exit(struct net *net)  static int __net_init ip6_route_net_init_late(struct net *net)  {  #ifdef CONFIG_PROC_FS -	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); -	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops); +	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops, +			sizeof(struct ipv6_route_iter)); +	proc_create_net_single("rt6_stats", 0444, net->proc_net, +			rt6_stats_seq_show, NULL);  #endif  	return 0;  } @@ -5159,6 +5304,7 @@ void __init ip6_route_init_special_entries(void)  	/* Registering of the loopback is done before this portion of code,  	 * the loopback reference in rt6_info will not be taken, do it  	 * manually for init_net */ +	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;  	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;  	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);    #ifdef CONFIG_IPV6_MULTIPLE_TABLES  |