diff options
Diffstat (limited to 'net/ipv6/route.c')
| -rw-r--r-- | net/ipv6/route.c | 567 | 
1 files changed, 433 insertions, 134 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0458b761f3c5..fb2d251c0500 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -186,7 +186,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)  static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)  { -	return dst_metrics_write_ptr(rt->dst.from); +	return dst_metrics_write_ptr(&rt->from->dst);  }  static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) @@ -391,7 +391,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)  {  	struct rt6_info *rt = (struct rt6_info *)dst;  	struct rt6_exception_bucket *bucket; -	struct dst_entry *from = dst->from; +	struct rt6_info *from = rt->from;  	struct inet6_dev *idev;  	dst_destroy_metrics_generic(dst); @@ -409,8 +409,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)  		kfree(bucket);  	} -	dst->from = NULL; -	dst_release(from); +	rt->from = NULL; +	dst_release(&from->dst);  }  static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -443,9 +443,9 @@ static bool rt6_check_expired(const struct rt6_info *rt)  	if (rt->rt6i_flags & RTF_EXPIRES) {  		if (time_after(jiffies, rt->dst.expires))  			return true; -	} else if (rt->dst.from) { +	} else if (rt->from) {  		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || -		       rt6_check_expired((struct rt6_info *)rt->dst.from); +			rt6_check_expired(rt->from);  	}  	return false;  } @@ -455,7 +455,6 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,  					     int strict)  {  	struct rt6_info *sibling, *next_sibling; -	int route_choosen;  	/* We might have already computed the hash for ICMPv6 errors. In such  	 * case it will always be non-zero. Otherwise now is the time to do it. @@ -463,26 +462,19 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,  	if (!fl6->mp_hash)  		fl6->mp_hash = rt6_multipath_hash(fl6, NULL); -	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1); -	/* Don't change the route, if route_choosen == 0 -	 * (siblings does not include ourself) -	 */ -	if (route_choosen) -		list_for_each_entry_safe(sibling, next_sibling, -				&match->rt6i_siblings, rt6i_siblings) { -			route_choosen--; -			if (route_choosen == 0) { -				struct inet6_dev *idev = sibling->rt6i_idev; - -				if (!netif_carrier_ok(sibling->dst.dev) && -				    idev->cnf.ignore_routes_with_linkdown) -					break; -				if (rt6_score_route(sibling, oif, strict) < 0) -					break; -				match = sibling; -				break; -			} -		} +	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) +		return match; + +	list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, +				 rt6i_siblings) { +		if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) +			continue; +		if (rt6_score_route(sibling, oif, strict) < 0) +			break; +		match = sibling; +		break; +	} +  	return match;  } @@ -499,12 +491,15 @@ static inline struct rt6_info *rt6_device_match(struct net *net,  	struct rt6_info *local = NULL;  	struct rt6_info *sprt; -	if (!oif && ipv6_addr_any(saddr)) -		goto out; +	if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) +		return rt; -	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) { +	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {  		struct net_device *dev = sprt->dst.dev; +		if (sprt->rt6i_nh_flags & RTNH_F_DEAD) +			continue; +  		if (oif) {  			if (dev->ifindex == oif)  				return sprt; @@ -533,8 +528,8 @@ static inline struct rt6_info *rt6_device_match(struct net *net,  		if (flags & RT6_LOOKUP_F_IFACE)  			return net->ipv6.ip6_null_entry;  	} -out: -	return rt; + +	return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;  }  #ifdef CONFIG_IPV6_ROUTER_PREF @@ -679,10 +674,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,  	int m;  	bool match_do_rr = false;  	struct inet6_dev *idev = rt->rt6i_idev; -	struct net_device *dev = rt->dst.dev; -	if (dev && !netif_carrier_ok(dev) && -	    idev->cnf.ignore_routes_with_linkdown && +	if (rt->rt6i_nh_flags & RTNH_F_DEAD) +		goto out; + +	if (idev->cnf.ignore_routes_with_linkdown && +	    rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&  	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))  		goto out; @@ -721,7 +718,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,  	match = NULL;  	cont = NULL; -	for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) { +	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {  		if (rt->rt6i_metric != metric) {  			cont = rt;  			break; @@ -731,7 +728,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,  	}  	for (rt = leaf; rt && rt != rr_head; -	     rt = rcu_dereference(rt->dst.rt6_next)) { +	     rt = rcu_dereference(rt->rt6_next)) {  		if (rt->rt6i_metric != metric) {  			cont = rt;  			break; @@ -743,7 +740,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,  	if (match || !cont)  		return match; -	for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next)) +	for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))  		match = find_match(rt, oif, strict, &mpri, match, do_rr);  	return match; @@ -781,7 +778,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,  			     &do_rr);  	if (do_rr) { -		struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next); +		struct rt6_info *next = rcu_dereference(rt0->rt6_next);  		/* no entries matched; do round-robin */  		if (!next || next->rt6i_metric != rt0->rt6i_metric) @@ -1054,7 +1051,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,  	 */  	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) -		ort = (struct rt6_info *)ort->dst.from; +		ort = ort->from;  	rcu_read_lock();  	dev = ip6_rt_get_dev_rcu(ort); @@ -1274,7 +1271,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,  	/* ort can't be a cache or pcpu route */  	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) -		ort = (struct rt6_info *)ort->dst.from; +		ort = ort->from;  	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));  	spin_lock_bh(&rt6_exception_lock); @@ -1346,7 +1343,9 @@ out:  	/* Update fn->fn_sernum to invalidate all cached dst */  	if (!err) { +		spin_lock_bh(&ort->rt6i_table->tb6_lock);  		fib6_update_sernum(ort); +		spin_unlock_bh(&ort->rt6i_table->tb6_lock);  		fib6_force_start_gc(net);  	} @@ -1415,8 +1414,8 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,  /* Remove the passed in cached rt from the hash table that contains it */  int rt6_remove_exception_rt(struct rt6_info *rt)  { -	struct rt6_info *from = (struct rt6_info *)rt->dst.from;  	struct rt6_exception_bucket *bucket; +	struct rt6_info *from = rt->from;  	struct in6_addr *src_key = NULL;  	struct rt6_exception *rt6_ex;  	int err; @@ -1460,8 +1459,8 @@ int rt6_remove_exception_rt(struct rt6_info *rt)   */  static void rt6_update_exception_stamp_rt(struct rt6_info *rt)  { -	struct rt6_info *from = (struct rt6_info *)rt->dst.from;  	struct rt6_exception_bucket *bucket; +	struct rt6_info *from = rt->from;  	struct in6_addr *src_key = NULL;  	struct rt6_exception *rt6_ex; @@ -1586,12 +1585,19 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,  	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when  	 * expired, independently from their aging, as per RFC 8201 section 4  	 */ -	if (!(rt->rt6i_flags & RTF_EXPIRES) && -	    time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { -		RT6_TRACE("aging clone %p\n", rt); +	if (!(rt->rt6i_flags & RTF_EXPIRES)) { +		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { +			RT6_TRACE("aging clone %p\n", rt); +			rt6_remove_exception(bucket, rt6_ex); +			return; +		} +	} else if (time_after(jiffies, rt->dst.expires)) { +		RT6_TRACE("purging expired route %p\n", rt);  		rt6_remove_exception(bucket, rt6_ex);  		return; -	} else if (rt->rt6i_flags & RTF_GATEWAY) { +	} + +	if (rt->rt6i_flags & RTF_GATEWAY) {  		struct neighbour *neigh;  		__u8 neigh_flags = 0; @@ -1606,11 +1612,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,  			rt6_remove_exception(bucket, rt6_ex);  			return;  		} -	} else if (__rt6_check_expired(rt)) { -		RT6_TRACE("purging expired route %p\n", rt); -		rt6_remove_exception(bucket, rt6_ex); -		return;  	} +  	gc_args->more++;  } @@ -1824,10 +1827,10 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)  	if (skb) {  		ip6_multipath_l3_keys(skb, &hash_keys); -		return flow_hash_from_keys(&hash_keys); +		return flow_hash_from_keys(&hash_keys) >> 1;  	} -	return get_hash_from_flowi6(fl6); +	return get_hash_from_flowi6(fl6) >> 1;  }  void ip6_route_input(struct sk_buff *skb) @@ -1929,9 +1932,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori  static void rt6_dst_from_metrics_check(struct rt6_info *rt)  { -	if (rt->dst.from && -	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) -		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); +	if (rt->from && +	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) +		dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);  }  static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) @@ -1951,7 +1954,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)  {  	if (!__rt6_check_expired(rt) &&  	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && -	    rt6_check((struct rt6_info *)(rt->dst.from), cookie)) +	    rt6_check(rt->from, cookie))  		return &rt->dst;  	else  		return NULL; @@ -1971,7 +1974,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)  	rt6_dst_from_metrics_check(rt);  	if (rt->rt6i_flags & RTF_PCPU || -	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) +	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))  		return rt6_dst_from_check(rt, cookie);  	else  		return rt6_check(rt, cookie); @@ -2154,6 +2157,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,  	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);  restart:  	for_each_fib6_node_rt_rcu(fn) { +		if (rt->rt6i_nh_flags & RTNH_F_DEAD) +			continue;  		if (rt6_check_expired(rt))  			continue;  		if (rt->dst.error) @@ -2344,7 +2349,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,  	rt->rt6i_idev     = idev;  	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); -	/* Add this dst into uncached_list so that rt6_ifdown() can +	/* Add this dst into uncached_list so that rt6_disable_ip() can  	 * do proper release of the net_device  	 */  	rt6_uncached_list_add(rt); @@ -2439,7 +2444,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,  static struct rt6_info *ip6_nh_lookup_table(struct net *net,  					    struct fib6_config *cfg, -					    const struct in6_addr *gw_addr) +					    const struct in6_addr *gw_addr, +					    u32 tbid, int flags)  {  	struct flowi6 fl6 = {  		.flowi6_oif = cfg->fc_ifindex, @@ -2448,15 +2454,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,  	};  	struct fib6_table *table;  	struct rt6_info *rt; -	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE; -	table = fib6_get_table(net, cfg->fc_table); +	table = fib6_get_table(net, tbid);  	if (!table)  		return NULL;  	if (!ipv6_addr_any(&cfg->fc_prefsrc))  		flags |= RT6_LOOKUP_F_HAS_SADDR; +	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;  	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);  	/* if table lookup failed, fall back to full lookup */ @@ -2468,6 +2474,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,  	return rt;  } +static int ip6_route_check_nh_onlink(struct net *net, +				     struct fib6_config *cfg, +				     struct net_device *dev, +				     struct netlink_ext_ack *extack) +{ +	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL; +	const struct in6_addr *gw_addr = &cfg->fc_gateway; +	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; +	struct rt6_info *grt; +	int err; + +	err = 0; +	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); +	if (grt) { +		if (grt->rt6i_flags & flags || dev != grt->dst.dev) { +			NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); +			err = -EINVAL; +		} + +		ip6_rt_put(grt); +	} + +	return err; +} + +static int ip6_route_check_nh(struct net *net, +			      struct fib6_config *cfg, +			      struct net_device **_dev, +			      struct inet6_dev **idev) +{ +	const struct in6_addr *gw_addr = &cfg->fc_gateway; +	struct net_device *dev = _dev ? *_dev : NULL; +	struct rt6_info *grt = NULL; +	int err = -EHOSTUNREACH; + +	if (cfg->fc_table) { +		int flags = RT6_LOOKUP_F_IFACE; + +		grt = ip6_nh_lookup_table(net, cfg, gw_addr, +					  cfg->fc_table, flags); +		if (grt) { +			if (grt->rt6i_flags & RTF_GATEWAY || +			    (dev && dev != grt->dst.dev)) { +				ip6_rt_put(grt); +				grt = NULL; +			} +		} +	} + +	if (!grt) +		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); + +	if (!grt) +		goto out; + +	if (dev) { +		if (dev != grt->dst.dev) { +			ip6_rt_put(grt); +			goto out; +		} +	} else { +		*_dev = dev = grt->dst.dev; +		*idev = grt->rt6i_idev; +		dev_hold(dev); +		in6_dev_hold(grt->rt6i_idev); +	} + +	if (!(grt->rt6i_flags & RTF_GATEWAY)) +		err = 0; + +	ip6_rt_put(grt); + +out: +	return err; +} +  static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  					      struct netlink_ext_ack *extack)  { @@ -2519,6 +2601,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  	if (cfg->fc_metric == 0)  		cfg->fc_metric = IP6_RT_PRIO_USER; +	if (cfg->fc_flags & RTNH_F_ONLINK) { +		if (!dev) { +			NL_SET_ERR_MSG(extack, +				       "Nexthop device required for onlink"); +			err = -ENODEV; +			goto out; +		} + +		if (!(dev->flags & IFF_UP)) { +			NL_SET_ERR_MSG(extack, "Nexthop device is not up"); +			err = -ENETDOWN; +			goto out; +		} +	} +  	err = -ENOBUFS;  	if (cfg->fc_nlinfo.nlh &&  	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { @@ -2593,6 +2690,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  #endif  	rt->rt6i_metric = cfg->fc_metric; +	rt->rt6i_nh_weight = 1;  	/* We cannot add true routes via loopback here,  	   they would result in kernel looping; promote them to reject routes @@ -2662,8 +2760,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  		rt->rt6i_gateway = *gw_addr;  		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { -			struct rt6_info *grt = NULL; -  			/* IPv6 strictly inhibits using not link-local  			   addresses as nexthop address.  			   Otherwise, router will not able to send redirects. @@ -2680,40 +2776,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  				goto out;  			} -			if (cfg->fc_table) { -				grt = ip6_nh_lookup_table(net, cfg, gw_addr); - -				if (grt) { -					if (grt->rt6i_flags & RTF_GATEWAY || -					    (dev && dev != grt->dst.dev)) { -						ip6_rt_put(grt); -						grt = NULL; -					} -				} -			} - -			if (!grt) -				grt = rt6_lookup(net, gw_addr, NULL, -						 cfg->fc_ifindex, 1); - -			err = -EHOSTUNREACH; -			if (!grt) -				goto out; -			if (dev) { -				if (dev != grt->dst.dev) { -					ip6_rt_put(grt); -					goto out; -				} +			if (cfg->fc_flags & RTNH_F_ONLINK) { +				err = ip6_route_check_nh_onlink(net, cfg, dev, +								extack);  			} else { -				dev = grt->dst.dev; -				idev = grt->rt6i_idev; -				dev_hold(dev); -				in6_dev_hold(grt->rt6i_idev); +				err = ip6_route_check_nh(net, cfg, &dev, &idev);  			} -			if (!(grt->rt6i_flags & RTF_GATEWAY)) -				err = 0; -			ip6_rt_put(grt); -  			if (err)  				goto out;  		} @@ -2732,6 +2800,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  	if (!dev)  		goto out; +	if (!(dev->flags & IFF_UP)) { +		NL_SET_ERR_MSG(extack, "Nexthop device is not up"); +		err = -ENETDOWN; +		goto out; +	} +  	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {  		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {  			NL_SET_ERR_MSG(extack, "Invalid source address"); @@ -2746,6 +2820,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,  	rt->rt6i_flags = cfg->fc_flags;  install_route: +	if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && +	    !netif_carrier_ok(dev)) +		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; +	rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);  	rt->dst.dev = dev;  	rt->rt6i_idev = idev;  	rt->rt6i_table = table; @@ -3056,11 +3134,11 @@ out:  static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)  { -	BUG_ON(from->dst.from); +	BUG_ON(from->from);  	rt->rt6i_flags &= ~RTF_EXPIRES;  	dst_hold(&from->dst); -	rt->dst.from = &from->dst; +	rt->from = from;  	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);  } @@ -3459,37 +3537,249 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)  	fib6_clean_all(net, fib6_clean_tohost, gateway);  } -struct arg_dev_net { -	struct net_device *dev; -	struct net *net; +struct arg_netdev_event { +	const struct net_device *dev; +	union { +		unsigned int nh_flags; +		unsigned long event; +	};  }; +static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) +{ +	struct rt6_info *iter; +	struct fib6_node *fn; + +	fn = rcu_dereference_protected(rt->rt6i_node, +			lockdep_is_held(&rt->rt6i_table->tb6_lock)); +	iter = rcu_dereference_protected(fn->leaf, +			lockdep_is_held(&rt->rt6i_table->tb6_lock)); +	while (iter) { +		if (iter->rt6i_metric == rt->rt6i_metric && +		    rt6_qualify_for_ecmp(iter)) +			return iter; +		iter = rcu_dereference_protected(iter->rt6_next, +				lockdep_is_held(&rt->rt6i_table->tb6_lock)); +	} + +	return NULL; +} + +static bool rt6_is_dead(const struct rt6_info *rt) +{ +	if (rt->rt6i_nh_flags & RTNH_F_DEAD || +	    (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && +	     rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) +		return true; + +	return false; +} + +static int rt6_multipath_total_weight(const struct rt6_info *rt) +{ +	struct rt6_info *iter; +	int total = 0; + +	if (!rt6_is_dead(rt)) +		total += rt->rt6i_nh_weight; + +	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { +		if (!rt6_is_dead(iter)) +			total += iter->rt6i_nh_weight; +	} + +	return total; +} + +static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) +{ +	int upper_bound = -1; + +	if (!rt6_is_dead(rt)) { +		*weight += rt->rt6i_nh_weight; +		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, +						    total) - 1; +	} +	atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); +} + +static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) +{ +	struct rt6_info *iter; +	int weight = 0; + +	rt6_upper_bound_set(rt, &weight, total); + +	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) +		rt6_upper_bound_set(iter, &weight, total); +} + +void rt6_multipath_rebalance(struct rt6_info *rt) +{ +	struct rt6_info *first; +	int total; + +	/* In case the entire multipath route was marked for flushing, +	 * then there is no need to rebalance upon the removal of every +	 * sibling route. +	 */ +	if (!rt->rt6i_nsiblings || rt->should_flush) +		return; + +	/* During lookup routes are evaluated in order, so we need to +	 * make sure upper bounds are assigned from the first sibling +	 * onwards. +	 */ +	first = rt6_multipath_first_sibling(rt); +	if (WARN_ON_ONCE(!first)) +		return; + +	total = rt6_multipath_total_weight(first); +	rt6_multipath_upper_bound_set(first, total); +} + +static int fib6_ifup(struct rt6_info *rt, void *p_arg) +{ +	const struct arg_netdev_event *arg = p_arg; +	const struct net *net = dev_net(arg->dev); + +	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { +		rt->rt6i_nh_flags &= ~arg->nh_flags; +		fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); +		rt6_multipath_rebalance(rt); +	} + +	return 0; +} + +void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) +{ +	struct arg_netdev_event arg = { +		.dev = dev, +		{ +			.nh_flags = nh_flags, +		}, +	}; + +	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev)) +		arg.nh_flags |= RTNH_F_LINKDOWN; + +	fib6_clean_all(dev_net(dev), fib6_ifup, &arg); +} + +static bool rt6_multipath_uses_dev(const struct rt6_info *rt, +				   const struct net_device *dev) +{ +	struct rt6_info *iter; + +	if (rt->dst.dev == dev) +		return true; +	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) +		if (iter->dst.dev == dev) +			return true; + +	return false; +} + +static void rt6_multipath_flush(struct rt6_info *rt) +{ +	struct rt6_info *iter; + +	rt->should_flush = 1; +	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) +		iter->should_flush = 1; +} + +static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, +					     const struct net_device *down_dev) +{ +	struct rt6_info *iter; +	unsigned int dead = 0; + +	if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) +		dead++; +	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) +		if (iter->dst.dev == down_dev || +		    iter->rt6i_nh_flags & RTNH_F_DEAD) +			dead++; + +	return dead; +} + +static void rt6_multipath_nh_flags_set(struct rt6_info *rt, +				       const struct net_device *dev, +				       unsigned int nh_flags) +{ +	struct rt6_info *iter; + +	if (rt->dst.dev == dev) +		rt->rt6i_nh_flags |= nh_flags; +	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) +		if (iter->dst.dev == dev) +			iter->rt6i_nh_flags |= nh_flags; +} +  /* called with write lock held for table with rt */ -static int fib6_ifdown(struct rt6_info *rt, void *arg) +static int fib6_ifdown(struct rt6_info *rt, void *p_arg)  { -	const struct arg_dev_net *adn = arg; -	const struct net_device *dev = adn->dev; +	const struct arg_netdev_event *arg = p_arg; +	const struct net_device *dev = arg->dev; +	const struct net *net = dev_net(dev); -	if ((rt->dst.dev == dev || !dev) && -	    rt != adn->net->ipv6.ip6_null_entry && -	    (rt->rt6i_nsiblings == 0 || -	     (dev && netdev_unregistering(dev)) || -	     !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) -		return -1; +	if (rt == net->ipv6.ip6_null_entry) +		return 0; + +	switch (arg->event) { +	case NETDEV_UNREGISTER: +		return rt->dst.dev == dev ? -1 : 0; +	case NETDEV_DOWN: +		if (rt->should_flush) +			return -1; +		if (!rt->rt6i_nsiblings) +			return rt->dst.dev == dev ? -1 : 0; +		if (rt6_multipath_uses_dev(rt, dev)) { +			unsigned int count; + +			count = rt6_multipath_dead_count(rt, dev); +			if (rt->rt6i_nsiblings + 1 == count) { +				rt6_multipath_flush(rt); +				return -1; +			} +			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | +						   RTNH_F_LINKDOWN); +			fib6_update_sernum(rt); +			rt6_multipath_rebalance(rt); +		} +		return -2; +	case NETDEV_CHANGE: +		if (rt->dst.dev != dev || +		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) +			break; +		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; +		rt6_multipath_rebalance(rt); +		break; +	}  	return 0;  } -void rt6_ifdown(struct net *net, struct net_device *dev) +void rt6_sync_down_dev(struct net_device *dev, unsigned long event)  { -	struct arg_dev_net adn = { +	struct arg_netdev_event arg = {  		.dev = dev, -		.net = net, +		{ +			.event = event, +		},  	}; -	fib6_clean_all(net, fib6_ifdown, &adn); -	if (dev) -		rt6_uncached_list_flush_dev(net, dev); +	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg); +} + +void rt6_disable_ip(struct net_device *dev, unsigned long event) +{ +	rt6_sync_down_dev(dev, event); +	rt6_uncached_list_flush_dev(dev_net(dev), dev); +	neigh_ifdown(&nd_tbl, dev);  }  struct rt6_mtu_change_arg { @@ -3603,6 +3893,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,  	if (rtm->rtm_flags & RTM_F_CLONED)  		cfg->fc_flags |= RTF_CACHE; +	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); +  	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;  	cfg->fc_nlinfo.nlh = nlh;  	cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -3812,6 +4104,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,  			goto cleanup;  		} +		rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; +  		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);  		if (err) {  			dst_release_immediate(&rt->dst); @@ -3992,7 +4286,10 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)  static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,  			    unsigned int *flags, bool skip_oif)  { -	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { +	if (rt->rt6i_nh_flags & RTNH_F_DEAD) +		*flags |= RTNH_F_DEAD; + +	if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {  		*flags |= RTNH_F_LINKDOWN;  		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)  			*flags |= RTNH_F_DEAD; @@ -4003,6 +4300,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,  			goto nla_put_failure;  	} +	*flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);  	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)  		*flags |= RTNH_F_OFFLOAD; @@ -4031,7 +4329,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)  	if (!rtnh)  		goto nla_put_failure; -	rtnh->rtnh_hops = 0; +	rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;  	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;  	if (rt6_nexthop_info(skb, rt, &flags, true) < 0) @@ -4321,9 +4619,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,  		goto errout;  	} -	if (fibmatch && rt->dst.from) { -		struct rt6_info *ort = container_of(rt->dst.from, -						    struct rt6_info, dst); +	if (fibmatch && rt->from) { +		struct rt6_info *ort = rt->from;  		dst_hold(&ort->dst);  		ip6_rt_put(rt); @@ -4427,7 +4724,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,  #ifdef CONFIG_PROC_FS  static const struct file_operations ipv6_route_proc_fops = { -	.owner		= THIS_MODULE,  	.open		= ipv6_route_open,  	.read		= seq_read,  	.llseek		= seq_lseek, @@ -4455,7 +4751,6 @@ static int rt6_stats_seq_open(struct inode *inode, struct file *file)  }  static const struct file_operations rt6_stats_seq_fops = { -	.owner	 = THIS_MODULE,  	.open	 = rt6_stats_seq_open,  	.read	 = seq_read,  	.llseek	 = seq_lseek, @@ -4600,8 +4895,6 @@ static int __net_init ip6_route_net_init(struct net *net)  					   GFP_KERNEL);  	if (!net->ipv6.ip6_null_entry)  		goto out_ip6_dst_entries; -	net->ipv6.ip6_null_entry->dst.path = -		(struct dst_entry *)net->ipv6.ip6_null_entry;  	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;  	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,  			 ip6_template_metrics, true); @@ -4613,8 +4906,6 @@ static int __net_init ip6_route_net_init(struct net *net)  					       GFP_KERNEL);  	if (!net->ipv6.ip6_prohibit_entry)  		goto out_ip6_null_entry; -	net->ipv6.ip6_prohibit_entry->dst.path = -		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;  	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;  	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,  			 ip6_template_metrics, true); @@ -4624,8 +4915,6 @@ static int __net_init ip6_route_net_init(struct net *net)  					       GFP_KERNEL);  	if (!net->ipv6.ip6_blk_hole_entry)  		goto out_ip6_prohibit_entry; -	net->ipv6.ip6_blk_hole_entry->dst.path = -		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;  	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;  	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,  			 ip6_template_metrics, true); @@ -4782,11 +5071,20 @@ int __init ip6_route_init(void)  	if (ret)  		goto fib6_rules_init; -	ret = -ENOBUFS; -	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) || -	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) || -	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, -			    RTNL_FLAG_DOIT_UNLOCKED)) +	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE, +				   inet6_rtm_newroute, NULL, 0); +	if (ret < 0) +		goto out_register_late_subsys; + +	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE, +				   inet6_rtm_delroute, NULL, 0); +	if (ret < 0) +		goto out_register_late_subsys; + +	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, +				   inet6_rtm_getroute, NULL, +				   RTNL_FLAG_DOIT_UNLOCKED); +	if (ret < 0)  		goto out_register_late_subsys;  	ret = register_netdevice_notifier(&ip6_route_dev_notifier); @@ -4804,6 +5102,7 @@ out:  	return ret;  out_register_late_subsys: +	rtnl_unregister_all(PF_INET6);  	unregister_pernet_subsys(&ip6_route_net_late_ops);  fib6_rules_init:  	fib6_rules_cleanup();  |