diff options
Diffstat (limited to 'net/core/dev.c')
| -rw-r--r-- | net/core/dev.c | 886 | 
1 files changed, 471 insertions, 415 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 6666b28b6815..6372117f653f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -139,7 +139,6 @@  #include <linux/errqueue.h>  #include <linux/hrtimer.h>  #include <linux/netfilter_ingress.h> -#include <linux/sctp.h>  #include <linux/crash_dump.h>  #include "net-sysfs.h" @@ -1944,37 +1943,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)  	}  } +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) +{ +	if (dev->num_tc) { +		struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; +		int i; + +		for (i = 0; i < TC_MAX_QUEUE; i++, tc++) { +			if ((txq - tc->offset) < tc->count) +				return i; +		} + +		return -1; +	} + +	return 0; +} +  #ifdef CONFIG_XPS  static DEFINE_MUTEX(xps_map_mutex);  #define xmap_dereference(P)		\  	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) -static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, -					int cpu, u16 index) +static bool remove_xps_queue(struct xps_dev_maps *dev_maps, +			     int tci, u16 index)  {  	struct xps_map *map = NULL;  	int pos;  	if (dev_maps) -		map = xmap_dereference(dev_maps->cpu_map[cpu]); +		map = xmap_dereference(dev_maps->cpu_map[tci]); +	if (!map) +		return false; -	for (pos = 0; map && pos < map->len; pos++) { -		if (map->queues[pos] == index) { -			if (map->len > 1) { -				map->queues[pos] = map->queues[--map->len]; -			} else { -				RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); -				kfree_rcu(map, rcu); -				map = NULL; -			} +	for (pos = map->len; pos--;) { +		if (map->queues[pos] != index) +			continue; + +		if (map->len > 1) { +			map->queues[pos] = map->queues[--map->len];  			break;  		} + +		RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL); +		kfree_rcu(map, rcu); +		return false; +	} + +	return true; +} + +static bool remove_xps_queue_cpu(struct net_device *dev, +				 struct xps_dev_maps *dev_maps, +				 int cpu, u16 offset, u16 count) +{ +	int num_tc = dev->num_tc ? : 1; +	bool active = false; +	int tci; + +	for (tci = cpu * num_tc; num_tc--; tci++) { +		int i, j; + +		for (i = count, j = offset; i--; j++) { +			if (!remove_xps_queue(dev_maps, cpu, j)) +				break; +		} + +		active |= i < 0;  	} -	return map; +	return active;  } -static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +static void netif_reset_xps_queues(struct net_device *dev, u16 offset, +				   u16 count)  {  	struct xps_dev_maps *dev_maps;  	int cpu, i; @@ -1986,21 +2028,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)  	if (!dev_maps)  		goto out_no_maps; -	for_each_possible_cpu(cpu) { -		for (i = index; i < dev->num_tx_queues; i++) { -			if (!remove_xps_queue(dev_maps, cpu, i)) -				break; -		} -		if (i == dev->num_tx_queues) -			active = true; -	} +	for_each_possible_cpu(cpu) +		active |= remove_xps_queue_cpu(dev, dev_maps, cpu, +					       offset, count);  	if (!active) {  		RCU_INIT_POINTER(dev->xps_maps, NULL);  		kfree_rcu(dev_maps, rcu);  	} -	for (i = index; i < dev->num_tx_queues; i++) +	for (i = offset + (count - 1); count--; i--)  		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),  					     NUMA_NO_NODE); @@ -2008,6 +2045,11 @@ out_no_maps:  	mutex_unlock(&xps_map_mutex);  } +static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +{ +	netif_reset_xps_queues(dev, index, dev->num_tx_queues - index); +} +  static struct xps_map *expand_xps_map(struct xps_map *map,  				      int cpu, u16 index)  { @@ -2047,20 +2089,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,  			u16 index)  {  	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; +	int i, cpu, tci, numa_node_id = -2; +	int maps_sz, num_tc = 1, tc = 0;  	struct xps_map *map, *new_map; -	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); -	int cpu, numa_node_id = -2;  	bool active = false; +	if (dev->num_tc) { +		num_tc = dev->num_tc; +		tc = netdev_txq_to_tc(dev, index); +		if (tc < 0) +			return -EINVAL; +	} + +	maps_sz = XPS_DEV_MAPS_SIZE(num_tc); +	if (maps_sz < L1_CACHE_BYTES) +		maps_sz = L1_CACHE_BYTES; +  	mutex_lock(&xps_map_mutex);  	dev_maps = xmap_dereference(dev->xps_maps);  	/* allocate memory for queue storage */ -	for_each_online_cpu(cpu) { -		if (!cpumask_test_cpu(cpu, mask)) -			continue; - +	for_each_cpu_and(cpu, cpu_online_mask, mask) {  		if (!new_dev_maps)  			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);  		if (!new_dev_maps) { @@ -2068,25 +2118,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,  			return -ENOMEM;  		} -		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : +		tci = cpu * num_tc + tc; +		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :  				 NULL;  		map = expand_xps_map(map, cpu, index);  		if (!map)  			goto error; -		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); +		RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);  	}  	if (!new_dev_maps)  		goto out_no_new_maps;  	for_each_possible_cpu(cpu) { +		/* copy maps belonging to foreign traffic classes */ +		for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) { +			/* fill in the new device map from the old device map */ +			map = xmap_dereference(dev_maps->cpu_map[tci]); +			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); +		} + +		/* We need to explicitly update tci as prevous loop +		 * could break out early if dev_maps is NULL. +		 */ +		tci = cpu * num_tc + tc; +  		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {  			/* add queue to CPU maps */  			int pos = 0; -			map = xmap_dereference(new_dev_maps->cpu_map[cpu]); +			map = xmap_dereference(new_dev_maps->cpu_map[tci]);  			while ((pos < map->len) && (map->queues[pos] != index))  				pos++; @@ -2100,26 +2163,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,  #endif  		} else if (dev_maps) {  			/* fill in the new device map from the old device map */ -			map = xmap_dereference(dev_maps->cpu_map[cpu]); -			RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); +			map = xmap_dereference(dev_maps->cpu_map[tci]); +			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);  		} +		/* copy maps belonging to foreign traffic classes */ +		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { +			/* fill in the new device map from the old device map */ +			map = xmap_dereference(dev_maps->cpu_map[tci]); +			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); +		}  	}  	rcu_assign_pointer(dev->xps_maps, new_dev_maps);  	/* Cleanup old maps */ -	if (dev_maps) { -		for_each_possible_cpu(cpu) { -			new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); -			map = xmap_dereference(dev_maps->cpu_map[cpu]); +	if (!dev_maps) +		goto out_no_old_maps; + +	for_each_possible_cpu(cpu) { +		for (i = num_tc, tci = cpu * num_tc; i--; tci++) { +			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); +			map = xmap_dereference(dev_maps->cpu_map[tci]);  			if (map && map != new_map)  				kfree_rcu(map, rcu);  		} - -		kfree_rcu(dev_maps, rcu);  	} +	kfree_rcu(dev_maps, rcu); + +out_no_old_maps:  	dev_maps = new_dev_maps;  	active = true; @@ -2134,11 +2207,12 @@ out_no_new_maps:  	/* removes queue from unused CPUs */  	for_each_possible_cpu(cpu) { -		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) -			continue; - -		if (remove_xps_queue(dev_maps, cpu, index)) -			active = true; +		for (i = tc, tci = cpu * num_tc; i--; tci++) +			active |= remove_xps_queue(dev_maps, tci, index); +		if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu)) +			active |= remove_xps_queue(dev_maps, tci, index); +		for (i = num_tc - tc, tci++; --i; tci++) +			active |= remove_xps_queue(dev_maps, tci, index);  	}  	/* free map if not active */ @@ -2154,11 +2228,14 @@ out_no_maps:  error:  	/* remove any maps that we added */  	for_each_possible_cpu(cpu) { -		new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); -		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : -				 NULL; -		if (new_map && new_map != map) -			kfree(new_map); +		for (i = num_tc, tci = cpu * num_tc; i--; tci++) { +			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); +			map = dev_maps ? +			      xmap_dereference(dev_maps->cpu_map[tci]) : +			      NULL; +			if (new_map && new_map != map) +				kfree(new_map); +		}  	}  	mutex_unlock(&xps_map_mutex); @@ -2169,6 +2246,44 @@ error:  EXPORT_SYMBOL(netif_set_xps_queue);  #endif +void netdev_reset_tc(struct net_device *dev) +{ +#ifdef CONFIG_XPS +	netif_reset_xps_queues_gt(dev, 0); +#endif +	dev->num_tc = 0; +	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); +	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); +} +EXPORT_SYMBOL(netdev_reset_tc); + +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) +{ +	if (tc >= dev->num_tc) +		return -EINVAL; + +#ifdef CONFIG_XPS +	netif_reset_xps_queues(dev, offset, count); +#endif +	dev->tc_to_txq[tc].count = count; +	dev->tc_to_txq[tc].offset = offset; +	return 0; +} +EXPORT_SYMBOL(netdev_set_tc_queue); + +int netdev_set_num_tc(struct net_device *dev, u8 num_tc) +{ +	if (num_tc > TC_MAX_QUEUE) +		return -EINVAL; + +#ifdef CONFIG_XPS +	netif_reset_xps_queues_gt(dev, 0); +#endif +	dev->num_tc = num_tc; +	return 0; +} +EXPORT_SYMBOL(netdev_set_num_tc); +  /*   * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues   * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -2487,141 +2602,6 @@ out:  }  EXPORT_SYMBOL(skb_checksum_help); -/* skb_csum_offload_check - Driver helper function to determine if a device - * with limited checksum offload capabilities is able to offload the checksum - * for a given packet. - * - * Arguments: - *   skb - sk_buff for the packet in question - *   spec - contains the description of what device can offload - *   csum_encapped - returns true if the checksum being offloaded is - *	      encpasulated. That is it is checksum for the transport header - *	      in the inner headers. - *   checksum_help - when set indicates that helper function should - *	      call skb_checksum_help if offload checks fail - * - * Returns: - *   true: Packet has passed the checksum checks and should be offloadable to - *	   the device (a driver may still need to check for additional - *	   restrictions of its device) - *   false: Checksum is not offloadable. If checksum_help was set then - *	   skb_checksum_help was called to resolve checksum for non-GSO - *	   packets and when IP protocol is not SCTP - */ -bool __skb_csum_offload_chk(struct sk_buff *skb, -			    const struct skb_csum_offl_spec *spec, -			    bool *csum_encapped, -			    bool csum_help) -{ -	struct iphdr *iph; -	struct ipv6hdr *ipv6; -	void *nhdr; -	int protocol; -	u8 ip_proto; - -	if (skb->protocol == htons(ETH_P_8021Q) || -	    skb->protocol == htons(ETH_P_8021AD)) { -		if (!spec->vlan_okay) -			goto need_help; -	} - -	/* We check whether the checksum refers to a transport layer checksum in -	 * the outermost header or an encapsulated transport layer checksum that -	 * corresponds to the inner headers of the skb. If the checksum is for -	 * something else in the packet we need help. -	 */ -	if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { -		/* Non-encapsulated checksum */ -		protocol = eproto_to_ipproto(vlan_get_protocol(skb)); -		nhdr = skb_network_header(skb); -		*csum_encapped = false; -		if (spec->no_not_encapped) -			goto need_help; -	} else if (skb->encapsulation && spec->encap_okay && -		   skb_checksum_start_offset(skb) == -		   skb_inner_transport_offset(skb)) { -		/* Encapsulated checksum */ -		*csum_encapped = true; -		switch (skb->inner_protocol_type) { -		case ENCAP_TYPE_ETHER: -			protocol = eproto_to_ipproto(skb->inner_protocol); -			break; -		case ENCAP_TYPE_IPPROTO: -			protocol = skb->inner_protocol; -			break; -		} -		nhdr = skb_inner_network_header(skb); -	} else { -		goto need_help; -	} - -	switch (protocol) { -	case IPPROTO_IP: -		if (!spec->ipv4_okay) -			goto need_help; -		iph = nhdr; -		ip_proto = iph->protocol; -		if (iph->ihl != 5 && !spec->ip_options_okay) -			goto need_help; -		break; -	case IPPROTO_IPV6: -		if (!spec->ipv6_okay) -			goto need_help; -		if (spec->no_encapped_ipv6 && *csum_encapped) -			goto need_help; -		ipv6 = nhdr; -		nhdr += sizeof(*ipv6); -		ip_proto = ipv6->nexthdr; -		break; -	default: -		goto need_help; -	} - -ip_proto_again: -	switch (ip_proto) { -	case IPPROTO_TCP: -		if (!spec->tcp_okay || -		    skb->csum_offset != offsetof(struct tcphdr, check)) -			goto need_help; -		break; -	case IPPROTO_UDP: -		if (!spec->udp_okay || -		    skb->csum_offset != offsetof(struct udphdr, check)) -			goto need_help; -		break; -	case IPPROTO_SCTP: -		if (!spec->sctp_okay || -		    skb->csum_offset != offsetof(struct sctphdr, checksum)) -			goto cant_help; -		break; -	case NEXTHDR_HOP: -	case NEXTHDR_ROUTING: -	case NEXTHDR_DEST: { -		u8 *opthdr = nhdr; - -		if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) -			goto need_help; - -		ip_proto = opthdr[0]; -		nhdr += (opthdr[1] + 1) << 3; - -		goto ip_proto_again; -	} -	default: -		goto need_help; -	} - -	/* Passed the tests for offloading checksum */ -	return true; - -need_help: -	if (csum_help && !skb_shinfo(skb)->gso_size) -		skb_checksum_help(skb); -cant_help: -	return false; -} -EXPORT_SYMBOL(__skb_csum_offload_chk); -  __be16 skb_network_protocol(struct sk_buff *skb, int *depth)  {  	__be16 type = skb->protocol; @@ -3216,8 +3196,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)  	rcu_read_lock();  	dev_maps = rcu_dereference(dev->xps_maps);  	if (dev_maps) { -		map = rcu_dereference( -		    dev_maps->cpu_map[skb->sender_cpu - 1]); +		unsigned int tci = skb->sender_cpu - 1; + +		if (dev->num_tc) { +			tci *= dev->num_tc; +			tci += netdev_get_prio_tc_map(dev, skb->priority); +		} + +		map = rcu_dereference(dev_maps->cpu_map[tci]);  		if (map) {  			if (map->len == 1)  				queue_index = map->queues[0]; @@ -3461,6 +3447,8 @@ EXPORT_SYMBOL(rps_cpu_mask);  struct static_key rps_needed __read_mostly;  EXPORT_SYMBOL(rps_needed); +struct static_key rfs_needed __read_mostly; +EXPORT_SYMBOL(rfs_needed);  static struct rps_dev_flow *  set_rps_cpu(struct net_device *dev, struct sk_buff *skb, @@ -4491,7 +4479,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff  	if (!(skb->dev->features & NETIF_F_GRO))  		goto normal; -	if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) +	if (skb->csum_bad)  		goto normal;  	gro_list_prepare(napi, skb); @@ -4504,7 +4492,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff  		skb_set_network_header(skb, skb_gro_offset(skb));  		skb_reset_mac_len(skb);  		NAPI_GRO_CB(skb)->same_flow = 0; -		NAPI_GRO_CB(skb)->flush = 0; +		NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);  		NAPI_GRO_CB(skb)->free = 0;  		NAPI_GRO_CB(skb)->encap_mark = 0;  		NAPI_GRO_CB(skb)->recursion_counter = 0; @@ -4912,26 +4900,36 @@ void __napi_schedule_irqoff(struct napi_struct *n)  }  EXPORT_SYMBOL(__napi_schedule_irqoff); -void __napi_complete(struct napi_struct *n) +bool __napi_complete(struct napi_struct *n)  {  	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); +	/* Some drivers call us directly, instead of calling +	 * napi_complete_done(). +	 */ +	if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state))) +		return false; +  	list_del_init(&n->poll_list);  	smp_mb__before_atomic();  	clear_bit(NAPI_STATE_SCHED, &n->state); +	return true;  }  EXPORT_SYMBOL(__napi_complete); -void napi_complete_done(struct napi_struct *n, int work_done) +bool napi_complete_done(struct napi_struct *n, int work_done)  {  	unsigned long flags;  	/* -	 * don't let napi dequeue from the cpu poll list -	 * just in case its running on a different cpu +	 * 1) Don't let napi dequeue from the cpu poll list +	 *    just in case its running on a different cpu. +	 * 2) If we are busy polling, do nothing here, we have +	 *    the guarantee we will be called later.  	 */ -	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) -		return; +	if (unlikely(n->state & (NAPIF_STATE_NPSVC | +				 NAPIF_STATE_IN_BUSY_POLL))) +		return false;  	if (n->gro_list) {  		unsigned long timeout = 0; @@ -4953,6 +4951,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)  		__napi_complete(n);  		local_irq_restore(flags);  	} +	return true;  }  EXPORT_SYMBOL(napi_complete_done); @@ -4970,13 +4969,41 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)  }  #if defined(CONFIG_NET_RX_BUSY_POLL) +  #define BUSY_POLL_BUDGET 8 + +static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) +{ +	int rc; + +	clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state); + +	local_bh_disable(); + +	/* All we really want here is to re-enable device interrupts. +	 * Ideally, a new ndo_busy_poll_stop() could avoid another round. +	 */ +	rc = napi->poll(napi, BUSY_POLL_BUDGET); +	netpoll_poll_unlock(have_poll_lock); +	if (rc == BUSY_POLL_BUDGET) +		__napi_schedule(napi); +	local_bh_enable(); +	if (local_softirq_pending()) +		do_softirq(); +} +  bool sk_busy_loop(struct sock *sk, int nonblock)  {  	unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; +	int (*napi_poll)(struct napi_struct *napi, int budget);  	int (*busy_poll)(struct napi_struct *dev); +	void *have_poll_lock = NULL;  	struct napi_struct *napi; -	int rc = false; +	int rc; + +restart: +	rc = false; +	napi_poll = NULL;  	rcu_read_lock(); @@ -4987,24 +5014,33 @@ bool sk_busy_loop(struct sock *sk, int nonblock)  	/* Note: ndo_busy_poll method is optional in linux-4.5 */  	busy_poll = napi->dev->netdev_ops->ndo_busy_poll; -	do { +	preempt_disable(); +	for (;;) {  		rc = 0;  		local_bh_disable();  		if (busy_poll) {  			rc = busy_poll(napi); -		} else if (napi_schedule_prep(napi)) { -			void *have = netpoll_poll_lock(napi); - -			if (test_bit(NAPI_STATE_SCHED, &napi->state)) { -				rc = napi->poll(napi, BUSY_POLL_BUDGET); -				trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); -				if (rc == BUSY_POLL_BUDGET) { -					napi_complete_done(napi, rc); -					napi_schedule(napi); -				} -			} -			netpoll_poll_unlock(have); +			goto count; +		} +		if (!napi_poll) { +			unsigned long val = READ_ONCE(napi->state); + +			/* If multiple threads are competing for this napi, +			 * we avoid dirtying napi->state as much as we can. +			 */ +			if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED | +				   NAPIF_STATE_IN_BUSY_POLL)) +				goto count; +			if (cmpxchg(&napi->state, val, +				    val | NAPIF_STATE_IN_BUSY_POLL | +					  NAPIF_STATE_SCHED) != val) +				goto count; +			have_poll_lock = netpoll_poll_lock(napi); +			napi_poll = napi->poll;  		} +		rc = napi_poll(napi, BUSY_POLL_BUDGET); +		trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); +count:  		if (rc > 0)  			__NET_ADD_STATS(sock_net(sk),  					LINUX_MIB_BUSYPOLLRXPACKETS, rc); @@ -5013,10 +5049,26 @@ bool sk_busy_loop(struct sock *sk, int nonblock)  		if (rc == LL_FLUSH_FAILED)  			break; /* permanent failure */ -		cpu_relax(); -	} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && -		 !need_resched() && !busy_loop_timeout(end_time)); +		if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || +		    busy_loop_timeout(end_time)) +			break; +		if (unlikely(need_resched())) { +			if (napi_poll) +				busy_poll_stop(napi, have_poll_lock); +			preempt_enable(); +			rcu_read_unlock(); +			cond_resched(); +			rc = !skb_queue_empty(&sk->sk_receive_queue); +			if (rc || busy_loop_timeout(end_time)) +				return rc; +			goto restart; +		} +		cpu_relax(); +	} +	if (napi_poll) +		busy_poll_stop(napi, have_poll_lock); +	preempt_enable();  	rc = !skb_queue_empty(&sk->sk_receive_queue);  out:  	rcu_read_unlock(); @@ -5026,7 +5078,7 @@ EXPORT_SYMBOL(sk_busy_loop);  #endif /* CONFIG_NET_RX_BUSY_POLL */ -void napi_hash_add(struct napi_struct *napi) +static void napi_hash_add(struct napi_struct *napi)  {  	if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||  	    test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) @@ -5046,7 +5098,6 @@ void napi_hash_add(struct napi_struct *napi)  	spin_unlock(&napi_hash_lock);  } -EXPORT_SYMBOL_GPL(napi_hash_add);  /* Warning : caller is responsible to make sure rcu grace period   * is respected before freeing memory containing @napi @@ -5094,7 +5145,6 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,  	list_add(&napi->dev_list, &dev->napi_list);  	napi->dev = dev;  #ifdef CONFIG_NETPOLL -	spin_lock_init(&napi->poll_lock);  	napi->poll_owner = -1;  #endif  	set_bit(NAPI_STATE_SCHED, &napi->state); @@ -5212,7 +5262,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)  		if (list_empty(&list)) {  			if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) -				return; +				goto out;  			break;  		} @@ -5230,7 +5280,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)  		}  	} -	__kfree_skb_flush();  	local_irq_disable();  	list_splice_tail_init(&sd->poll_list, &list); @@ -5240,6 +5289,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)  		__raise_softirq_irqoff(NET_RX_SOFTIRQ);  	net_rps_action_and_irq_enable(sd); +out: +	__kfree_skb_flush();  }  struct netdev_adjacent { @@ -5270,6 +5321,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,  	return NULL;  } +static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +{ +	struct net_device *dev = data; + +	return upper_dev == dev; +} +  /**   * netdev_has_upper_dev - Check if device is linked to an upper device   * @dev: device @@ -5284,11 +5342,30 @@ bool netdev_has_upper_dev(struct net_device *dev,  {  	ASSERT_RTNL(); -	return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); +	return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, +					     upper_dev);  }  EXPORT_SYMBOL(netdev_has_upper_dev);  /** + * netdev_has_upper_dev_all - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks the entire upper device chain. + * The caller must hold rcu lock. + */ + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, +				  struct net_device *upper_dev) +{ +	return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, +					       upper_dev); +} +EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); + +/**   * netdev_has_any_upper_dev - Check if device is linked to some device   * @dev: device   * @@ -5299,7 +5376,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev)  {  	ASSERT_RTNL(); -	return !list_empty(&dev->all_adj_list.upper); +	return !list_empty(&dev->adj_list.upper);  }  /** @@ -5326,6 +5403,20 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)  }  EXPORT_SYMBOL(netdev_master_upper_dev_get); +/** + * netdev_has_any_lower_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to a lower device and return true in case + * it is. The caller must hold the RTNL lock. + */ +static bool netdev_has_any_lower_dev(struct net_device *dev) +{ +	ASSERT_RTNL(); + +	return !list_empty(&dev->adj_list.lower); +} +  void *netdev_adjacent_get_private(struct list_head *adj_list)  {  	struct netdev_adjacent *adj; @@ -5362,16 +5453,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,  }  EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); -/** - * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next device from the dev's upper list, starting from iter - * position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, -						     struct list_head **iter) +static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, +						    struct list_head **iter)  {  	struct netdev_adjacent *upper; @@ -5379,14 +5462,41 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,  	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); -	if (&upper->list == &dev->all_adj_list.upper) +	if (&upper->list == &dev->adj_list.upper)  		return NULL;  	*iter = &upper->list;  	return upper->dev;  } -EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); + +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, +				  int (*fn)(struct net_device *dev, +					    void *data), +				  void *data) +{ +	struct net_device *udev; +	struct list_head *iter; +	int ret; + +	for (iter = &dev->adj_list.upper, +	     udev = netdev_next_upper_dev_rcu(dev, &iter); +	     udev; +	     udev = netdev_next_upper_dev_rcu(dev, &iter)) { +		/* first is the upper device itself */ +		ret = fn(udev, data); +		if (ret) +			return ret; + +		/* then look at all of its upper devices */ +		ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); +		if (ret) +			return ret; +	} + +	return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);  /**   * netdev_lower_get_next_private - Get the next ->private from the @@ -5469,55 +5579,90 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)  }  EXPORT_SYMBOL(netdev_lower_get_next); -/** - * netdev_all_lower_get_next - Get the next device from all lower neighbour list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RTNL lock or - * its own locking that guarantees that the neighbour all lower - * list will remain unchanged. - */ -struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +static struct net_device *netdev_next_lower_dev(struct net_device *dev, +						struct list_head **iter)  {  	struct netdev_adjacent *lower; -	lower = list_entry(*iter, struct netdev_adjacent, list); +	lower = list_entry((*iter)->next, struct netdev_adjacent, list); -	if (&lower->list == &dev->all_adj_list.lower) +	if (&lower->list == &dev->adj_list.lower)  		return NULL; -	*iter = lower->list.next; +	*iter = &lower->list;  	return lower->dev;  } -EXPORT_SYMBOL(netdev_all_lower_get_next); -/** - * netdev_all_lower_get_next_rcu - Get the next device from all - *				   lower neighbour list, RCU variant - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, -						 struct list_head **iter) +int netdev_walk_all_lower_dev(struct net_device *dev, +			      int (*fn)(struct net_device *dev, +					void *data), +			      void *data) +{ +	struct net_device *ldev; +	struct list_head *iter; +	int ret; + +	for (iter = &dev->adj_list.lower, +	     ldev = netdev_next_lower_dev(dev, &iter); +	     ldev; +	     ldev = netdev_next_lower_dev(dev, &iter)) { +		/* first is the lower device itself */ +		ret = fn(ldev, data); +		if (ret) +			return ret; + +		/* then look at all of its lower devices */ +		ret = netdev_walk_all_lower_dev(ldev, fn, data); +		if (ret) +			return ret; +	} + +	return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); + +static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, +						    struct list_head **iter)  {  	struct netdev_adjacent *lower;  	lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - -	if (&lower->list == &dev->all_adj_list.lower) +	if (&lower->list == &dev->adj_list.lower)  		return NULL;  	*iter = &lower->list;  	return lower->dev;  } -EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); + +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, +				  int (*fn)(struct net_device *dev, +					    void *data), +				  void *data) +{ +	struct net_device *ldev; +	struct list_head *iter; +	int ret; + +	for (iter = &dev->adj_list.lower, +	     ldev = netdev_next_lower_dev_rcu(dev, &iter); +	     ldev; +	     ldev = netdev_next_lower_dev_rcu(dev, &iter)) { +		/* first is the lower device itself */ +		ret = fn(ldev, data); +		if (ret) +			return ret; + +		/* then look at all of its lower devices */ +		ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); +		if (ret) +			return ret; +	} + +	return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);  /**   * netdev_lower_get_first_private_rcu - Get the first ->private from the @@ -5590,7 +5735,6 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,  static int __netdev_adjacent_dev_insert(struct net_device *dev,  					struct net_device *adj_dev, -					u16 ref_nr,  					struct list_head *dev_list,  					void *private, bool master)  { @@ -5600,7 +5744,10 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,  	adj = __netdev_find_adj(adj_dev, dev_list);  	if (adj) { -		adj->ref_nr += ref_nr; +		adj->ref_nr += 1; +		pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n", +			 dev->name, adj_dev->name, adj->ref_nr); +  		return 0;  	} @@ -5610,12 +5757,12 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,  	adj->dev = adj_dev;  	adj->master = master; -	adj->ref_nr = ref_nr; +	adj->ref_nr = 1;  	adj->private = private;  	dev_hold(adj_dev); -	pr_debug("dev_hold for %s, because of link added from %s to %s\n", -		 adj_dev->name, dev->name, adj_dev->name); +	pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", +		 dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);  	if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {  		ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); @@ -5654,17 +5801,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,  {  	struct netdev_adjacent *adj; +	pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n", +		 dev->name, adj_dev->name, ref_nr); +  	adj = __netdev_find_adj(adj_dev, dev_list);  	if (!adj) { -		pr_err("tried to remove device %s from %s\n", +		pr_err("Adjacency does not exist for device %s from %s\n",  		       dev->name, adj_dev->name); -		BUG(); +		WARN_ON(1); +		return;  	}  	if (adj->ref_nr > ref_nr) { -		pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, -			 ref_nr, adj->ref_nr-ref_nr); +		pr_debug("adjacency: %s to %s ref_nr - %d = %d\n", +			 dev->name, adj_dev->name, ref_nr, +			 adj->ref_nr - ref_nr);  		adj->ref_nr -= ref_nr;  		return;  	} @@ -5676,7 +5828,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,  		netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);  	list_del_rcu(&adj->list); -	pr_debug("dev_put for %s, because link removed from %s to %s\n", +	pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",  		 adj_dev->name, dev->name, adj_dev->name);  	dev_put(adj_dev);  	kfree_rcu(adj, rcu); @@ -5684,38 +5836,27 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,  static int __netdev_adjacent_dev_link_lists(struct net_device *dev,  					    struct net_device *upper_dev, -					    u16 ref_nr,  					    struct list_head *up_list,  					    struct list_head *down_list,  					    void *private, bool master)  {  	int ret; -	ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, +	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,  					   private, master);  	if (ret)  		return ret; -	ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, +	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,  					   private, false);  	if (ret) { -		__netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); +		__netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);  		return ret;  	}  	return 0;  } -static int __netdev_adjacent_dev_link(struct net_device *dev, -				      struct net_device *upper_dev, -				      u16 ref_nr) -{ -	return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, -						&dev->all_adj_list.upper, -						&upper_dev->all_adj_list.lower, -						NULL, false); -} -  static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,  					       struct net_device *upper_dev,  					       u16 ref_nr, @@ -5726,40 +5867,19 @@ static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,  	__netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);  } -static void __netdev_adjacent_dev_unlink(struct net_device *dev, -					 struct net_device *upper_dev, -					 u16 ref_nr) -{ -	__netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, -					   &dev->all_adj_list.upper, -					   &upper_dev->all_adj_list.lower); -} -  static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,  						struct net_device *upper_dev,  						void *private, bool master)  { -	int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); - -	if (ret) -		return ret; - -	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, -					       &dev->adj_list.upper, -					       &upper_dev->adj_list.lower, -					       private, master); -	if (ret) { -		__netdev_adjacent_dev_unlink(dev, upper_dev, 1); -		return ret; -	} - -	return 0; +	return __netdev_adjacent_dev_link_lists(dev, upper_dev, +						&dev->adj_list.upper, +						&upper_dev->adj_list.lower, +						private, master);  }  static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,  						   struct net_device *upper_dev)  { -	__netdev_adjacent_dev_unlink(dev, upper_dev, 1);  	__netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,  					   &dev->adj_list.upper,  					   &upper_dev->adj_list.lower); @@ -5770,7 +5890,6 @@ static int __netdev_upper_dev_link(struct net_device *dev,  				   void *upper_priv, void *upper_info)  {  	struct netdev_notifier_changeupper_info changeupper_info; -	struct netdev_adjacent *i, *j, *to_i, *to_j;  	int ret = 0;  	ASSERT_RTNL(); @@ -5779,10 +5898,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,  		return -EBUSY;  	/* To prevent loops, check if dev is not upper device to upper_dev. */ -	if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) +	if (netdev_has_upper_dev(upper_dev, dev))  		return -EBUSY; -	if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) +	if (netdev_has_upper_dev(dev, upper_dev))  		return -EEXIST;  	if (master && netdev_master_upper_dev_get(dev)) @@ -5804,80 +5923,15 @@ static int __netdev_upper_dev_link(struct net_device *dev,  	if (ret)  		return ret; -	/* Now that we linked these devs, make all the upper_dev's -	 * all_adj_list.upper visible to every dev's all_adj_list.lower an -	 * versa, and don't forget the devices itself. All of these -	 * links are non-neighbours. -	 */ -	list_for_each_entry(i, &dev->all_adj_list.lower, list) { -		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { -			pr_debug("Interlinking %s with %s, non-neighbour\n", -				 i->dev->name, j->dev->name); -			ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); -			if (ret) -				goto rollback_mesh; -		} -	} - -	/* add dev to every upper_dev's upper device */ -	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { -		pr_debug("linking %s's upper device %s with %s\n", -			 upper_dev->name, i->dev->name, dev->name); -		ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); -		if (ret) -			goto rollback_upper_mesh; -	} - -	/* add upper_dev to every dev's lower device */ -	list_for_each_entry(i, &dev->all_adj_list.lower, list) { -		pr_debug("linking %s's lower device %s with %s\n", dev->name, -			 i->dev->name, upper_dev->name); -		ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); -		if (ret) -			goto rollback_lower_mesh; -	} -  	ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,  					    &changeupper_info.info);  	ret = notifier_to_errno(ret);  	if (ret) -		goto rollback_lower_mesh; +		goto rollback;  	return 0; -rollback_lower_mesh: -	to_i = i; -	list_for_each_entry(i, &dev->all_adj_list.lower, list) { -		if (i == to_i) -			break; -		__netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); -	} - -	i = NULL; - -rollback_upper_mesh: -	to_i = i; -	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { -		if (i == to_i) -			break; -		__netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); -	} - -	i = j = NULL; - -rollback_mesh: -	to_i = i; -	to_j = j; -	list_for_each_entry(i, &dev->all_adj_list.lower, list) { -		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { -			if (i == to_i && j == to_j) -				break; -			__netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); -		} -		if (i == to_i) -			break; -	} - +rollback:  	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);  	return ret; @@ -5934,7 +5988,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,  			     struct net_device *upper_dev)  {  	struct netdev_notifier_changeupper_info changeupper_info; -	struct netdev_adjacent *i, *j;  	ASSERT_RTNL();  	changeupper_info.upper_dev = upper_dev; @@ -5946,23 +5999,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,  	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); -	/* Here is the tricky part. We must remove all dev's lower -	 * devices from all upper_dev's upper devices and vice -	 * versa, to maintain the graph relationship. -	 */ -	list_for_each_entry(i, &dev->all_adj_list.lower, list) -		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) -			__netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - -	/* remove also the devices itself from lower/upper device -	 * list -	 */ -	list_for_each_entry(i, &dev->all_adj_list.lower, list) -		__netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - -	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) -		__netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); -  	call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,  				      &changeupper_info.info);  } @@ -6500,9 +6536,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)  	if (new_mtu == dev->mtu)  		return 0; -	/*	MTU must be positive.	 */ -	if (new_mtu < 0) +	/* MTU must be positive, and in range */ +	if (new_mtu < 0 || new_mtu < dev->min_mtu) { +		net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", +				    dev->name, new_mtu, dev->min_mtu);  		return -EINVAL; +	} + +	if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { +		net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", +				    dev->name, new_mtu, dev->max_mtu); +		return -EINVAL; +	}  	if (!netif_device_present(dev))  		return -ENODEV; @@ -6649,26 +6694,42 @@ EXPORT_SYMBOL(dev_change_proto_down);   *	dev_change_xdp_fd - set or clear a bpf program for a device rx path   *	@dev: device   *	@fd: new program fd or negative value to clear + *	@flags: xdp-related flags   *   *	Set or clear a bpf program for a device   */ -int dev_change_xdp_fd(struct net_device *dev, int fd) +int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)  {  	const struct net_device_ops *ops = dev->netdev_ops;  	struct bpf_prog *prog = NULL; -	struct netdev_xdp xdp = {}; +	struct netdev_xdp xdp;  	int err; +	ASSERT_RTNL(); +  	if (!ops->ndo_xdp)  		return -EOPNOTSUPP;  	if (fd >= 0) { +		if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) { +			memset(&xdp, 0, sizeof(xdp)); +			xdp.command = XDP_QUERY_PROG; + +			err = ops->ndo_xdp(dev, &xdp); +			if (err < 0) +				return err; +			if (xdp.prog_attached) +				return -EBUSY; +		} +  		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);  		if (IS_ERR(prog))  			return PTR_ERR(prog);  	} +	memset(&xdp, 0, sizeof(xdp));  	xdp.command = XDP_SETUP_PROG;  	xdp.prog = prog; +  	err = ops->ndo_xdp(dev, &xdp);  	if (err < 0 && prog)  		bpf_prog_put(prog); @@ -6777,6 +6838,7 @@ static void rollback_registered_many(struct list_head *head)  		/* Notifier chain MUST detach us all upper devices. */  		WARN_ON(netdev_has_any_upper_dev(dev)); +		WARN_ON(netdev_has_any_lower_dev(dev));  		/* Remove entries from kobject tree */  		netdev_unregister_kobject(dev); @@ -7655,8 +7717,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,  	INIT_LIST_HEAD(&dev->link_watch_list);  	INIT_LIST_HEAD(&dev->adj_list.upper);  	INIT_LIST_HEAD(&dev->adj_list.lower); -	INIT_LIST_HEAD(&dev->all_adj_list.upper); -	INIT_LIST_HEAD(&dev->all_adj_list.lower);  	INIT_LIST_HEAD(&dev->ptype_all);  	INIT_LIST_HEAD(&dev->ptype_specific);  #ifdef CONFIG_NET_SCHED @@ -7667,7 +7727,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,  	if (!dev->tx_queue_len) {  		dev->priv_flags |= IFF_NO_QUEUE; -		dev->tx_queue_len = 1; +		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;  	}  	dev->num_tx_queues = txqs; @@ -7948,18 +8008,13 @@ out:  }  EXPORT_SYMBOL_GPL(dev_change_net_namespace); -static int dev_cpu_callback(struct notifier_block *nfb, -			    unsigned long action, -			    void *ocpu) +static int dev_cpu_dead(unsigned int oldcpu)  {  	struct sk_buff **list_skb;  	struct sk_buff *skb; -	unsigned int cpu, oldcpu = (unsigned long)ocpu; +	unsigned int cpu;  	struct softnet_data *sd, *oldsd; -	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) -		return NOTIFY_OK; -  	local_irq_disable();  	cpu = smp_processor_id();  	sd = &per_cpu(softnet_data, cpu); @@ -8009,10 +8064,9 @@ static int dev_cpu_callback(struct notifier_block *nfb,  		input_queue_head_incr(oldsd);  	} -	return NOTIFY_OK; +	return 0;  } -  /**   *	netdev_increment_features - increment feature set by one   *	@all: current feature set @@ -8346,7 +8400,9 @@ static int __init net_dev_init(void)  	open_softirq(NET_TX_SOFTIRQ, net_tx_action);  	open_softirq(NET_RX_SOFTIRQ, net_rx_action); -	hotcpu_notifier(dev_cpu_callback, 0); +	rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead", +				       NULL, dev_cpu_dead); +	WARN_ON(rc < 0);  	dst_subsys_init();  	rc = 0;  out:  |