diff options
Diffstat (limited to 'net/core/dev.c')
| -rw-r--r-- | net/core/dev.c | 147 | 
1 files changed, 93 insertions, 54 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 253584777101..735096d42c1d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -160,8 +160,6 @@ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;  struct list_head ptype_all __read_mostly;	/* Taps */  static int netif_rx_internal(struct sk_buff *skb); -static int call_netdevice_notifiers_info(unsigned long val, -					 struct netdev_notifier_info *info);  static int call_netdevice_notifiers_extack(unsigned long val,  					   struct net_device *dev,  					   struct netlink_ext_ack *extack); @@ -1919,8 +1917,8 @@ static void move_netdevice_notifiers_dev_net(struct net_device *dev,   *	are as for raw_notifier_call_chain().   */ -static int call_netdevice_notifiers_info(unsigned long val, -					 struct netdev_notifier_info *info) +int call_netdevice_notifiers_info(unsigned long val, +				  struct netdev_notifier_info *info)  {  	struct net *net = dev_net(info->dev);  	int ret; @@ -2535,6 +2533,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,  	struct xps_map *map, *new_map;  	unsigned int nr_ids; +	WARN_ON_ONCE(index >= dev->num_tx_queues); +  	if (dev->num_tc) {  		/* Do not allow XPS on subordinate device directly */  		num_tc = dev->num_tc; @@ -3075,7 +3075,7 @@ void __netif_schedule(struct Qdisc *q)  EXPORT_SYMBOL(__netif_schedule);  struct dev_kfree_skb_cb { -	enum skb_free_reason reason; +	enum skb_drop_reason reason;  };  static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb) @@ -3108,7 +3108,7 @@ void netif_tx_wake_queue(struct netdev_queue *dev_queue)  }  EXPORT_SYMBOL(netif_tx_wake_queue); -void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason)  {  	unsigned long flags; @@ -3128,18 +3128,16 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)  	raise_softirq_irqoff(NET_TX_SOFTIRQ);  	local_irq_restore(flags);  } -EXPORT_SYMBOL(__dev_kfree_skb_irq); +EXPORT_SYMBOL(dev_kfree_skb_irq_reason); -void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) +void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason)  {  	if (in_hardirq() || irqs_disabled()) -		__dev_kfree_skb_irq(skb, reason); -	else if (unlikely(reason == SKB_REASON_DROPPED)) -		kfree_skb(skb); +		dev_kfree_skb_irq_reason(skb, reason);  	else -		consume_skb(skb); +		kfree_skb_reason(skb, reason);  } -EXPORT_SYMBOL(__dev_kfree_skb_any); +EXPORT_SYMBOL(dev_kfree_skb_any_reason);  /** @@ -3199,6 +3197,7 @@ static u16 skb_tx_hash(const struct net_device *dev,  	}  	if (skb_rx_queue_recorded(skb)) { +		DEBUG_NET_WARN_ON_ONCE(qcount == 0);  		hash = skb_get_rx_queue(skb);  		if (hash >= qoffset)  			hash -= qoffset; @@ -3316,8 +3315,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb)  						  skb->len - start, ~(__u32)0,  						  crc32c_csum_stub));  	*(__le32 *)(skb->data + offset) = crc32c_csum; -	skb->ip_summed = CHECKSUM_NONE; -	skb->csum_not_inet = 0; +	skb_reset_csum_not_inet(skb);  out:  	return ret;  } @@ -3735,25 +3733,25 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)  	 * we add to pkt_len the headers size of all segments  	 */  	if (shinfo->gso_size && skb_transport_header_was_set(skb)) { -		unsigned int hdr_len;  		u16 gso_segs = shinfo->gso_segs; +		unsigned int hdr_len;  		/* mac layer + network layer */ -		hdr_len = skb_transport_header(skb) - skb_mac_header(skb); +		hdr_len = skb_transport_offset(skb);  		/* + transport layer */  		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {  			const struct tcphdr *th;  			struct tcphdr _tcphdr; -			th = skb_header_pointer(skb, skb_transport_offset(skb), +			th = skb_header_pointer(skb, hdr_len,  						sizeof(_tcphdr), &_tcphdr);  			if (likely(th))  				hdr_len += __tcp_hdrlen(th);  		} else {  			struct udphdr _udphdr; -			if (skb_header_pointer(skb, skb_transport_offset(skb), +			if (skb_header_pointer(skb, hdr_len,  					       sizeof(_udphdr), &_udphdr))  				hdr_len += sizeof(struct udphdr);  		} @@ -4360,7 +4358,12 @@ static inline void ____napi_schedule(struct softnet_data *sd,  	}  	list_add_tail(&napi->poll_list, &sd->poll_list); -	__raise_softirq_irqoff(NET_RX_SOFTIRQ); +	WRITE_ONCE(napi->list_owner, smp_processor_id()); +	/* If not called from net_rx_action() +	 * we have to raise NET_RX_SOFTIRQ. +	 */ +	if (!sd->in_net_rx_action) +		__raise_softirq_irqoff(NET_RX_SOFTIRQ);  }  #ifdef CONFIG_RPS @@ -4582,11 +4585,16 @@ static void trigger_rx_softirq(void *data)  }  /* - * Check if this softnet_data structure is another cpu one - * If yes, queue it to our IPI list and return 1 - * If no, return 0 + * After we queued a packet into sd->input_pkt_queue, + * we need to make sure this queue is serviced soon. + * + * - If this is another cpu queue, link it to our rps_ipi_list, + *   and make sure we will process rps_ipi_list from net_rx_action(). + * + * - If this is our own queue, NAPI schedule our backlog. + *   Note that this also raises NET_RX_SOFTIRQ.   */ -static int napi_schedule_rps(struct softnet_data *sd) +static void napi_schedule_rps(struct softnet_data *sd)  {  	struct softnet_data *mysd = this_cpu_ptr(&softnet_data); @@ -4595,12 +4603,15 @@ static int napi_schedule_rps(struct softnet_data *sd)  		sd->rps_ipi_next = mysd->rps_ipi_list;  		mysd->rps_ipi_list = sd; -		__raise_softirq_irqoff(NET_RX_SOFTIRQ); -		return 1; +		/* If not called from net_rx_action() or napi_threaded_poll() +		 * we have to raise NET_RX_SOFTIRQ. +		 */ +		if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll) +			__raise_softirq_irqoff(NET_RX_SOFTIRQ); +		return;  	}  #endif /* CONFIG_RPS */  	__napi_schedule_irqoff(&mysd->backlog); -	return 0;  }  #ifdef CONFIG_NET_FLOW_LIMIT @@ -5020,16 +5031,17 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)  			clist = clist->next;  			WARN_ON(refcount_read(&skb->users)); -			if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) +			if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED))  				trace_consume_skb(skb, net_tx_action);  			else  				trace_kfree_skb(skb, net_tx_action, -						SKB_DROP_REASON_NOT_SPECIFIED); +						get_kfree_skb_cb(skb)->reason);  			if (skb->fclone != SKB_FCLONE_UNAVAILABLE)  				__kfree_skb(skb);  			else -				__kfree_skb_defer(skb); +				__napi_kfree_skb(skb, +						 get_kfree_skb_cb(skb)->reason);  		}  	} @@ -6058,6 +6070,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)  		list_del_init(&n->poll_list);  		local_irq_restore(flags);  	} +	WRITE_ONCE(n->list_owner, -1);  	val = READ_ONCE(n->state);  	do { @@ -6373,6 +6386,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,  #ifdef CONFIG_NETPOLL  	napi->poll_owner = -1;  #endif +	napi->list_owner = -1;  	set_bit(NAPI_STATE_SCHED, &napi->state);  	set_bit(NAPI_STATE_NPSVC, &napi->state);  	list_add_rcu(&napi->dev_list, &dev->napi_list); @@ -6584,9 +6598,31 @@ static int napi_thread_wait(struct napi_struct *napi)  	return -1;  } +static void skb_defer_free_flush(struct softnet_data *sd) +{ +	struct sk_buff *skb, *next; + +	/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ +	if (!READ_ONCE(sd->defer_list)) +		return; + +	spin_lock(&sd->defer_lock); +	skb = sd->defer_list; +	sd->defer_list = NULL; +	sd->defer_count = 0; +	spin_unlock(&sd->defer_lock); + +	while (skb != NULL) { +		next = skb->next; +		napi_consume_skb(skb, 1); +		skb = next; +	} +} +  static int napi_threaded_poll(void *data)  {  	struct napi_struct *napi = data; +	struct softnet_data *sd;  	void *have;  	while (!napi_thread_wait(napi)) { @@ -6594,11 +6630,21 @@ static int napi_threaded_poll(void *data)  			bool repoll = false;  			local_bh_disable(); +			sd = this_cpu_ptr(&softnet_data); +			sd->in_napi_threaded_poll = true;  			have = netpoll_poll_lock(napi);  			__napi_poll(napi, &repoll);  			netpoll_poll_unlock(have); +			sd->in_napi_threaded_poll = false; +			barrier(); + +			if (sd_has_rps_ipi_waiting(sd)) { +				local_irq_disable(); +				net_rps_action_and_irq_enable(sd); +			} +			skb_defer_free_flush(sd);  			local_bh_enable();  			if (!repoll) @@ -6610,27 +6656,6 @@ static int napi_threaded_poll(void *data)  	return 0;  } -static void skb_defer_free_flush(struct softnet_data *sd) -{ -	struct sk_buff *skb, *next; - -	/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ -	if (!READ_ONCE(sd->defer_list)) -		return; - -	spin_lock_irq(&sd->defer_lock); -	skb = sd->defer_list; -	sd->defer_list = NULL; -	sd->defer_count = 0; -	spin_unlock_irq(&sd->defer_lock); - -	while (skb != NULL) { -		next = skb->next; -		napi_consume_skb(skb, 1); -		skb = next; -	} -} -  static __latent_entropy void net_rx_action(struct softirq_action *h)  {  	struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -6640,6 +6665,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)  	LIST_HEAD(list);  	LIST_HEAD(repoll); +start: +	sd->in_net_rx_action = true;  	local_irq_disable();  	list_splice_init(&sd->poll_list, &list);  	local_irq_enable(); @@ -6650,8 +6677,18 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)  		skb_defer_free_flush(sd);  		if (list_empty(&list)) { -			if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) -				goto end; +			if (list_empty(&repoll)) { +				sd->in_net_rx_action = false; +				barrier(); +				/* We need to check if ____napi_schedule() +				 * had refilled poll_list while +				 * sd->in_net_rx_action was true. +				 */ +				if (!list_empty(&sd->poll_list)) +					goto start; +				if (!sd_has_rps_ipi_waiting(sd)) +					goto end; +			}  			break;  		} @@ -6676,6 +6713,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)  	list_splice(&list, &sd->poll_list);  	if (!list_empty(&sd->poll_list))  		__raise_softirq_irqoff(NET_RX_SOFTIRQ); +	else +		sd->in_net_rx_action = false;  	net_rps_action_and_irq_enable(sd);  end:; @@ -10846,7 +10885,7 @@ void unregister_netdevice_many_notify(struct list_head *head,  		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)  			skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,  						     GFP_KERNEL, NULL, 0, -						     portid, nlmsg_seq(nlh)); +						     portid, nlh);  		/*  		 *	Flush the unicast and multicast chains  |