diff options
Diffstat (limited to 'net/core/dev.c')
| -rw-r--r-- | net/core/dev.c | 288 | 
1 files changed, 198 insertions, 90 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index be9924f60ec3..bcc490cc9452 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1113,19 +1113,7 @@ void dev_load(struct net *net, const char *name)  }  EXPORT_SYMBOL(dev_load); -/** - *	dev_open	- prepare an interface for use. - *	@dev:	device to open - * - *	Takes a device from down to up state. The device's private open - *	function is invoked and then the multicast lists are loaded. Finally - *	the device is moved into the up state and a %NETDEV_UP message is - *	sent to the netdev notifier chain. - * - *	Calling this function on an active interface is a nop. On a failure - *	a negative errno code is returned. - */ -int dev_open(struct net_device *dev) +static int __dev_open(struct net_device *dev)  {  	const struct net_device_ops *ops = dev->netdev_ops;  	int ret; @@ -1133,13 +1121,6 @@ int dev_open(struct net_device *dev)  	ASSERT_RTNL();  	/* -	 *	Is it already up? -	 */ - -	if (dev->flags & IFF_UP) -		return 0; - -	/*  	 *	Is it even present?  	 */  	if (!netif_device_present(dev)) @@ -1187,36 +1168,57 @@ int dev_open(struct net_device *dev)  		 *	Wakeup transmit queue engine  		 */  		dev_activate(dev); - -		/* -		 *	... and announce new interface. -		 */ -		call_netdevice_notifiers(NETDEV_UP, dev);  	}  	return ret;  } -EXPORT_SYMBOL(dev_open);  /** - *	dev_close - shutdown an interface. - *	@dev: device to shutdown + *	dev_open	- prepare an interface for use. + *	@dev:	device to open   * - *	This function moves an active device into down state. A - *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device - *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier - *	chain. + *	Takes a device from down to up state. The device's private open + *	function is invoked and then the multicast lists are loaded. Finally + *	the device is moved into the up state and a %NETDEV_UP message is + *	sent to the netdev notifier chain. + * + *	Calling this function on an active interface is a nop. On a failure + *	a negative errno code is returned.   */ -int dev_close(struct net_device *dev) +int dev_open(struct net_device *dev) +{ +	int ret; + +	/* +	 *	Is it already up? +	 */ +	if (dev->flags & IFF_UP) +		return 0; + +	/* +	 *	Open device +	 */ +	ret = __dev_open(dev); +	if (ret < 0) +		return ret; + +	/* +	 *	... and announce new interface. +	 */ +	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); +	call_netdevice_notifiers(NETDEV_UP, dev); + +	return ret; +} +EXPORT_SYMBOL(dev_open); + +static int __dev_close(struct net_device *dev)  {  	const struct net_device_ops *ops = dev->netdev_ops; -	ASSERT_RTNL(); +	ASSERT_RTNL();  	might_sleep(); -	if (!(dev->flags & IFF_UP)) -		return 0; -  	/*  	 *	Tell people we are going down, so that they can  	 *	prepare to death, when device is still operating. @@ -1252,14 +1254,34 @@ int dev_close(struct net_device *dev)  	dev->flags &= ~IFF_UP;  	/* -	 * Tell people we are down +	 *	Shutdown NET_DMA  	 */ -	call_netdevice_notifiers(NETDEV_DOWN, dev); +	net_dmaengine_put(); + +	return 0; +} + +/** + *	dev_close - shutdown an interface. + *	@dev: device to shutdown + * + *	This function moves an active device into down state. A + *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device + *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier + *	chain. + */ +int dev_close(struct net_device *dev) +{ +	if (!(dev->flags & IFF_UP)) +		return 0; + +	__dev_close(dev);  	/* -	 *	Shutdown NET_DMA +	 * Tell people we are down  	 */ -	net_dmaengine_put(); +	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); +	call_netdevice_notifiers(NETDEV_DOWN, dev);  	return 0;  } @@ -1448,13 +1470,10 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)  	if (skb->len > (dev->mtu + dev->hard_header_len))  		return NET_RX_DROP; -	skb_dst_drop(skb); +	skb_set_dev(skb, dev);  	skb->tstamp.tv64 = 0;  	skb->pkt_type = PACKET_HOST;  	skb->protocol = eth_type_trans(skb, dev); -	skb->mark = 0; -	secpath_reset(skb); -	nf_reset(skb);  	return netif_rx(skb);  }  EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -1614,6 +1633,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)  	return false;  } +/** + * skb_dev_set -- assign a new device to a buffer + * @skb: buffer for the new device + * @dev: network device + * + * If an skb is owned by a device already, we have to reset + * all data private to the namespace a device belongs to + * before assigning it a new device. + */ +#ifdef CONFIG_NET_NS +void skb_set_dev(struct sk_buff *skb, struct net_device *dev) +{ +	skb_dst_drop(skb); +	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) { +		secpath_reset(skb); +		nf_reset(skb); +		skb_init_secmark(skb); +		skb->mark = 0; +		skb->priority = 0; +		skb->nf_trace = 0; +		skb->ipvs_property = 0; +#ifdef CONFIG_NET_SCHED +		skb->tc_index = 0; +#endif +	} +	skb->dev = dev; +} +EXPORT_SYMBOL(skb_set_dev); +#endif /* CONFIG_NET_NS */ +  /*   * Invalidate hardware checksum when packet is to be mangled, and   * complete checksum manually on outgoing path. @@ -1853,6 +1902,14 @@ gso:  		skb->next = nskb->next;  		nskb->next = NULL; + +		/* +		 * If device doesnt need nskb->dst, release it right now while +		 * its hot in this cpu cache +		 */ +		if (dev->priv_flags & IFF_XMIT_DST_RELEASE) +			skb_dst_drop(nskb); +  		rc = ops->ndo_start_xmit(nskb, dev);  		if (unlikely(rc != NETDEV_TX_OK)) {  			if (rc & ~NETDEV_TX_MASK) @@ -1974,6 +2031,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,  	return rc;  } +/* + * Returns true if either: + *	1. skb has frag_list and the device doesn't support FRAGLIST, or + *	2. skb is fragmented and the device does not support SG, or if + *	   at least one of fragments is in highmem and device does not + *	   support DMA from it. + */ +static inline int skb_needs_linearize(struct sk_buff *skb, +				      struct net_device *dev) +{ +	return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || +	       (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || +					      illegal_highdma(dev, skb))); +} +  /**   *	dev_queue_xmit - transmit a buffer   *	@skb: buffer to transmit @@ -2010,18 +2082,8 @@ int dev_queue_xmit(struct sk_buff *skb)  	if (netif_needs_gso(dev, skb))  		goto gso; -	if (skb_has_frags(skb) && -	    !(dev->features & NETIF_F_FRAGLIST) && -	    __skb_linearize(skb)) -		goto out_kfree_skb; - -	/* Fragmented skb is linearized if device does not support SG, -	 * or if at least one of fragments is in highmem and device -	 * does not support DMA from it. -	 */ -	if (skb_shinfo(skb)->nr_frags && -	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && -	    __skb_linearize(skb)) +	/* Convert a paged skb to linear, if required */ +	if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))  		goto out_kfree_skb;  	/* If packet is not checksummed and device does not support @@ -2041,7 +2103,7 @@ gso:  	rcu_read_lock_bh();  	txq = dev_pick_tx(dev, skb); -	q = rcu_dereference(txq->qdisc); +	q = rcu_dereference_bh(txq->qdisc);  #ifdef CONFIG_NET_CLS_ACT  	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); @@ -2422,6 +2484,7 @@ int netif_receive_skb(struct sk_buff *skb)  	struct packet_type *ptype, *pt_prev;  	struct net_device *orig_dev;  	struct net_device *null_or_orig; +	struct net_device *null_or_bond;  	int ret = NET_RX_DROP;  	__be16 type; @@ -2487,12 +2550,24 @@ ncls:  	if (!skb)  		goto out; +	/* +	 * Make sure frames received on VLAN interfaces stacked on +	 * bonding interfaces still make their way to any base bonding +	 * device that may have registered for a specific ptype.  The +	 * handler may have to adjust skb->dev and orig_dev. +	 */ +	null_or_bond = NULL; +	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && +	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { +		null_or_bond = vlan_dev_real_dev(skb->dev); +	} +  	type = skb->protocol;  	list_for_each_entry_rcu(ptype,  			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { -		if (ptype->type == type && -		    (ptype->dev == null_or_orig || ptype->dev == skb->dev || -		     ptype->dev == orig_dev)) { +		if (ptype->type == type && (ptype->dev == null_or_orig || +		     ptype->dev == skb->dev || ptype->dev == orig_dev || +		     ptype->dev == null_or_bond)) {  			if (pt_prev)  				ret = deliver_skb(skb, pt_prev, orig_dev);  			pt_prev = ptype; @@ -2561,7 +2636,7 @@ out:  	return netif_receive_skb(skb);  } -void napi_gro_flush(struct napi_struct *napi) +static void napi_gro_flush(struct napi_struct *napi)  {  	struct sk_buff *skb, *next; @@ -2574,7 +2649,6 @@ void napi_gro_flush(struct napi_struct *napi)  	napi->gro_count = 0;  	napi->gro_list = NULL;  } -EXPORT_SYMBOL(napi_gro_flush);  enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)  { @@ -2761,7 +2835,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,  	switch (ret) {  	case GRO_NORMAL:  	case GRO_HELD: -		skb->protocol = eth_type_trans(skb, napi->dev); +		skb->protocol = eth_type_trans(skb, skb->dev);  		if (ret == GRO_HELD)  			skb_gro_pull(skb, -ETH_HLEN); @@ -2966,7 +3040,7 @@ static void net_rx_action(struct softirq_action *h)  		 * entries to the tail of this list, and only ->poll()  		 * calls can remove this head entry from the list.  		 */ -		n = list_entry(list->next, struct napi_struct, poll_list); +		n = list_first_entry(list, struct napi_struct, poll_list);  		have = netpoll_poll_lock(n); @@ -3185,7 +3259,7 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)  {  	const struct net_device_stats *stats = dev_get_stats(dev); -	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " +	seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "  		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",  		   dev->name, stats->rx_bytes, stats->rx_packets,  		   stats->rx_errors, @@ -3640,10 +3714,10 @@ void __dev_set_rx_mode(struct net_device *dev)  		/* Unicast addresses changes may only happen under the rtnl,  		 * therefore calling __dev_set_promiscuity here is safe.  		 */ -		if (dev->uc.count > 0 && !dev->uc_promisc) { +		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {  			__dev_set_promiscuity(dev, 1);  			dev->uc_promisc = 1; -		} else if (dev->uc.count == 0 && dev->uc_promisc) { +		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {  			__dev_set_promiscuity(dev, -1);  			dev->uc_promisc = 0;  		} @@ -4211,7 +4285,7 @@ static void dev_addr_discard(struct net_device *dev)  	netif_addr_lock_bh(dev);  	__dev_addr_discard(&dev->mc_list); -	dev->mc_count = 0; +	netdev_mc_count(dev) = 0;  	netif_addr_unlock_bh(dev);  } @@ -4247,18 +4321,10 @@ unsigned dev_get_flags(const struct net_device *dev)  }  EXPORT_SYMBOL(dev_get_flags); -/** - *	dev_change_flags - change device settings - *	@dev: device - *	@flags: device state flags - * - *	Change settings on device based state flags. The flags are - *	in the userspace exported format. - */ -int dev_change_flags(struct net_device *dev, unsigned flags) +int __dev_change_flags(struct net_device *dev, unsigned int flags)  { -	int ret, changes;  	int old_flags = dev->flags; +	int ret;  	ASSERT_RTNL(); @@ -4289,17 +4355,12 @@ int dev_change_flags(struct net_device *dev, unsigned flags)  	ret = 0;  	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */ -		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); +		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);  		if (!ret)  			dev_set_rx_mode(dev);  	} -	if (dev->flags & IFF_UP && -	    ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | -					  IFF_VOLATILE))) -		call_netdevice_notifiers(NETDEV_CHANGE, dev); -  	if ((flags ^ dev->gflags) & IFF_PROMISC) {  		int inc = (flags & IFF_PROMISC) ? 1 : -1; @@ -4318,11 +4379,47 @@ int dev_change_flags(struct net_device *dev, unsigned flags)  		dev_set_allmulti(dev, inc);  	} -	/* Exclude state transition flags, already notified */ -	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING); +	return ret; +} + +void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) +{ +	unsigned int changes = dev->flags ^ old_flags; + +	if (changes & IFF_UP) { +		if (dev->flags & IFF_UP) +			call_netdevice_notifiers(NETDEV_UP, dev); +		else +			call_netdevice_notifiers(NETDEV_DOWN, dev); +	} + +	if (dev->flags & IFF_UP && +	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) +		call_netdevice_notifiers(NETDEV_CHANGE, dev); +} + +/** + *	dev_change_flags - change device settings + *	@dev: device + *	@flags: device state flags + * + *	Change settings on device based state flags. The flags are + *	in the userspace exported format. + */ +int dev_change_flags(struct net_device *dev, unsigned flags) +{ +	int ret, changes; +	int old_flags = dev->flags; + +	ret = __dev_change_flags(dev, flags); +	if (ret < 0) +		return ret; + +	changes = old_flags ^ dev->flags;  	if (changes)  		rtmsg_ifinfo(RTM_NEWLINK, dev, changes); +	__dev_notify_flags(dev, old_flags);  	return ret;  }  EXPORT_SYMBOL(dev_change_flags); @@ -4813,6 +4910,10 @@ static void rollback_registered_many(struct list_head *head)  		*/  		call_netdevice_notifiers(NETDEV_UNREGISTER, dev); +		if (!dev->rtnl_link_ops || +		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED) +			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); +  		/*  		 *	Flush the unicast and multicast chains  		 */ @@ -4830,7 +4931,7 @@ static void rollback_registered_many(struct list_head *head)  	}  	/* Process any work delayed until the end of the batch */ -	dev = list_entry(head->next, struct net_device, unreg_list); +	dev = list_first_entry(head, struct net_device, unreg_list);  	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);  	synchronize_net(); @@ -5039,7 +5140,9 @@ int register_netdevice(struct net_device *dev)  	 *	Prevent userspace races by waiting until the network  	 *	device is fully setup before sending notifications.  	 */ -	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); +	if (!dev->rtnl_link_ops || +	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED) +		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);  out:  	return ret; @@ -5216,7 +5319,7 @@ void netdev_run_todo(void)  	while (!list_empty(&list)) {  		struct net_device *dev -			= list_entry(list.next, struct net_device, todo_list); +			= list_first_entry(&list, struct net_device, todo_list);  		list_del(&dev->todo_list);  		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { @@ -5367,6 +5470,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,  	netdev_init_queues(dev); +	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); +	dev->ethtool_ntuple_list.count = 0;  	INIT_LIST_HEAD(&dev->napi_list);  	INIT_LIST_HEAD(&dev->unreg_list);  	INIT_LIST_HEAD(&dev->link_watch_list); @@ -5403,6 +5508,9 @@ void free_netdev(struct net_device *dev)  	/* Flush device addresses */  	dev_addr_flush(dev); +	/* Clear ethtool n-tuple list */ +	ethtool_ntuple_flush(dev); +  	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)  		netif_napi_del(p);  |