diff options
Diffstat (limited to 'net/core/dev.c')
| -rw-r--r-- | net/core/dev.c | 749 | 
1 files changed, 41 insertions, 708 deletions
| diff --git a/net/core/dev.c b/net/core/dev.c index 2a352e668d10..1baab07820f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -153,16 +153,10 @@  #include "net-sysfs.h" -#define MAX_GRO_SKBS 8 - -/* This should be increased if a protocol with a bigger head is added. */ -#define GRO_MAX_HEAD (MAX_HEADER + 128)  static DEFINE_SPINLOCK(ptype_lock); -static DEFINE_SPINLOCK(offload_lock);  struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;  struct list_head ptype_all __read_mostly;	/* Taps */ -static struct list_head offload_base __read_mostly;  static int netif_rx_internal(struct sk_buff *skb);  static int call_netdevice_notifiers_info(unsigned long val, @@ -371,12 +365,12 @@ static void list_netdevice(struct net_device *dev)  	ASSERT_RTNL(); -	write_lock_bh(&dev_base_lock); +	write_lock(&dev_base_lock);  	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);  	netdev_name_node_add(net, dev->name_node);  	hlist_add_head_rcu(&dev->index_hlist,  			   dev_index_hash(net, dev->ifindex)); -	write_unlock_bh(&dev_base_lock); +	write_unlock(&dev_base_lock);  	dev_base_seq_inc(net);  } @@ -389,11 +383,11 @@ static void unlist_netdevice(struct net_device *dev)  	ASSERT_RTNL();  	/* Unlink dev from the device chain */ -	write_lock_bh(&dev_base_lock); +	write_lock(&dev_base_lock);  	list_del_rcu(&dev->dev_list);  	netdev_name_node_del(dev->name_node);  	hlist_del_rcu(&dev->index_hlist); -	write_unlock_bh(&dev_base_lock); +	write_unlock(&dev_base_lock);  	dev_base_seq_inc(dev_net(dev));  } @@ -604,84 +598,6 @@ void dev_remove_pack(struct packet_type *pt)  EXPORT_SYMBOL(dev_remove_pack); -/** - *	dev_add_offload - register offload handlers - *	@po: protocol offload declaration - * - *	Add protocol offload handlers to the networking stack. The passed - *	&proto_offload is linked into kernel lists and may not be freed until - *	it has been removed from the kernel lists. - * - *	This call does not sleep therefore it can not - *	guarantee all CPU's that are in middle of receiving packets - *	will see the new offload handlers (until the next received packet). - */ -void dev_add_offload(struct packet_offload *po) -{ -	struct packet_offload *elem; - -	spin_lock(&offload_lock); -	list_for_each_entry(elem, &offload_base, list) { -		if (po->priority < elem->priority) -			break; -	} -	list_add_rcu(&po->list, elem->list.prev); -	spin_unlock(&offload_lock); -} -EXPORT_SYMBOL(dev_add_offload); - -/** - *	__dev_remove_offload	 - remove offload handler - *	@po: packet offload declaration - * - *	Remove a protocol offload handler that was previously added to the - *	kernel offload handlers by dev_add_offload(). The passed &offload_type - *	is removed from the kernel lists and can be freed or reused once this - *	function returns. - * - *      The packet type might still be in use by receivers - *	and must not be freed until after all the CPU's have gone - *	through a quiescent state. - */ -static void __dev_remove_offload(struct packet_offload *po) -{ -	struct list_head *head = &offload_base; -	struct packet_offload *po1; - -	spin_lock(&offload_lock); - -	list_for_each_entry(po1, head, list) { -		if (po == po1) { -			list_del_rcu(&po->list); -			goto out; -		} -	} - -	pr_warn("dev_remove_offload: %p not found\n", po); -out: -	spin_unlock(&offload_lock); -} - -/** - *	dev_remove_offload	 - remove packet offload handler - *	@po: packet offload declaration - * - *	Remove a packet offload handler that was previously added to the kernel - *	offload handlers by dev_add_offload(). The passed &offload_type is - *	removed from the kernel lists and can be freed or reused once this - *	function returns. - * - *	This call sleeps to guarantee that no CPU is looking at the packet - *	type after return. - */ -void dev_remove_offload(struct packet_offload *po) -{ -	__dev_remove_offload(po); - -	synchronize_net(); -} -EXPORT_SYMBOL(dev_remove_offload); -  /*******************************************************************************   *   *			    Device Interface Subroutines @@ -1272,15 +1188,15 @@ rollback:  	netdev_adjacent_rename_links(dev, oldname); -	write_lock_bh(&dev_base_lock); +	write_lock(&dev_base_lock);  	netdev_name_node_del(dev->name_node); -	write_unlock_bh(&dev_base_lock); +	write_unlock(&dev_base_lock);  	synchronize_rcu(); -	write_lock_bh(&dev_base_lock); +	write_lock(&dev_base_lock);  	netdev_name_node_add(net, dev->name_node); -	write_unlock_bh(&dev_base_lock); +	write_unlock(&dev_base_lock);  	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);  	ret = notifier_to_errno(ret); @@ -1461,6 +1377,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)  	int ret;  	ASSERT_RTNL(); +	dev_addr_check(dev);  	if (!netif_device_present(dev)) {  		/* may be detached because parent is runtime-suspended */ @@ -3315,40 +3232,6 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)  	return __vlan_get_protocol(skb, type, depth);  } -/** - *	skb_mac_gso_segment - mac layer segmentation handler. - *	@skb: buffer to segment - *	@features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, -				    netdev_features_t features) -{ -	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); -	struct packet_offload *ptype; -	int vlan_depth = skb->mac_len; -	__be16 type = skb_network_protocol(skb, &vlan_depth); - -	if (unlikely(!type)) -		return ERR_PTR(-EINVAL); - -	__skb_pull(skb, vlan_depth); - -	rcu_read_lock(); -	list_for_each_entry_rcu(ptype, &offload_base, list) { -		if (ptype->type == type && ptype->callbacks.gso_segment) { -			segs = ptype->callbacks.gso_segment(skb, features); -			break; -		} -	} -	rcu_read_unlock(); - -	__skb_push(skb, skb->data - skb_mac_header(skb)); - -	return segs; -} -EXPORT_SYMBOL(skb_mac_gso_segment); - -  /* openvswitch calls this on rx path, so we need a different check.   */  static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) @@ -3513,7 +3396,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,  {  	u16 gso_segs = skb_shinfo(skb)->gso_segs; -	if (gso_segs > dev->gso_max_segs) +	if (gso_segs > READ_ONCE(dev->gso_max_segs))  		return features & ~NETIF_F_GSO_MASK;  	if (!skb_shinfo(skb)->gso_type) { @@ -3836,8 +3719,12 @@ no_lock_out:  	 * separate lock before trying to get qdisc main lock.  	 * This permits qdisc->running owner to get the lock more  	 * often and dequeue packets faster. +	 * On PREEMPT_RT it is possible to preempt the qdisc owner during xmit +	 * and then other tasks will only enqueue packets. The packets will be +	 * sent after the qdisc owner is scheduled again. To prevent this +	 * scenario the task always serialize on the lock.  	 */ -	contended = qdisc_is_running(q); +	contended = qdisc_is_running(q) || IS_ENABLED(CONFIG_PREEMPT_RT);  	if (unlikely(contended))  		spin_lock(&q->busylock); @@ -3941,8 +3828,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)  		return skb;  	/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ -	qdisc_skb_cb(skb)->mru = 0; -	qdisc_skb_cb(skb)->post_ct = false; +	tc_skb_cb(skb)->mru = 0; +	tc_skb_cb(skb)->post_ct = false;  	mini_qdisc_bstats_cpu_update(miniq, skb);  	switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { @@ -4323,8 +4210,6 @@ int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */  int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */  int dev_rx_weight __read_mostly = 64;  int dev_tx_weight __read_mostly = 64; -/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ -int gro_normal_batch __read_mostly = 8;  /* Called with irq disabled */  static inline void ____napi_schedule(struct softnet_data *sd, @@ -4827,7 +4712,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,  	case XDP_PASS:  		break;  	default: -		bpf_warn_invalid_xdp_action(act); +		bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);  		fallthrough;  	case XDP_ABORTED:  		trace_xdp_exception(skb->dev, xdp_prog, act); @@ -5014,7 +4899,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)  			if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))  				trace_consume_skb(skb);  			else -				trace_kfree_skb(skb, net_tx_action); +				trace_kfree_skb(skb, net_tx_action, +						SKB_DROP_REASON_NOT_SPECIFIED);  			if (skb->fclone != SKB_FCLONE_UNAVAILABLE)  				__kfree_skb(skb); @@ -5103,8 +4989,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,  	}  	qdisc_skb_cb(skb)->pkt_len = skb->len; -	qdisc_skb_cb(skb)->mru = 0; -	qdisc_skb_cb(skb)->post_ct = false; +	tc_skb_cb(skb)->mru = 0; +	tc_skb_cb(skb)->post_ct = false;  	skb->tc_at_ingress = 1;  	mini_qdisc_bstats_cpu_update(miniq, skb); @@ -5667,7 +5553,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)  	return ret;  } -static void netif_receive_skb_list_internal(struct list_head *head) +void netif_receive_skb_list_internal(struct list_head *head)  {  	struct sk_buff *skb, *next;  	struct list_head sublist; @@ -5845,550 +5731,6 @@ static void flush_all_backlogs(void)  	cpus_read_unlock();  } -/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ -static void gro_normal_list(struct napi_struct *napi) -{ -	if (!napi->rx_count) -		return; -	netif_receive_skb_list_internal(&napi->rx_list); -	INIT_LIST_HEAD(&napi->rx_list); -	napi->rx_count = 0; -} - -/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, - * pass the whole batch up to the stack. - */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) -{ -	list_add_tail(&skb->list, &napi->rx_list); -	napi->rx_count += segs; -	if (napi->rx_count >= gro_normal_batch) -		gro_normal_list(napi); -} - -static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) -{ -	struct packet_offload *ptype; -	__be16 type = skb->protocol; -	struct list_head *head = &offload_base; -	int err = -ENOENT; - -	BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); - -	if (NAPI_GRO_CB(skb)->count == 1) { -		skb_shinfo(skb)->gso_size = 0; -		goto out; -	} - -	rcu_read_lock(); -	list_for_each_entry_rcu(ptype, head, list) { -		if (ptype->type != type || !ptype->callbacks.gro_complete) -			continue; - -		err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, -					 ipv6_gro_complete, inet_gro_complete, -					 skb, 0); -		break; -	} -	rcu_read_unlock(); - -	if (err) { -		WARN_ON(&ptype->list == head); -		kfree_skb(skb); -		return; -	} - -out: -	gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); -} - -static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, -				   bool flush_old) -{ -	struct list_head *head = &napi->gro_hash[index].list; -	struct sk_buff *skb, *p; - -	list_for_each_entry_safe_reverse(skb, p, head, list) { -		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) -			return; -		skb_list_del_init(skb); -		napi_gro_complete(napi, skb); -		napi->gro_hash[index].count--; -	} - -	if (!napi->gro_hash[index].count) -		__clear_bit(index, &napi->gro_bitmask); -} - -/* napi->gro_hash[].list contains packets ordered by age. - * youngest packets at the head of it. - * Complete skbs in reverse order to reduce latencies. - */ -void napi_gro_flush(struct napi_struct *napi, bool flush_old) -{ -	unsigned long bitmask = napi->gro_bitmask; -	unsigned int i, base = ~0U; - -	while ((i = ffs(bitmask)) != 0) { -		bitmask >>= i; -		base += i; -		__napi_gro_flush_chain(napi, base, flush_old); -	} -} -EXPORT_SYMBOL(napi_gro_flush); - -static void gro_list_prepare(const struct list_head *head, -			     const struct sk_buff *skb) -{ -	unsigned int maclen = skb->dev->hard_header_len; -	u32 hash = skb_get_hash_raw(skb); -	struct sk_buff *p; - -	list_for_each_entry(p, head, list) { -		unsigned long diffs; - -		NAPI_GRO_CB(p)->flush = 0; - -		if (hash != skb_get_hash_raw(p)) { -			NAPI_GRO_CB(p)->same_flow = 0; -			continue; -		} - -		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; -		diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb); -		if (skb_vlan_tag_present(p)) -			diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb); -		diffs |= skb_metadata_differs(p, skb); -		if (maclen == ETH_HLEN) -			diffs |= compare_ether_header(skb_mac_header(p), -						      skb_mac_header(skb)); -		else if (!diffs) -			diffs = memcmp(skb_mac_header(p), -				       skb_mac_header(skb), -				       maclen); - -		/* in most common scenarions 'slow_gro' is 0 -		 * otherwise we are already on some slower paths -		 * either skip all the infrequent tests altogether or -		 * avoid trying too hard to skip each of them individually -		 */ -		if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) { -#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) -			struct tc_skb_ext *skb_ext; -			struct tc_skb_ext *p_ext; -#endif - -			diffs |= p->sk != skb->sk; -			diffs |= skb_metadata_dst_cmp(p, skb); -			diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); - -#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT) -			skb_ext = skb_ext_find(skb, TC_SKB_EXT); -			p_ext = skb_ext_find(p, TC_SKB_EXT); - -			diffs |= (!!p_ext) ^ (!!skb_ext); -			if (!diffs && unlikely(skb_ext)) -				diffs |= p_ext->chain ^ skb_ext->chain; -#endif -		} - -		NAPI_GRO_CB(p)->same_flow = !diffs; -	} -} - -static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) -{ -	const struct skb_shared_info *pinfo = skb_shinfo(skb); -	const skb_frag_t *frag0 = &pinfo->frags[0]; - -	NAPI_GRO_CB(skb)->data_offset = 0; -	NAPI_GRO_CB(skb)->frag0 = NULL; -	NAPI_GRO_CB(skb)->frag0_len = 0; - -	if (!skb_headlen(skb) && pinfo->nr_frags && -	    !PageHighMem(skb_frag_page(frag0)) && -	    (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { -		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); -		NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, -						    skb_frag_size(frag0), -						    skb->end - skb->tail); -	} -} - -static void gro_pull_from_frag0(struct sk_buff *skb, int grow) -{ -	struct skb_shared_info *pinfo = skb_shinfo(skb); - -	BUG_ON(skb->end - skb->tail < grow); - -	memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); - -	skb->data_len -= grow; -	skb->tail += grow; - -	skb_frag_off_add(&pinfo->frags[0], grow); -	skb_frag_size_sub(&pinfo->frags[0], grow); - -	if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { -		skb_frag_unref(skb, 0); -		memmove(pinfo->frags, pinfo->frags + 1, -			--pinfo->nr_frags * sizeof(pinfo->frags[0])); -	} -} - -static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) -{ -	struct sk_buff *oldest; - -	oldest = list_last_entry(head, struct sk_buff, list); - -	/* We are called with head length >= MAX_GRO_SKBS, so this is -	 * impossible. -	 */ -	if (WARN_ON_ONCE(!oldest)) -		return; - -	/* Do not adjust napi->gro_hash[].count, caller is adding a new -	 * SKB to the chain. -	 */ -	skb_list_del_init(oldest); -	napi_gro_complete(napi, oldest); -} - -static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) -{ -	u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); -	struct gro_list *gro_list = &napi->gro_hash[bucket]; -	struct list_head *head = &offload_base; -	struct packet_offload *ptype; -	__be16 type = skb->protocol; -	struct sk_buff *pp = NULL; -	enum gro_result ret; -	int same_flow; -	int grow; - -	if (netif_elide_gro(skb->dev)) -		goto normal; - -	gro_list_prepare(&gro_list->list, skb); - -	rcu_read_lock(); -	list_for_each_entry_rcu(ptype, head, list) { -		if (ptype->type != type || !ptype->callbacks.gro_receive) -			continue; - -		skb_set_network_header(skb, skb_gro_offset(skb)); -		skb_reset_mac_len(skb); -		NAPI_GRO_CB(skb)->same_flow = 0; -		NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); -		NAPI_GRO_CB(skb)->free = 0; -		NAPI_GRO_CB(skb)->encap_mark = 0; -		NAPI_GRO_CB(skb)->recursion_counter = 0; -		NAPI_GRO_CB(skb)->is_fou = 0; -		NAPI_GRO_CB(skb)->is_atomic = 1; -		NAPI_GRO_CB(skb)->gro_remcsum_start = 0; - -		/* Setup for GRO checksum validation */ -		switch (skb->ip_summed) { -		case CHECKSUM_COMPLETE: -			NAPI_GRO_CB(skb)->csum = skb->csum; -			NAPI_GRO_CB(skb)->csum_valid = 1; -			NAPI_GRO_CB(skb)->csum_cnt = 0; -			break; -		case CHECKSUM_UNNECESSARY: -			NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; -			NAPI_GRO_CB(skb)->csum_valid = 0; -			break; -		default: -			NAPI_GRO_CB(skb)->csum_cnt = 0; -			NAPI_GRO_CB(skb)->csum_valid = 0; -		} - -		pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, -					ipv6_gro_receive, inet_gro_receive, -					&gro_list->list, skb); -		break; -	} -	rcu_read_unlock(); - -	if (&ptype->list == head) -		goto normal; - -	if (PTR_ERR(pp) == -EINPROGRESS) { -		ret = GRO_CONSUMED; -		goto ok; -	} - -	same_flow = NAPI_GRO_CB(skb)->same_flow; -	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; - -	if (pp) { -		skb_list_del_init(pp); -		napi_gro_complete(napi, pp); -		gro_list->count--; -	} - -	if (same_flow) -		goto ok; - -	if (NAPI_GRO_CB(skb)->flush) -		goto normal; - -	if (unlikely(gro_list->count >= MAX_GRO_SKBS)) -		gro_flush_oldest(napi, &gro_list->list); -	else -		gro_list->count++; - -	NAPI_GRO_CB(skb)->count = 1; -	NAPI_GRO_CB(skb)->age = jiffies; -	NAPI_GRO_CB(skb)->last = skb; -	skb_shinfo(skb)->gso_size = skb_gro_len(skb); -	list_add(&skb->list, &gro_list->list); -	ret = GRO_HELD; - -pull: -	grow = skb_gro_offset(skb) - skb_headlen(skb); -	if (grow > 0) -		gro_pull_from_frag0(skb, grow); -ok: -	if (gro_list->count) { -		if (!test_bit(bucket, &napi->gro_bitmask)) -			__set_bit(bucket, &napi->gro_bitmask); -	} else if (test_bit(bucket, &napi->gro_bitmask)) { -		__clear_bit(bucket, &napi->gro_bitmask); -	} - -	return ret; - -normal: -	ret = GRO_NORMAL; -	goto pull; -} - -struct packet_offload *gro_find_receive_by_type(__be16 type) -{ -	struct list_head *offload_head = &offload_base; -	struct packet_offload *ptype; - -	list_for_each_entry_rcu(ptype, offload_head, list) { -		if (ptype->type != type || !ptype->callbacks.gro_receive) -			continue; -		return ptype; -	} -	return NULL; -} -EXPORT_SYMBOL(gro_find_receive_by_type); - -struct packet_offload *gro_find_complete_by_type(__be16 type) -{ -	struct list_head *offload_head = &offload_base; -	struct packet_offload *ptype; - -	list_for_each_entry_rcu(ptype, offload_head, list) { -		if (ptype->type != type || !ptype->callbacks.gro_complete) -			continue; -		return ptype; -	} -	return NULL; -} -EXPORT_SYMBOL(gro_find_complete_by_type); - -static gro_result_t napi_skb_finish(struct napi_struct *napi, -				    struct sk_buff *skb, -				    gro_result_t ret) -{ -	switch (ret) { -	case GRO_NORMAL: -		gro_normal_one(napi, skb, 1); -		break; - -	case GRO_MERGED_FREE: -		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) -			napi_skb_free_stolen_head(skb); -		else if (skb->fclone != SKB_FCLONE_UNAVAILABLE) -			__kfree_skb(skb); -		else -			__kfree_skb_defer(skb); -		break; - -	case GRO_HELD: -	case GRO_MERGED: -	case GRO_CONSUMED: -		break; -	} - -	return ret; -} - -gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) -{ -	gro_result_t ret; - -	skb_mark_napi_id(skb, napi); -	trace_napi_gro_receive_entry(skb); - -	skb_gro_reset_offset(skb, 0); - -	ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); -	trace_napi_gro_receive_exit(ret); - -	return ret; -} -EXPORT_SYMBOL(napi_gro_receive); - -static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) -{ -	if (unlikely(skb->pfmemalloc)) { -		consume_skb(skb); -		return; -	} -	__skb_pull(skb, skb_headlen(skb)); -	/* restore the reserve we had after netdev_alloc_skb_ip_align() */ -	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); -	__vlan_hwaccel_clear_tag(skb); -	skb->dev = napi->dev; -	skb->skb_iif = 0; - -	/* eth_type_trans() assumes pkt_type is PACKET_HOST */ -	skb->pkt_type = PACKET_HOST; - -	skb->encapsulation = 0; -	skb_shinfo(skb)->gso_type = 0; -	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); -	if (unlikely(skb->slow_gro)) { -		skb_orphan(skb); -		skb_ext_reset(skb); -		nf_reset_ct(skb); -		skb->slow_gro = 0; -	} - -	napi->skb = skb; -} - -struct sk_buff *napi_get_frags(struct napi_struct *napi) -{ -	struct sk_buff *skb = napi->skb; - -	if (!skb) { -		skb = napi_alloc_skb(napi, GRO_MAX_HEAD); -		if (skb) { -			napi->skb = skb; -			skb_mark_napi_id(skb, napi); -		} -	} -	return skb; -} -EXPORT_SYMBOL(napi_get_frags); - -static gro_result_t napi_frags_finish(struct napi_struct *napi, -				      struct sk_buff *skb, -				      gro_result_t ret) -{ -	switch (ret) { -	case GRO_NORMAL: -	case GRO_HELD: -		__skb_push(skb, ETH_HLEN); -		skb->protocol = eth_type_trans(skb, skb->dev); -		if (ret == GRO_NORMAL) -			gro_normal_one(napi, skb, 1); -		break; - -	case GRO_MERGED_FREE: -		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) -			napi_skb_free_stolen_head(skb); -		else -			napi_reuse_skb(napi, skb); -		break; - -	case GRO_MERGED: -	case GRO_CONSUMED: -		break; -	} - -	return ret; -} - -/* Upper GRO stack assumes network header starts at gro_offset=0 - * Drivers could call both napi_gro_frags() and napi_gro_receive() - * We copy ethernet header into skb->data to have a common layout. - */ -static struct sk_buff *napi_frags_skb(struct napi_struct *napi) -{ -	struct sk_buff *skb = napi->skb; -	const struct ethhdr *eth; -	unsigned int hlen = sizeof(*eth); - -	napi->skb = NULL; - -	skb_reset_mac_header(skb); -	skb_gro_reset_offset(skb, hlen); - -	if (unlikely(skb_gro_header_hard(skb, hlen))) { -		eth = skb_gro_header_slow(skb, hlen, 0); -		if (unlikely(!eth)) { -			net_warn_ratelimited("%s: dropping impossible skb from %s\n", -					     __func__, napi->dev->name); -			napi_reuse_skb(napi, skb); -			return NULL; -		} -	} else { -		eth = (const struct ethhdr *)skb->data; -		gro_pull_from_frag0(skb, hlen); -		NAPI_GRO_CB(skb)->frag0 += hlen; -		NAPI_GRO_CB(skb)->frag0_len -= hlen; -	} -	__skb_pull(skb, hlen); - -	/* -	 * This works because the only protocols we care about don't require -	 * special handling. -	 * We'll fix it up properly in napi_frags_finish() -	 */ -	skb->protocol = eth->h_proto; - -	return skb; -} - -gro_result_t napi_gro_frags(struct napi_struct *napi) -{ -	gro_result_t ret; -	struct sk_buff *skb = napi_frags_skb(napi); - -	trace_napi_gro_frags_entry(skb); - -	ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); -	trace_napi_gro_frags_exit(ret); - -	return ret; -} -EXPORT_SYMBOL(napi_gro_frags); - -/* Compute the checksum from gro_offset and return the folded value - * after adding in any pseudo checksum. - */ -__sum16 __skb_gro_checksum_complete(struct sk_buff *skb) -{ -	__wsum wsum; -	__sum16 sum; - -	wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0); - -	/* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ -	sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); -	/* See comments in __skb_checksum_complete(). */ -	if (likely(!sum)) { -		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && -		    !skb->csum_complete_sw) -			netdev_rx_csum_fault(skb->dev, skb); -	} - -	NAPI_GRO_CB(skb)->csum = wsum; -	NAPI_GRO_CB(skb)->csum_valid = 1; - -	return sum; -} -EXPORT_SYMBOL(__skb_gro_checksum_complete); -  static void net_rps_send_ipi(struct softnet_data *remsd)  {  #ifdef CONFIG_RPS @@ -7200,6 +6542,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)  struct netdev_adjacent {  	struct net_device *dev; +	netdevice_tracker dev_tracker;  	/* upper master flag, there can only be one master device per list */  	bool master; @@ -7964,7 +7307,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,  	adj->ref_nr = 1;  	adj->private = private;  	adj->ignore = false; -	dev_hold(adj_dev); +	dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL);  	pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",  		 dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); @@ -7993,8 +7336,8 @@ remove_symlinks:  	if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))  		netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);  free_adj: +	dev_put_track(adj_dev, &adj->dev_tracker);  	kfree(adj); -	dev_put(adj_dev);  	return ret;  } @@ -8035,7 +7378,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,  	list_del_rcu(&adj->list);  	pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",  		 adj_dev->name, dev->name, adj_dev->name); -	dev_put(adj_dev); +	dev_put_track(adj_dev, &adj->dev_tracker);  	kfree_rcu(adj, rcu);  } @@ -9224,35 +8567,17 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)  EXPORT_SYMBOL(netdev_port_same_parent_id);  /** - *	dev_change_proto_down - update protocol port state information + *	dev_change_proto_down - set carrier according to proto_down. + *   *	@dev: device   *	@proto_down: new value - * - *	This info can be used by switch drivers to set the phys state of the - *	port.   */  int dev_change_proto_down(struct net_device *dev, bool proto_down)  { -	const struct net_device_ops *ops = dev->netdev_ops; - -	if (!ops->ndo_change_proto_down) +	if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN))  		return -EOPNOTSUPP;  	if (!netif_device_present(dev))  		return -ENODEV; -	return ops->ndo_change_proto_down(dev, proto_down); -} -EXPORT_SYMBOL(dev_change_proto_down); - -/** - *	dev_change_proto_down_generic - generic implementation for - * 	ndo_change_proto_down that sets carrier according to - * 	proto_down. - * - *	@dev: device - *	@proto_down: new value - */ -int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) -{  	if (proto_down)  		netif_carrier_off(dev);  	else @@ -9260,7 +8585,7 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)  	dev->proto_down = proto_down;  	return 0;  } -EXPORT_SYMBOL(dev_change_proto_down_generic); +EXPORT_SYMBOL(dev_change_proto_down);  /**   *	dev_change_proto_down_reason - proto down reason @@ -9656,6 +8981,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,  		goto out_unlock;  	}  	old_prog = link->prog; +	if (old_prog->type != new_prog->type || +	    old_prog->expected_attach_type != new_prog->expected_attach_type) { +		err = -EINVAL; +		goto out_unlock; +	} +  	if (old_prog == new_prog) {  		/* no-op, don't disturb drivers */  		bpf_prog_put(new_prog); @@ -10545,6 +9876,7 @@ static void netdev_wait_allrefs(struct net_device *dev)  			       netdev_unregister_timeout_secs * HZ)) {  			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",  				 dev->name, refcnt); +			ref_tracker_dir_print(&dev->refcnt_tracker, 10);  			warning_time = jiffies;  		}  	} @@ -10835,6 +10167,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,  	dev = PTR_ALIGN(p, NETDEV_ALIGN);  	dev->padded = (char *)dev - (char *)p; +	ref_tracker_dir_init(&dev->refcnt_tracker, 128);  #ifdef CONFIG_PCPU_DEV_REFCNT  	dev->pcpu_refcnt = alloc_percpu(int);  	if (!dev->pcpu_refcnt) @@ -10854,6 +10187,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,  	dev->gso_max_size = GSO_MAX_SIZE;  	dev->gso_max_segs = GSO_MAX_SEGS; +	dev->gro_max_size = GRO_MAX_SIZE;  	dev->upper_level = 1;  	dev->lower_level = 1;  #ifdef CONFIG_LOCKDEP @@ -10951,6 +10285,7 @@ void free_netdev(struct net_device *dev)  	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)  		netif_napi_del(p); +	ref_tracker_dir_exit(&dev->refcnt_tracker);  #ifdef CONFIG_PCPU_DEV_REFCNT  	free_percpu(dev->pcpu_refcnt);  	dev->pcpu_refcnt = NULL; @@ -11643,8 +10978,6 @@ static int __init net_dev_init(void)  	for (i = 0; i < PTYPE_HASH_SIZE; i++)  		INIT_LIST_HEAD(&ptype_base[i]); -	INIT_LIST_HEAD(&offload_base); -  	if (register_pernet_subsys(&netdev_net_ops))  		goto out; |