diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
| -rw-r--r-- | drivers/net/virtio_net.c | 267 | 
1 files changed, 173 insertions, 94 deletions
| diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b6c9a2af3732..765920905226 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -30,7 +30,7 @@  #include <linux/cpu.h>  #include <linux/average.h>  #include <linux/filter.h> -#include <linux/netdevice.h> +#include <linux/kernel.h>  #include <linux/pci.h>  #include <net/route.h>  #include <net/xdp.h> @@ -53,6 +53,10 @@ module_param(napi_tx, bool, 0644);  /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */  #define VIRTIO_XDP_HEADROOM 256 +/* Separating two types of XDP xmit */ +#define VIRTIO_XDP_TX		BIT(0) +#define VIRTIO_XDP_REDIR	BIT(1) +  /* RX packet size EWMA. The average packet size is used to determine the packet   * buffer size when refilling RX rings. As the entire RX ring may be refilled   * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -78,25 +82,43 @@ struct virtnet_sq_stats {  	struct u64_stats_sync syncp;  	u64 packets;  	u64 bytes; +	u64 xdp_tx; +	u64 xdp_tx_drops; +	u64 kicks;  };  struct virtnet_rq_stats {  	struct u64_stats_sync syncp;  	u64 packets;  	u64 bytes; +	u64 drops; +	u64 xdp_packets; +	u64 xdp_tx; +	u64 xdp_redirects; +	u64 xdp_drops; +	u64 kicks;  };  #define VIRTNET_SQ_STAT(m)	offsetof(struct virtnet_sq_stats, m)  #define VIRTNET_RQ_STAT(m)	offsetof(struct virtnet_rq_stats, m)  static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { -	{ "packets",	VIRTNET_SQ_STAT(packets) }, -	{ "bytes",	VIRTNET_SQ_STAT(bytes) }, +	{ "packets",		VIRTNET_SQ_STAT(packets) }, +	{ "bytes",		VIRTNET_SQ_STAT(bytes) }, +	{ "xdp_tx",		VIRTNET_SQ_STAT(xdp_tx) }, +	{ "xdp_tx_drops",	VIRTNET_SQ_STAT(xdp_tx_drops) }, +	{ "kicks",		VIRTNET_SQ_STAT(kicks) },  };  static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { -	{ "packets",	VIRTNET_RQ_STAT(packets) }, -	{ "bytes",	VIRTNET_RQ_STAT(bytes) }, +	{ "packets",		VIRTNET_RQ_STAT(packets) }, +	{ "bytes",		VIRTNET_RQ_STAT(bytes) }, +	{ "drops",		VIRTNET_RQ_STAT(drops) }, +	{ "xdp_packets",	VIRTNET_RQ_STAT(xdp_packets) }, +	{ "xdp_tx",		VIRTNET_RQ_STAT(xdp_tx) }, +	{ "xdp_redirects",	VIRTNET_RQ_STAT(xdp_redirects) }, +	{ "xdp_drops",		VIRTNET_RQ_STAT(xdp_drops) }, +	{ "kicks",		VIRTNET_RQ_STAT(kicks) },  };  #define VIRTNET_SQ_STATS_LEN	ARRAY_SIZE(virtnet_sq_stats_desc) @@ -443,22 +465,12 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,  	return 0;  } -static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi, -				   struct xdp_frame *xdpf) +static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)  { -	struct xdp_frame *xdpf_sent; -	struct send_queue *sq; -	unsigned int len;  	unsigned int qp;  	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); -	sq = &vi->sq[qp]; - -	/* Free up any pending old buffers before queueing new ones. */ -	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) -		xdp_return_frame(xdpf_sent); - -	return __virtnet_xdp_xmit_one(vi, sq, xdpf); +	return &vi->sq[qp];  }  static int virtnet_xdp_xmit(struct net_device *dev, @@ -470,23 +482,28 @@ static int virtnet_xdp_xmit(struct net_device *dev,  	struct bpf_prog *xdp_prog;  	struct send_queue *sq;  	unsigned int len; -	unsigned int qp;  	int drops = 0; -	int err; +	int kicks = 0; +	int ret, err;  	int i; -	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) -		return -EINVAL; +	sq = virtnet_xdp_sq(vi); -	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); -	sq = &vi->sq[qp]; +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { +		ret = -EINVAL; +		drops = n; +		goto out; +	}  	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this  	 * indicate XDP resources have been successfully allocated.  	 */  	xdp_prog = rcu_dereference(rq->xdp_prog); -	if (!xdp_prog) -		return -ENXIO; +	if (!xdp_prog) { +		ret = -ENXIO; +		drops = n; +		goto out; +	}  	/* Free up any pending old buffers before queueing new ones. */  	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) @@ -501,11 +518,20 @@ static int virtnet_xdp_xmit(struct net_device *dev,  			drops++;  		}  	} +	ret = n - drops; -	if (flags & XDP_XMIT_FLUSH) -		virtqueue_kick(sq->vq); +	if (flags & XDP_XMIT_FLUSH) { +		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) +			kicks = 1; +	} +out: +	u64_stats_update_begin(&sq->stats.syncp); +	sq->stats.xdp_tx += n; +	sq->stats.xdp_tx_drops += drops; +	sq->stats.kicks += kicks; +	u64_stats_update_end(&sq->stats.syncp); -	return n - drops; +	return ret;  }  static unsigned int virtnet_get_headroom(struct virtnet_info *vi) @@ -582,7 +608,8 @@ static struct sk_buff *receive_small(struct net_device *dev,  				     struct receive_queue *rq,  				     void *buf, void *ctx,  				     unsigned int len, -				     bool *xdp_xmit) +				     unsigned int *xdp_xmit, +				     struct virtnet_rq_stats *stats)  {  	struct sk_buff *skb;  	struct bpf_prog *xdp_prog; @@ -597,6 +624,7 @@ static struct sk_buff *receive_small(struct net_device *dev,  	int err;  	len -= vi->hdr_len; +	stats->bytes += len;  	rcu_read_lock();  	xdp_prog = rcu_dereference(rq->xdp_prog); @@ -638,6 +666,7 @@ static struct sk_buff *receive_small(struct net_device *dev,  		xdp.rxq = &rq->xdp_rxq;  		orig_data = xdp.data;  		act = bpf_prog_run_xdp(xdp_prog, &xdp); +		stats->xdp_packets++;  		switch (act) {  		case XDP_PASS: @@ -646,26 +675,29 @@ static struct sk_buff *receive_small(struct net_device *dev,  			len = xdp.data_end - xdp.data;  			break;  		case XDP_TX: +			stats->xdp_tx++;  			xdpf = convert_to_xdp_frame(&xdp);  			if (unlikely(!xdpf))  				goto err_xdp; -			err = __virtnet_xdp_tx_xmit(vi, xdpf); -			if (unlikely(err)) { +			err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); +			if (unlikely(err < 0)) {  				trace_xdp_exception(vi->dev, xdp_prog, act);  				goto err_xdp;  			} -			*xdp_xmit = true; +			*xdp_xmit |= VIRTIO_XDP_TX;  			rcu_read_unlock();  			goto xdp_xmit;  		case XDP_REDIRECT: +			stats->xdp_redirects++;  			err = xdp_do_redirect(dev, &xdp, xdp_prog);  			if (err)  				goto err_xdp; -			*xdp_xmit = true; +			*xdp_xmit |= VIRTIO_XDP_REDIR;  			rcu_read_unlock();  			goto xdp_xmit;  		default:  			bpf_warn_invalid_xdp_action(act); +			/* fall through */  		case XDP_ABORTED:  			trace_xdp_exception(vi->dev, xdp_prog, act);  		case XDP_DROP: @@ -691,7 +723,8 @@ err:  err_xdp:  	rcu_read_unlock(); -	dev->stats.rx_dropped++; +	stats->xdp_drops++; +	stats->drops++;  	put_page(page);  xdp_xmit:  	return NULL; @@ -701,18 +734,20 @@ static struct sk_buff *receive_big(struct net_device *dev,  				   struct virtnet_info *vi,  				   struct receive_queue *rq,  				   void *buf, -				   unsigned int len) +				   unsigned int len, +				   struct virtnet_rq_stats *stats)  {  	struct page *page = buf;  	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); +	stats->bytes += len - vi->hdr_len;  	if (unlikely(!skb))  		goto err;  	return skb;  err: -	dev->stats.rx_dropped++; +	stats->drops++;  	give_pages(rq, page);  	return NULL;  } @@ -723,7 +758,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  					 void *buf,  					 void *ctx,  					 unsigned int len, -					 bool *xdp_xmit) +					 unsigned int *xdp_xmit, +					 struct virtnet_rq_stats *stats)  {  	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;  	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); @@ -736,6 +772,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  	int err;  	head_skb = NULL; +	stats->bytes += len - vi->hdr_len;  	rcu_read_lock();  	xdp_prog = rcu_dereference(rq->xdp_prog); @@ -784,6 +821,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  		xdp.rxq = &rq->xdp_rxq;  		act = bpf_prog_run_xdp(xdp_prog, &xdp); +		stats->xdp_packets++;  		switch (act) {  		case XDP_PASS: @@ -808,37 +846,41 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  			}  			break;  		case XDP_TX: +			stats->xdp_tx++;  			xdpf = convert_to_xdp_frame(&xdp);  			if (unlikely(!xdpf))  				goto err_xdp; -			err = __virtnet_xdp_tx_xmit(vi, xdpf); -			if (unlikely(err)) { +			err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); +			if (unlikely(err < 0)) {  				trace_xdp_exception(vi->dev, xdp_prog, act);  				if (unlikely(xdp_page != page))  					put_page(xdp_page);  				goto err_xdp;  			} -			*xdp_xmit = true; +			*xdp_xmit |= VIRTIO_XDP_TX;  			if (unlikely(xdp_page != page))  				put_page(page);  			rcu_read_unlock();  			goto xdp_xmit;  		case XDP_REDIRECT: +			stats->xdp_redirects++;  			err = xdp_do_redirect(dev, &xdp, xdp_prog);  			if (err) {  				if (unlikely(xdp_page != page))  					put_page(xdp_page);  				goto err_xdp;  			} -			*xdp_xmit = true; +			*xdp_xmit |= VIRTIO_XDP_REDIR;  			if (unlikely(xdp_page != page))  				put_page(page);  			rcu_read_unlock();  			goto xdp_xmit;  		default:  			bpf_warn_invalid_xdp_action(act); +			/* fall through */  		case XDP_ABORTED:  			trace_xdp_exception(vi->dev, xdp_prog, act); +			/* fall through */  		case XDP_DROP:  			if (unlikely(xdp_page != page))  				__free_pages(xdp_page, 0); @@ -873,6 +915,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  			goto err_buf;  		} +		stats->bytes += len;  		page = virt_to_head_page(buf);  		truesize = mergeable_ctx_to_truesize(ctx); @@ -918,6 +961,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  err_xdp:  	rcu_read_unlock(); +	stats->xdp_drops++;  err_skb:  	put_page(page);  	while (num_buf-- > 1) { @@ -928,23 +972,25 @@ err_skb:  			dev->stats.rx_length_errors++;  			break;  		} +		stats->bytes += len;  		page = virt_to_head_page(buf);  		put_page(page);  	}  err_buf: -	dev->stats.rx_dropped++; +	stats->drops++;  	dev_kfree_skb(head_skb);  xdp_xmit:  	return NULL;  } -static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, -		       void *buf, unsigned int len, void **ctx, bool *xdp_xmit) +static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, +			void *buf, unsigned int len, void **ctx, +			unsigned int *xdp_xmit, +			struct virtnet_rq_stats *stats)  {  	struct net_device *dev = vi->dev;  	struct sk_buff *skb;  	struct virtio_net_hdr_mrg_rxbuf *hdr; -	int ret;  	if (unlikely(len < vi->hdr_len + ETH_HLEN)) {  		pr_debug("%s: short packet %i\n", dev->name, len); @@ -956,23 +1002,22 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,  		} else {  			put_page(virt_to_head_page(buf));  		} -		return 0; +		return;  	}  	if (vi->mergeable_rx_bufs) -		skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit); +		skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, +					stats);  	else if (vi->big_packets) -		skb = receive_big(dev, vi, rq, buf, len); +		skb = receive_big(dev, vi, rq, buf, len, stats);  	else -		skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit); +		skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);  	if (unlikely(!skb)) -		return 0; +		return;  	hdr = skb_vnet_hdr(skb); -	ret = skb->len; -  	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)  		skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -989,12 +1034,11 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,  		 ntohs(skb->protocol), skb->len, skb->pkt_type);  	napi_gro_receive(&rq->napi, skb); -	return ret; +	return;  frame_err:  	dev->stats.rx_frame_errors++;  	dev_kfree_skb(skb); -	return 0;  }  /* Unlike mergeable buffers, all buffers are allocated to the @@ -1161,7 +1205,12 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,  		if (err)  			break;  	} while (rq->vq->num_free); -	virtqueue_kick(rq->vq); +	if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { +		u64_stats_update_begin(&rq->stats.syncp); +		rq->stats.kicks++; +		u64_stats_update_end(&rq->stats.syncp); +	} +  	return !oom;  } @@ -1232,25 +1281,28 @@ static void refill_work(struct work_struct *work)  	}  } -static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) +static int virtnet_receive(struct receive_queue *rq, int budget, +			   unsigned int *xdp_xmit)  {  	struct virtnet_info *vi = rq->vq->vdev->priv; -	unsigned int len, received = 0, bytes = 0; +	struct virtnet_rq_stats stats = {}; +	unsigned int len;  	void *buf; +	int i;  	if (!vi->big_packets || vi->mergeable_rx_bufs) {  		void *ctx; -		while (received < budget && +		while (stats.packets < budget &&  		       (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { -			bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit); -			received++; +			receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); +			stats.packets++;  		}  	} else { -		while (received < budget && +		while (stats.packets < budget &&  		       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { -			bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit); -			received++; +			receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); +			stats.packets++;  		}  	} @@ -1260,11 +1312,16 @@ static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit)  	}  	u64_stats_update_begin(&rq->stats.syncp); -	rq->stats.bytes += bytes; -	rq->stats.packets += received; +	for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { +		size_t offset = virtnet_rq_stats_desc[i].offset; +		u64 *item; + +		item = (u64 *)((u8 *)&rq->stats + offset); +		*item += *(u64 *)((u8 *)&stats + offset); +	}  	u64_stats_update_end(&rq->stats.syncp); -	return received; +	return stats.packets;  }  static void free_old_xmit_skbs(struct send_queue *sq) @@ -1320,8 +1377,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget)  		container_of(napi, struct receive_queue, napi);  	struct virtnet_info *vi = rq->vq->vdev->priv;  	struct send_queue *sq; -	unsigned int received, qp; -	bool xdp_xmit = false; +	unsigned int received; +	unsigned int xdp_xmit = 0;  	virtnet_poll_cleantx(rq); @@ -1331,12 +1388,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)  	if (received < budget)  		virtqueue_napi_complete(napi, rq->vq, received); -	if (xdp_xmit) { -		qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + -		     smp_processor_id(); -		sq = &vi->sq[qp]; -		virtqueue_kick(sq->vq); +	if (xdp_xmit & VIRTIO_XDP_REDIR)  		xdp_do_flush_map(); + +	if (xdp_xmit & VIRTIO_XDP_TX) { +		sq = virtnet_xdp_sq(vi); +		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { +			u64_stats_update_begin(&sq->stats.syncp); +			sq->stats.kicks++; +			u64_stats_update_end(&sq->stats.syncp); +		}  	}  	return received; @@ -1498,8 +1559,13 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)  		}  	} -	if (kick || netif_xmit_stopped(txq)) -		virtqueue_kick(sq->vq); +	if (kick || netif_xmit_stopped(txq)) { +		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { +			u64_stats_update_begin(&sq->stats.syncp); +			sq->stats.kicks++; +			u64_stats_update_end(&sq->stats.syncp); +		} +	}  	return NETDEV_TX_OK;  } @@ -1603,7 +1669,7 @@ static void virtnet_stats(struct net_device *dev,  	int i;  	for (i = 0; i < vi->max_queue_pairs; i++) { -		u64 tpackets, tbytes, rpackets, rbytes; +		u64 tpackets, tbytes, rpackets, rbytes, rdrops;  		struct receive_queue *rq = &vi->rq[i];  		struct send_queue *sq = &vi->sq[i]; @@ -1617,17 +1683,18 @@ static void virtnet_stats(struct net_device *dev,  			start = u64_stats_fetch_begin_irq(&rq->stats.syncp);  			rpackets = rq->stats.packets;  			rbytes   = rq->stats.bytes; +			rdrops   = rq->stats.drops;  		} while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));  		tot->rx_packets += rpackets;  		tot->tx_packets += tpackets;  		tot->rx_bytes   += rbytes;  		tot->tx_bytes   += tbytes; +		tot->rx_dropped += rdrops;  	}  	tot->tx_dropped = dev->stats.tx_dropped;  	tot->tx_fifo_errors = dev->stats.tx_fifo_errors; -	tot->rx_dropped = dev->stats.rx_dropped;  	tot->rx_length_errors = dev->stats.rx_length_errors;  	tot->rx_frame_errors = dev->stats.rx_frame_errors;  } @@ -1811,8 +1878,8 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)  	if (vi->affinity_hint_set) {  		for (i = 0; i < vi->max_queue_pairs; i++) { -			virtqueue_set_affinity(vi->rq[i].vq, -1); -			virtqueue_set_affinity(vi->sq[i].vq, -1); +			virtqueue_set_affinity(vi->rq[i].vq, NULL); +			virtqueue_set_affinity(vi->sq[i].vq, NULL);  		}  		vi->affinity_hint_set = false; @@ -1821,28 +1888,41 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)  static void virtnet_set_affinity(struct virtnet_info *vi)  { -	int i; -	int cpu; +	cpumask_var_t mask; +	int stragglers; +	int group_size; +	int i, j, cpu; +	int num_cpu; +	int stride; -	/* In multiqueue mode, when the number of cpu is equal to the number of -	 * queue pairs, we let the queue pairs to be private to one cpu by -	 * setting the affinity hint to eliminate the contention. -	 */ -	if (vi->curr_queue_pairs == 1 || -	    vi->max_queue_pairs != num_online_cpus()) { +	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {  		virtnet_clean_affinity(vi, -1);  		return;  	} -	i = 0; -	for_each_online_cpu(cpu) { -		virtqueue_set_affinity(vi->rq[i].vq, cpu); -		virtqueue_set_affinity(vi->sq[i].vq, cpu); -		netif_set_xps_queue(vi->dev, cpumask_of(cpu), i); -		i++; +	num_cpu = num_online_cpus(); +	stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); +	stragglers = num_cpu >= vi->curr_queue_pairs ? +			num_cpu % vi->curr_queue_pairs : +			0; +	cpu = cpumask_next(-1, cpu_online_mask); + +	for (i = 0; i < vi->curr_queue_pairs; i++) { +		group_size = stride + (i < stragglers ? 1 : 0); + +		for (j = 0; j < group_size; j++) { +			cpumask_set_cpu(cpu, mask); +			cpu = cpumask_next_wrap(cpu, cpu_online_mask, +						nr_cpu_ids, false); +		} +		virtqueue_set_affinity(vi->rq[i].vq, mask); +		virtqueue_set_affinity(vi->sq[i].vq, mask); +		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false); +		cpumask_clear(mask);  	}  	vi->affinity_hint_set = true; +	free_cpumask_var(mask);  }  static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) @@ -2335,7 +2415,6 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)  		return virtnet_xdp_set(dev, xdp->prog, xdp->extack);  	case XDP_QUERY_PROG:  		xdp->prog_id = virtnet_xdp_query(dev); -		xdp->prog_attached = !!xdp->prog_id;  		return 0;  	default:  		return -EINVAL; |