diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 575 |
1 files changed, 376 insertions, 199 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 61e33e4dd0cd..2396c28c0122 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -134,7 +134,7 @@ struct send_queue { struct scatterlist sg[MAX_SKB_FRAGS + 2]; /* Name of the send queue: output.$index */ - char name[40]; + char name[16]; struct virtnet_sq_stats stats; @@ -171,7 +171,7 @@ struct receive_queue { unsigned int min_buf_len; /* Name of this receive queue: input.$index */ - char name[40]; + char name[16]; struct xdp_rxq_info xdp_rxq; }; @@ -447,7 +447,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, - bool hdr_valid, unsigned int metasize, unsigned int headroom) { struct sk_buff *skb; @@ -466,21 +465,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - /* If headroom is not 0, there is an offset between the beginning of the - * data and the allocated space, otherwise the data and the allocated - * space are aligned. - * - * Buffers with headroom use PAGE_SIZE as alloc size, see - * add_recvbuf_mergeable() + get_mergeable_buf_len() - */ - truesize = headroom ? PAGE_SIZE : truesize; - tailroom = truesize - headroom; buf = p - headroom; - len -= hdr_len; offset += hdr_padded_len; p += hdr_padded_len; - tailroom -= hdr_padded_len + len; + tailroom = truesize - headroom - hdr_padded_len - len; shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -510,7 +499,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, if (len <= skb_tailroom(skb)) copy = len; else - copy = ETH_HLEN + metasize; + copy = ETH_HLEN; skb_put_data(skb, p, copy); len -= copy; @@ -549,20 +538,93 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, give_pages(rq, page); ok: - /* hdr_valid means no XDP, so we can copy the vnet header */ - if (hdr_valid) { - hdr = skb_vnet_hdr(skb); - memcpy(hdr, hdr_p, hdr_len); - } + hdr = skb_vnet_hdr(skb); + memcpy(hdr, hdr_p, hdr_len); if (page_to_free) put_page(page_to_free); - if (metasize) { - __skb_pull(skb, metasize); - skb_metadata_set(skb, metasize); + return skb; +} + +static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) +{ + unsigned int len; + unsigned int packets = 0; + unsigned int bytes = 0; + void *ptr; + + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(!is_xdp_frame(ptr))) { + struct sk_buff *skb = ptr; + + pr_debug("Sent skb %p\n", skb); + + bytes += skb->len; + napi_consume_skb(skb, in_napi); + } else { + struct xdp_frame *frame = ptr_to_xdp(ptr); + + bytes += xdp_get_frame_len(frame); + xdp_return_frame(frame); + } + packets++; } - return skb; + /* Avoid overhead when no packets have been processed + * happens when called speculatively from start_xmit. + */ + if (!packets) + return; + + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.bytes += bytes; + sq->stats.packets += packets; + u64_stats_update_end(&sq->stats.syncp); +} + +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + +static void check_sq_full_and_disable(struct virtnet_info *vi, + struct net_device *dev, + struct send_queue *sq) +{ + bool use_napi = sq->napi.weight; + int qnum; + + qnum = sq - vi->sq; + + /* If running out of space, stop queue to avoid getting packets that we + * are then unable to transmit. + * An alternative would be to force queuing layer to requeue the skb by + * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be + * returned in a normal path of operation: it means that driver is not + * maintaining the TX queue stop/start state properly, and causes + * the stack to do a non-trivial amount of useless work. + * Since most packets only take 1 or 2 ring slots, stopping the queue + * early means 16 slots are typically wasted. + */ + if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + netif_stop_subqueue(dev, qnum); + if (use_napi) { + if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) + virtqueue_napi_schedule(&sq->napi, sq->vq); + } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { + /* More just got used, free them then recheck. */ + free_old_xmit_skbs(sq, false); + if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + netif_start_subqueue(dev, qnum); + virtqueue_disable_cb(sq->vq); + } + } + } } static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, @@ -570,22 +632,43 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; - int err; + struct skb_shared_info *shinfo; + u8 nr_frags = 0; + int err, i; if (unlikely(xdpf->headroom < vi->hdr_len)) return -EOVERFLOW; - /* Make room for virtqueue hdr (also change xdpf->headroom?) */ + if (unlikely(xdp_frame_has_frags(xdpf))) { + shinfo = xdp_get_shared_info_from_frame(xdpf); + nr_frags = shinfo->nr_frags; + } + + /* In wrapping function virtnet_xdp_xmit(), we need to free + * up the pending old buffers, where we need to calculate the + * position of skb_shared_info in xdp_get_frame_len() and + * xdp_return_frame(), which will involve to xdpf->data and + * xdpf->headroom. Therefore, we need to update the value of + * headroom synchronously here. + */ + xdpf->headroom -= vi->hdr_len; xdpf->data -= vi->hdr_len; /* Zero header and leave csum up to XDP layers */ hdr = xdpf->data; memset(hdr, 0, vi->hdr_len); xdpf->len += vi->hdr_len; - sg_init_one(sq->sg, xdpf->data, xdpf->len); + sg_init_table(sq->sg, nr_frags + 1); + sg_set_buf(sq->sg, xdpf->data, xdpf->len); + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = &shinfo->frags[i]; + + sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), + skb_frag_size(frag), skb_frag_off(frag)); + } - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), - GFP_ATOMIC); + err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, + xdp_to_ptr(xdpf), GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */ @@ -665,7 +748,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, if (likely(is_xdp_frame(ptr))) { struct xdp_frame *frame = ptr_to_xdp(ptr); - bytes += frame->len; + bytes += xdp_get_frame_len(frame); xdp_return_frame(frame); } else { struct sk_buff *skb = ptr; @@ -685,6 +768,9 @@ static int virtnet_xdp_xmit(struct net_device *dev, } ret = nxmit; + if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) + check_sq_full_and_disable(vi, dev, sq); + if (flags & XDP_XMIT_FLUSH) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) kicks = 1; @@ -722,7 +808,7 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi) * have enough headroom. */ static struct page *xdp_linearize_page(struct receive_queue *rq, - u16 *num_buf, + int *num_buf, struct page *p, int offset, int page_off, @@ -822,7 +908,7 @@ static struct sk_buff *receive_small(struct net_device *dev, if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { int offset = buf - page_address(page) + header_offset; unsigned int tlen = len + vi->hdr_len; - u16 num_buf = 1; + int num_buf = 1; xdp_headroom = virtnet_get_headroom(vi); header_offset = VIRTNET_RX_PAD + xdp_headroom; @@ -924,7 +1010,7 @@ static struct sk_buff *receive_big(struct net_device *dev, { struct page *page = buf; struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0); + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) @@ -938,6 +1024,143 @@ err: return NULL; } +/* Why not use xdp_build_skb_from_frame() ? + * XDP core assumes that xdp frags are PAGE_SIZE in length, while in + * virtio-net there are 2 points that do not match its requirements: + * 1. The size of the prefilled buffer is not fixed before xdp is set. + * 2. xdp_build_skb_from_frame() does more checks that we don't need, + * like eth_type_trans() (which virtio-net does in receive_buf()). + */ +static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, + struct virtnet_info *vi, + struct xdp_buff *xdp, + unsigned int xdp_frags_truesz) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + unsigned int headroom, data_len; + struct sk_buff *skb; + int metasize; + u8 nr_frags; + + if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { + pr_debug("Error building skb as missing reserved tailroom for xdp"); + return NULL; + } + + if (unlikely(xdp_buff_has_frags(xdp))) + nr_frags = sinfo->nr_frags; + + skb = build_skb(xdp->data_hard_start, xdp->frame_sz); + if (unlikely(!skb)) + return NULL; + + headroom = xdp->data - xdp->data_hard_start; + data_len = xdp->data_end - xdp->data; + skb_reserve(skb, headroom); + __skb_put(skb, data_len); + + metasize = xdp->data - xdp->data_meta; + metasize = metasize > 0 ? metasize : 0; + if (metasize) + skb_metadata_set(skb, metasize); + + if (unlikely(xdp_buff_has_frags(xdp))) + xdp_update_skb_shared_info(skb, nr_frags, + sinfo->xdp_frags_size, + xdp_frags_truesz, + xdp_buff_is_frag_pfmemalloc(xdp)); + + return skb; +} + +/* TODO: build xdp in big mode */ +static int virtnet_build_xdp_buff_mrg(struct net_device *dev, + struct virtnet_info *vi, + struct receive_queue *rq, + struct xdp_buff *xdp, + void *buf, + unsigned int len, + unsigned int frame_sz, + int *num_buf, + unsigned int *xdp_frags_truesize, + struct virtnet_rq_stats *stats) +{ + struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + unsigned int headroom, tailroom, room; + unsigned int truesize, cur_frag_size; + struct skb_shared_info *shinfo; + unsigned int xdp_frags_truesz = 0; + struct page *page; + skb_frag_t *frag; + int offset; + void *ctx; + + xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); + xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM, + VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); + + if (!*num_buf) + return 0; + + if (*num_buf > 1) { + /* If we want to build multi-buffer xdp, we need + * to specify that the flags of xdp_buff have the + * XDP_FLAGS_HAS_FRAG bit. + */ + if (!xdp_buff_has_frags(xdp)) + xdp_buff_set_frags_flag(xdp); + + shinfo = xdp_get_shared_info_from_buff(xdp); + shinfo->nr_frags = 0; + shinfo->xdp_frags_size = 0; + } + + if (*num_buf > MAX_SKB_FRAGS + 1) + return -EINVAL; + + while (--*num_buf > 0) { + buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); + if (unlikely(!buf)) { + pr_debug("%s: rx error: %d buffers out of %d missing\n", + dev->name, *num_buf, + virtio16_to_cpu(vi->vdev, hdr->num_buffers)); + dev->stats.rx_length_errors++; + return -EINVAL; + } + + stats->bytes += len; + page = virt_to_head_page(buf); + offset = buf - page_address(page); + + truesize = mergeable_ctx_to_truesize(ctx); + headroom = mergeable_ctx_to_headroom(ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + + cur_frag_size = truesize; + xdp_frags_truesz += cur_frag_size; + if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { + put_page(page); + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)(truesize - room)); + dev->stats.rx_length_errors++; + return -EINVAL; + } + + frag = &shinfo->frags[shinfo->nr_frags++]; + __skb_frag_set_page(frag, page); + skb_frag_off_set(frag, offset); + skb_frag_size_set(frag, len); + if (page_is_pfmemalloc(page)) + xdp_buff_set_frag_pfmemalloc(xdp); + + shinfo->xdp_frags_size += len; + } + + *xdp_frags_truesize = xdp_frags_truesz; + return 0; +} + static struct sk_buff *receive_mergeable(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, @@ -948,23 +1171,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; - u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); + int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); struct page *page = virt_to_head_page(buf); int offset = buf - page_address(page); struct sk_buff *head_skb, *curr_skb; struct bpf_prog *xdp_prog; unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); - unsigned int metasize = 0; - unsigned int frame_sz; + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); + unsigned int frame_sz, xdp_room; int err; head_skb = NULL; stats->bytes += len - vi->hdr_len; - if (unlikely(len > truesize)) { + if (unlikely(len > truesize - room)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)ctx); + dev->name, len, (unsigned long)(truesize - room)); dev->stats.rx_length_errors++; goto err_skb; } @@ -977,11 +1201,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { + unsigned int xdp_frags_truesz = 0; + struct skb_shared_info *shinfo; struct xdp_frame *xdpf; struct page *xdp_page; struct xdp_buff xdp; void *data; u32 act; + int i; /* Transient failure which in theory could occur if * in-flight packets from before XDP was enabled reach @@ -990,19 +1217,23 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; - /* Buffers with headroom use PAGE_SIZE as alloc size, - * see add_recvbuf_mergeable() + get_mergeable_buf_len() + /* Now XDP core assumes frag size is PAGE_SIZE, but buffers + * with headroom may add hole in truesize, which + * make their length exceed PAGE_SIZE. So we disabled the + * hole mechanism for xdp. See add_recvbuf_mergeable(). */ - frame_sz = headroom ? PAGE_SIZE : truesize; - - /* This happens when rx buffer size is underestimated - * or headroom is not enough because of the buffer - * was refilled before XDP is set. This should only - * happen for the first several packets, so we don't - * care much about its performance. + frame_sz = truesize; + + /* This happens when headroom is not enough because + * of the buffer was prefilled before XDP is set. + * This should only happen for the first several packets. + * In fact, vq reset can be used here to help us clean up + * the prefilled buffers, but many existing devices do not + * support it, and we don't want to bother users who are + * using xdp normally. */ - if (unlikely(num_buf > 1 || - headroom < virtnet_get_headroom(vi))) { + if (!xdp_prog->aux->xdp_has_frags && + (num_buf > 1 || headroom < virtnet_get_headroom(vi))) { /* linearize data for XDP */ xdp_page = xdp_linearize_page(rq, &num_buf, page, offset, @@ -1013,82 +1244,56 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (!xdp_page) goto err_xdp; offset = VIRTIO_XDP_HEADROOM; + } else if (unlikely(headroom < virtnet_get_headroom(vi))) { + xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + + sizeof(struct skb_shared_info)); + if (len + xdp_room > PAGE_SIZE) + goto err_xdp; + + xdp_page = alloc_page(GFP_ATOMIC); + if (!xdp_page) + goto err_xdp; + + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, + page_address(page) + offset, len); + frame_sz = PAGE_SIZE; + offset = VIRTIO_XDP_HEADROOM; } else { xdp_page = page; } - /* Allow consuming headroom but reserve enough space to push - * the descriptor on if we get an XDP_TX return code. - */ data = page_address(xdp_page) + offset; - xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq); - xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len, - VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true); + err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, + &num_buf, &xdp_frags_truesz, stats); + if (unlikely(err)) + goto err_xdp_frags; act = bpf_prog_run_xdp(xdp_prog, &xdp); stats->xdp_packets++; switch (act) { case XDP_PASS: - metasize = xdp.data - xdp.data_meta; - - /* recalculate offset to account for any header - * adjustments and minus the metasize to copy the - * metadata in page_to_skb(). Note other cases do not - * build an skb and avoid using offset - */ - offset = xdp.data - page_address(xdp_page) - - vi->hdr_len - metasize; + head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); + if (unlikely(!head_skb)) + goto err_xdp_frags; - /* recalculate len if xdp.data, xdp.data_end or - * xdp.data_meta were adjusted - */ - len = xdp.data_end - xdp.data + vi->hdr_len + metasize; - - /* recalculate headroom if xdp.data or xdp_data_meta - * were adjusted, note that offset should always point - * to the start of the reserved bytes for virtio_net - * header which are followed by xdp.data, that means - * that offset is equal to the headroom (when buf is - * starting at the beginning of the page, otherwise - * there is a base offset inside the page) but it's used - * with a different starting point (buf start) than - * xdp.data (buf start + vnet hdr size). If xdp.data or - * data_meta were adjusted by the xdp prog then the - * headroom size has changed and so has the offset, we - * can use data_hard_start, which points at buf start + - * vnet hdr size, to calculate the new headroom and use - * it later to compute buf start in page_to_skb() - */ - headroom = xdp.data - xdp.data_hard_start - metasize; - - /* We can only create skb based on xdp_page. */ - if (unlikely(xdp_page != page)) { - rcu_read_unlock(); + if (unlikely(xdp_page != page)) put_page(page); - head_skb = page_to_skb(vi, rq, xdp_page, offset, - len, PAGE_SIZE, false, - metasize, - headroom); - return head_skb; - } - break; + rcu_read_unlock(); + return head_skb; case XDP_TX: stats->xdp_tx++; xdpf = xdp_convert_buff_to_frame(&xdp); if (unlikely(!xdpf)) { - if (unlikely(xdp_page != page)) - put_page(xdp_page); - goto err_xdp; + netdev_dbg(dev, "convert buff to frame failed for xdp\n"); + goto err_xdp_frags; } err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); if (unlikely(!err)) { xdp_return_frame_rx_napi(xdpf); } else if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); - if (unlikely(xdp_page != page)) - put_page(xdp_page); - goto err_xdp; + goto err_xdp_frags; } *xdp_xmit |= VIRTIO_XDP_TX; if (unlikely(xdp_page != page)) @@ -1098,11 +1303,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, case XDP_REDIRECT: stats->xdp_redirects++; err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (err) { - if (unlikely(xdp_page != page)) - put_page(xdp_page); - goto err_xdp; - } + if (err) + goto err_xdp_frags; *xdp_xmit |= VIRTIO_XDP_REDIR; if (unlikely(xdp_page != page)) put_page(page); @@ -1115,16 +1317,26 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, trace_xdp_exception(vi->dev, xdp_prog, act); fallthrough; case XDP_DROP: - if (unlikely(xdp_page != page)) - __free_pages(xdp_page, 0); - goto err_xdp; + goto err_xdp_frags; + } +err_xdp_frags: + if (unlikely(xdp_page != page)) + __free_pages(xdp_page, 0); + + if (xdp_buff_has_frags(&xdp)) { + shinfo = xdp_get_shared_info_from_buff(&xdp); + for (i = 0; i < shinfo->nr_frags; i++) { + xdp_page = skb_frag_page(&shinfo->frags[i]); + put_page(xdp_page); + } } + + goto err_xdp; } rcu_read_unlock(); skip_xdp: - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, - metasize, headroom); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -1146,9 +1358,12 @@ skip_xdp: page = virt_to_head_page(buf); truesize = mergeable_ctx_to_truesize(ctx); - if (unlikely(len > truesize)) { + headroom = mergeable_ctx_to_headroom(ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + if (unlikely(len > truesize - room)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)ctx); + dev->name, len, (unsigned long)(truesize - room)); dev->stats.rx_length_errors++; goto err_skb; } @@ -1251,13 +1466,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; - if (vi->mergeable_rx_bufs) { - put_page(virt_to_head_page(buf)); - } else if (vi->big_packets) { - give_pages(rq, buf); - } else { - put_page(virt_to_head_page(buf)); - } + virtnet_rq_free_unused_buf(rq->vq, buf); return; } @@ -1426,13 +1635,16 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, /* To avoid internal fragmentation, if there is very likely not * enough space for another buffer, add the remaining space to * the current buffer. + * XDP core assumes that frame_size of xdp_buff and the length + * of the frag are PAGE_SIZE, so we disable the hole mechanism. */ - len += hole; + if (!headroom) + len += hole; alloc_frag->offset += hole; } sg_init_one(rq->sg, buf, len); - ctx = mergeable_len_to_ctx(len, headroom); + ctx = mergeable_len_to_ctx(len + room, headroom); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) put_page(virt_to_head_page(buf)); @@ -1590,52 +1802,6 @@ static int virtnet_receive(struct receive_queue *rq, int budget, return stats.packets; } -static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) -{ - unsigned int len; - unsigned int packets = 0; - unsigned int bytes = 0; - void *ptr; - - while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { - if (likely(!is_xdp_frame(ptr))) { - struct sk_buff *skb = ptr; - - pr_debug("Sent skb %p\n", skb); - - bytes += skb->len; - napi_consume_skb(skb, in_napi); - } else { - struct xdp_frame *frame = ptr_to_xdp(ptr); - - bytes += frame->len; - xdp_return_frame(frame); - } - packets++; - } - - /* Avoid overhead when no packets have been processed - * happens when called speculatively from start_xmit. - */ - if (!packets) - return; - - u64_stats_update_begin(&sq->stats.syncp); - sq->stats.bytes += bytes; - sq->stats.packets += packets; - u64_stats_update_end(&sq->stats.syncp); -} - -static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; -} - static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; @@ -1865,30 +2031,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset_ct(skb); } - /* If running out of space, stop queue to avoid getting packets that we - * are then unable to transmit. - * An alternative would be to force queuing layer to requeue the skb by - * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be - * returned in a normal path of operation: it means that driver is not - * maintaining the TX queue stop/start state properly, and causes - * the stack to do a non-trivial amount of useless work. - * Since most packets only take 1 or 2 ring slots, stopping the queue - * early means 16 slots are typically wasted. - */ - if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { - netif_stop_subqueue(dev, qnum); - if (use_napi) { - if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) - virtqueue_napi_schedule(&sq->napi, sq->vq); - } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { - /* More just got used, free them then recheck. */ - free_old_xmit_skbs(sq, false); - if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { - netif_start_subqueue(dev, qnum); - virtqueue_disable_cb(sq->vq); - } - } - } + check_sq_full_and_disable(vi, dev, sq); if (kick || netif_xmit_stopped(txq)) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { @@ -2158,9 +2301,9 @@ static int virtnet_close(struct net_device *dev) cancel_delayed_work_sync(&vi->refill); for (i = 0; i < vi->max_queue_pairs; i++) { + virtnet_napi_tx_disable(&vi->sq[i].napi); napi_disable(&vi->rq[i].napi); xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); - virtnet_napi_tx_disable(&vi->sq[i].napi); } return 0; @@ -3080,7 +3223,9 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi) static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, struct netlink_ext_ack *extack) { - unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); + unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + + sizeof(struct skb_shared_info)); + unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; struct virtnet_info *vi = netdev_priv(dev); struct bpf_prog *old_prog; u16 xdp_qp = 0, curr_qp; @@ -3103,9 +3248,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return -EINVAL; } - if (dev->mtu > max_sz) { - NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); - netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); + if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); + netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); return -EINVAL; } @@ -3157,7 +3302,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } + if (!old_prog) + xdp_features_set_redirect_target(dev, true); } else { + xdp_features_clear_redirect_target(dev); vi->xdp_enabled = false; } @@ -3690,6 +3838,12 @@ static int virtnet_validate(struct virtio_device *vdev) __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); } + if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY) && + !virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { + dev_warn(&vdev->dev, "device advertises feature VIRTIO_NET_F_STANDBY but not VIRTIO_NET_F_MAC, disabling standby"); + __virtio_clear_bit(vdev, VIRTIO_NET_F_STANDBY); + } + return 0; } @@ -3787,6 +3941,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; /* MTU range: 68 - 65535 */ dev->min_mtu = MIN_MTU; @@ -3802,6 +3957,8 @@ static int virtnet_probe(struct virtio_device *vdev) eth_hw_addr_set(dev, addr); } else { eth_hw_addr_random(dev); + dev_info(&vdev->dev, "Assigned random MAC address %pM\n", + dev->dev_addr); } /* Set up our device-specific information */ @@ -3813,8 +3970,10 @@ static int virtnet_probe(struct virtio_device *vdev) INIT_WORK(&vi->config_work, virtnet_config_changed_work); spin_lock_init(&vi->refill_lock); - if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { vi->mergeable_rx_bufs = true; + dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; + } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { vi->rx_usecs = 0; @@ -3929,6 +4088,24 @@ static int virtnet_probe(struct virtio_device *vdev) virtio_device_ready(vdev); + /* a random MAC address has been assigned, notify the device. + * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there + * because many devices work fine without getting MAC explicitly + */ + if (!virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && + virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + struct scatterlist sg; + + sg_init_one(&sg, dev->dev_addr, dev->addr_len); + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { + pr_debug("virtio_net: setting MAC address failed\n"); + rtnl_unlock(); + err = -EINVAL; + goto free_unregister_netdev; + } + } + rtnl_unlock(); err = virtnet_cpu_notif_add(vi); |