virtio,vhost,vdpa: bugfixes

A bunch of fixes all over the place
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQFDBAABCAAtFiEEXQn9CHHI+FuUyooNKB8NuNKNVGkFAmSDUGkPHG1zdEByZWRo
 YXQuY29tAAoJECgfDbjSjVRpw6gH+wbopniPKDrxNwTpDFx3jix3QDTzVMY4Bq4k
 QdwPfjAZ1aDZXYHV1CdFXeKTA+ZkWHIREZSr+E/2/jeI55Exc2AeFptZrUesSg29
 jMN1MPs00CCy8Qi9BiCZIQkFkIKHNA2PY8wIA0oIXhIaG7pBtYQ14CnAFqn41ev5
 II20h389KMthe0lwm4ni/qHVZzG/2qP/JXLKf35proDEnU5WWM1rQZ1666EFMaIR
 6QExqwbPubxfv44Kl3mMkanGj6MmtLtFa2XlMLbEfLrU5/Xz+CywqSFHTUerrh3I
 eTNyqz4Oyj6UpRq264rqQBJmpSn8LWFBZXQlJ6Y+ef/h8Mhdewk=
 =G8CT
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio bug fixes from Michael Tsirkin:
 "A bunch of fixes all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  tools/virtio: use canonical ftrace path
  vhost_vdpa: support PACKED when setting-getting vring_base
  vhost: support PACKED when setting-getting vring_base
  vhost: Fix worker hangs due to missed wake up calls
  vhost: Fix crash during early vhost_transport_send_pkt calls
  vhost_net: revert upend_idx only on retriable error
  vhost_vdpa: tell vqs about the negotiated
  vdpa/mlx5: Fix hang when cvq commands are triggered during device unregister
  tools/virtio: Add .gitignore for ringtest
  tools/virtio: Fix arm64 ringtest compilation error
  vduse: avoid empty string for dev name
  vhost: use kzalloc() instead of kmalloc() followed by memset()
This commit is contained in:
Linus Torvalds 2023-06-09 11:04:08 -07:00
commit dbfa18c5d7
11 changed files with 120 additions and 65 deletions

View file

@ -3349,10 +3349,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
mlx5_vdpa_remove_debugfs(ndev->debugfs); mlx5_vdpa_remove_debugfs(ndev->debugfs);
ndev->debugfs = NULL; ndev->debugfs = NULL;
unregister_link_notifier(ndev); unregister_link_notifier(ndev);
_vdpa_unregister_device(dev);
wq = mvdev->wq; wq = mvdev->wq;
mvdev->wq = NULL; mvdev->wq = NULL;
destroy_workqueue(wq); destroy_workqueue(wq);
_vdpa_unregister_device(dev);
mgtdev->ndev = NULL; mgtdev->ndev = NULL;
} }

View file

@ -1685,6 +1685,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config)
if (config->vq_num > 0xffff) if (config->vq_num > 0xffff)
return false; return false;
if (!config->name[0])
return false;
if (!device_is_allowed(config->device_id)) if (!device_is_allowed(config->device_id))
return false; return false;

View file

@ -935,13 +935,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
err = sock->ops->sendmsg(sock, &msg, len); err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) { if (unlikely(err < 0)) {
bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS;
if (zcopy_used) { if (zcopy_used) {
if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS)
vhost_net_ubuf_put(ubufs); vhost_net_ubuf_put(ubufs);
nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) if (retry)
% UIO_MAXIOV; nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
% UIO_MAXIOV;
else
vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
} }
if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { if (retry) {
vhost_discard_vq_desc(vq, 1); vhost_discard_vq_desc(vq, 1);
vhost_net_enable_vq(net, vq); vhost_net_enable_vq(net, vq);
break; break;

View file

@ -407,7 +407,10 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
{ {
struct vdpa_device *vdpa = v->vdpa; struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config; const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_dev *d = &v->vdev;
u64 actual_features;
u64 features; u64 features;
int i;
/* /*
* It's not allowed to change the features after they have * It's not allowed to change the features after they have
@ -422,6 +425,16 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (vdpa_set_features(vdpa, features)) if (vdpa_set_features(vdpa, features))
return -EINVAL; return -EINVAL;
/* let the vqs know what has been configured */
actual_features = ops->get_driver_features(vdpa);
for (i = 0; i < d->nvqs; ++i) {
struct vhost_virtqueue *vq = d->vqs[i];
mutex_lock(&vq->mutex);
vq->acked_features = actual_features;
mutex_unlock(&vq->mutex);
}
return 0; return 0;
} }
@ -594,7 +607,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (r) if (r)
return r; return r;
vq->last_avail_idx = vq_state.split.avail_index; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
vq->last_avail_idx = vq_state.packed.last_avail_idx |
(vq_state.packed.last_avail_counter << 15);
vq->last_used_idx = vq_state.packed.last_used_idx |
(vq_state.packed.last_used_counter << 15);
} else {
vq->last_avail_idx = vq_state.split.avail_index;
}
break; break;
} }
@ -612,9 +632,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
break; break;
case VHOST_SET_VRING_BASE: case VHOST_SET_VRING_BASE:
vq_state.split.avail_index = vq->last_avail_idx; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
if (ops->set_vq_state(vdpa, idx, &vq_state)) vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
r = -EINVAL; vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
} else {
vq_state.split.avail_index = vq->last_avail_idx;
}
r = ops->set_vq_state(vdpa, idx, &vq_state);
break; break;
case VHOST_SET_VRING_CALL: case VHOST_SET_VRING_CALL:

View file

@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev)
{ {
struct vhost_flush_struct flush; struct vhost_flush_struct flush;
if (dev->worker) { if (dev->worker.vtsk) {
init_completion(&flush.wait_event); init_completion(&flush.wait_event);
vhost_work_init(&flush.work, vhost_flush_work); vhost_work_init(&flush.work, vhost_flush_work);
@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
{ {
if (!dev->worker) if (!dev->worker.vtsk)
return; return;
if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
* sure it was not in the list. * sure it was not in the list.
* test_and_set_bit() implies a memory barrier. * test_and_set_bit() implies a memory barrier.
*/ */
llist_add(&work->node, &dev->worker->work_list); llist_add(&work->node, &dev->worker.work_list);
vhost_task_wake(dev->worker->vtsk); vhost_task_wake(dev->worker.vtsk);
} }
} }
EXPORT_SYMBOL_GPL(vhost_work_queue); EXPORT_SYMBOL_GPL(vhost_work_queue);
@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue);
/* A lockless hint for busy polling code to exit the loop */ /* A lockless hint for busy polling code to exit the loop */
bool vhost_has_work(struct vhost_dev *dev) bool vhost_has_work(struct vhost_dev *dev)
{ {
return dev->worker && !llist_empty(&dev->worker->work_list); return !llist_empty(&dev->worker.work_list);
} }
EXPORT_SYMBOL_GPL(vhost_has_work); EXPORT_SYMBOL_GPL(vhost_has_work);
@ -341,6 +341,8 @@ static bool vhost_worker(void *data)
node = llist_del_all(&worker->work_list); node = llist_del_all(&worker->work_list);
if (node) { if (node) {
__set_current_state(TASK_RUNNING);
node = llist_reverse_order(node); node = llist_reverse_order(node);
/* make sure flag is seen after deletion */ /* make sure flag is seen after deletion */
smp_wmb(); smp_wmb();
@ -456,7 +458,8 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->umem = NULL; dev->umem = NULL;
dev->iotlb = NULL; dev->iotlb = NULL;
dev->mm = NULL; dev->mm = NULL;
dev->worker = NULL; memset(&dev->worker, 0, sizeof(dev->worker));
init_llist_head(&dev->worker.work_list);
dev->iov_limit = iov_limit; dev->iov_limit = iov_limit;
dev->weight = weight; dev->weight = weight;
dev->byte_weight = byte_weight; dev->byte_weight = byte_weight;
@ -530,47 +533,30 @@ static void vhost_detach_mm(struct vhost_dev *dev)
static void vhost_worker_free(struct vhost_dev *dev) static void vhost_worker_free(struct vhost_dev *dev)
{ {
struct vhost_worker *worker = dev->worker; if (!dev->worker.vtsk)
if (!worker)
return; return;
dev->worker = NULL; WARN_ON(!llist_empty(&dev->worker.work_list));
WARN_ON(!llist_empty(&worker->work_list)); vhost_task_stop(dev->worker.vtsk);
vhost_task_stop(worker->vtsk); dev->worker.kcov_handle = 0;
kfree(worker); dev->worker.vtsk = NULL;
} }
static int vhost_worker_create(struct vhost_dev *dev) static int vhost_worker_create(struct vhost_dev *dev)
{ {
struct vhost_worker *worker;
struct vhost_task *vtsk; struct vhost_task *vtsk;
char name[TASK_COMM_LEN]; char name[TASK_COMM_LEN];
int ret;
worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
if (!worker)
return -ENOMEM;
dev->worker = worker;
worker->kcov_handle = kcov_common_handle();
init_llist_head(&worker->work_list);
snprintf(name, sizeof(name), "vhost-%d", current->pid); snprintf(name, sizeof(name), "vhost-%d", current->pid);
vtsk = vhost_task_create(vhost_worker, worker, name); vtsk = vhost_task_create(vhost_worker, &dev->worker, name);
if (!vtsk) { if (!vtsk)
ret = -ENOMEM; return -ENOMEM;
goto free_worker;
}
worker->vtsk = vtsk; dev->worker.kcov_handle = kcov_common_handle();
dev->worker.vtsk = vtsk;
vhost_task_start(vtsk); vhost_task_start(vtsk);
return 0; return 0;
free_worker:
kfree(worker);
dev->worker = NULL;
return ret;
} }
/* Caller should have device mutex */ /* Caller should have device mutex */
@ -1614,17 +1600,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT; r = -EFAULT;
break; break;
} }
if (s.num > 0xffff) { if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
r = -EINVAL; vq->last_avail_idx = s.num & 0xffff;
break; vq->last_used_idx = (s.num >> 16) & 0xffff;
} else {
if (s.num > 0xffff) {
r = -EINVAL;
break;
}
vq->last_avail_idx = s.num;
} }
vq->last_avail_idx = s.num;
/* Forget the cached index value. */ /* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx; vq->avail_idx = vq->last_avail_idx;
break; break;
case VHOST_GET_VRING_BASE: case VHOST_GET_VRING_BASE:
s.index = idx; s.index = idx;
s.num = vq->last_avail_idx; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16);
else
s.num = vq->last_avail_idx;
if (copy_to_user(argp, &s, sizeof s)) if (copy_to_user(argp, &s, sizeof s))
r = -EFAULT; r = -EFAULT;
break; break;
@ -2563,12 +2557,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify);
/* Create a new message. */ /* Create a new message. */
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
{ {
struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); /* Make sure all padding within the structure is initialized. */
struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node) if (!node)
return NULL; return NULL;
/* Make sure all padding within the structure is initialized. */
memset(&node->msg, 0, sizeof node->msg);
node->vq = vq; node->vq = vq;
node->msg.type = type; node->msg.type = type;
return node; return node;

View file

@ -92,13 +92,17 @@ struct vhost_virtqueue {
/* The routine to call when the Guest pings us, or timeout. */ /* The routine to call when the Guest pings us, or timeout. */
vhost_work_fn_t handle_kick; vhost_work_fn_t handle_kick;
/* Last available index we saw. */ /* Last available index we saw.
* Values are limited to 0x7fff, and the high bit is used as
* a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_avail_idx; u16 last_avail_idx;
/* Caches available index value from user. */ /* Caches available index value from user. */
u16 avail_idx; u16 avail_idx;
/* Last index we used. */ /* Last index we used.
* Values are limited to 0x7fff, and the high bit is used as
* a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_used_idx; u16 last_used_idx;
/* Used flags */ /* Used flags */
@ -154,7 +158,7 @@ struct vhost_dev {
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
int nvqs; int nvqs;
struct eventfd_ctx *log_ctx; struct eventfd_ctx *log_ctx;
struct vhost_worker *worker; struct vhost_worker worker;
struct vhost_iotlb *umem; struct vhost_iotlb *umem;
struct vhost_iotlb *iotlb; struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock; spinlock_t iotlb_lock;

View file

@ -28,10 +28,6 @@ static int vhost_task_fn(void *data)
for (;;) { for (;;) {
bool did_work; bool did_work;
/* mb paired w/ vhost_task_stop */
if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags))
break;
if (!dead && signal_pending(current)) { if (!dead && signal_pending(current)) {
struct ksignal ksig; struct ksignal ksig;
/* /*
@ -48,11 +44,17 @@ static int vhost_task_fn(void *data)
clear_thread_flag(TIF_SIGPENDING); clear_thread_flag(TIF_SIGPENDING);
} }
did_work = vtsk->fn(vtsk->data); /* mb paired w/ vhost_task_stop */
if (!did_work) { set_current_state(TASK_INTERRUPTIBLE);
set_current_state(TASK_INTERRUPTIBLE);
schedule(); if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) {
__set_current_state(TASK_RUNNING);
break;
} }
did_work = vtsk->fn(vtsk->data);
if (!did_work)
schedule();
} }
complete(&vtsk->exited); complete(&vtsk->exited);

7
tools/virtio/ringtest/.gitignore vendored Normal file
View file

@ -0,0 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
/noring
/ptr_ring
/ring
/virtio_ring_0_9
/virtio_ring_inorder
/virtio_ring_poll

View file

@ -8,6 +8,7 @@
#ifndef MAIN_H #ifndef MAIN_H
#define MAIN_H #define MAIN_H
#include <assert.h>
#include <stdbool.h> #include <stdbool.h>
extern int param; extern int param;
@ -95,6 +96,8 @@ extern unsigned ring_size;
#define cpu_relax() asm ("rep; nop" ::: "memory") #define cpu_relax() asm ("rep; nop" ::: "memory")
#elif defined(__s390x__) #elif defined(__s390x__)
#define cpu_relax() barrier() #define cpu_relax() barrier()
#elif defined(__aarch64__)
#define cpu_relax() asm ("yield" ::: "memory")
#else #else
#define cpu_relax() assert(0) #define cpu_relax() assert(0)
#endif #endif
@ -112,6 +115,8 @@ static inline void busy_wait(void)
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") #define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
#elif defined(__aarch64__)
#define smp_mb() asm volatile("dmb ish" ::: "memory")
#else #else
/* /*
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
@ -136,10 +141,16 @@ static inline void busy_wait(void)
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
#define smp_wmb() barrier() #define smp_wmb() barrier()
#elif defined(__aarch64__)
#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
#else #else
#define smp_wmb() smp_release() #define smp_wmb() smp_release()
#endif #endif
#ifndef __always_inline
#define __always_inline inline __attribute__((always_inline))
#endif
static __always_inline static __always_inline
void __read_once_size(const volatile void *p, void *res, int size) void __read_once_size(const volatile void *p, void *res, int size)
{ {

View file

@ -95,7 +95,7 @@ Run
1) Enable ftrace in the guest 1) Enable ftrace in the guest
<Example> <Example>
# echo 1 > /sys/kernel/debug/tracing/events/sched/enable # echo 1 > /sys/kernel/tracing/events/sched/enable
2) Run trace agent in the guest 2) Run trace agent in the guest
This agent must be operated as root. This agent must be operated as root.

View file

@ -18,8 +18,9 @@
#define PIPE_DEF_BUFS 16 #define PIPE_DEF_BUFS 16
#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) #define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
#define PIPE_MAX_SIZE (1024*1024) #define PIPE_MAX_SIZE (1024*1024)
#define READ_PATH_FMT \ #define TRACEFS "/sys/kernel/tracing"
"/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" #define DEBUGFS "/sys/kernel/debug/tracing"
#define READ_PATH_FMT "%s/per_cpu/cpu%d/trace_pipe_raw"
#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" #define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
#define CTL_PATH "/dev/virtio-ports/agent-ctl-path" #define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
@ -120,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path)
if (this_is_write_path) if (this_is_write_path)
/* write(output) path */ /* write(output) path */
ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
else else {
/* read(input) path */ /* read(input) path */
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num);
if (ret > 0 && access(buf, F_OK) != 0)
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num);
}
if (ret <= 0) { if (ret <= 0) {
pr_err("Failed to generate %s path(CPU#%d):%d\n", pr_err("Failed to generate %s path(CPU#%d):%d\n",