diff options
Diffstat (limited to 'drivers/dma/idxd')
-rw-r--r-- | drivers/dma/idxd/Makefile | 2 | ||||
-rw-r--r-- | drivers/dma/idxd/cdev.c | 334 | ||||
-rw-r--r-- | drivers/dma/idxd/compat.c | 4 | ||||
-rw-r--r-- | drivers/dma/idxd/debugfs.c | 138 | ||||
-rw-r--r-- | drivers/dma/idxd/device.c | 121 | ||||
-rw-r--r-- | drivers/dma/idxd/idxd.h | 69 | ||||
-rw-r--r-- | drivers/dma/idxd/init.c | 93 | ||||
-rw-r--r-- | drivers/dma/idxd/irq.c | 212 | ||||
-rw-r--r-- | drivers/dma/idxd/registers.h | 126 | ||||
-rw-r--r-- | drivers/dma/idxd/sysfs.c | 146 |
10 files changed, 1161 insertions, 84 deletions
diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index a1e9f2b3a37c..dc096839ac63 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,7 +1,7 @@ ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD obj-$(CONFIG_INTEL_IDXD) += idxd.o -idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o +idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o debugfs.o idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 674bfefca088..ecbf67c2ad2b 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -11,7 +11,9 @@ #include <linux/fs.h> #include <linux/poll.h> #include <linux/iommu.h> +#include <linux/highmem.h> #include <uapi/linux/idxd.h> +#include <linux/xarray.h> #include "registers.h" #include "idxd.h" @@ -22,6 +24,13 @@ struct idxd_cdev_context { }; /* + * Since user file names are global in DSA devices, define their ida's as + * global to avoid conflict file names. + */ +static DEFINE_IDA(file_ida); +static DEFINE_MUTEX(ida_lock); + +/* * ictx is an array based off of accelerator types. enum idxd_type * is used as index */ @@ -34,8 +43,119 @@ struct idxd_user_context { struct idxd_wq *wq; struct task_struct *task; unsigned int pasid; + struct mm_struct *mm; unsigned int flags; struct iommu_sva *sva; + struct idxd_dev idxd_dev; + u64 counters[COUNTER_MAX]; + int id; + pid_t pid; +}; + +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid); +static void idxd_xa_pasid_remove(struct idxd_user_context *ctx); + +static inline struct idxd_user_context *dev_to_uctx(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_user_context, idxd_dev); +} + +static ssize_t cr_faults_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULTS]); +} +static DEVICE_ATTR_RO(cr_faults); + +static ssize_t cr_fault_failures_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULT_FAILS]); +} +static DEVICE_ATTR_RO(cr_fault_failures); + +static ssize_t pid_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%u\n", ctx->pid); +} +static DEVICE_ATTR_RO(pid); + +static struct attribute *cdev_file_attributes[] = { + &dev_attr_cr_faults.attr, + &dev_attr_cr_fault_failures.attr, + &dev_attr_pid.attr, + NULL +}; + +static umode_t cdev_file_attr_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct idxd_user_context *ctx = dev_to_uctx(dev); + struct idxd_wq *wq = ctx->wq; + + if (!wq_pasid_enabled(wq)) + return 0; + + return a->mode; +} + +static const struct attribute_group cdev_file_attribute_group = { + .attrs = cdev_file_attributes, + .is_visible = cdev_file_attr_visible, +}; + +static const struct attribute_group *cdev_file_attribute_groups[] = { + &cdev_file_attribute_group, + NULL +}; + +static void idxd_file_dev_release(struct device *dev) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + struct idxd_wq *wq = ctx->wq; + struct idxd_device *idxd = wq->idxd; + int rc; + + mutex_lock(&ida_lock); + ida_free(&file_ida, ctx->id); + mutex_unlock(&ida_lock); + + /* Wait for in-flight operations to complete. */ + if (wq_shared(wq)) { + idxd_device_drain_pasid(idxd, ctx->pasid); + } else { + if (device_user_pasid_enabled(idxd)) { + /* The wq disable in the disable pasid function will drain the wq */ + rc = idxd_wq_disable_pasid(wq); + if (rc < 0) + dev_err(dev, "wq disable pasid failed.\n"); + } else { + idxd_wq_drain(wq); + } + } + + if (ctx->sva) { + idxd_cdev_evl_drain_pasid(wq, ctx->pasid); + iommu_sva_unbind_device(ctx->sva); + idxd_xa_pasid_remove(ctx); + } + kfree(ctx); + mutex_lock(&wq->wq_lock); + idxd_wq_put(wq); + mutex_unlock(&wq->wq_lock); +} + +static struct device_type idxd_cdev_file_type = { + .name = "idxd_file", + .release = idxd_file_dev_release, + .groups = cdev_file_attribute_groups, }; static void idxd_cdev_dev_release(struct device *dev) @@ -68,15 +188,46 @@ static inline struct idxd_wq *inode_wq(struct inode *inode) return idxd_cdev->wq; } +static void idxd_xa_pasid_remove(struct idxd_user_context *ctx) +{ + struct idxd_wq *wq = ctx->wq; + void *ptr; + + mutex_lock(&wq->uc_lock); + ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL); + if (ptr != (void *)ctx) + dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n", + ctx->pasid); + mutex_unlock(&wq->uc_lock); +} + +void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index) +{ + struct idxd_user_context *ctx; + + if (index >= COUNTER_MAX) + return; + + mutex_lock(&wq->uc_lock); + ctx = xa_load(&wq->upasid_xa, pasid); + if (!ctx) { + mutex_unlock(&wq->uc_lock); + return; + } + ctx->counters[index]++; + mutex_unlock(&wq->uc_lock); +} + static int idxd_cdev_open(struct inode *inode, struct file *filp) { struct idxd_user_context *ctx; struct idxd_device *idxd; struct idxd_wq *wq; - struct device *dev; + struct device *dev, *fdev; int rc = 0; struct iommu_sva *sva; unsigned int pasid; + struct idxd_cdev *idxd_cdev; wq = inode_wq(inode); idxd = wq->idxd; @@ -97,6 +248,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) ctx->wq = wq; filp->private_data = ctx; + ctx->pid = current->pid; if (device_user_pasid_enabled(idxd)) { sva = iommu_sva_bind_device(dev, current->mm); @@ -108,65 +260,118 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) pasid = iommu_sva_get_pasid(sva); if (pasid == IOMMU_PASID_INVALID) { - iommu_sva_unbind_device(sva); rc = -EINVAL; - goto failed; + goto failed_get_pasid; } ctx->sva = sva; ctx->pasid = pasid; + ctx->mm = current->mm; + + mutex_lock(&wq->uc_lock); + rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL); + mutex_unlock(&wq->uc_lock); + if (rc < 0) + dev_warn(dev, "PASID entry already exist in xarray.\n"); if (wq_dedicated(wq)) { rc = idxd_wq_set_pasid(wq, pasid); if (rc < 0) { iommu_sva_unbind_device(sva); dev_err(dev, "wq set pasid failed: %d\n", rc); - goto failed; + goto failed_set_pasid; } } } + idxd_cdev = wq->idxd_cdev; + mutex_lock(&ida_lock); + ctx->id = ida_alloc(&file_ida, GFP_KERNEL); + mutex_unlock(&ida_lock); + if (ctx->id < 0) { + dev_warn(dev, "ida alloc failure\n"); + goto failed_ida; + } + ctx->idxd_dev.type = IDXD_DEV_CDEV_FILE; + fdev = user_ctx_dev(ctx); + device_initialize(fdev); + fdev->parent = cdev_dev(idxd_cdev); + fdev->bus = &dsa_bus_type; + fdev->type = &idxd_cdev_file_type; + + rc = dev_set_name(fdev, "file%d", ctx->id); + if (rc < 0) { + dev_warn(dev, "set name failure\n"); + goto failed_dev_name; + } + + rc = device_add(fdev); + if (rc < 0) { + dev_warn(dev, "file device add failure\n"); + goto failed_dev_add; + } + idxd_wq_get(wq); mutex_unlock(&wq->wq_lock); return 0; - failed: +failed_dev_add: +failed_dev_name: + put_device(fdev); +failed_ida: +failed_set_pasid: + if (device_user_pasid_enabled(idxd)) + idxd_xa_pasid_remove(ctx); +failed_get_pasid: + if (device_user_pasid_enabled(idxd)) + iommu_sva_unbind_device(sva); +failed: mutex_unlock(&wq->wq_lock); kfree(ctx); return rc; } +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg status; + u16 h, t, size; + int ent_size = evl_ent_size(idxd); + struct __evl_entry *entry_head; + + if (!evl) + return; + + spin_lock(&evl->lock); + status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = status.tail; + h = evl->head; + size = evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id) + set_bit(h, evl->bmap); + h = (h + 1) % size; + } + spin_unlock(&evl->lock); + + drain_workqueue(wq->wq); +} + static int idxd_cdev_release(struct inode *node, struct file *filep) { struct idxd_user_context *ctx = filep->private_data; struct idxd_wq *wq = ctx->wq; struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; - int rc; dev_dbg(dev, "%s called\n", __func__); filep->private_data = NULL; - /* Wait for in-flight operations to complete. */ - if (wq_shared(wq)) { - idxd_device_drain_pasid(idxd, ctx->pasid); - } else { - if (device_user_pasid_enabled(idxd)) { - /* The wq disable in the disable pasid function will drain the wq */ - rc = idxd_wq_disable_pasid(wq); - if (rc < 0) - dev_err(dev, "wq disable pasid failed.\n"); - } else { - idxd_wq_drain(wq); - } - } + device_unregister(user_ctx_dev(ctx)); - if (ctx->sva) - iommu_sva_unbind_device(ctx->sva); - kfree(ctx); - mutex_lock(&wq->wq_lock); - idxd_wq_put(wq); - mutex_unlock(&wq->wq_lock); return 0; } @@ -297,6 +502,7 @@ void idxd_wq_del_cdev(struct idxd_wq *wq) struct idxd_cdev *idxd_cdev; idxd_cdev = wq->idxd_cdev; + ida_destroy(&file_ida); wq->idxd_cdev = NULL; cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev)); put_device(cdev_dev(idxd_cdev)); @@ -330,6 +536,13 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) } mutex_lock(&wq->wq_lock); + + wq->wq = create_workqueue(dev_name(wq_confdev(wq))); + if (!wq->wq) { + rc = -ENOMEM; + goto wq_err; + } + wq->type = IDXD_WQT_USER; rc = drv_enable_wq(wq); if (rc < 0) @@ -348,7 +561,9 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) err_cdev: drv_disable_wq(wq); err: + destroy_workqueue(wq->wq); wq->type = IDXD_WQT_NONE; +wq_err: mutex_unlock(&wq->wq_lock); return rc; } @@ -361,6 +576,8 @@ static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) idxd_wq_del_cdev(wq); drv_disable_wq(wq); wq->type = IDXD_WQT_NONE; + destroy_workqueue(wq->wq); + wq->wq = NULL; mutex_unlock(&wq->wq_lock); } @@ -407,3 +624,70 @@ void idxd_cdev_remove(void) ida_destroy(&ictx[i].minor_ida); } } + +/** + * idxd_copy_cr - copy completion record to user address space found by wq and + * PASID + * @wq: work queue + * @pasid: PASID + * @addr: user fault address to write + * @cr: completion record + * @len: number of bytes to copy + * + * This is called by a work that handles completion record fault. + * + * Return: number of bytes copied. + */ +int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, + void *cr, int len) +{ + struct device *dev = &wq->idxd->pdev->dev; + int left = len, status_size = 1; + struct idxd_user_context *ctx; + struct mm_struct *mm; + + mutex_lock(&wq->uc_lock); + + ctx = xa_load(&wq->upasid_xa, pasid); + if (!ctx) { + dev_warn(dev, "No user context\n"); + goto out; + } + + mm = ctx->mm; + /* + * The completion record fault handling work is running in kernel + * thread context. It temporarily switches to the mm to copy cr + * to addr in the mm. + */ + kthread_use_mm(mm); + left = copy_to_user((void __user *)addr + status_size, cr + status_size, + len - status_size); + /* + * Copy status only after the rest of completion record is copied + * successfully so that the user gets the complete completion record + * when a non-zero status is polled. + */ + if (!left) { + u8 status; + + /* + * Ensure that the completion record's status field is written + * after the rest of the completion record has been written. + * This ensures that the user receives the correct completion + * record information once polling for a non-zero status. + */ + wmb(); + status = *(u8 *)cr; + if (put_user(status, (u8 __user *)addr)) + left += status_size; + } else { + left += status_size; + } + kthread_unuse_mm(mm); + +out: + mutex_unlock(&wq->uc_lock); + + return len - left; +} diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c index 3df21615f888..5fd38d1b9d28 100644 --- a/drivers/dma/idxd/compat.c +++ b/drivers/dma/idxd/compat.c @@ -16,7 +16,7 @@ extern void device_driver_detach(struct device *dev); static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count) { - struct bus_type *bus = drv->bus; + const struct bus_type *bus = drv->bus; struct device *dev; int rc = -ENODEV; @@ -32,7 +32,7 @@ static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store); static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count) { - struct bus_type *bus = drv->bus; + const struct bus_type *bus = drv->bus; struct device *dev; struct device_driver *alt_drv = NULL; int rc = -ENODEV; diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c new file mode 100644 index 000000000000..9cfbd9b14c4c --- /dev/null +++ b/drivers/dma/idxd/debugfs.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/debugfs.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <uapi/linux/idxd.h> +#include "idxd.h" +#include "registers.h" + +static struct dentry *idxd_debugfs_dir; + +static void dump_event_entry(struct idxd_device *idxd, struct seq_file *s, + u16 index, int *count, bool processed) +{ + struct idxd_evl *evl = idxd->evl; + struct dsa_evl_entry *entry; + struct dsa_completion_record *cr; + u64 *raw; + int i; + int evl_strides = evl_ent_size(idxd) / sizeof(u64); + + entry = (struct dsa_evl_entry *)evl->log + index; + + if (!entry->e.desc_valid) + return; + + seq_printf(s, "Event Log entry %d (real index %u) processed: %u\n", + *count, index, processed); + + seq_printf(s, "desc valid %u wq idx valid %u\n" + "batch %u fault rw %u priv %u error 0x%x\n" + "wq idx %u op %#x pasid %u batch idx %u\n" + "fault addr %#llx\n", + entry->e.desc_valid, entry->e.wq_idx_valid, + entry->e.batch, entry->e.fault_rw, entry->e.priv, + entry->e.error, entry->e.wq_idx, entry->e.operation, + entry->e.pasid, entry->e.batch_idx, entry->e.fault_addr); + + cr = &entry->cr; + seq_printf(s, "status %#x result %#x fault_info %#x bytes_completed %u\n" + "fault addr %#llx inv flags %#x\n\n", + cr->status, cr->result, cr->fault_info, cr->bytes_completed, + cr->fault_addr, cr->invalid_flags); + + raw = (u64 *)entry; + + for (i = 0; i < evl_strides; i++) + seq_printf(s, "entry[%d] = %#llx\n", i, raw[i]); + + seq_puts(s, "\n"); + *count += 1; +} + +static int debugfs_evl_show(struct seq_file *s, void *d) +{ + struct idxd_device *idxd = s->private; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg evl_status; + u16 h, t, evl_size, i; + int count = 0; + bool processed = true; + + if (!evl || !evl->log) + return 0; + + spin_lock(&evl->lock); + + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + evl_size = evl->size; + + seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n", + evl_status.head, evl_status.tail, evl_status.int_pending); + + i = t; + while (1) { + i = (i + 1) % evl_size; + if (i == t) + break; + + if (processed && i == h) + processed = false; + dump_event_entry(idxd, s, i, &count, processed); + } + + spin_unlock(&evl->lock); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(debugfs_evl); + +int idxd_device_init_debugfs(struct idxd_device *idxd) +{ + if (IS_ERR_OR_NULL(idxd_debugfs_dir)) + return 0; + + idxd->dbgfs_dir = debugfs_create_dir(dev_name(idxd_confdev(idxd)), idxd_debugfs_dir); + if (IS_ERR(idxd->dbgfs_dir)) + return PTR_ERR(idxd->dbgfs_dir); + + if (idxd->evl) { + idxd->dbgfs_evl_file = debugfs_create_file("event_log", 0400, + idxd->dbgfs_dir, idxd, + &debugfs_evl_fops); + if (IS_ERR(idxd->dbgfs_evl_file)) { + debugfs_remove_recursive(idxd->dbgfs_dir); + idxd->dbgfs_dir = NULL; + return PTR_ERR(idxd->dbgfs_evl_file); + } + } + + return 0; +} + +void idxd_device_remove_debugfs(struct idxd_device *idxd) +{ + debugfs_remove_recursive(idxd->dbgfs_dir); +} + +int idxd_init_debugfs(void) +{ + if (!debugfs_initialized()) + return 0; + + idxd_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (IS_ERR(idxd_debugfs_dir)) + return PTR_ERR(idxd_debugfs_dir); + return 0; +} + +void idxd_remove_debugfs(void) +{ + debugfs_remove_recursive(idxd_debugfs_dir); +} diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5f321f3b4242..5abbcc61c528 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -752,6 +752,101 @@ void idxd_device_clear_state(struct idxd_device *idxd) spin_unlock(&idxd->dev_lock); } +static int idxd_device_evl_setup(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union evlcfg_reg evlcfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + void *addr; + dma_addr_t dma_addr; + int size; + struct idxd_evl *evl = idxd->evl; + unsigned long *bmap; + int rc; + + if (!evl) + return 0; + + size = evl_size(idxd); + + bmap = bitmap_zalloc(size, GFP_KERNEL); + if (!bmap) { + rc = -ENOMEM; + goto err_bmap; + } + + /* + * Address needs to be page aligned. However, dma_alloc_coherent() provides + * at minimal page size aligned address. No manual alignment required. + */ + addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); + if (!addr) { + rc = -ENOMEM; + goto err_alloc; + } + + memset(addr, 0, size); + + spin_lock(&evl->lock); + evl->log = addr; + evl->dma = dma_addr; + evl->log_size = size; + evl->bmap = bmap; + + memset(&evlcfg, 0, sizeof(evlcfg)); + evlcfg.bits[0] = dma_addr & GENMASK(63, 12); + evlcfg.size = evl->size; + + iowrite64(evlcfg.bits[0], idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(evlcfg.bits[1], idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 1; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + gencfg.evl_en = 1; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + spin_unlock(&evl->lock); + return 0; + +err_alloc: + bitmap_free(bmap); +err_bmap: + return rc; +} + +static void idxd_device_evl_free(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + if (!gencfg.evl_en) + return; + + spin_lock(&evl->lock); + gencfg.evl_en = 0; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 0; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); + bitmap_free(evl->bmap); + evl->log = NULL; + evl->size = IDXD_EVL_SIZE_MIN; + spin_unlock(&evl->lock); +} + static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; @@ -872,12 +967,16 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->priority = wq->priority; if (idxd->hw.gen_cap.block_on_fault && - test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)) + test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags) && + !test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)) wq->wqcfg->bof = 1; if (idxd->hw.wq_cap.wq_ats_support) wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); + if (idxd->hw.wq_cap.wq_prs_support) + wq->wqcfg->wq_prs_disable = test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + /* bytes 12-15 */ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); idxd_wqcfg_set_max_batch_shift(idxd->data->type, wq->wqcfg, ilog2(wq->max_batch_size)); @@ -1194,7 +1293,7 @@ static void idxd_device_set_perm_entry(struct idxd_device *idxd, { union msix_perm mperm; - if (ie->pasid == INVALID_IOASID) + if (ie->pasid == IOMMU_PASID_INVALID) return; mperm.bits = 0; @@ -1224,7 +1323,7 @@ void idxd_wq_free_irq(struct idxd_wq *wq) idxd_device_clear_perm_entry(idxd, ie); ie->vector = -1; ie->int_handle = INVALID_INT_HANDLE; - ie->pasid = INVALID_IOASID; + ie->pasid = IOMMU_PASID_INVALID; } int idxd_wq_request_irq(struct idxd_wq *wq) @@ -1240,7 +1339,7 @@ int idxd_wq_request_irq(struct idxd_wq *wq) ie = &wq->ie; ie->vector = pci_irq_vector(pdev, ie->id); - ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID; + ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : IOMMU_PASID_INVALID; idxd_device_set_perm_entry(idxd, ie); rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie); @@ -1265,7 +1364,7 @@ err_int_handle: free_irq(ie->vector, ie); err_irq: idxd_device_clear_perm_entry(idxd, ie); - ie->pasid = INVALID_IOASID; + ie->pasid = IOMMU_PASID_INVALID; return rc; } @@ -1451,15 +1550,24 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) if (rc < 0) return -ENXIO; + rc = idxd_device_evl_setup(idxd); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_DEV_EVL_ERR; + return rc; + } + /* Start device */ rc = idxd_device_enable(idxd); - if (rc < 0) + if (rc < 0) { + idxd_device_evl_free(idxd); return rc; + } /* Setup DMA device without channels */ rc = idxd_register_dma_device(idxd); if (rc < 0) { idxd_device_disable(idxd); + idxd_device_evl_free(idxd); idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR; return rc; } @@ -1488,6 +1596,7 @@ void idxd_device_drv_remove(struct idxd_dev *idxd_dev) idxd_device_disable(idxd); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) idxd_device_reset(idxd); + idxd_device_evl_free(idxd); } static enum idxd_dev_type dev_types[] = { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7ced8d283d98..5428a2e1b1ec 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -10,9 +10,9 @@ #include <linux/cdev.h> #include <linux/idr.h> #include <linux/pci.h> -#include <linux/ioasid.h> #include <linux/bitmap.h> #include <linux/perf_event.h> +#include <linux/iommu.h> #include <uapi/linux/idxd.h> #include "registers.h" @@ -32,6 +32,7 @@ enum idxd_dev_type { IDXD_DEV_GROUP, IDXD_DEV_ENGINE, IDXD_DEV_CDEV, + IDXD_DEV_CDEV_FILE, IDXD_DEV_MAX_TYPE, }; @@ -127,6 +128,12 @@ struct idxd_pmu { #define IDXD_MAX_PRIORITY 0xf +enum { + COUNTER_FAULTS = 0, + COUNTER_FAULT_FAILS, + COUNTER_MAX +}; + enum idxd_wq_state { IDXD_WQ_DISABLED = 0, IDXD_WQ_ENABLED, @@ -136,6 +143,7 @@ enum idxd_wq_flag { WQ_FLAG_DEDICATED = 0, WQ_FLAG_BLOCK_ON_FAULT, WQ_FLAG_ATS_DISABLE, + WQ_FLAG_PRS_DISABLE, }; enum idxd_wq_type { @@ -185,6 +193,7 @@ struct idxd_wq { struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; + struct workqueue_struct *wq; struct idxd_device *idxd; int id; struct idxd_irq_entry ie; @@ -214,6 +223,10 @@ struct idxd_wq { char name[WQ_NAME_SIZE + 1]; u64 max_xfer_bytes; u32 max_batch_size; + + /* Lock to protect upasid_xa access. */ + struct mutex uc_lock; + struct xarray upasid_xa; }; struct idxd_engine { @@ -232,6 +245,7 @@ struct idxd_hw { union engine_cap_reg engine_cap; struct opcap opcap; u32 cmd_cap; + union iaa_cap_reg iaa_cap; }; enum idxd_device_state { @@ -258,6 +272,32 @@ struct idxd_driver_data { struct device_type *dev_type; int compl_size; int align; + int evl_cr_off; + int cr_status_off; + int cr_result_off; +}; + +struct idxd_evl { + /* Lock to protect event log access. */ + spinlock_t lock; + void *log; + dma_addr_t dma; + /* Total size of event log = number of entries * entry size. */ + unsigned int log_size; + /* The number of entries in the event log. */ + u16 size; + u16 head; + unsigned long *bmap; + bool batch_fail[IDXD_MAX_BATCH_IDENT]; +}; + +struct idxd_evl_fault { + struct work_struct work; + struct idxd_wq *wq; + u8 status; + + /* make this last member always */ + struct __evl_entry entry[]; }; struct idxd_device { @@ -316,8 +356,24 @@ struct idxd_device { struct idxd_pmu *idxd_pmu; unsigned long *opcap_bmap; + struct idxd_evl *evl; + struct kmem_cache *evl_cache; + + struct dentry *dbgfs_dir; + struct dentry *dbgfs_evl_file; }; +static inline unsigned int evl_ent_size(struct idxd_device *idxd) +{ + return idxd->hw.gen_cap.evl_support ? + (32 * (1 << idxd->hw.gen_cap.evl_support)) : 0; +} + +static inline unsigned int evl_size(struct idxd_device *idxd) +{ + return idxd->evl->size * evl_ent_size(idxd); +} + /* IDXD software descriptor */ struct idxd_desc { union { @@ -351,6 +407,7 @@ enum idxd_completion_status { #define engine_confdev(engine) &engine->idxd_dev.conf_dev #define group_confdev(group) &group->idxd_dev.conf_dev #define cdev_dev(cdev) &cdev->idxd_dev.conf_dev +#define user_ctx_dev(ctx) (&(ctx)->idxd_dev.conf_dev) #define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev) #define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev) @@ -598,6 +655,7 @@ int idxd_register_driver(void); void idxd_unregister_driver(void); void idxd_wqs_quiesce(struct idxd_device *idxd); bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); +void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count); /* device interrupt control */ irqreturn_t idxd_misc_thread(int vec, void *data); @@ -662,6 +720,9 @@ void idxd_cdev_remove(void); int idxd_cdev_get_major(struct idxd_device *idxd); int idxd_wq_add_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq); +int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, + void *buf, int len); +void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index); /* perfmon */ #if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) @@ -678,4 +739,10 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif +/* debugfs */ +int idxd_device_init_debugfs(struct idxd_device *idxd); +void idxd_device_remove_debugfs(struct idxd_device *idxd); +int idxd_init_debugfs(void); +void idxd_remove_debugfs(void); + #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 640d3048368e..1aa823974cda 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -9,7 +9,6 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/workqueue.h> -#include <linux/aer.h> #include <linux/fs.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/device.h> @@ -47,6 +46,9 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct dsa_completion_record), .align = 32, .dev_type = &dsa_device_type, + .evl_cr_off = offsetof(struct dsa_evl_entry, cr), + .cr_status_off = offsetof(struct dsa_completion_record, status), + .cr_result_off = offsetof(struct dsa_completion_record, result), }, [IDXD_TYPE_IAX] = { .name_prefix = "iax", @@ -54,6 +56,9 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct iax_completion_record), .align = 64, .dev_type = &iax_device_type, + .evl_cr_off = offsetof(struct iax_evl_entry, cr), + .cr_status_off = offsetof(struct iax_completion_record, status), + .cr_result_off = offsetof(struct iax_completion_record, error_code), }, }; @@ -105,7 +110,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) ie = idxd_get_ie(idxd, msix_idx); ie->id = msix_idx; ie->int_handle = INVALID_INT_HANDLE; - ie->pasid = INVALID_IOASID; + ie->pasid = IOMMU_PASID_INVALID; spin_lock_init(&ie->list_lock); init_llist_head(&ie->pending_llist); @@ -200,6 +205,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) } bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } + mutex_init(&wq->uc_lock); + xa_init(&wq->upasid_xa); idxd->wqs[i] = wq; } @@ -332,6 +339,33 @@ static void idxd_cleanup_internals(struct idxd_device *idxd) destroy_workqueue(idxd->wq); } +static int idxd_init_evl(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl; + + if (idxd->hw.gen_cap.evl_support == 0) + return 0; + + evl = kzalloc_node(sizeof(*evl), GFP_KERNEL, dev_to_node(dev)); + if (!evl) + return -ENOMEM; + + spin_lock_init(&evl->lock); + evl->size = IDXD_EVL_SIZE_MIN; + + idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)), + sizeof(struct idxd_evl_fault) + evl_ent_size(idxd), + 0, 0, NULL); + if (!idxd->evl_cache) { + kfree(evl); + return -ENOMEM; + } + + idxd->evl = evl; + return 0; +} + static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -357,8 +391,14 @@ static int idxd_setup_internals(struct idxd_device *idxd) goto err_wkq_create; } + rc = idxd_init_evl(idxd); + if (rc < 0) + goto err_evl; + return 0; + err_evl: + destroy_workqueue(idxd->wq); err_wkq_create: for (i = 0; i < idxd->max_groups; i++) put_device(group_confdev(idxd->groups[i])); @@ -389,7 +429,7 @@ static void idxd_read_table_offsets(struct idxd_device *idxd) dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset); } -static void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count) +void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count) { int i, j, nr; @@ -461,6 +501,10 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]); } multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4); + + /* read iaa cap */ + if (idxd->data->type == IDXD_TYPE_IAX && idxd->hw.version >= DEVICE_VERSION_2) + idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET); } static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) @@ -516,6 +560,27 @@ static void idxd_disable_system_pasid(struct idxd_device *idxd) idxd->sva = NULL; } +static int idxd_enable_sva(struct pci_dev *pdev) +{ + int ret; + + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); + if (ret) + return ret; + + ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + if (ret) + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); + + return ret; +} + +static void idxd_disable_sva(struct pci_dev *pdev) +{ + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_IOPF); +} + static int idxd_probe(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; @@ -530,7 +595,7 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) { + if (idxd_enable_sva(pdev)) { dev_warn(dev, "Unable to turn on user SVA feature.\n"); } else { set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); @@ -578,21 +643,19 @@ static int idxd_probe(struct idxd_device *idxd) if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); if (device_user_pasid_enabled(idxd)) - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + idxd_disable_sva(pdev); return rc; } static void idxd_cleanup(struct idxd_device *idxd) { - struct device *dev = &idxd->pdev->dev; - perfmon_pmu_remove(idxd); idxd_cleanup_interrupts(idxd); idxd_cleanup_internals(idxd); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); if (device_user_pasid_enabled(idxd)) - iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + idxd_disable_sva(idxd->pdev); } static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -642,6 +705,10 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_dev_register; } + rc = idxd_device_init_debugfs(idxd); + if (rc) + dev_warn(dev, "IDXD debugfs failed to setup\n"); + dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", idxd->hw.version); @@ -704,13 +771,14 @@ static void idxd_remove(struct pci_dev *pdev) idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); + idxd_device_remove_debugfs(idxd); irq_entry = idxd_get_ie(idxd, 0); free_irq(irq_entry->vector, irq_entry); pci_free_irq_vectors(pdev); pci_iounmap(pdev, idxd->reg_base); if (device_user_pasid_enabled(idxd)) - iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + idxd_disable_sva(pdev); pci_disable_device(pdev); destroy_workqueue(idxd->wq); perfmon_pmu_remove(idxd); @@ -761,6 +829,10 @@ static int __init idxd_init_module(void) if (err) goto err_cdev_register; + err = idxd_init_debugfs(); + if (err) + goto err_debugfs; + err = pci_register_driver(&idxd_pci_driver); if (err) goto err_pci_register; @@ -768,6 +840,8 @@ static int __init idxd_init_module(void) return 0; err_pci_register: + idxd_remove_debugfs(); +err_debugfs: idxd_cdev_remove(); err_cdev_register: idxd_driver_unregister(&idxd_user_drv); @@ -788,5 +862,6 @@ static void __exit idxd_exit_module(void) pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); perfmon_exit(); + idxd_remove_debugfs(); } module_exit(idxd_exit_module); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index aa314ebec587..b501320a9c7a 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -7,6 +7,8 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include <linux/dmaengine.h> #include <linux/delay.h> +#include <linux/iommu.h> +#include <linux/sched/mm.h> #include <uapi/linux/idxd.h> #include "../dmaengine.h" #include "idxd.h" @@ -80,7 +82,7 @@ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) desc.opcode = DSA_OPCODE_DRAIN; desc.priv = 1; - if (ie->pasid != INVALID_IOASID) + if (ie->pasid != IOMMU_PASID_INVALID) desc.pasid = ie->pasid; desc.int_handle = ie->int_handle; portal = idxd_wq_portal_addr(wq); @@ -217,13 +219,187 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } -static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) +static void idxd_evl_fault_work(struct work_struct *work) { + struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work); + struct idxd_wq *wq = fault->wq; + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + struct __evl_entry *entry_head = fault->entry; + void *cr = (void *)entry_head + idxd->data->evl_cr_off; + int cr_size = idxd->data->compl_size; + u8 *status = (u8 *)cr + idxd->data->cr_status_off; + u8 *result = (u8 *)cr + idxd->data->cr_result_off; + int copied, copy_size; + bool *bf; + + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + if (entry_head->batch && entry_head->first_err_in_batch) + evl->batch_fail[entry_head->batch_id] = false; + + copy_size = cr_size; + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); + break; + case DSA_COMP_BATCH_EVL_ERR: + bf = &evl->batch_fail[entry_head->batch_id]; + + copy_size = entry_head->rcr || *bf ? cr_size : 0; + if (*bf) { + if (*status == DSA_COMP_SUCCESS) + *status = DSA_COMP_BATCH_FAIL; + *result = 1; + *bf = false; + } + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); + break; + case DSA_COMP_DRAIN_EVL: + copy_size = cr_size; + break; + default: + copy_size = 0; + dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status); + break; + } + + if (copy_size == 0) + return; + + /* + * Copy completion record to fault_addr in user address space + * that is found by wq and PASID. + */ + copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr, + cr, copy_size); + /* + * The task that triggered the page fault is unknown currently + * because multiple threads may share the user address + * space or the task exits already before this fault. + * So if the copy fails, SIGSEGV can not be sent to the task. + * Just print an error for the failure. The user application + * waiting for the completion record will time out on this + * failure. + */ + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + if (copied != copy_size) { + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); + dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n", + copy_size, copied); + if (entry_head->batch) + evl->batch_fail[entry_head->batch_id] = true; + } + break; + case DSA_COMP_BATCH_EVL_ERR: + if (copied != copy_size) { + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); + dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n", + copy_size, copied); + } + break; + case DSA_COMP_DRAIN_EVL: + if (copied != copy_size) + dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n", + copy_size, copied); + break; + } + + kmem_cache_free(idxd->evl_cache, fault); +} + +static void process_evl_entry(struct idxd_device *idxd, + struct __evl_entry *entry_head, unsigned int index) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + u8 status; + + if (test_bit(index, evl->bmap)) { + clear_bit(index, evl->bmap); + } else { + status = DSA_COMP_STATUS(entry_head->error); + + if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL || + status == DSA_COMP_BATCH_EVL_ERR) { + struct idxd_evl_fault *fault; + int ent_size = evl_ent_size(idxd); + + if (entry_head->rci) + dev_dbg(dev, "Completion Int Req set, ignoring!\n"); + + if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL) + return; + + fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC); + if (fault) { + struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx]; + + fault->wq = wq; + fault->status = status; + memcpy(&fault->entry, entry_head, ent_size); + INIT_WORK(&fault->work, idxd_evl_fault_work); + queue_work(wq->wq, &fault->work); + } else { + dev_warn(dev, "Failed to service fault work.\n"); + } + } else { + dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", + status, entry_head->operation, + entry_head->fault_addr); + } + } +} + +static void process_evl_entries(struct idxd_device *idxd) +{ + union evl_status_reg evl_status; + unsigned int h, t; + struct idxd_evl *evl = idxd->evl; + struct __evl_entry *entry_head; + unsigned int ent_size = evl_ent_size(idxd); + u32 size; + + evl_status.bits = 0; + evl_status.int_pending = 1; + + spin_lock(&evl->lock); + /* Clear interrupt pending bit */ + iowrite32(evl_status.bits_upper32, + idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + size = idxd->evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + process_evl_entry(idxd, entry_head, h); + h = (h + 1) % size; + } + + evl->head = h; + evl_status.head = h; + iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + spin_unlock(&evl->lock); +} + +irqreturn_t idxd_misc_thread(int vec, void *data) +{ + struct idxd_irq_entry *irq_entry = data; + struct idxd_device *idxd = ie_to_idxd(irq_entry); struct device *dev = &idxd->pdev->dev; union gensts_reg gensts; u32 val = 0; int i; bool err = false; + u32 cause; + + cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); + if (!cause) + return IRQ_NONE; + + iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); if (cause & IDXD_INTC_HALT_STATE) goto halt; @@ -295,13 +471,18 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) perfmon_counter_overflow(idxd); } + if (cause & IDXD_INTC_EVL) { + val |= IDXD_INTC_EVL; + process_evl_entries(idxd); + } + val ^= cause; if (val) dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", val); if (!err) - return 0; + goto out; halt: gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); @@ -324,33 +505,10 @@ halt: "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); - return -ENXIO; } } - return 0; -} - -irqreturn_t idxd_misc_thread(int vec, void *data) -{ - struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = ie_to_idxd(irq_entry); - int rc; - u32 cause; - - cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); - if (cause) - iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); - - while (cause) { - rc = process_misc_interrupts(idxd, cause); - if (rc < 0) - break; - cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); - if (cause) - iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); - } - +out: return IRQ_HANDLED; } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index fe3b8d04f9db..7b54a3939ea1 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -3,6 +3,8 @@ #ifndef _IDXD_REGISTERS_H_ #define _IDXD_REGISTERS_H_ +#include <uapi/linux/idxd.h> + /* PCI Config */ #define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 #define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe @@ -31,7 +33,9 @@ union gen_cap_reg { u64 rsvd:3; u64 dest_readback:1; u64 drain_readback:1; - u64 rsvd2:6; + u64 rsvd2:3; + u64 evl_support:2; + u64 batch_continuation:1; u64 max_xfer_shift:5; u64 max_batch_shift:4; u64 max_ims_mult:6; @@ -55,7 +59,8 @@ union wq_cap_reg { u64 occupancy:1; u64 occupancy_int:1; u64 op_config:1; - u64 rsvd3:9; + u64 wq_prs_support:1; + u64 rsvd4:8; }; u64 bits; } __packed; @@ -117,7 +122,8 @@ union gencfg_reg { u32 rdbuf_limit:8; u32 rsvd:4; u32 user_int_en:1; - u32 rsvd2:19; + u32 evl_en:1; + u32 rsvd2:18; }; u32 bits; } __packed; @@ -127,7 +133,8 @@ union genctrl_reg { struct { u32 softerr_int_en:1; u32 halt_int_en:1; - u32 rsvd:30; + u32 evl_int_en:1; + u32 rsvd:29; }; u32 bits; } __packed; @@ -162,6 +169,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_EVL 0x20 #define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 @@ -276,6 +284,45 @@ union sw_err_reg { u64 bits[4]; } __packed; +union iaa_cap_reg { + struct { + u64 dec_aecs_format_ver:1; + u64 drop_init_bits:1; + u64 chaining:1; + u64 force_array_output_mod:1; + u64 load_part_aecs:1; + u64 comp_early_abort:1; + u64 nested_comp:1; + u64 diction_comp:1; + u64 header_gen:1; + u64 crypto_gcm:1; + u64 crypto_cfb:1; + u64 crypto_xts:1; + u64 rsvd:52; + }; + u64 bits; +} __packed; + +#define IDXD_IAACAP_OFFSET 0x180 + +#define IDXD_EVLCFG_OFFSET 0xe0 +union evlcfg_reg { + struct { + u64 pasid_en:1; + u64 priv:1; + u64 rsvd:10; + u64 base_addr:52; + + u64 size:16; + u64 pasid:20; + u64 rsvd2:28; + }; + u64 bits[2]; +} __packed; + +#define IDXD_EVL_SIZE_MIN 0x0040 +#define IDXD_EVL_SIZE_MAX 0xffff + union msix_perm { struct { u32 rsvd:2; @@ -325,7 +372,7 @@ union wqcfg { u32 mode:1; /* shared or dedicated */ u32 bof:1; /* block on fault */ u32 wq_ats_disable:1; - u32 rsvd2:1; + u32 wq_prs_disable:1; u32 priority:4; u32 pasid:20; u32 pasid_en:1; @@ -513,4 +560,73 @@ union filter_cfg { u64 val; } __packed; +#define IDXD_EVLSTATUS_OFFSET 0xf0 + +union evl_status_reg { + struct { + u32 head:16; + u32 rsvd:16; + u32 tail:16; + u32 rsvd2:14; + u32 int_pending:1; + u32 rsvd3:1; + }; + struct { + u32 bits_lower32; + u32 bits_upper32; + }; + u64 bits; +} __packed; + +#define IDXD_MAX_BATCH_IDENT 256 + +struct __evl_entry { + u64 rsvd:2; + u64 desc_valid:1; + u64 wq_idx_valid:1; + u64 batch:1; + u64 fault_rw:1; + u64 priv:1; + u64 err_info_valid:1; + u64 error:8; + u64 wq_idx:8; + u64 batch_id:8; + u64 operation:8; + u64 pasid:20; + u64 rsvd2:4; + + u16 batch_idx; + u16 rsvd3; + union { + /* Invalid Flags 0x11 */ + u32 invalid_flags; + /* Invalid Int Handle 0x19 */ + /* Page fault 0x1a */ + /* Page fault 0x06, 0x1f, only operand_id */ + /* Page fault before drain or in batch, 0x26, 0x27 */ + struct { + u16 int_handle; + u16 rci:1; + u16 ims:1; + u16 rcr:1; + u16 first_err_in_batch:1; + u16 rsvd4_2:9; + u16 operand_id:3; + }; + }; + u64 fault_addr; + u64 rsvd5; +} __packed; + +struct dsa_evl_entry { + struct __evl_entry e; + struct dsa_completion_record cr; +} __packed; + +struct iax_evl_entry { + struct __evl_entry e; + u64 rsvd[4]; + struct iax_completion_record cr; +} __packed; + #endif diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 18cd8151dee0..293739ac5596 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -822,10 +822,14 @@ static ssize_t wq_block_on_fault_store(struct device *dev, if (rc < 0) return rc; - if (bof) + if (bof) { + if (test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)) + return -EOPNOTSUPP; + set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); - else + } else { clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } return count; } @@ -1109,6 +1113,44 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute static struct device_attribute dev_attr_wq_ats_disable = __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store); +static ssize_t wq_prs_disable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)); +} + +static ssize_t wq_prs_disable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + bool prs_dis; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (!idxd->hw.wq_cap.wq_prs_support) + return -EOPNOTSUPP; + + rc = kstrtobool(buf, &prs_dis); + if (rc < 0) + return rc; + + if (prs_dis) { + set_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + /* when PRS is disabled, BOF needs to be off as well */ + clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } else { + clear_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + } + return count; +} + +static struct device_attribute dev_attr_wq_prs_disable = + __ATTR(prs_disable, 0644, wq_prs_disable_show, wq_prs_disable_store); + static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_wq *wq = confdev_to_wq(dev); @@ -1239,6 +1281,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_transfer_size.attr, &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, + &dev_attr_wq_prs_disable.attr, &dev_attr_wq_occupancy.attr, &dev_attr_wq_enqcmds_retries.attr, &dev_attr_wq_op_config.attr, @@ -1260,6 +1303,13 @@ static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr, idxd->data->type == IDXD_TYPE_IAX; } +static bool idxd_wq_attr_wq_prs_disable_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_wq_prs_disable.attr && + !idxd->hw.wq_cap.wq_prs_support; +} + static umode_t idxd_wq_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1273,6 +1323,9 @@ static umode_t idxd_wq_attr_visible(struct kobject *kobj, if (idxd_wq_attr_max_batch_size_invisible(attr, idxd)) return 0; + if (idxd_wq_attr_wq_prs_disable_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1292,6 +1345,7 @@ static void idxd_conf_wq_release(struct device *dev) bitmap_free(wq->opcap_bmap); kfree(wq->wqcfg); + xa_destroy(&wq->upasid_xa); kfree(wq); } @@ -1452,15 +1506,13 @@ static ssize_t errors_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); - int i, out = 0; + DECLARE_BITMAP(swerr_bmap, 256); + bitmap_zero(swerr_bmap, 256); spin_lock(&idxd->dev_lock); - for (i = 0; i < 4; i++) - out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]); + multi_u64_to_bmap(swerr_bmap, &idxd->sw_err.bits[0], 4); spin_unlock(&idxd->dev_lock); - out--; - out += sysfs_emit_at(buf, out, "\n"); - return out; + return sysfs_emit(buf, "%*pb\n", 256, swerr_bmap); } static DEVICE_ATTR_RO(errors); @@ -1563,6 +1615,59 @@ static ssize_t cmd_status_store(struct device *dev, struct device_attribute *att } static DEVICE_ATTR_RW(cmd_status); +static ssize_t iaa_cap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (idxd->hw.version < DEVICE_VERSION_2) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%#llx\n", idxd->hw.iaa_cap.bits); +} +static DEVICE_ATTR_RO(iaa_cap); + +static ssize_t event_log_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (!idxd->evl) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", idxd->evl->size); +} + +static ssize_t event_log_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + unsigned long val; + int rc; + + if (!idxd->evl) + return -EOPNOTSUPP; + + rc = kstrtoul(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX || + (val * evl_ent_size(idxd) > ULONG_MAX - idxd->evl->dma)) + return -EINVAL; + + idxd->evl->size = val; + return count; +} +static DEVICE_ATTR_RW(event_log_size); + static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, struct idxd_device *idxd) { @@ -1585,6 +1690,21 @@ static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr, idxd->data->type == IDXD_TYPE_IAX; } +static bool idxd_device_attr_iaa_cap_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_iaa_cap.attr && + (idxd->data->type != IDXD_TYPE_IAX || + idxd->hw.version < DEVICE_VERSION_2); +} + +static bool idxd_device_attr_event_log_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return (attr == &dev_attr_event_log_size.attr && + !idxd->hw.gen_cap.evl_support); +} + static umode_t idxd_device_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1597,6 +1717,12 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj, if (idxd_device_attr_read_buffers_invisible(attr, idxd)) return 0; + if (idxd_device_attr_iaa_cap_invisible(attr, idxd)) + return 0; + + if (idxd_device_attr_event_log_size_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1622,6 +1748,8 @@ static struct attribute *idxd_device_attributes[] = { &dev_attr_read_buffer_limit.attr, &dev_attr_cdev_major.attr, &dev_attr_cmd_status.attr, + &dev_attr_iaa_cap.attr, + &dev_attr_event_log_size.attr, NULL, }; @@ -1643,6 +1771,8 @@ static void idxd_conf_device_release(struct device *dev) bitmap_free(idxd->wq_enable_map); kfree(idxd->wqs); kfree(idxd->engines); + kfree(idxd->evl); + kmem_cache_destroy(idxd->evl_cache); ida_free(&idxd_ida, idxd->id); bitmap_free(idxd->opcap_bmap); kfree(idxd); |