aboutsummaryrefslogtreecommitdiff
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r--drivers/iommu/intel/Kconfig6
-rw-r--r--drivers/iommu/intel/Makefile1
-rw-r--r--drivers/iommu/intel/debugfs.c111
-rw-r--r--drivers/iommu/intel/dmar.c58
-rw-r--r--drivers/iommu/intel/iommu.c215
-rw-r--r--drivers/iommu/intel/pasid.c15
-rw-r--r--drivers/iommu/intel/pasid.h6
-rw-r--r--drivers/iommu/intel/perf.c166
-rw-r--r--drivers/iommu/intel/perf.h73
-rw-r--r--drivers/iommu/intel/svm.c644
10 files changed, 875 insertions, 420 deletions
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 28a3d1596c76..43ebd8af11c5 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -3,6 +3,9 @@
config DMAR_TABLE
bool
+config DMAR_PERF
+ bool
+
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && (X86 || IA64)
@@ -14,6 +17,7 @@ config INTEL_IOMMU
select SWIOTLB
select IOASID
select IOMMU_DMA
+ select PCI_ATS
help
DMA remapping (DMAR) devices support enables independent address
translations for Direct Memory Access (DMA) from devices.
@@ -24,6 +28,7 @@ config INTEL_IOMMU
config INTEL_IOMMU_DEBUGFS
bool "Export Intel IOMMU internals in Debugfs"
depends on INTEL_IOMMU && IOMMU_DEBUGFS
+ select DMAR_PERF
help
!!!WARNING!!!
@@ -41,6 +46,7 @@ config INTEL_IOMMU_SVM
select PCI_PRI
select MMU_NOTIFIER
select IOASID
+ select IOMMU_SVA_LIB
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index ae236ec7d219..fa0dae16441c 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
+obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index efea7f02abd9..62e23ff3c987 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -16,6 +16,7 @@
#include <asm/irq_remapping.h>
#include "pasid.h"
+#include "perf.h"
struct tbl_walk {
u16 bus;
@@ -31,6 +32,9 @@ struct iommu_regset {
const char *regs;
};
+#define DEBUG_BUFFER_SIZE 1024
+static char debug_buf[DEBUG_BUFFER_SIZE];
+
#define IOMMU_REGSET_ENTRY(_reg_) \
{ DMAR_##_reg_##_REG, __stringify(_reg_) }
@@ -538,6 +542,111 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
#endif
+static void latency_show_one(struct seq_file *m, struct intel_iommu *iommu,
+ struct dmar_drhd_unit *drhd)
+{
+ int ret;
+
+ seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
+ iommu->name, drhd->reg_base_addr);
+
+ ret = dmar_latency_snapshot(iommu, debug_buf, DEBUG_BUFFER_SIZE);
+ if (ret < 0)
+ seq_puts(m, "Failed to get latency snapshot");
+ else
+ seq_puts(m, debug_buf);
+ seq_puts(m, "\n");
+}
+
+static int latency_show(struct seq_file *m, void *v)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ latency_show_one(m, iommu, drhd);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static int dmar_perf_latency_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, latency_show, NULL);
+}
+
+static ssize_t dmar_perf_latency_write(struct file *filp,
+ const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ int counting;
+ char buf[64];
+
+ if (cnt > 63)
+ cnt = 63;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ if (kstrtoint(buf, 0, &counting))
+ return -EINVAL;
+
+ switch (counting) {
+ case 0:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB);
+ dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB);
+ dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC);
+ dmar_latency_disable(iommu, DMAR_LATENCY_PRQ);
+ }
+ rcu_read_unlock();
+ break;
+ case 1:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ dmar_latency_enable(iommu, DMAR_LATENCY_INV_IOTLB);
+ rcu_read_unlock();
+ break;
+ case 2:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ dmar_latency_enable(iommu, DMAR_LATENCY_INV_DEVTLB);
+ rcu_read_unlock();
+ break;
+ case 3:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC);
+ rcu_read_unlock();
+ break;
+ case 4:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ dmar_latency_enable(iommu, DMAR_LATENCY_PRQ);
+ rcu_read_unlock();
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *ppos += cnt;
+ return cnt;
+}
+
+static const struct file_operations dmar_perf_latency_fops = {
+ .open = dmar_perf_latency_open,
+ .write = dmar_perf_latency_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
void __init intel_iommu_debugfs_init(void)
{
struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
@@ -556,4 +665,6 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
NULL, &ir_translation_struct_fops);
#endif
+ debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
+ NULL, &dmar_perf_latency_fops);
}
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 1757ac1e1623..d66f79acd14d 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -34,6 +34,7 @@
#include <trace/events/intel_iommu.h>
#include "../irq_remapping.h"
+#include "perf.h"
typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
struct dmar_res_callback {
@@ -1142,7 +1143,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
if (err)
- goto err_unmap;
+ goto err_sysfs;
}
drhd->iommu = iommu;
@@ -1150,6 +1151,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
return 0;
+err_sysfs:
+ iommu_device_sysfs_remove(&iommu->iommu);
err_unmap:
unmap_iommu(iommu);
error_free_seq_id:
@@ -1340,15 +1343,33 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
unsigned int count, unsigned long options)
{
struct q_inval *qi = iommu->qi;
+ s64 devtlb_start_ktime = 0;
+ s64 iotlb_start_ktime = 0;
+ s64 iec_start_ktime = 0;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
int offset, shift;
int rc, i;
+ u64 type;
if (!qi)
return 0;
+ type = desc->qw0 & GENMASK_ULL(3, 0);
+
+ if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
+ iotlb_start_ktime = ktime_to_ns(ktime_get());
+
+ if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
+ devtlb_start_ktime = ktime_to_ns(ktime_get());
+
+ if (type == QI_IEC_TYPE &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
+ iec_start_ktime = ktime_to_ns(ktime_get());
+
restart:
rc = 0;
@@ -1423,6 +1444,18 @@ restart:
if (rc == -EAGAIN)
goto restart;
+ if (iotlb_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
+ ktime_to_ns(ktime_get()) - iotlb_start_ktime);
+
+ if (devtlb_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
+ ktime_to_ns(ktime_get()) - devtlb_start_ktime);
+
+ if (iec_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
+ ktime_to_ns(ktime_get()) - iec_start_ktime);
+
return rc;
}
@@ -1911,16 +1944,23 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
reason = dmar_get_fault_reason(fault_reason, &fault_type);
if (fault_type == INTR_REMAP)
- pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
- source_id >> 8, PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), addr >> 48,
- fault_reason, reason);
- else
- pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
+ pr_err("[INTR-REMAP] Request device [0x%02x:0x%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
+ source_id >> 8, PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr >> 48,
+ fault_reason, reason);
+ else if (pasid == INVALID_IOASID)
+ pr_err("[%s NO_PASID] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
type ? "DMA Read" : "DMA Write",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), pasid, addr,
+ PCI_FUNC(source_id & 0xFF), addr,
fault_reason, reason);
+ else
+ pr_err("[%s PASID 0x%x] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
+ type ? "DMA Read" : "DMA Write", pasid,
+ source_id >> 8, PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr,
+ fault_reason, reason);
+
return 0;
}
@@ -1987,7 +2027,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
if (!ratelimited)
/* Using pasid -1 if pasid is not present */
dmar_fault_do_one(iommu, type, fault_reason,
- pasid_present ? pasid : -1,
+ pasid_present ? pasid : INVALID_IOASID,
source_id, guest_addr);
fault_index++;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 708f430af1c4..dd22fc7d5176 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -46,6 +46,7 @@
#include <asm/iommu.h>
#include "../irq_remapping.h"
+#include "../iommu-sva-lib.h"
#include "pasid.h"
#include "cap_audit.h"
@@ -564,7 +565,7 @@ static inline int domain_pfn_supported(struct dmar_domain *domain,
static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
{
unsigned long sagaw;
- int agaw = -1;
+ int agaw;
sagaw = cap_sagaw(iommu->cap);
for (agaw = width_to_agaw(max_gaw);
@@ -625,12 +626,12 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
bool found = false;
int i;
- domain->iommu_coherency = 1;
+ domain->iommu_coherency = true;
for_each_domain_iommu(i, domain) {
found = true;
if (!iommu_paging_structure_coherency(g_iommus[i])) {
- domain->iommu_coherency = 0;
+ domain->iommu_coherency = false;
break;
}
}
@@ -641,18 +642,18 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (!iommu_paging_structure_coherency(iommu)) {
- domain->iommu_coherency = 0;
+ domain->iommu_coherency = false;
break;
}
}
rcu_read_unlock();
}
-static int domain_update_iommu_snooping(struct intel_iommu *skip)
+static bool domain_update_iommu_snooping(struct intel_iommu *skip)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- int ret = 1;
+ bool ret = true;
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
@@ -665,7 +666,7 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
*/
if (!sm_supported(iommu) &&
!ecap_sc_support(iommu->ecap)) {
- ret = 0;
+ ret = false;
break;
}
}
@@ -682,9 +683,8 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain,
struct intel_iommu *iommu;
int mask = 0x3;
- if (!intel_iommu_superpage) {
+ if (!intel_iommu_superpage)
return 0;
- }
/* set iommu_superpage to the smallest common denominator */
rcu_read_lock();
@@ -1919,7 +1919,6 @@ static int domain_attach_iommu(struct dmar_domain *domain,
assert_spin_locked(&iommu->lock);
domain->iommu_refcnt[iommu->seq_id] += 1;
- domain->iommu_count += 1;
if (domain->iommu_refcnt[iommu->seq_id] == 1) {
ndomains = cap_ndoms(iommu->cap);
num = find_first_zero_bit(iommu->domain_ids, ndomains);
@@ -1927,7 +1926,6 @@ static int domain_attach_iommu(struct dmar_domain *domain,
if (num >= ndomains) {
pr_err("%s: No free domain ids\n", iommu->name);
domain->iommu_refcnt[iommu->seq_id] -= 1;
- domain->iommu_count -= 1;
return -ENOSPC;
}
@@ -1943,16 +1941,15 @@ static int domain_attach_iommu(struct dmar_domain *domain,
return 0;
}
-static int domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+static void domain_detach_iommu(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
{
- int num, count;
+ int num;
assert_spin_locked(&device_domain_lock);
assert_spin_locked(&iommu->lock);
domain->iommu_refcnt[iommu->seq_id] -= 1;
- count = --domain->iommu_count;
if (domain->iommu_refcnt[iommu->seq_id] == 0) {
num = domain->iommu_did[iommu->seq_id];
clear_bit(num, iommu->domain_ids);
@@ -1961,8 +1958,6 @@ static int domain_detach_iommu(struct dmar_domain *domain,
domain_update_iommu_cap(domain);
domain->iommu_did[iommu->seq_id] = 0;
}
-
- return count;
}
static inline int guestwidth_to_adjustwidth(int gaw)
@@ -2434,10 +2429,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return 0;
}
-static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
+static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
{
- unsigned long flags;
+ struct intel_iommu *iommu = info->iommu;
struct context_entry *context;
+ unsigned long flags;
u16 did_old;
if (!iommu)
@@ -2449,7 +2445,16 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn
spin_unlock_irqrestore(&iommu->lock, flags);
return;
}
- did_old = context_domain_id(context);
+
+ if (sm_supported(iommu)) {
+ if (hw_pass_through && domain_type_is_si(info->domain))
+ did_old = FLPT_DEFAULT_DID;
+ else
+ did_old = info->domain->iommu_did[iommu->seq_id];
+ } else {
+ did_old = context_domain_id(context);
+ }
+
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -2467,6 +2472,8 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn
0,
0,
DMA_TLB_DSI_FLUSH);
+
+ __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
}
static inline void unlink_domain_info(struct device_domain_info *info)
@@ -2525,9 +2532,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
struct device *dev,
u32 pasid)
{
- int flags = PASID_FLAG_SUPERVISOR_MODE;
struct dma_pte *pgd = domain->pgd;
int agaw, level;
+ int flags = 0;
/*
* Skip top levels of page tables for iommu which has
@@ -2543,7 +2550,10 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
if (level != 4 && level != 5)
return -EINVAL;
- flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+ if (pasid != PASID_RID2PASID)
+ flags |= PASID_FLAG_SUPERVISOR_MODE;
+ if (level == 5)
+ flags |= PASID_FLAG_FL5LP;
if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
flags |= PASID_FLAG_PAGE_SNOOP;
@@ -4135,62 +4145,56 @@ static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
return container_of(iommu_dev, struct intel_iommu, iommu);
}
-static ssize_t intel_iommu_show_version(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
u32 ver = readl(iommu->reg + DMAR_VER_REG);
return sprintf(buf, "%d:%d\n",
DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
}
-static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
+static DEVICE_ATTR_RO(version);
-static ssize_t intel_iommu_show_address(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t address_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->reg_phys);
}
-static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
+static DEVICE_ATTR_RO(address);
-static ssize_t intel_iommu_show_cap(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t cap_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->cap);
}
-static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
+static DEVICE_ATTR_RO(cap);
-static ssize_t intel_iommu_show_ecap(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t ecap_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->ecap);
}
-static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
+static DEVICE_ATTR_RO(ecap);
-static ssize_t intel_iommu_show_ndoms(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t domains_supported_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
}
-static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
+static DEVICE_ATTR_RO(domains_supported);
-static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t domains_used_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
cap_ndoms(iommu->cap)));
}
-static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
+static DEVICE_ATTR_RO(domains_used);
static struct attribute *intel_iommu_attrs[] = {
&dev_attr_version.attr,
@@ -4433,9 +4437,9 @@ out_free_dmar:
static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
{
- struct intel_iommu *iommu = opaque;
+ struct device_domain_info *info = opaque;
- domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
+ domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
return 0;
}
@@ -4445,12 +4449,13 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op
* devices, unbinding the driver from any one of them will possibly leave
* the others unable to operate.
*/
-static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
+static void domain_context_clear(struct device_domain_info *info)
{
- if (!iommu || !dev || !dev_is_pci(dev))
+ if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
return;
- pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
+ pci_for_each_dma_alias(to_pci_dev(info->dev),
+ &domain_context_clear_one_cb, info);
}
static void __dmar_remove_one_dev_info(struct device_domain_info *info)
@@ -4467,14 +4472,13 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
iommu = info->iommu;
domain = info->domain;
- if (info->dev) {
+ if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
PASID_RID2PASID, false);
iommu_disable_dev_iotlb(info);
- if (!dev_is_real_dma_subdevice(info->dev))
- domain_context_clear(iommu, info->dev);
+ domain_context_clear(info);
intel_pasid_free_table(info->dev);
}
@@ -4508,13 +4512,13 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
adjust_width = guestwidth_to_adjustwidth(guest_width);
domain->agaw = width_to_agaw(adjust_width);
- domain->iommu_coherency = 0;
- domain->iommu_snooping = 0;
+ domain->iommu_coherency = false;
+ domain->iommu_snooping = false;
domain->iommu_superpage = 0;
domain->max_addr = 0;
/* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
+ domain->pgd = alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -4606,6 +4610,8 @@ static int auxiliary_link_device(struct dmar_domain *domain,
if (!sinfo) {
sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC);
+ if (!sinfo)
+ return -ENOMEM;
sinfo->domain = domain;
sinfo->pdev = dev;
list_add(&sinfo->link_phys, &info->subdevices);
@@ -4752,6 +4758,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
if (!iommu)
return -ENODEV;
+ if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) &&
+ !ecap_nest(iommu->ecap)) {
+ dev_err(dev, "%s: iommu not support nested translation\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
@@ -4773,8 +4786,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
pte = dmar_domain->pgd;
if (dma_pte_present(pte)) {
- dmar_domain->pgd = (struct dma_pte *)
- phys_to_virt(dma_pte_addr(pte));
+ dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
free_pgtable_page(pte);
}
dmar_domain->agaw--;
@@ -5124,7 +5136,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
static bool intel_iommu_capable(enum iommu_cap cap)
{
if (cap == IOMMU_CAP_CACHE_COHERENCY)
- return domain_update_iommu_snooping(NULL) == 1;
+ return domain_update_iommu_snooping(NULL);
if (cap == IOMMU_CAP_INTR_REMAP)
return irq_remapping_enabled == 1;
@@ -5160,13 +5172,10 @@ static void intel_iommu_release_device(struct device *dev)
static void intel_iommu_probe_finalize(struct device *dev)
{
- dma_addr_t base = IOVA_START_PFN << VTD_PAGE_SHIFT;
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
if (domain && domain->type == IOMMU_DOMAIN_DMA)
- iommu_setup_dma_ops(dev, base,
- __DOMAIN_MAX_ADDR(dmar_domain->gaw) - base);
+ iommu_setup_dma_ops(dev, 0, U64_MAX);
else
set_dma_ops(dev, NULL);
}
@@ -5326,6 +5335,48 @@ static int intel_iommu_disable_auxd(struct device *dev)
return 0;
}
+static int intel_iommu_enable_sva(struct device *dev)
+{
+ struct device_domain_info *info = get_domain_info(dev);
+ struct intel_iommu *iommu;
+ int ret;
+
+ if (!info || dmar_disabled)
+ return -EINVAL;
+
+ iommu = info->iommu;
+ if (!iommu)
+ return -EINVAL;
+
+ if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
+ return -ENODEV;
+
+ if (intel_iommu_enable_pasid(iommu, dev))
+ return -ENODEV;
+
+ if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
+ return -EINVAL;
+
+ ret = iopf_queue_add_device(iommu->iopf_queue, dev);
+ if (!ret)
+ ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+
+ return ret;
+}
+
+static int intel_iommu_disable_sva(struct device *dev)
+{
+ struct device_domain_info *info = get_domain_info(dev);
+ struct intel_iommu *iommu = info->iommu;
+ int ret;
+
+ ret = iommu_unregister_device_fault_handler(dev);
+ if (!ret)
+ ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
+
+ return ret;
+}
+
/*
* A PCI express designated vendor specific extended capability is defined
* in the section 3.7 of Intel scalable I/O virtualization technical spec
@@ -5387,35 +5438,37 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
static int
intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
{
- if (feat == IOMMU_DEV_FEAT_AUX)
+ switch (feat) {
+ case IOMMU_DEV_FEAT_AUX:
return intel_iommu_enable_auxd(dev);
- if (feat == IOMMU_DEV_FEAT_IOPF)
+ case IOMMU_DEV_FEAT_IOPF:
return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
- if (feat == IOMMU_DEV_FEAT_SVA) {
- struct device_domain_info *info = get_domain_info(dev);
-
- if (!info)
- return -EINVAL;
+ case IOMMU_DEV_FEAT_SVA:
+ return intel_iommu_enable_sva(dev);
- if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
- return -EINVAL;
-
- if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
- return 0;
+ default:
+ return -ENODEV;
}
-
- return -ENODEV;
}
static int
intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
- if (feat == IOMMU_DEV_FEAT_AUX)
+ switch (feat) {
+ case IOMMU_DEV_FEAT_AUX:
return intel_iommu_disable_auxd(dev);
- return -ENODEV;
+ case IOMMU_DEV_FEAT_IOPF:
+ return 0;
+
+ case IOMMU_DEV_FEAT_SVA:
+ return intel_iommu_disable_sva(dev);
+
+ default:
+ return -ENODEV;
+ }
}
static bool
@@ -5452,7 +5505,7 @@ intel_iommu_enable_nesting(struct iommu_domain *domain)
int ret = -ENODEV;
spin_lock_irqsave(&device_domain_lock, flags);
- if (nested_mode_support() && list_empty(&dmar_domain->devices)) {
+ if (list_empty(&dmar_domain->devices)) {
dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
ret = 0;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 72646bafc52f..9ec374e17469 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* intel-pasid.c - PASID idr, table and entry manipulation
*
* Copyright (C) 2018 Intel Corporation
@@ -511,7 +511,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
u32 pasid, bool fault_ignore)
{
struct pasid_entry *pte;
- u16 did;
+ u16 did, pgtt;
pte = intel_pasid_get_entry(dev, pasid);
if (WARN_ON(!pte))
@@ -521,13 +521,19 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
return;
did = pasid_get_domain_id(pte);
+ pgtt = pasid_pte_get_pgtt(pte);
+
intel_pasid_clear_entry(dev, pasid, fault_ignore);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
- qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+
+ if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
+ qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
/* Device IOTLB doesn't need to be flushed in caching mode. */
if (!cap_caching_mode(iommu->cap))
@@ -699,7 +705,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
* Since it is a second level only translation setup, we should
* set SRE bit as well (addresses are expected to be GPAs).
*/
- pasid_set_sre(pte);
+ if (pasid != PASID_RID2PASID)
+ pasid_set_sre(pte);
pasid_set_present(pte);
pasid_flush_caches(iommu, pte, pasid, did);
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 5ff61c3d401f..c11bc8b833b8 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -99,6 +99,12 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte)
return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT;
}
+/* Get PGTT field of a PASID table entry */
+static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte)
+{
+ return (u16)((READ_ONCE(pte->val[0]) >> 6) & 0x7);
+}
+
extern unsigned int intel_pasid_max_id;
int intel_pasid_alloc_table(struct device *dev);
void intel_pasid_free_table(struct device *dev);
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
new file mode 100644
index 000000000000..73b7ec705552
--- /dev/null
+++ b/drivers/iommu/intel/perf.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * perf.c - performance monitor
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/intel-iommu.h>
+
+#include "perf.h"
+
+static DEFINE_SPINLOCK(latency_lock);
+
+bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+
+ return lstat && lstat[type].enabled;
+}
+
+int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat;
+ unsigned long flags;
+ int ret = -EBUSY;
+
+ if (dmar_latency_enabled(iommu, type))
+ return 0;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ if (!iommu->perf_statistic) {
+ iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM,
+ GFP_ATOMIC);
+ if (!iommu->perf_statistic) {
+ ret = -ENOMEM;
+ goto unlock_out;
+ }
+ }
+
+ lstat = iommu->perf_statistic;
+
+ if (!lstat[type].enabled) {
+ lstat[type].enabled = true;
+ lstat[type].counter[COUNTS_MIN] = UINT_MAX;
+ ret = 0;
+ }
+unlock_out:
+ spin_unlock_irqrestore(&latency_lock, flags);
+
+ return ret;
+}
+
+void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+
+ if (!dmar_latency_enabled(iommu, type))
+ return;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM);
+ spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+ u64 min, max;
+
+ if (!dmar_latency_enabled(iommu, type))
+ return;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ if (latency < 100)
+ lstat[type].counter[COUNTS_10e2]++;
+ else if (latency < 1000)
+ lstat[type].counter[COUNTS_10e3]++;
+ else if (latency < 10000)
+ lstat[type].counter[COUNTS_10e4]++;
+ else if (latency < 100000)
+ lstat[type].counter[COUNTS_10e5]++;
+ else if (latency < 1000000)
+ lstat[type].counter[COUNTS_10e6]++;
+ else if (latency < 10000000)
+ lstat[type].counter[COUNTS_10e7]++;
+ else
+ lstat[type].counter[COUNTS_10e8_plus]++;
+
+ min = lstat[type].counter[COUNTS_MIN];
+ max = lstat[type].counter[COUNTS_MAX];
+ lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency);
+ lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency);
+ lstat[type].counter[COUNTS_SUM] += latency;
+ lstat[type].samples++;
+ spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+static char *latency_counter_names[] = {
+ " <0.1us",
+ " 0.1us-1us", " 1us-10us", " 10us-100us",
+ " 100us-1ms", " 1ms-10ms", " >=10ms",
+ " min(us)", " max(us)", " average(us)"
+};
+
+static char *latency_type_names[] = {
+ " inv_iotlb", " inv_devtlb", " inv_iec",
+ " svm_prq"
+};
+
+int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+ int bytes = 0, i, j;
+
+ memset(str, 0, size);
+
+ for (i = 0; i < COUNTS_NUM; i++)
+ bytes += snprintf(str + bytes, size - bytes,
+ "%s", latency_counter_names[i]);
+
+ spin_lock_irqsave(&latency_lock, flags);
+ for (i = 0; i < DMAR_LATENCY_NUM; i++) {
+ if (!dmar_latency_enabled(iommu, i))
+ continue;
+
+ bytes += snprintf(str + bytes, size - bytes,
+ "\n%s", latency_type_names[i]);
+
+ for (j = 0; j < COUNTS_NUM; j++) {
+ u64 val = lstat[i].counter[j];
+
+ switch (j) {
+ case COUNTS_MIN:
+ if (val == UINT_MAX)
+ val = 0;
+ else
+ val = div_u64(val, 1000);
+ break;
+ case COUNTS_MAX:
+ val = div_u64(val, 1000);
+ break;
+ case COUNTS_SUM:
+ if (lstat[i].samples)
+ val = div_u64(val, (lstat[i].samples * 1000));
+ else
+ val = 0;
+ break;
+ default:
+ break;
+ }
+
+ bytes += snprintf(str + bytes, size - bytes,
+ "%12lld", val);
+ }
+ }
+ spin_unlock_irqrestore(&latency_lock, flags);
+
+ return bytes;
+}
diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h
new file mode 100644
index 000000000000..fd6db8049d1a
--- /dev/null
+++ b/drivers/iommu/intel/perf.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * perf.h - performance monitor header
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+enum latency_type {
+ DMAR_LATENCY_INV_IOTLB = 0,
+ DMAR_LATENCY_INV_DEVTLB,
+ DMAR_LATENCY_INV_IEC,
+ DMAR_LATENCY_PRQ,
+ DMAR_LATENCY_NUM
+};
+
+enum latency_count {
+ COUNTS_10e2 = 0, /* < 0.1us */
+ COUNTS_10e3, /* 0.1us ~ 1us */
+ COUNTS_10e4, /* 1us ~ 10us */
+ COUNTS_10e5, /* 10us ~ 100us */
+ COUNTS_10e6, /* 100us ~ 1ms */
+ COUNTS_10e7, /* 1ms ~ 10ms */
+ COUNTS_10e8_plus, /* 10ms and plus*/
+ COUNTS_MIN,
+ COUNTS_MAX,
+ COUNTS_SUM,
+ COUNTS_NUM
+};
+
+struct latency_statistic {
+ bool enabled;
+ u64 counter[COUNTS_NUM];
+ u64 samples;
+};
+
+#ifdef CONFIG_DMAR_PERF
+int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type);
+void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type);
+bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type);
+void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type,
+ u64 latency);
+int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size);
+#else
+static inline int
+dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
+{
+ return -EINVAL;
+}
+
+static inline void
+dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
+{
+}
+
+static inline bool
+dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
+{
+ return false;
+}
+
+static inline void
+dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
+{
+}
+
+static inline int
+dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
+{
+ return 0;
+}
+#endif /* CONFIG_DMAR_PERF */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 5165cea90421..4b9b3f35ba0e 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -17,19 +17,76 @@
#include <linux/dmar.h>
#include <linux/interrupt.h>
#include <linux/mm_types.h>
+#include <linux/xarray.h>
#include <linux/ioasid.h>
#include <asm/page.h>
#include <asm/fpu/api.h>
+#include <trace/events/intel_iommu.h>
#include "pasid.h"
+#include "perf.h"
+#include "../iommu-sva-lib.h"
static irqreturn_t prq_event_thread(int irq, void *d);
static void intel_svm_drain_prq(struct device *dev, u32 pasid);
+#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
#define PRQ_ORDER 0
+static DEFINE_XARRAY_ALLOC(pasid_private_array);
+static int pasid_private_add(ioasid_t pasid, void *priv)
+{
+ return xa_alloc(&pasid_private_array, &pasid, priv,
+ XA_LIMIT(pasid, pasid), GFP_ATOMIC);
+}
+
+static void pasid_private_remove(ioasid_t pasid)
+{
+ xa_erase(&pasid_private_array, pasid);
+}
+
+static void *pasid_private_find(ioasid_t pasid)
+{
+ return xa_load(&pasid_private_array, pasid);
+}
+
+static struct intel_svm_dev *
+svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid)
+{
+ struct intel_svm_dev *sdev = NULL, *t;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->sid == sid) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return sdev;
+}
+
+static struct intel_svm_dev *
+svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
+{
+ struct intel_svm_dev *sdev = NULL, *t;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->dev == dev) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return sdev;
+}
+
int intel_svm_enable_prq(struct intel_iommu *iommu)
{
+ struct iopf_queue *iopfq;
struct page *pages;
int irq, ret;
@@ -46,13 +103,20 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
iommu->name);
ret = -EINVAL;
- err:
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
- iommu->prq = NULL;
- return ret;
+ goto free_prq;
}
iommu->pr_irq = irq;
+ snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
+ "dmar%d-iopfq", iommu->seq_id);
+ iopfq = iopf_queue_alloc(iommu->iopfq_name);
+ if (!iopfq) {
+ pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
+ ret = -ENOMEM;
+ goto free_hwirq;
+ }
+ iommu->iopf_queue = iopfq;
+
snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
@@ -60,9 +124,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
if (ret) {
pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
iommu->name);
- dmar_free_hwirq(irq);
- iommu->pr_irq = 0;
- goto err;
+ goto free_iopfq;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
@@ -71,6 +133,18 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
init_completion(&iommu->prq_complete);
return 0;
+
+free_iopfq:
+ iopf_queue_free(iommu->iopf_queue);
+ iommu->iopf_queue = NULL;
+free_hwirq:
+ dmar_free_hwirq(irq);
+ iommu->pr_irq = 0;
+free_prq:
+ free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+
+ return ret;
}
int intel_svm_finish_prq(struct intel_iommu *iommu)
@@ -85,6 +159,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
iommu->pr_irq = 0;
}
+ if (iommu->iopf_queue) {
+ iopf_queue_free(iommu->iopf_queue);
+ iommu->iopf_queue = NULL;
+ }
+
free_pages((unsigned long)iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
@@ -204,17 +283,12 @@ static const struct mmu_notifier_ops intel_mmuops = {
};
static DEFINE_MUTEX(pasid_mutex);
-static LIST_HEAD(global_svm_list);
-
-#define for_each_svm_dev(sdev, svm, d) \
- list_for_each_entry((sdev), &(svm)->devs, list) \
- if ((d) != (sdev)->dev) {} else
static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
struct intel_svm **rsvm,
struct intel_svm_dev **rsdev)
{
- struct intel_svm_dev *d, *sdev = NULL;
+ struct intel_svm_dev *sdev = NULL;
struct intel_svm *svm;
/* The caller should hold the pasid_mutex lock */
@@ -224,7 +298,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
return -EINVAL;
- svm = ioasid_find(NULL, pasid, NULL);
+ svm = pasid_private_find(pasid);
if (IS_ERR(svm))
return PTR_ERR(svm);
@@ -237,15 +311,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
*/
if (WARN_ON(list_empty(&svm->devs)))
return -EINVAL;
-
- rcu_read_lock();
- list_for_each_entry_rcu(d, &svm->devs, list) {
- if (d->dev == dev) {
- sdev = d;
- break;
- }
- }
- rcu_read_unlock();
+ sdev = svm_lookup_device_by_dev(svm, dev);
out:
*rsvm = svm;
@@ -334,7 +400,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
svm->gpasid = data->gpasid;
svm->flags |= SVM_FLAG_GUEST_PASID;
}
- ioasid_set_data(data->hpasid, svm);
+ pasid_private_add(data->hpasid, svm);
INIT_LIST_HEAD_RCU(&svm->devs);
mmput(svm->mm);
}
@@ -388,7 +454,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
list_add_rcu(&sdev->list, &svm->devs);
out:
if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
- ioasid_set_data(data->hpasid, NULL);
+ pasid_private_remove(data->hpasid);
kfree(svm);
}
@@ -431,7 +497,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
* the unbind, IOMMU driver will get notified
* and perform cleanup.
*/
- ioasid_set_data(pasid, NULL);
+ pasid_private_remove(pasid);
kfree(svm);
}
}
@@ -459,79 +525,81 @@ static void load_pasid(struct mm_struct *mm, u32 pasid)
mutex_unlock(&mm->context.lock);
}
-/* Caller must hold pasid_mutex, mm reference */
-static int
-intel_svm_bind_mm(struct device *dev, unsigned int flags,
- struct mm_struct *mm, struct intel_svm_dev **sd)
+static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
+ unsigned int flags)
{
- struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
- struct intel_svm *svm = NULL, *t;
- struct device_domain_info *info;
- struct intel_svm_dev *sdev;
- unsigned long iflags;
- int pasid_max;
- int ret;
+ ioasid_t max_pasid = dev_is_pci(dev) ?
+ pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
- if (!iommu || dmar_disabled)
- return -EINVAL;
+ return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
+}
- if (!intel_svm_capable(iommu))
- return -ENOTSUPP;
+static void intel_svm_free_pasid(struct mm_struct *mm)
+{
+ iommu_sva_free_pasid(mm);
+}
- if (dev_is_pci(dev)) {
- pasid_max = pci_max_pasids(to_pci_dev(dev));
- if (pasid_max < 0)
- return -EINVAL;
- } else
- pasid_max = 1 << 20;
+static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
+ struct device *dev,
+ struct mm_struct *mm,
+ unsigned int flags)
+{
+ struct device_domain_info *info = get_domain_info(dev);
+ unsigned long iflags, sflags;
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm;
+ int ret = 0;
- /* Bind supervisor PASID shuld have mm = NULL */
- if (flags & SVM_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap) || mm) {
- pr_err("Supervisor PASID with user provided mm.\n");
- return -EINVAL;
- }
- }
+ svm = pasid_private_find(mm->pasid);
+ if (!svm) {
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm)
+ return ERR_PTR(-ENOMEM);
- list_for_each_entry(t, &global_svm_list, list) {
- if (t->mm != mm)
- continue;
+ svm->pasid = mm->pasid;
+ svm->mm = mm;
+ svm->flags = flags;
+ INIT_LIST_HEAD_RCU(&svm->devs);
- svm = t;
- if (svm->pasid >= pasid_max) {
- dev_warn(dev,
- "Limited PASID width. Cannot use existing PASID %d\n",
- svm->pasid);
- ret = -ENOSPC;
- goto out;
+ if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
+ svm->notifier.ops = &intel_mmuops;
+ ret = mmu_notifier_register(&svm->notifier, mm);
+ if (ret) {
+ kfree(svm);
+ return ERR_PTR(ret);
+ }
}
- /* Find the matching device in svm list */
- for_each_svm_dev(sdev, svm, dev) {
- sdev->users++;
- goto success;
+ ret = pasid_private_add(svm->pasid, svm);
+ if (ret) {
+ if (svm->notifier.ops)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ kfree(svm);
+ return ERR_PTR(ret);
}
+ }
- break;
+ /* Find the matching device in svm list */
+ sdev = svm_lookup_device_by_dev(svm, dev);
+ if (sdev) {
+ sdev->users++;
+ goto success;
}
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev) {
ret = -ENOMEM;
- goto out;
+ goto free_svm;
}
+
sdev->dev = dev;
sdev->iommu = iommu;
-
- ret = intel_iommu_enable_pasid(iommu, dev);
- if (ret) {
- kfree(sdev);
- goto out;
- }
-
- info = get_domain_info(dev);
sdev->did = FLPT_DEFAULT_DID;
sdev->sid = PCI_DEVID(info->bus, info->devfn);
+ sdev->users = 1;
+ sdev->pasid = svm->pasid;
+ sdev->sva.dev = dev;
+ init_rcu_head(&sdev->rcu);
if (info->ats_enabled) {
sdev->dev_iotlb = 1;
sdev->qdep = info->ats_qdep;
@@ -539,95 +607,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
sdev->qdep = 0;
}
- /* Finish the setup now we know we're keeping it */
- sdev->users = 1;
- init_rcu_head(&sdev->rcu);
-
- if (!svm) {
- svm = kzalloc(sizeof(*svm), GFP_KERNEL);
- if (!svm) {
- ret = -ENOMEM;
- kfree(sdev);
- goto out;
- }
-
- if (pasid_max > intel_pasid_max_id)
- pasid_max = intel_pasid_max_id;
+ /* Setup the pasid table: */
+ sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
+ PASID_FLAG_SUPERVISOR_MODE : 0;
+ sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
+ spin_lock_irqsave(&iommu->lock, iflags);
+ ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
+ FLPT_DEFAULT_DID, sflags);
+ spin_unlock_irqrestore(&iommu->lock, iflags);
- /* Do not use PASID 0, reserved for RID to PASID */
- svm->pasid = ioasid_alloc(NULL, PASID_MIN,
- pasid_max - 1, svm);
- if (svm->pasid == INVALID_IOASID) {
- kfree(svm);
- kfree(sdev);
- ret = -ENOSPC;
- goto out;
- }
- svm->notifier.ops = &intel_mmuops;
- svm->mm = mm;
- svm->flags = flags;
- INIT_LIST_HEAD_RCU(&svm->devs);
- INIT_LIST_HEAD(&svm->list);
- ret = -ENOMEM;
- if (mm) {
- ret = mmu_notifier_register(&svm->notifier, mm);
- if (ret) {
- ioasid_put(svm->pasid);
- kfree(svm);
- kfree(sdev);
- goto out;
- }
- }
+ if (ret)
+ goto free_sdev;
- spin_lock_irqsave(&iommu->lock, iflags);
- ret = intel_pasid_setup_first_level(iommu, dev,
- mm ? mm->pgd : init_mm.pgd,
- svm->pasid, FLPT_DEFAULT_DID,
- (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
- (cpu_feature_enabled(X86_FEATURE_LA57) ?
- PASID_FLAG_FL5LP : 0));
- spin_unlock_irqrestore(&iommu->lock, iflags);
- if (ret) {
- if (mm)
- mmu_notifier_unregister(&svm->notifier, mm);
- ioasid_put(svm->pasid);
- kfree(svm);
- kfree(sdev);
- goto out;
- }
+ /* The newly allocated pasid is loaded to the mm. */
+ if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs))
+ load_pasid(mm, svm->pasid);
- list_add_tail(&svm->list, &global_svm_list);
- if (mm) {
- /* The newly allocated pasid is loaded to the mm. */
- load_pasid(mm, svm->pasid);
- }
- } else {
- /*
- * Binding a new device with existing PASID, need to setup
- * the PASID entry.
- */
- spin_lock_irqsave(&iommu->lock, iflags);
- ret = intel_pasid_setup_first_level(iommu, dev,
- mm ? mm->pgd : init_mm.pgd,
- svm->pasid, FLPT_DEFAULT_DID,
- (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
- (cpu_feature_enabled(X86_FEATURE_LA57) ?
- PASID_FLAG_FL5LP : 0));
- spin_unlock_irqrestore(&iommu->lock, iflags);
- if (ret) {
- kfree(sdev);
- goto out;
- }
- }
list_add_rcu(&sdev->list, &svm->devs);
success:
- sdev->pasid = svm->pasid;
- sdev->sva.dev = dev;
- if (sd)
- *sd = sdev;
- ret = 0;
-out:
- return ret;
+ return &sdev->sva;
+
+free_sdev:
+ kfree(sdev);
+free_svm:
+ if (list_empty(&svm->devs)) {
+ if (svm->notifier.ops)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ pasid_private_remove(mm->pasid);
+ kfree(svm);
+ }
+
+ return ERR_PTR(ret);
}
/* Caller must hold pasid_mutex */
@@ -636,6 +646,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
struct intel_svm_dev *sdev;
struct intel_iommu *iommu;
struct intel_svm *svm;
+ struct mm_struct *mm;
int ret = -EINVAL;
iommu = device_to_iommu(dev, NULL, NULL);
@@ -645,6 +656,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
if (ret)
goto out;
+ mm = svm->mm;
if (sdev) {
sdev->users--;
@@ -663,13 +675,12 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
- ioasid_put(svm->pasid);
- if (svm->mm) {
- mmu_notifier_unregister(&svm->notifier, svm->mm);
+ if (svm->notifier.ops) {
+ mmu_notifier_unregister(&svm->notifier, mm);
/* Clear mm's pasid. */
- load_pasid(svm->mm, PASID_DISABLED);
+ load_pasid(mm, PASID_DISABLED);
}
- list_del(&svm->list);
+ pasid_private_remove(svm->pasid);
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.
* If that is not obeyed, subtle errors will happen.
@@ -678,6 +689,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree(svm);
}
}
+ /* Drop a PASID reference and free it if no reference. */
+ intel_svm_free_pasid(mm);
}
out:
return ret;
@@ -714,22 +727,6 @@ struct page_req_dsc {
#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
-static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
-{
- unsigned long requested = 0;
-
- if (req->exe_req)
- requested |= VM_EXEC;
-
- if (req->rd_req)
- requested |= VM_READ;
-
- if (req->wr_req)
- requested |= VM_WRITE;
-
- return (requested & ~vma->vm_flags) != 0;
-}
-
static bool is_canonical_address(u64 addr)
{
int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
@@ -799,6 +796,8 @@ prq_retry:
goto prq_retry;
}
+ iopf_queue_flush_dev(dev);
+
/*
* Perform steps described in VT-d spec CH7.10 to drain page
* requests and responses in hardware.
@@ -841,8 +840,8 @@ static int prq_to_iommu_prot(struct page_req_dsc *req)
return prot;
}
-static int
-intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
+ struct page_req_dsc *desc)
{
struct iommu_fault_event event;
@@ -872,159 +871,136 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
*/
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
- memcpy(event.fault.prm.private_data, desc->priv_data,
- sizeof(desc->priv_data));
+ event.fault.prm.private_data[0] = desc->priv_data[0];
+ event.fault.prm.private_data[1] = desc->priv_data[1];
+ } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
+ /*
+ * If the private data fields are not used by hardware, use it
+ * to monitor the prq handle latency.
+ */
+ event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
}
return iommu_report_device_fault(dev, &event);
}
+static void handle_bad_prq_event(struct intel_iommu *iommu,
+ struct page_req_dsc *req, int result)
+{
+ struct qi_desc desc;
+
+ pr_err("%s: Invalid page request: %08llx %08llx\n",
+ iommu->name, ((unsigned long long *)req)[0],
+ ((unsigned long long *)req)[1]);
+
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must
+ * respond with page group response if private data
+ * is present (PDP) or last page in group (LPIG) bit
+ * is set. This is an additional VT-d feature beyond
+ * PCI ATS spec.
+ */
+ if (!req->lpig && !req->priv_data_present)
+ return;
+
+ desc.qw0 = QI_PGRP_PASID(req->pasid) |
+ QI_PGRP_DID(req->rid) |
+ QI_PGRP_PASID_P(req->pasid_present) |
+ QI_PGRP_PDP(req->priv_data_present) |
+ QI_PGRP_RESP_CODE(result) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(req->prg_index) |
+ QI_PGRP_LPIG(req->lpig);
+
+ if (req->priv_data_present) {
+ desc.qw2 = req->priv_data[0];
+ desc.qw3 = req->priv_data[1];
+ } else {
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+ }
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_svm_dev *sdev = NULL;
struct intel_iommu *iommu = d;
struct intel_svm *svm = NULL;
- int head, tail, handled = 0;
- unsigned int flags = 0;
+ struct page_req_dsc *req;
+ int head, tail, handled;
+ u64 address;
- /* Clear PPR bit before reading head/tail registers, to
- * ensure that we get a new interrupt if needed. */
+ /*
+ * Clear PPR bit before reading head/tail registers, to ensure that
+ * we get a new interrupt if needed.
+ */
writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ handled = (head != tail);
while (head != tail) {
- struct vm_area_struct *vma;
- struct page_req_dsc *req;
- struct qi_desc resp;
- int result;
- vm_fault_t ret;
- u64 address;
-
- handled = 1;
req = &iommu->prq[head / sizeof(*req)];
- result = QI_RESP_INVALID;
address = (u64)req->addr << VTD_PAGE_SHIFT;
- if (!req->pasid_present) {
- pr_err("%s: Page request without PASID: %08llx %08llx\n",
- iommu->name, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
- goto no_pasid;
+
+ if (unlikely(!req->pasid_present)) {
+ pr_err("IOMMU: %s: Page request without PASID\n",
+ iommu->name);
+bad_req:
+ svm = NULL;
+ sdev = NULL;
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ goto prq_advance;
}
- /* We shall not receive page request for supervisor SVM */
- if (req->pm_req && (req->rd_req | req->wr_req)) {
- pr_err("Unexpected page request in Privilege Mode");
- /* No need to find the matching sdev as for bad_req */
- goto no_pasid;
+
+ if (unlikely(!is_canonical_address(address))) {
+ pr_err("IOMMU: %s: Address is not canonical\n",
+ iommu->name);
+ goto bad_req;
}
- /* DMA read with exec requeset is not supported. */
- if (req->exe_req && req->rd_req) {
- pr_err("Execution request not supported\n");
- goto no_pasid;
+
+ if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
+ pr_err("IOMMU: %s: Page request in Privilege Mode\n",
+ iommu->name);
+ goto bad_req;
}
+
+ if (unlikely(req->exe_req && req->rd_req)) {
+ pr_err("IOMMU: %s: Execution request not supported\n",
+ iommu->name);
+ goto bad_req;
+ }
+
if (!svm || svm->pasid != req->pasid) {
- rcu_read_lock();
- svm = ioasid_find(NULL, req->pasid, NULL);
- /* It *can't* go away, because the driver is not permitted
+ /*
+ * It can't go away, because the driver is not permitted
* to unbind the mm while any page faults are outstanding.
- * So we only need RCU to protect the internal idr code. */
- rcu_read_unlock();
- if (IS_ERR_OR_NULL(svm)) {
- pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
- iommu->name, req->pasid, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
- goto no_pasid;
- }
+ */
+ svm = pasid_private_find(req->pasid);
+ if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE))
+ goto bad_req;
}
if (!sdev || sdev->sid != req->rid) {
- struct intel_svm_dev *t;
-
- sdev = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(t, &svm->devs, list) {
- if (t->sid == req->rid) {
- sdev = t;
- break;
- }
- }
- rcu_read_unlock();
+ sdev = svm_lookup_device_by_sid(svm, req->rid);
+ if (!sdev)
+ goto bad_req;
}
- /* Since we're using init_mm.pgd directly, we should never take
- * any faults on kernel addresses. */
- if (!svm->mm)
- goto bad_req;
-
- /* If address is not canonical, return invalid response */
- if (!is_canonical_address(address))
- goto bad_req;
+ sdev->prq_seq_number++;
/*
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
- if (svm->flags & SVM_FLAG_GUEST_MODE) {
- if (sdev && !intel_svm_prq_report(sdev->dev, req))
- goto prq_advance;
- else
- goto bad_req;
- }
-
- /* If the mm is already defunct, don't handle faults. */
- if (!mmget_not_zero(svm->mm))
- goto bad_req;
-
- mmap_read_lock(svm->mm);
- vma = find_extend_vma(svm->mm, address);
- if (!vma || address < vma->vm_start)
- goto invalid;
-
- if (access_error(vma, req))
- goto invalid;
+ if (intel_svm_prq_report(iommu, sdev->dev, req))
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
- flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE;
- if (req->wr_req)
- flags |= FAULT_FLAG_WRITE;
-
- ret = handle_mm_fault(vma, address, flags, NULL);
- if (ret & VM_FAULT_ERROR)
- goto invalid;
-
- result = QI_RESP_SUCCESS;
-invalid:
- mmap_read_unlock(svm->mm);
- mmput(svm->mm);
-bad_req:
- /* We get here in the error case where the PASID lookup failed,
- and these can be NULL. Do not use them below this point! */
- sdev = NULL;
- svm = NULL;
-no_pasid:
- if (req->lpig || req->priv_data_present) {
- /*
- * Per VT-d spec. v3.0 ch7.7, system software must
- * respond with page group response if private data
- * is present (PDP) or last page in group (LPIG) bit
- * is set. This is an additional VT-d feature beyond
- * PCI ATS spec.
- */
- resp.qw0 = QI_PGRP_PASID(req->pasid) |
- QI_PGRP_DID(req->rid) |
- QI_PGRP_PASID_P(req->pasid_present) |
- QI_PGRP_PDP(req->priv_data_present) |
- QI_PGRP_RESP_CODE(result) |
- QI_PGRP_RESP_TYPE;
- resp.qw1 = QI_PGRP_IDX(req->prg_index) |
- QI_PGRP_LPIG(req->lpig);
- resp.qw2 = 0;
- resp.qw3 = 0;
-
- if (req->priv_data_present)
- memcpy(&resp.qw2, req->priv_data,
- sizeof(req->priv_data));
- qi_submit_sync(iommu, &resp, 1, 0);
- }
+ trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1,
+ req->priv_data[0], req->priv_data[1],
+ sdev->prq_seq_number);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
@@ -1041,6 +1017,7 @@ prq_advance:
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
if (head == tail) {
+ iopf_queue_discard_partial(iommu->iopf_queue);
writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
iommu->name);
@@ -1053,31 +1030,42 @@ prq_advance:
return IRQ_RETVAL(handled);
}
-#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
-struct iommu_sva *
-intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
{
- struct iommu_sva *sva = ERR_PTR(-EINVAL);
- struct intel_svm_dev *sdev = NULL;
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
unsigned int flags = 0;
+ struct iommu_sva *sva;
int ret;
- /*
- * TODO: Consolidate with generic iommu-sva bind after it is merged.
- * It will require shared SVM data structures, i.e. combine io_mm
- * and intel_svm etc.
- */
if (drvdata)
flags = *(unsigned int *)drvdata;
+
+ if (flags & SVM_FLAG_SUPERVISOR_MODE) {
+ if (!ecap_srs(iommu->ecap)) {
+ dev_err(dev, "%s: Supervisor PASID not supported\n",
+ iommu->name);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (mm) {
+ dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
+ iommu->name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mm = &init_mm;
+ }
+
mutex_lock(&pasid_mutex);
- ret = intel_svm_bind_mm(dev, flags, mm, &sdev);
- if (ret)
- sva = ERR_PTR(ret);
- else if (sdev)
- sva = &sdev->sva;
- else
- WARN(!sdev, "SVM bind succeeded with no sdev!\n");
+ ret = intel_svm_alloc_pasid(dev, mm, flags);
+ if (ret) {
+ mutex_unlock(&pasid_mutex);
+ return ERR_PTR(ret);
+ }
+ sva = intel_svm_bind_mm(iommu, dev, mm, flags);
+ if (IS_ERR_OR_NULL(sva))
+ intel_svm_free_pasid(mm);
mutex_unlock(&pasid_mutex);
return sva;
@@ -1085,10 +1073,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
void intel_svm_unbind(struct iommu_sva *sva)
{
- struct intel_svm_dev *sdev;
+ struct intel_svm_dev *sdev = to_intel_svm_dev(sva);
mutex_lock(&pasid_mutex);
- sdev = to_intel_svm_dev(sva);
intel_svm_unbind_mm(sdev->dev, sdev->pasid);
mutex_unlock(&pasid_mutex);
}
@@ -1194,9 +1181,14 @@ int intel_svm_page_response(struct device *dev,
desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
desc.qw2 = 0;
desc.qw3 = 0;
- if (private_present)
- memcpy(&desc.qw2, prm->private_data,
- sizeof(prm->private_data));
+
+ if (private_present) {
+ desc.qw2 = prm->private_data[0];
+ desc.qw3 = prm->private_data[1];
+ } else if (prm->private_data[0]) {
+ dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
+ ktime_to_ns(ktime_get()) - prm->private_data[0]);
+ }
qi_submit_sync(iommu, &desc, 1, 0);
}