aboutsummaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r--drivers/iommu/intel/iommu.c210
1 files changed, 135 insertions, 75 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9129663a7406..e9864e52b0e9 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -48,7 +48,7 @@
#include <trace/events/intel_iommu.h>
#include "../irq_remapping.h"
-#include "intel-pasid.h"
+#include "pasid.h"
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
@@ -356,6 +356,7 @@ static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
static int intel_no_bounce;
+static int iommu_skip_te_disable;
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
@@ -372,7 +373,7 @@ struct device_domain_info *get_domain_info(struct device *dev)
if (!dev)
return NULL;
- info = dev->archdata.iommu;
+ info = dev_iommu_priv_get(dev);
if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
info == DEFER_DEVICE_DOMAIN_INFO))
return NULL;
@@ -612,6 +613,12 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
return g_iommus[iommu_id];
}
+static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
+{
+ return sm_supported(iommu) ?
+ ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
+}
+
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
struct dmar_drhd_unit *drhd;
@@ -623,7 +630,7 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
for_each_domain_iommu(i, domain) {
found = true;
- if (!ecap_coherent(g_iommus[i]->ecap)) {
+ if (!iommu_paging_structure_coherency(g_iommus[i])) {
domain->iommu_coherency = 0;
break;
}
@@ -634,7 +641,7 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
/* No hardware attached; use lowest common denominator */
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
- if (!ecap_coherent(iommu->ecap)) {
+ if (!iommu_paging_structure_coherency(iommu)) {
domain->iommu_coherency = 0;
break;
}
@@ -737,12 +744,12 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
static int iommu_dummy(struct device *dev)
{
- return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+ return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO;
}
static bool attach_deferred(struct device *dev)
{
- return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+ return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
}
/**
@@ -772,16 +779,16 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
return false;
}
-static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
+ struct pci_dev *pdev = NULL;
struct intel_iommu *iommu;
struct device *tmp;
- struct pci_dev *pdev = NULL;
u16 segment = 0;
int i;
- if (iommu_dummy(dev))
+ if (!dev || iommu_dummy(dev))
return NULL;
if (dev_is_pci(dev)) {
@@ -812,8 +819,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
if (pdev && pdev->is_virtfn)
goto got_pdev;
- *bus = drhd->devices[i].bus;
- *devfn = drhd->devices[i].devfn;
+ if (bus && devfn) {
+ *bus = drhd->devices[i].bus;
+ *devfn = drhd->devices[i].devfn;
+ }
goto out;
}
@@ -823,8 +832,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
if (pdev && drhd->include_all) {
got_pdev:
- *bus = pdev->bus->number;
- *devfn = pdev->devfn;
+ if (bus && devfn) {
+ *bus = pdev->bus->number;
+ *devfn = pdev->devfn;
+ }
goto out;
}
}
@@ -921,7 +932,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
if (domain_use_first_level(domain))
- pteval |= DMA_FL_PTE_XD;
+ pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
@@ -1623,6 +1634,10 @@ static void iommu_disable_translation(struct intel_iommu *iommu)
u32 sts;
unsigned long flag;
+ if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
+ (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
+ return;
+
raw_spin_lock_irqsave(&iommu->register_lock, flag);
iommu->gcmd &= ~DMA_GCMD_TE;
writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
@@ -1951,7 +1966,6 @@ static inline void
context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
{
context->hi |= pasid & ((1 << 20) - 1);
- context->hi |= (1 << 20);
}
/*
@@ -2095,7 +2109,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
context_set_fault_enable(context);
context_set_present(context);
- domain_flush_cache(domain, context, sizeof(*context));
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(context, sizeof(*context));
/*
* It's a non-present to present mapping. If hardware doesn't cache
@@ -2230,7 +2245,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long nr_pages, int prot)
{
struct dma_pte *first_pte = NULL, *pte = NULL;
- phys_addr_t uninitialized_var(pteval);
+ phys_addr_t pteval;
unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
@@ -2243,7 +2258,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
if (domain_use_first_level(domain))
- attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD;
+ attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
if (!sg) {
sg_res = nr_pages;
@@ -2414,7 +2429,7 @@ static inline void unlink_domain_info(struct device_domain_info *info)
list_del(&info->link);
list_del(&info->global);
if (info->dev)
- info->dev->archdata.iommu = NULL;
+ dev_iommu_priv_set(info->dev, NULL);
}
static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -2447,7 +2462,7 @@ static void do_deferred_attach(struct device *dev)
{
struct iommu_domain *domain;
- dev->archdata.iommu = NULL;
+ dev_iommu_priv_set(dev, NULL);
domain = iommu_get_domain_for_dev(dev);
if (domain)
intel_iommu_attach_device(domain, dev);
@@ -2554,7 +2569,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+ pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
@@ -2593,7 +2608,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
list_add(&info->link, &domain->devices);
list_add(&info->global, &device_domain_list);
if (dev)
- dev->archdata.iommu = info;
+ dev_iommu_priv_set(dev, info);
spin_unlock_irqrestore(&device_domain_lock, flags);
/* PASID table is mandatory for a PCI device in scalable mode. */
@@ -2695,7 +2710,9 @@ static int __init si_domain_init(int hw)
end >> agaw_to_width(si_domain->agaw)))
continue;
- ret = iommu_domain_identity_map(si_domain, start, end);
+ ret = iommu_domain_identity_map(si_domain,
+ mm_to_dma_pfn(start >> PAGE_SHIFT),
+ mm_to_dma_pfn(end >> PAGE_SHIFT));
if (ret)
return ret;
}
@@ -3996,7 +4013,7 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+ dev_iommu_priv_set(&pdev->dev, DUMMY_DEVICE_DOMAIN_INFO);
}
}
DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
@@ -4031,11 +4048,12 @@ static void __init init_no_remapping_devices(void)
/* This IOMMU has *only* gfx devices. Either bypass it or
set the gfx_mapped flag, as appropriate */
+ drhd->gfx_dedicated = 1;
if (!dmar_map_gfx) {
drhd->ignored = 1;
for_each_active_dev_scope(drhd->devices,
drhd->devices_cnt, i, dev)
- dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+ dev_iommu_priv_set(dev, DUMMY_DEVICE_DOMAIN_INFO);
}
}
}
@@ -4730,12 +4748,12 @@ const struct attribute_group *intel_iommu_groups[] = {
NULL,
};
-static inline bool has_untrusted_dev(void)
+static inline bool has_external_pci(void)
{
struct pci_dev *pdev = NULL;
for_each_pci_dev(pdev)
- if (pdev->untrusted)
+ if (pdev->external_facing)
return true;
return false;
@@ -4743,7 +4761,7 @@ static inline bool has_untrusted_dev(void)
static int __init platform_optin_force_iommu(void)
{
- if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
+ if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
return 0;
if (no_iommu || dmar_disabled)
@@ -5138,11 +5156,10 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
struct device *dev)
{
int ret;
- u8 bus, devfn;
unsigned long flags;
struct intel_iommu *iommu;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5228,9 +5245,8 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
int addr_width;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5408,7 +5424,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
sid = PCI_DEVID(bus, devfn);
/* Size is only valid in address selective invalidation */
- if (inv_info->granularity != IOMMU_INV_GRANU_PASID)
+ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
size = to_vtd_size(inv_info->addr_info.granule_size,
inv_info->addr_info.nb_granules);
@@ -5417,6 +5433,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
IOMMU_CACHE_INV_TYPE_NR) {
int granu = 0;
u64 pasid = 0;
+ u64 addr = 0;
granu = to_vtd_granularity(cache_type, inv_info->granularity);
if (granu == -EINVAL) {
@@ -5438,13 +5455,12 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
switch (BIT(cache_type)) {
case IOMMU_CACHE_INV_TYPE_IOTLB:
+ /* HW will ignore LSB bits based on address mask */
if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
size &&
(inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
- pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n",
+ pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
inv_info->addr_info.addr, size);
- ret = -ERANGE;
- goto out_unlock;
}
/*
@@ -5456,25 +5472,35 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
(granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+ if (!info->ats_enabled)
+ break;
/*
* Always flush device IOTLB if ATS is enabled. vIOMMU
* in the guest may assume IOTLB flush is inclusive,
* which is more efficient.
*/
- if (info->ats_enabled)
- qi_flush_dev_iotlb_pasid(iommu, sid,
- info->pfsid, pasid,
- info->ats_qdep,
- inv_info->addr_info.addr,
- size, granu);
- break;
+ fallthrough;
case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
+ /*
+ * PASID based device TLB invalidation does not support
+ * IOMMU_INV_GRANU_PASID granularity but only supports
+ * IOMMU_INV_GRANU_ADDR.
+ * The equivalent of that is we set the size to be the
+ * entire range of 64 bit. User only provides PASID info
+ * without address info. So we set addr to 0.
+ */
+ if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
+ size = 64 - VTD_PAGE_SHIFT;
+ addr = 0;
+ } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
+ addr = inv_info->addr_info.addr;
+ }
+
if (info->ats_enabled)
qi_flush_dev_iotlb_pasid(iommu, sid,
info->pfsid, pasid,
- info->ats_qdep,
- inv_info->addr_info.addr,
- size, granu);
+ info->ats_qdep, addr,
+ size);
else
pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
break;
@@ -5650,14 +5676,13 @@ static bool intel_iommu_capable(enum iommu_cap cap)
static struct iommu_device *intel_iommu_probe_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return ERR_PTR(-ENODEV);
if (translation_pre_enabled(iommu))
- dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
+ dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
return &iommu->iommu;
}
@@ -5665,9 +5690,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
static void intel_iommu_release_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return;
@@ -5817,37 +5841,14 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
return generic_device_group(dev);
}
-#ifdef CONFIG_INTEL_IOMMU_SVM
-struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
-{
- struct intel_iommu *iommu;
- u8 bus, devfn;
-
- if (iommu_dummy(dev)) {
- dev_warn(dev,
- "No IOMMU translation for device; cannot enable SVM\n");
- return NULL;
- }
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if ((!iommu)) {
- dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
- return NULL;
- }
-
- return iommu;
-}
-#endif /* CONFIG_INTEL_IOMMU_SVM */
-
static int intel_iommu_enable_auxd(struct device *dev)
{
struct device_domain_info *info;
struct intel_iommu *iommu;
unsigned long flags;
- u8 bus, devfn;
int ret;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu || dmar_disabled)
return -EINVAL;
@@ -6021,6 +6022,23 @@ intel_iommu_domain_set_attr(struct iommu_domain *domain,
return ret;
}
+/*
+ * Check that the device does not live on an external facing PCI port that is
+ * marked as untrusted. Such devices should not be able to apply quirks and
+ * thus not be able to bypass the IOMMU restrictions.
+ */
+static bool risky_device(struct pci_dev *pdev)
+{
+ if (pdev->untrusted) {
+ pci_info(pdev,
+ "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
+ pdev->vendor, pdev->device);
+ pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
+ return true;
+ }
+ return false;
+}
+
const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
@@ -6055,11 +6073,15 @@ const struct iommu_ops intel_iommu_ops = {
.sva_bind = intel_svm_bind,
.sva_unbind = intel_svm_unbind,
.sva_get_pasid = intel_svm_get_pasid,
+ .page_response = intel_svm_page_response,
#endif
};
static void quirk_iommu_igfx(struct pci_dev *dev)
{
+ if (risky_device(dev))
+ return;
+
pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
dmar_map_gfx = 0;
}
@@ -6101,6 +6123,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
static void quirk_iommu_rwbf(struct pci_dev *dev)
{
+ if (risky_device(dev))
+ return;
+
/*
* Mobile 4 Series Chipset neglects to set RWBF capability,
* but needs it. Same seems to hold for the desktop versions.
@@ -6131,6 +6156,9 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
{
unsigned short ggc;
+ if (risky_device(dev))
+ return;
+
if (pci_read_config_word(dev, GGC, &ggc))
return;
@@ -6148,6 +6176,27 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_g
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
+static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
+{
+ unsigned short ver;
+
+ if (!IS_GFX_DEVICE(dev))
+ return;
+
+ ver = (dev->device >> 8) & 0xff;
+ if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
+ ver != 0x4e && ver != 0x8a && ver != 0x98 &&
+ ver != 0x9a)
+ return;
+
+ if (risky_device(dev))
+ return;
+
+ pci_info(dev, "Skip IOMMU disabling for graphics\n");
+ iommu_skip_te_disable = 1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
+
/* On Tylersburg chipsets, some BIOSes have been known to enable the
ISOCH DMAR unit for the Azalia sound device, but not give it any
TLB entries, which causes it to deadlock. Check for that. We do
@@ -6164,6 +6213,12 @@ static void __init check_tylersburg_isoch(void)
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
if (!pdev)
return;
+
+ if (risky_device(pdev)) {
+ pci_dev_put(pdev);
+ return;
+ }
+
pci_dev_put(pdev);
/* System Management Registers. Might be hidden, in which case
@@ -6173,6 +6228,11 @@ static void __init check_tylersburg_isoch(void)
if (!pdev)
return;
+ if (risky_device(pdev)) {
+ pci_dev_put(pdev);
+ return;
+ }
+
if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
pci_dev_put(pdev);
return;