diff options
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r-- | drivers/iommu/intel/dmar.c | 72 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.c | 233 | ||||
-rw-r--r-- | drivers/iommu/intel/irq_remapping.c | 5 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.c | 75 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.h | 6 | ||||
-rw-r--r-- | drivers/iommu/intel/svm.c | 82 |
6 files changed, 268 insertions, 205 deletions
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index d5c51b5c20af..1757ac1e1623 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1140,9 +1140,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (err) goto err_unmap; - iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops); - - err = iommu_device_register(&iommu->iommu); + err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); if (err) goto err_unmap; } @@ -1205,6 +1203,63 @@ static inline void reclaim_free_desc(struct q_inval *qi) } } +static const char *qi_type_string(u8 type) +{ + switch (type) { + case QI_CC_TYPE: + return "Context-cache Invalidation"; + case QI_IOTLB_TYPE: + return "IOTLB Invalidation"; + case QI_DIOTLB_TYPE: + return "Device-TLB Invalidation"; + case QI_IEC_TYPE: + return "Interrupt Entry Cache Invalidation"; + case QI_IWD_TYPE: + return "Invalidation Wait"; + case QI_EIOTLB_TYPE: + return "PASID-based IOTLB Invalidation"; + case QI_PC_TYPE: + return "PASID-cache Invalidation"; + case QI_DEIOTLB_TYPE: + return "PASID-based Device-TLB Invalidation"; + case QI_PGRP_RESP_TYPE: + return "Page Group Response"; + default: + return "UNKNOWN"; + } +} + +static void qi_dump_fault(struct intel_iommu *iommu, u32 fault) +{ + unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG); + u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); + struct qi_desc *desc = iommu->qi->desc + head; + + if (fault & DMA_FSTS_IQE) + pr_err("VT-d detected Invalidation Queue Error: Reason %llx", + DMAR_IQER_REG_IQEI(iqe_err)); + if (fault & DMA_FSTS_ITE) + pr_err("VT-d detected Invalidation Time-out Error: SID %llx", + DMAR_IQER_REG_ITESID(iqe_err)); + if (fault & DMA_FSTS_ICE) + pr_err("VT-d detected Invalidation Completion Error: SID %llx", + DMAR_IQER_REG_ICESID(iqe_err)); + + pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n", + qi_type_string(desc->qw0 & 0xf), + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); + + head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH; + head <<= qi_shift(iommu); + desc = iommu->qi->desc + head; + + pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n", + qi_type_string(desc->qw0 & 0xf), + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); +} + static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; @@ -1216,6 +1271,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) return -EAGAIN; fault = readl(iommu->reg + DMAR_FSTS_REG); + if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE)) + qi_dump_fault(iommu, fault); /* * If IQE happens, the head points to the descriptor associated @@ -1232,12 +1289,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) * used by software as private data. We won't print * out these two qw's for security consideration. */ - pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n", - (unsigned long long)desc->qw0, - (unsigned long long)desc->qw1); memcpy(desc, qi->desc + (wait_index << shift), 1 << shift); writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Queue Error (IQE) cleared\n"); return -EINVAL; } } @@ -1254,6 +1309,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Time-out Error (ITE) cleared\n"); do { if (qi->desc_status[head] == QI_IN_USE) @@ -1265,8 +1321,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) return -EAGAIN; } - if (fault & DMA_FSTS_ICE) + if (fault & DMA_FSTS_ICE) { writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Completion Error (ICE) cleared\n"); + } return 0; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ee0932307d64..708f430af1c4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -360,7 +360,6 @@ int intel_iommu_enabled = 0; EXPORT_SYMBOL_GPL(intel_iommu_enabled); static int dmar_map_gfx = 1; -static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; @@ -451,8 +450,8 @@ static int __init intel_iommu_setup(char *str) dmar_map_gfx = 0; pr_info("Disable GFX device mapping\n"); } else if (!strncmp(str, "forcedac", 8)) { - pr_info("Forcing DAC for PCI devices\n"); - dmar_forcedac = 1; + pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); + iommu_dma_forcedac = true; } else if (!strncmp(str, "strict", 6)) { pr_info("Disable batched IOTLB flush\n"); intel_iommu_strict = 1; @@ -658,7 +657,14 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (iommu != skip) { - if (!ecap_sc_support(iommu->ecap)) { + /* + * If the hardware is operating in the scalable mode, + * the snooping control is always supported since we + * always set PASID-table-entry.PGSNP bit if the domain + * is managed outside (UNMANAGED). + */ + if (!sm_supported(iommu) && + !ecap_sc_support(iommu->ecap)) { ret = 0; break; } @@ -1340,6 +1346,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) readl, (sts & DMA_GSTS_RTPS), sts); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); + + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); + if (sm_supported(iommu)) + qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } void iommu_flush_write_buffer(struct intel_iommu *iommu) @@ -2289,6 +2300,41 @@ static inline int hardware_largepage_caps(struct dmar_domain *domain, return level; } +/* + * Ensure that old small page tables are removed to make room for superpage(s). + * We're going to add new large pages, so make sure we don't remove their parent + * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared. + */ +static void switch_to_super_page(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long end_pfn, int level) +{ + unsigned long lvl_pages = lvl_to_nr_pages(level); + struct dma_pte *pte = NULL; + int i; + + while (start_pfn <= end_pfn) { + if (!pte) + pte = pfn_to_dma_pte(domain, start_pfn, &level); + + if (dma_pte_present(pte)) { + dma_pte_free_pagetable(domain, start_pfn, + start_pfn + lvl_pages - 1, + level + 1); + + for_each_domain_iommu(i, domain) + iommu_flush_iotlb_psi(g_iommus[i], domain, + start_pfn, lvl_pages, + 0, 0); + } + + pte++; + start_pfn += lvl_pages; + if (first_pte_in_page(pte)) + pte = NULL; + } +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -2305,8 +2351,9 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -EINVAL; attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); + attr |= DMA_FL_PTE_PRESENT; if (domain_use_first_level(domain)) { - attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US; + attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US; if (domain->domain.type == IOMMU_DOMAIN_DMA) { attr |= DMA_FL_PTE_ACCESS; @@ -2329,22 +2376,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return -ENOMEM; /* It is large page*/ if (largepage_lvl > 1) { - unsigned long nr_superpages, end_pfn; + unsigned long end_pfn; pteval |= DMA_PTE_LARGE_PAGE; - lvl_pages = lvl_to_nr_pages(largepage_lvl); - - nr_superpages = nr_pages / lvl_pages; - end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; - - /* - * Ensure that old small page tables are - * removed to make room for superpage(s). - * We're adding new large pages, so make sure - * we don't remove their parent tables. - */ - dma_pte_free_pagetable(domain, iov_pfn, end_pfn, - largepage_lvl + 1); + end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1; + switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } @@ -2422,6 +2458,10 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + + if (sm_supported(iommu)) + qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0); + iommu->flush.flush_iotlb(iommu, did_old, 0, @@ -2505,6 +2545,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; + if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) + flags |= PASID_FLAG_PAGE_SNOOP; + return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, domain->iommu_did[iommu->seq_id], flags); @@ -3267,8 +3310,6 @@ static int __init init_dmars(void) register_pasid_allocator(iommu); #endif iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); } #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA @@ -3458,12 +3499,7 @@ static int init_iommu_hw(void) } iommu_flush_write_buffer(iommu); - iommu_set_root_entry(iommu); - - iommu->flush.flush_context(iommu, 0, 0, 0, - DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); } @@ -3846,8 +3882,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) goto disable_iommu; iommu_set_root_entry(iommu); - iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); @@ -4065,35 +4099,6 @@ static struct notifier_block intel_iommu_memory_nb = { .priority = 0 }; -static void free_all_cpu_cached_iovas(unsigned int cpu) -{ - int i; - - for (i = 0; i < g_num_of_iommus; i++) { - struct intel_iommu *iommu = g_iommus[i]; - struct dmar_domain *domain; - int did; - - if (!iommu) - continue; - - for (did = 0; did < cap_ndoms(iommu->cap); did++) { - domain = get_iommu_domain(iommu, (u16)did); - - if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) - continue; - - iommu_dma_free_cpu_cached_iovas(cpu, &domain->domain); - } - } -} - -static int intel_iommu_cpu_dead(unsigned int cpu) -{ - free_all_cpu_cached_iovas(cpu); - return 0; -} - static void intel_disable_iommus(void) { struct intel_iommu *iommu = NULL; @@ -4377,19 +4382,28 @@ int __init intel_iommu_init(void) down_read(&dmar_global_lock); for_each_active_iommu(iommu, drhd) { + /* + * The flush queue implementation does not perform + * page-selective invalidations that are required for efficient + * TLB flushes in virtual environments. The benefit of batching + * is likely to be much lower than the overhead of synchronizing + * the virtual and physical IOMMU page-tables. + */ + if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) { + pr_warn("IOMMU batching is disabled due to virtualization"); + intel_iommu_strict = 1; + } iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); - iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops); - iommu_device_register(&iommu->iommu); + iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); } up_read(&dmar_global_lock); + iommu_set_dma_strict(intel_iommu_strict); bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); - cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL, - intel_iommu_cpu_dead); down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) @@ -5343,6 +5357,8 @@ static int siov_find_pci_dvsec(struct pci_dev *pdev) static bool intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) { + struct device_domain_info *info = get_domain_info(dev); + if (feat == IOMMU_DEV_FEAT_AUX) { int ret; @@ -5357,13 +5373,13 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) return !!siov_find_pci_dvsec(to_pci_dev(dev)); } - if (feat == IOMMU_DEV_FEAT_SVA) { - struct device_domain_info *info = get_domain_info(dev); + if (feat == IOMMU_DEV_FEAT_IOPF) + return info && info->pri_supported; + if (feat == IOMMU_DEV_FEAT_SVA) return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && info->pasid_supported && info->pri_supported && info->ats_supported; - } return false; } @@ -5374,12 +5390,18 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (feat == IOMMU_DEV_FEAT_AUX) return intel_iommu_enable_auxd(dev); + if (feat == IOMMU_DEV_FEAT_IOPF) + return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV; + if (feat == IOMMU_DEV_FEAT_SVA) { struct device_domain_info *info = get_domain_info(dev); if (!info) return -EINVAL; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) return 0; } @@ -5423,87 +5445,23 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, } static int -intel_iommu_domain_set_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) +intel_iommu_enable_nesting(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long flags; - int ret = 0; - - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; + int ret = -ENODEV; - switch (attr) { - case DOMAIN_ATTR_NESTING: - spin_lock_irqsave(&device_domain_lock, flags); - if (nested_mode_support() && - list_empty(&dmar_domain->devices)) { - dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; - dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; - } else { - ret = -ENODEV; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - break; - default: - ret = -EINVAL; - break; + spin_lock_irqsave(&device_domain_lock, flags); + if (nested_mode_support() && list_empty(&dmar_domain->devices)) { + dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; + dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; + ret = 0; } + spin_unlock_irqrestore(&device_domain_lock, flags); return ret; } -static bool domain_use_flush_queue(void) -{ - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - bool r = true; - - if (intel_iommu_strict) - return false; - - /* - * The flush queue implementation does not perform page-selective - * invalidations that are required for efficient TLB flushes in virtual - * environments. The benefit of batching is likely to be much lower than - * the overhead of synchronizing the virtual and physical IOMMU - * page-tables. - */ - rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { - if (!cap_caching_mode(iommu->cap)) - continue; - - pr_warn_once("IOMMU batching is disabled due to virtualization"); - r = false; - break; - } - rcu_read_unlock(); - - return r; -} - -static int -intel_iommu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ - switch (domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - return -ENODEV; - case IOMMU_DOMAIN_DMA: - switch (attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - *(int *)data = domain_use_flush_queue(); - return 0; - default: - return -ENODEV; - } - break; - default: - return -EINVAL; - } -} - /* * Check that the device does not live on an external facing PCI port that is * marked as untrusted. Such devices should not be able to apply quirks and @@ -5576,8 +5534,7 @@ const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, .domain_free = intel_iommu_domain_free, - .domain_get_attr = intel_iommu_domain_get_attr, - .domain_set_attr = intel_iommu_domain_set_attr, + .enable_nesting = intel_iommu_enable_nesting, .attach_dev = intel_iommu_attach_device, .detach_dev = intel_iommu_detach_device, .aux_attach_dev = intel_iommu_aux_attach_device, diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 611ef5243cb6..f912fe45bea2 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -736,7 +736,7 @@ static int __init intel_prepare_irq_remapping(void) return -ENODEV; if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL)) - goto error; + return -ENODEV; if (!dmar_ir_support()) return -ENODEV; @@ -1280,7 +1280,8 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data, break; case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: - set_msi_sid(irte, msi_desc_to_pci_dev(info->desc)); + set_msi_sid(irte, + pci_real_dma_dev(msi_desc_to_pci_dev(info->desc))); break; default: BUG_ON(1); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index f26cb6195b2c..72646bafc52f 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -24,7 +24,6 @@ /* * Intel IOMMU system wide PASID name space: */ -static DEFINE_SPINLOCK(pasid_lock); u32 intel_pasid_max_id = PASID_MAX; int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid) @@ -231,7 +230,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev) return info->pasid_table; } -int intel_pasid_get_dev_max_id(struct device *dev) +static int intel_pasid_get_dev_max_id(struct device *dev) { struct device_domain_info *info; @@ -242,7 +241,7 @@ int intel_pasid_get_dev_max_id(struct device *dev) return info->pasid_table->max_pasid; } -struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) +static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) { struct device_domain_info *info; struct pasid_table *pasid_table; @@ -259,19 +258,25 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) dir_index = pasid >> PASID_PDE_SHIFT; index = pasid & PASID_PTE_MASK; - spin_lock(&pasid_lock); +retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { entries = alloc_pgtable_page(info->iommu->node); - if (!entries) { - spin_unlock(&pasid_lock); + if (!entries) return NULL; - } - WRITE_ONCE(dir[dir_index].val, - (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); + /* + * The pasid directory table entry won't be freed after + * allocation. No worry about the race with free and + * clear. However, this entry might be populated by others + * while we are preparing it. Use theirs with a retry. + */ + if (cmpxchg64(&dir[dir_index].val, 0ULL, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { + free_pgtable_page(entries); + goto retry; + } } - spin_unlock(&pasid_lock); return &entries[index]; } @@ -394,6 +399,15 @@ static inline void pasid_set_sre(struct pasid_entry *pe) } /* + * Setup the WPE(Write Protect Enable) field (Bit 132) of a + * scalable mode PASID entry. + */ +static inline void pasid_set_wpe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4); +} + +/* * Setup the P(Present) field (Bit 0) of a scalable mode PASID * entry. */ @@ -412,6 +426,16 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) } /* + * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode + * PASID entry. + */ +static inline void +pasid_set_pgsnp(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24); +} + +/* * Setup the First Level Page table Pointer field (Bit 140~191) * of a scalable mode PASID entry. */ @@ -493,6 +517,9 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, if (WARN_ON(!pte)) return; + if (!(pte->val[0] & PASID_PTE_PRESENT)) + return; + did = pasid_get_domain_id(pte); intel_pasid_clear_entry(dev, pasid, fault_ignore); @@ -522,6 +549,22 @@ static void pasid_flush_caches(struct intel_iommu *iommu, } } +static inline int pasid_enable_wpe(struct pasid_entry *pte) +{ +#ifdef CONFIG_X86 + unsigned long cr0 = read_cr0(); + + /* CR0.WP is normally set but just to be sure */ + if (unlikely(!(cr0 & X86_CR0_WP))) { + pr_err_ratelimited("No CPU write protect!\n"); + return -EINVAL; + } +#endif + pasid_set_wpe(pte); + + return 0; +}; + /* * Set up the scalable mode pasid table entry for first only * translation type. @@ -553,6 +596,9 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, return -EINVAL; } pasid_set_sre(pte); + if (pasid_enable_wpe(pte)) + return -EINVAL; + } if (flags & PASID_FLAG_FL5LP) { @@ -565,6 +611,9 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, } } + if (flags & PASID_FLAG_PAGE_SNOOP) + pasid_set_pgsnp(pte); + pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); @@ -643,6 +692,9 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) + pasid_set_pgsnp(pte); + /* * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). @@ -706,6 +758,9 @@ intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, return -EINVAL; } pasid_set_sre(pte); + /* Enable write protect WP if guest requested */ + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_WPE) + pasid_set_wpe(pte); } if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 444c0bec221a..5ff61c3d401f 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -48,6 +48,7 @@ */ #define PASID_FLAG_SUPERVISOR_MODE BIT(0) #define PASID_FLAG_NESTED BIT(1) +#define PASID_FLAG_PAGE_SNOOP BIT(2) /* * The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first- @@ -99,14 +100,9 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte) } extern unsigned int intel_pasid_max_id; -int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp); -void intel_pasid_free_id(u32 pasid); -void *intel_pasid_lookup_id(u32 pasid); int intel_pasid_alloc_table(struct device *dev); void intel_pasid_free_table(struct device *dev); struct pasid_table *intel_pasid_get_table(struct device *dev); -int intel_pasid_get_dev_max_id(struct device *dev); -struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid); int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, pgd_t *pgd, u32 pasid, u16 did, int flags); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 574a7e657a9a..5165cea90421 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -462,13 +462,12 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) /* Caller must hold pasid_mutex, mm reference */ static int intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct svm_dev_ops *ops, struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct intel_svm *svm = NULL, *t; struct device_domain_info *info; struct intel_svm_dev *sdev; - struct intel_svm *svm = NULL; unsigned long iflags; int pasid_max; int ret; @@ -494,34 +493,26 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - if (!(flags & SVM_FLAG_PRIVATE_PASID)) { - struct intel_svm *t; - - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID)) - continue; - - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; - } + list_for_each_entry(t, &global_svm_list, list) { + if (t->mm != mm) + continue; - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - if (sdev->ops != ops) { - ret = -EBUSY; - goto out; - } - sdev->users++; - goto success; - } + svm = t; + if (svm->pasid >= pasid_max) { + dev_warn(dev, + "Limited PASID width. Cannot use existing PASID %d\n", + svm->pasid); + ret = -ENOSPC; + goto out; + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } + + break; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); @@ -550,7 +541,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Finish the setup now we know we're keeping it */ sdev->users = 1; - sdev->ops = ops; init_rcu_head(&sdev->rcu); if (!svm) { @@ -862,7 +852,7 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) /* Fill in event data for device specific processing */ memset(&event, 0, sizeof(struct iommu_fault_event)); event.fault.type = IOMMU_FAULT_PAGE_REQ; - event.fault.prm.addr = desc->addr; + event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; event.fault.prm.pasid = desc->pasid; event.fault.prm.grpid = desc->prg_index; event.fault.prm.perm = prq_to_iommu_prot(desc); @@ -895,6 +885,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) struct intel_iommu *iommu = d; struct intel_svm *svm = NULL; int head, tail, handled = 0; + unsigned int flags = 0; /* Clear PPR bit before reading head/tail registers, to * ensure that we get a new interrupt if needed. */ @@ -920,7 +911,17 @@ static irqreturn_t prq_event_thread(int irq, void *d) ((unsigned long long *)req)[1]); goto no_pasid; } - + /* We shall not receive page request for supervisor SVM */ + if (req->pm_req && (req->rd_req | req->wr_req)) { + pr_err("Unexpected page request in Privilege Mode"); + /* No need to find the matching sdev as for bad_req */ + goto no_pasid; + } + /* DMA read with exec requeset is not supported. */ + if (req->exe_req && req->rd_req) { + pr_err("Execution request not supported\n"); + goto no_pasid; + } if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); svm = ioasid_find(NULL, req->pasid, NULL); @@ -982,9 +983,11 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (access_error(vma, req)) goto invalid; - ret = handle_mm_fault(vma, address, - req->wr_req ? FAULT_FLAG_WRITE : 0, - NULL); + flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE; + if (req->wr_req) + flags |= FAULT_FLAG_WRITE; + + ret = handle_mm_fault(vma, address, flags, NULL); if (ret & VM_FAULT_ERROR) goto invalid; @@ -993,13 +996,6 @@ invalid: mmap_read_unlock(svm->mm); mmput(svm->mm); bad_req: - WARN_ON(!sdev); - if (sdev && sdev->ops && sdev->ops->fault_cb) { - int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->pm_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, - req->priv_data, rwxp, result); - } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; @@ -1021,12 +1017,12 @@ no_pasid: QI_PGRP_RESP_TYPE; resp.qw1 = QI_PGRP_IDX(req->prg_index) | QI_PGRP_LPIG(req->lpig); + resp.qw2 = 0; + resp.qw3 = 0; if (req->priv_data_present) memcpy(&resp.qw2, req->priv_data, sizeof(req->priv_data)); - resp.qw2 = 0; - resp.qw3 = 0; qi_submit_sync(iommu, &resp, 1, 0); } prq_advance: @@ -1074,7 +1070,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) if (drvdata) flags = *(unsigned int *)drvdata; mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); + ret = intel_svm_bind_mm(dev, flags, mm, &sdev); if (ret) sva = ERR_PTR(ret); else if (sdev) |