From 6c17c7d5936e6af6a5bda9f9de98a5e2ee6e8a6f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 7 Aug 2024 15:19:20 -0300 Subject: iommu: Allow ATS to work on VFs when the PF uses IDENTITY PCI ATS has a global Smallest Translation Unit field that is located in the PF but shared by all of the VFs. The expectation is that the STU will be set to the root port's global STU capability which is driven by the IO page table configuration of the iommu HW. Today it becomes set when the iommu driver first enables ATS. Thus, to enable ATS on the VF, the PF must have already had the correct STU programmed, even if ATS is off on the PF. Unfortunately the PF only programs the STU when the PF enables ATS. The iommu drivers tend to leave ATS disabled when IDENTITY translation is being used. Thus we can get into a state where the PF is setup to use IDENTITY with the DMA API while the VF would like to use VFIO with a PAGING domain and have ATS turned on. This fails because the PF never loaded a PAGING domain and so it never setup the STU, and the VF can't do it. The simplest solution is to have the iommu driver set the ATS STU when it probes the device. This way the ATS STU is loaded immediately at boot time to all PFs and there is no issue when a VF comes to use it. Add a new call pci_prepare_ats() which should be called by iommu drivers in their probe_device() op for every PCI device if the iommu driver supports ATS. This will setup the STU based on whatever page size capability the iommu HW has. Signed-off-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-0fb4d2ab6770+7e706-ats_vf_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9ff8b83c19a3..ad81db026ab2 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4091,6 +4091,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) dev_iommu_priv_set(dev, info); if (pdev && pci_ats_supported(pdev)) { + pci_prepare_ats(pdev, VTD_PAGE_SHIFT); ret = device_rbtree_insert(iommu, info); if (ret) goto free; -- cgit From 184bee388d4661c3fea633f135a5c45ff03c7ec6 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:11 +0800 Subject: iommu/vt-d: Require DMA domain if hardware not support passthrough The iommu core defines the def_domain_type callback to query the iommu driver about hardware capability and quirks. The iommu driver should declare IOMMU_DOMAIN_DMA requirement for hardware lacking pass-through capability. Earlier VT-d hardware implementations did not support pass-through translation mode. The iommu driver relied on a paging domain with all physical system memory addresses identically mapped to the same IOVA to simulate pass-through translation before the def_domain_type was introduced and it has been kept until now. It's time to adjust it now to make the Intel iommu driver follow the def_domain_type semantics. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240809055431.36513-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9ff8b83c19a3..90ad794a1be7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2149,6 +2149,16 @@ static bool device_rmrr_is_relaxable(struct device *dev) static int device_def_domain_type(struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + /* + * Hardware does not support the passthrough translation mode. + * Always use a dynamaic mapping domain. + */ + if (!ecap_pass_through(iommu->ecap)) + return IOMMU_DOMAIN_DMA; + if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); -- cgit From 9e74e1b8198fd07fcbb4266771ca0f5195c71d9c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:12 +0800 Subject: iommu/vt-d: Remove identity mappings from si_domain As the driver has enforced DMA domains for devices managed by an IOMMU hardware that doesn't support passthrough translation mode, there is no need for static identity mappings in the si_domain. Remove the identity mapping code to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240809055431.36513-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 122 ++------------------------------------------ 1 file changed, 4 insertions(+), 118 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 90ad794a1be7..723ea9f3f501 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -167,14 +167,7 @@ static void device_rbtree_remove(struct device_domain_info *info) spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); } -/* - * This domain is a statically identity mapping domain. - * 1. This domain creats a static 1:1 mapping to all usable memory. - * 2. It maps to each iommu if successful. - * 3. Each iommu mapps to this domain if successful. - */ static struct dmar_domain *si_domain; -static int hw_pass_through = 1; struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ @@ -1647,7 +1640,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; int agaw, ret; - if (hw_pass_through && domain_type_is_si(domain)) + if (domain_type_is_si(domain)) translation = CONTEXT_TT_PASS_THROUGH; pr_debug("Set context mapping for %02x:%02x.%d\n", @@ -1998,29 +1991,10 @@ static bool dev_is_real_dma_subdevice(struct device *dev) pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); } -static int iommu_domain_identity_map(struct dmar_domain *domain, - unsigned long first_vpfn, - unsigned long last_vpfn) -{ - /* - * RMRR range might have overlap with physical memory range, - * clear it first - */ - dma_pte_clear_range(domain, first_vpfn, last_vpfn); - - return __domain_mapping(domain, first_vpfn, - first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); -} - static int md_domain_init(struct dmar_domain *domain, int guest_width); -static int __init si_domain_init(int hw) +static int __init si_domain_init(void) { - struct dmar_rmrr_unit *rmrr; - struct device *dev; - int i, nid, ret; - si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY); if (!si_domain) return -EFAULT; @@ -2031,44 +2005,6 @@ static int __init si_domain_init(int hw) return -EFAULT; } - if (hw) - return 0; - - for_each_online_node(nid) { - unsigned long start_pfn, end_pfn; - int i; - - for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { - ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn_start(start_pfn), - mm_to_dma_pfn_end(end_pfn-1)); - if (ret) - return ret; - } - } - - /* - * Identity map the RMRRs so that devices with RMRRs could also use - * the si_domain. - */ - for_each_rmrr_units(rmrr) { - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, dev) { - unsigned long long start = rmrr->base_address; - unsigned long long end = rmrr->end_address; - - if (WARN_ON(end < start || - end >> agaw_to_width(si_domain->agaw))) - continue; - - ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn_start(start >> PAGE_SHIFT), - mm_to_dma_pfn_end(end >> PAGE_SHIFT)); - if (ret) - return ret; - } - } - return 0; } @@ -2094,7 +2030,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (!sm_supported(iommu)) ret = domain_context_mapping(domain, dev); - else if (hw_pass_through && domain_type_is_si(domain)) + else if (domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); @@ -2449,8 +2385,6 @@ static int __init init_dmars(void) } } - if (!ecap_pass_through(iommu->ecap)) - hw_pass_through = 0; intel_svm_check(iommu); } @@ -2466,7 +2400,7 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - ret = si_domain_init(hw_pass_through); + ret = si_domain_init(); if (ret) goto free_iommu; @@ -2893,12 +2827,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (ret) goto out; - if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { - pr_warn("%s: Doesn't support hardware pass through.\n", - iommu->name); - return -ENXIO; - } - sp = domain_update_iommu_superpage(NULL, iommu) - 1; if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { pr_warn("%s: Doesn't support large page.\n", @@ -3149,43 +3077,6 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) return 0; } -static int intel_iommu_memory_notifier(struct notifier_block *nb, - unsigned long val, void *v) -{ - struct memory_notify *mhp = v; - unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn); - unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn + - mhp->nr_pages - 1); - - switch (val) { - case MEM_GOING_ONLINE: - if (iommu_domain_identity_map(si_domain, - start_vpfn, last_vpfn)) { - pr_warn("Failed to build identity map for [%lx-%lx]\n", - start_vpfn, last_vpfn); - return NOTIFY_BAD; - } - break; - - case MEM_OFFLINE: - case MEM_CANCEL_ONLINE: - { - LIST_HEAD(freelist); - - domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist); - iommu_put_pages_list(&freelist); - } - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block intel_iommu_memory_nb = { - .notifier_call = intel_iommu_memory_notifier, - .priority = 0 -}; - static void intel_disable_iommus(void) { struct intel_iommu *iommu = NULL; @@ -3482,12 +3373,7 @@ int __init intel_iommu_init(void) iommu_pmu_register(iommu); } - up_read(&dmar_global_lock); - - if (si_domain && !hw_pass_through) - register_memory_notifier(&intel_iommu_memory_nb); - down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) pr_warn("ACPI name space devices didn't probe correctly\n"); -- cgit From 2c13012e09190174614fd6901857a1b8c199e17d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:13 +0800 Subject: iommu/vt-d: Always reserve a domain ID for identity setup We will use a global static identity domain. Reserve a static domain ID for it. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240809055431.36513-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 723ea9f3f501..c019fb3b3e78 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1440,10 +1440,10 @@ static int iommu_init_domains(struct intel_iommu *iommu) * entry for first-level or pass-through translation modes should * be programmed with a domain id different from those used for * second-level or nested translation. We reserve a domain id for - * this purpose. + * this purpose. This domain id is also used for identity domain + * in legacy mode. */ - if (sm_supported(iommu)) - set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); + set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); return 0; } -- cgit From 487df6836606dc67cd8e2c26616f581c8800a17a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:14 +0800 Subject: iommu/vt-d: Remove has_iotlb_device flag The has_iotlb_device flag was used to indicate if a domain had attached devices with ATS enabled. Domains without this flag didn't require device TLB invalidation during unmap operations, optimizing performance by avoiding unnecessary device iteration. With the introduction of cache tags, this flag is no longer needed. The code to iterate over attached devices was removed by commit 06792d067989 ("iommu/vt-d: Cleanup use of iommu_flush_iotlb_psi()"). Remove has_iotlb_device to avoid unnecessary code. Suggested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240809055431.36513-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 34 +--------------------------------- drivers/iommu/intel/iommu.h | 2 -- drivers/iommu/intel/nested.c | 2 -- 3 files changed, 1 insertion(+), 37 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index c019fb3b3e78..aa8e10a2ad51 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -485,7 +485,6 @@ void domain_update_iommu_cap(struct dmar_domain *domain) domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain); - domain_update_iotlb(domain); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -1263,32 +1262,6 @@ domain_lookup_dev_info(struct dmar_domain *domain, return NULL; } -void domain_update_iotlb(struct dmar_domain *domain) -{ - struct dev_pasid_info *dev_pasid; - struct device_domain_info *info; - bool has_iotlb_device = false; - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); - list_for_each_entry(info, &domain->devices, link) { - if (info->ats_enabled) { - has_iotlb_device = true; - break; - } - } - - list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { - info = dev_iommu_priv_get(dev_pasid->dev); - if (info->ats_enabled) { - has_iotlb_device = true; - break; - } - } - domain->has_iotlb_device = has_iotlb_device; - spin_unlock_irqrestore(&domain->lock, flags); -} - /* * The extra devTLB flush quirk impacts those QAT devices with PCI device * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() @@ -1325,10 +1298,8 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) info->pasid_enabled = 1; if (info->ats_supported && pci_ats_page_aligned(pdev) && - !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { + !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) info->ats_enabled = 1; - domain_update_iotlb(info->domain); - } } static void iommu_disable_pci_caps(struct device_domain_info *info) @@ -1343,7 +1314,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) if (info->ats_enabled) { pci_disable_ats(pdev); info->ats_enabled = 0; - domain_update_iotlb(info->domain); } if (info->pasid_enabled) { @@ -1517,7 +1487,6 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->nid = NUMA_NO_NODE; if (first_level_by_default(type)) domain->use_first_level = true; - domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); INIT_LIST_HEAD(&domain->dev_pasids); INIT_LIST_HEAD(&domain->cache_tags); @@ -3518,7 +3487,6 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st xa_init(&domain->iommu_array); domain->nid = dev_to_node(dev); - domain->has_iotlb_device = info->ats_enabled; domain->use_first_level = first_stage; /* calculate the address width */ diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index b67c14da1240..01002ae2a091 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -588,7 +588,6 @@ struct dmar_domain { int nid; /* node id */ struct xarray iommu_array; /* Attached IOMMU array */ - u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ u8 set_pte_snp:1; @@ -1104,7 +1103,6 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, */ #define QI_OPT_WAIT_DRAIN BIT(0) -void domain_update_iotlb(struct dmar_domain *domain); int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void device_block_translation(struct device *dev); diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 16a2bcf5cfeb..36a91b1b52be 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -66,8 +66,6 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); - domain_update_iotlb(dmar_domain); - return 0; unassign_tag: cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID); -- cgit From c7191984e5aade540f1a3845a116537c89572655 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:15 +0800 Subject: iommu/vt-d: Factor out helpers from domain_context_mapping_one() Extract common code from domain_context_mapping_one() into new helpers, making it reusable by other functions such as the upcoming identity domain implementation. No intentional functional changes. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20240809055431.36513-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 99 ++++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 41 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index aa8e10a2ad51..7950152bb4e6 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1597,6 +1597,61 @@ static void domain_exit(struct dmar_domain *domain) kfree(domain); } +/* + * For kdump cases, old valid entries may be cached due to the + * in-flight DMA and copied pgtable, but there is no unmapping + * behaviour for them, thus we need an explicit cache flush for + * the newly-mapped device. For kdump, at this point, the device + * is supposed to finish reset at its driver probe stage, so no + * in-flight DMA will exist, and we don't need to worry anymore + * hereafter. + */ +static void copied_context_tear_down(struct intel_iommu *iommu, + struct context_entry *context, + u8 bus, u8 devfn) +{ + u16 did_old; + + if (!context_copied(iommu, bus, devfn)) + return; + + assert_spin_locked(&iommu->lock); + + did_old = context_domain_id(context); + context_clear_entry(context); + + if (did_old < cap_ndoms(iommu->cap)) { + iommu->flush.flush_context(iommu, did_old, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did_old, 0, 0, + DMA_TLB_DSI_FLUSH); + } + + clear_context_copied(iommu, bus, devfn); +} + +/* + * It's a non-present to present mapping. If hardware doesn't cache + * non-present entry we only need to flush the write-buffer. If the + * _does_ cache non-present entries, then it does so in the special + * domain #0, which we have to flush: + */ +static void context_present_cache_flush(struct intel_iommu *iommu, u16 did, + u8 bus, u8 devfn) +{ + if (cap_caching_mode(iommu->cap)) { + iommu->flush.flush_context(iommu, 0, + (((u16)bus) << 8) | devfn, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + } else { + iommu_flush_write_buffer(iommu); + } +} + static int domain_context_mapping_one(struct dmar_domain *domain, struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -1625,31 +1680,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_present(context) && !context_copied(iommu, bus, devfn)) goto out_unlock; - /* - * For kdump cases, old valid entries may be cached due to the - * in-flight DMA and copied pgtable, but there is no unmapping - * behaviour for them, thus we need an explicit cache flush for - * the newly-mapped device. For kdump, at this point, the device - * is supposed to finish reset at its driver probe stage, so no - * in-flight DMA will exist, and we don't need to worry anymore - * hereafter. - */ - if (context_copied(iommu, bus, devfn)) { - u16 did_old = context_domain_id(context); - - if (did_old < cap_ndoms(iommu->cap)) { - iommu->flush.flush_context(iommu, did_old, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, did_old, 0, 0, - DMA_TLB_DSI_FLUSH); - } - - clear_context_copied(iommu, bus, devfn); - } - + copied_context_tear_down(iommu, context, bus, devfn); context_clear_entry(context); + context_set_domain_id(context, did); if (translation != CONTEXT_TT_PASS_THROUGH) { @@ -1685,23 +1718,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_set_present(context); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(context, sizeof(*context)); - - /* - * It's a non-present to present mapping. If hardware doesn't cache - * non-present entry we only need to flush the write-buffer. If the - * _does_ cache non-present entries, then it does so in the special - * domain #0, which we have to flush: - */ - if (cap_caching_mode(iommu->cap)) { - iommu->flush.flush_context(iommu, 0, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - } else { - iommu_flush_write_buffer(iommu); - } - + context_present_cache_flush(iommu, did, bus, devfn); ret = 0; out_unlock: -- cgit From 2031c469f8161abe74189cb74f50da224f340b71 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:16 +0800 Subject: iommu/vt-d: Add support for static identity domain Software determines VT-d hardware support for passthrough translation by inspecting the capability register. If passthrough translation is not supported, the device is instructed to use DMA domain for its default domain. Add a global static identity domain with guaranteed attach semantics for IOMMUs that support passthrough translation mode. The global static identity domain is a dummy domain without corresponding dmar_domain structure. Consequently, the device's info->domain will be NULL with the identity domain is attached. Refactor the code accordingly. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240809055431.36513-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 114 ++++++++++++++++++++++++++++++++++++++++++-- drivers/iommu/intel/svm.c | 2 +- 2 files changed, 111 insertions(+), 5 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7950152bb4e6..14f1fcf17152 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3691,11 +3691,9 @@ int prepare_domain_attach_device(struct iommu_domain *domain, static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { - struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; - if (info->domain) - device_block_translation(dev); + device_block_translation(dev); ret = prepare_domain_attach_device(domain, dev); if (ret) @@ -4301,11 +4299,17 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, struct iommu_domain *domain) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct dev_pasid_info *curr, *dev_pasid = NULL; struct intel_iommu *iommu = info->iommu; + struct dmar_domain *dmar_domain; unsigned long flags; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + intel_pasid_tear_down_entry(iommu, dev, pasid, false); + return; + } + + dmar_domain = to_dmar_domain(domain); spin_lock_irqsave(&dmar_domain->lock, flags); list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { if (curr->dev == dev && curr->pasid == pasid) { @@ -4532,9 +4536,111 @@ static const struct iommu_dirty_ops intel_dirty_ops = { .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, }; +static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct context_entry *context; + + spin_lock(&iommu->lock); + context = iommu_context_addr(iommu, bus, devfn, 1); + if (!context) { + spin_unlock(&iommu->lock); + return -ENOMEM; + } + + if (context_present(context) && !context_copied(iommu, bus, devfn)) { + spin_unlock(&iommu->lock); + return 0; + } + + copied_context_tear_down(iommu, context, bus, devfn); + context_clear_entry(context); + context_set_domain_id(context, FLPT_DEFAULT_DID); + + /* + * In pass through mode, AW must be programmed to indicate the largest + * AGAW value supported by hardware. And ASR is ignored by hardware. + */ + context_set_address_width(context, iommu->msagaw); + context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH); + context_set_fault_enable(context); + context_set_present(context); + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(context, sizeof(*context)); + context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn); + spin_unlock(&iommu->lock); + + return 0; +} + +static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data) +{ + struct device *dev = data; + + if (dev != &pdev->dev) + return 0; + + return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); +} + +static int device_setup_pass_through(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + + if (!dev_is_pci(dev)) + return context_setup_pass_through(dev, info->bus, info->devfn); + + return pci_for_each_dma_alias(to_pci_dev(dev), + context_setup_pass_through_cb, dev); +} + +static int identity_domain_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + int ret; + + device_block_translation(dev); + + if (dev_is_real_dma_subdevice(dev)) + return 0; + + if (sm_supported(iommu)) { + ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); + if (!ret) + iommu_enable_pci_caps(info); + } else { + ret = device_setup_pass_through(dev); + } + + return ret; +} + +static int identity_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) + return -EOPNOTSUPP; + + return intel_pasid_setup_pass_through(iommu, dev, pasid); +} + +static struct iommu_domain identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &(const struct iommu_domain_ops) { + .attach_dev = identity_domain_attach_dev, + .set_dev_pasid = identity_domain_set_dev_pasid, + }, +}; + const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, .release_domain = &blocking_domain, + .identity_domain = &identity_domain, .capable = intel_iommu_capable, .hw_info = intel_iommu_hw_info, .domain_alloc = intel_iommu_domain_alloc, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 0e3a9b38bef2..ef12e95e400a 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -311,7 +311,7 @@ void intel_drain_pasid_prq(struct device *dev, u32 pasid) domain = info->domain; pdev = to_pci_dev(dev); sid = PCI_DEVID(info->bus, info->devfn); - did = domain_id_iommu(domain, iommu); + did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID; qdep = pci_ats_queue_depth(pdev); /* -- cgit From 50a7e2c6c3b6ea2439aa2e2e392c0ca2ef567fcf Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:17 +0800 Subject: iommu/vt-d: Cleanup si_domain The static identity domain has been introduced, rendering the si_domain obsolete. Remove si_domain and cleanup the code accordingly. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240809055431.36513-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 91 ++++++++++----------------------------------- 1 file changed, 19 insertions(+), 72 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 14f1fcf17152..159da629349c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -167,8 +167,6 @@ static void device_rbtree_remove(struct device_domain_info *info) spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags); } -static struct dmar_domain *si_domain; - struct dmar_rmrr_unit { struct list_head list; /* list of rmrr units */ struct acpi_dmar_header *hdr; /* ACPI header */ @@ -286,11 +284,6 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -static int domain_type_is_si(struct dmar_domain *domain) -{ - return domain->domain.type == IOMMU_DOMAIN_IDENTITY; -} - static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; @@ -1664,9 +1657,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; int agaw, ret; - if (domain_type_is_si(domain)) - translation = CONTEXT_TT_PASS_THROUGH; - pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1685,34 +1675,24 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_set_domain_id(context, did); - if (translation != CONTEXT_TT_PASS_THROUGH) { - /* - * Skip top levels of page tables for iommu which has - * less agaw than default. Unnecessary for PT mode. - */ - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - ret = -ENOMEM; - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) - goto out_unlock; - } - - if (info && info->ats_supported) - translation = CONTEXT_TT_DEV_IOTLB; - else - translation = CONTEXT_TT_MULTI_LEVEL; - - context_set_address_root(context, virt_to_phys(pgd)); - context_set_address_width(context, agaw); - } else { - /* - * In pass through mode, AW must be programmed to - * indicate the largest AGAW value supported by - * hardware. And ASR is ignored by hardware. - */ - context_set_address_width(context, iommu->msagaw); + /* + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. + */ + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + ret = -ENOMEM; + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + goto out_unlock; } + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; + + context_set_address_root(context, virt_to_phys(pgd)); + context_set_address_width(context, agaw); context_set_translation_type(context, translation); context_set_fault_enable(context); context_set_present(context); @@ -1977,23 +1957,6 @@ static bool dev_is_real_dma_subdevice(struct device *dev) pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); } -static int md_domain_init(struct dmar_domain *domain, int guest_width); - -static int __init si_domain_init(void) -{ - si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY); - if (!si_domain) - return -EFAULT; - - if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { - domain_exit(si_domain); - si_domain = NULL; - return -EFAULT; - } - - return 0; -} - static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { @@ -2016,8 +1979,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (!sm_supported(iommu)) ret = domain_context_mapping(domain, dev); - else if (domain_type_is_si(domain)) - ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID); else @@ -2026,8 +1987,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) goto out_block_translation; - if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) - iommu_enable_pci_caps(info); + iommu_enable_pci_caps(info); ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); if (ret) @@ -2386,10 +2346,6 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - ret = si_domain_init(); - if (ret) - goto free_iommu; - /* * for each drhd * enable fault log @@ -2435,10 +2391,6 @@ free_iommu: disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } - if (si_domain) { - domain_exit(si_domain); - si_domain = NULL; - } return ret; } @@ -3572,8 +3524,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) domain->geometry.force_aperture = true; return domain; - case IOMMU_DOMAIN_IDENTITY: - return &si_domain->domain; default: return NULL; } @@ -3640,8 +3590,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) WARN_ON(dmar_domain->nested_parent && !list_empty(&dmar_domain->s1_domains)); - if (domain != &si_domain->domain) - domain_exit(dmar_domain); + domain_exit(dmar_domain); } int prepare_domain_attach_device(struct iommu_domain *domain, @@ -4364,9 +4313,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (ret) goto out_detach_iommu; - if (domain_type_is_si(dmar_domain)) - ret = intel_pasid_setup_pass_through(iommu, dev, pasid); - else if (dmar_domain->use_first_level) + if (dmar_domain->use_first_level) ret = domain_setup_first_level(iommu, dmar_domain, dev, pasid); else -- cgit From ccb02b27bb50c0f5a8f6fd745aecf4ac4beda73f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:19 +0800 Subject: iommu/vt-d: Move PCI PASID enablement to probe path Currently, PCI PASID is enabled alongside PCI ATS when an iommu domain is attached to the device and disabled when the device transitions to block translation mode. This approach is inappropriate as PCI PASID is a device feature independent of the type of the attached domain. Enable PCI PASID during the IOMMU device probe and disables it during the release path. Suggested-by: Yi Liu Signed-off-by: Lu Baolu Reviewed-by: Yi Liu Tested-by: Yi Liu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240819051805.116936-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 159da629349c..10468c871fe0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1281,15 +1281,6 @@ static void iommu_enable_pci_caps(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - - /* The PCIe spec, in its wisdom, declares that the behaviour of - the device if you enable PASID support after ATS support is - undefined. So always enable PASID support on devices which - have it, even if we can't yet know if we're ever going to - use it. */ - if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) - info->pasid_enabled = 1; - if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) info->ats_enabled = 1; @@ -1308,11 +1299,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) pci_disable_ats(pdev); info->ats_enabled = 0; } - - if (info->pasid_enabled) { - pci_disable_pasid(pdev); - info->pasid_enabled = 0; - } } static void intel_flush_iotlb_all(struct iommu_domain *domain) @@ -3940,6 +3926,16 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); + /* + * The PCIe spec, in its wisdom, declares that the behaviour of the + * device is undefined if you enable PASID support after ATS support. + * So always enable PASID support on devices which have it, even if + * we can't yet know if we're ever going to use it. + */ + if (info->pasid_supported && + !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + info->pasid_enabled = 1; + return &iommu->iommu; free_table: intel_pasid_free_table(dev); @@ -3956,6 +3952,11 @@ static void intel_iommu_release_device(struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; + if (info->pasid_enabled) { + pci_disable_pasid(to_pci_dev(dev)); + info->pasid_enabled = 0; + } + mutex_lock(&iommu->iopf_lock); if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev))) device_rbtree_remove(info); -- cgit From 3297d047cd7f502ea7bd949fe070bf01c02aec3e Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Mon, 2 Sep 2024 10:27:22 +0800 Subject: iommu/vt-d: Refactor IOTLB and Dev-IOTLB flush for batching Extracts IOTLB and Dev-IOTLB invalidation logic from cache tag flush interfaces into dedicated helper functions. It prepares the codebase for upcoming changes to support batched cache invalidations. To enable direct use of qi_flush helpers in the new functions, iommu->flush.flush_iotlb and quirk_extra_dev_tlb_flush() are opened up. No functional changes are intended. Co-developed-by: Lu Baolu Signed-off-by: Lu Baolu Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240815065221.50328-3-tina.zhang@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cache.c | 142 ++++++++++++++++++++++++-------------------- drivers/iommu/intel/iommu.c | 5 +- drivers/iommu/intel/iommu.h | 3 + 3 files changed, 83 insertions(+), 67 deletions(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 44e92638c0cd..08f7ce2c16c3 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -255,6 +255,78 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); } +static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag, + unsigned long addr, unsigned long pages, + unsigned long mask, int ih) +{ + struct intel_iommu *iommu = tag->iommu; + u64 type = DMA_TLB_PSI_FLUSH; + + if (domain->use_first_level) { + qi_flush_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih); + return; + } + + /* + * Fallback to domain selective flush if no PSI support or the size + * is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || + mask > cap_max_amask_val(iommu->cap) || pages == -1) { + addr = 0; + mask = 0; + ih = 0; + type = DMA_TLB_DSI_FLUSH; + } + + if (ecap_qis(iommu->ecap)) + qi_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); + else + __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); +} + +static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag, + unsigned long addr, unsigned long mask) +{ + struct intel_iommu *iommu = tag->iommu; + struct device_domain_info *info; + u16 sid; + + info = dev_iommu_priv_get(tag->dev); + sid = PCI_DEVID(info->bus, info->devfn); + + if (tag->pasid == IOMMU_NO_PASID) { + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, + addr, mask); + if (info->dtlb_extra_inval) + qi_flush_dev_iotlb(iommu, sid, info->pfsid, + info->ats_qdep, addr, mask); + return; + } + + qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid, + info->ats_qdep, addr, mask); + if (info->dtlb_extra_inval) + qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid, + info->ats_qdep, addr, mask); +} + +static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) +{ + struct intel_iommu *iommu = tag->iommu; + struct device_domain_info *info; + u16 sid; + + info = dev_iommu_priv_get(tag->dev); + sid = PCI_DEVID(info->bus, info->devfn); + + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, + MAX_AGAW_PFN_WIDTH); + if (info->dtlb_extra_inval) + qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, + MAX_AGAW_PFN_WIDTH); +} + /* * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) * when the memory mappings in the target domain have been modified. @@ -270,30 +342,10 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; - switch (tag->type) { case CACHE_TAG_IOTLB: case CACHE_TAG_NESTING_IOTLB: - if (domain->use_first_level) { - qi_flush_piotlb(iommu, tag->domain_id, - tag->pasid, addr, pages, ih); - } else { - /* - * Fallback to domain selective flush if no - * PSI support or the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - addr | ih, mask, - DMA_TLB_PSI_FLUSH); - } + cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih); break; case CACHE_TAG_NESTING_DEVTLB: /* @@ -307,18 +359,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, mask = MAX_AGAW_PFN_WIDTH; fallthrough; case CACHE_TAG_DEVTLB: - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - if (tag->pasid == IOMMU_NO_PASID) - qi_flush_dev_iotlb(iommu, sid, info->pfsid, - info->ats_qdep, addr, mask); - else - qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, - tag->pasid, info->ats_qdep, - addr, mask); - - quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep); + cache_tag_flush_devtlb_psi(domain, tag, addr, mask); break; } @@ -338,29 +379,14 @@ void cache_tag_flush_all(struct dmar_domain *domain) spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; - switch (tag->type) { case CACHE_TAG_IOTLB: case CACHE_TAG_NESTING_IOTLB: - if (domain->use_first_level) - qi_flush_piotlb(iommu, tag->domain_id, - tag->pasid, 0, -1, 0); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); + cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0); break; case CACHE_TAG_DEVTLB: case CACHE_TAG_NESTING_DEVTLB: - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, - 0, MAX_AGAW_PFN_WIDTH); - quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, - IOMMU_NO_PASID, info->ats_qdep); + cache_tag_flush_devtlb_all(domain, tag); break; } @@ -399,20 +425,8 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, } if (tag->type == CACHE_TAG_IOTLB || - tag->type == CACHE_TAG_NESTING_IOTLB) { - /* - * Fallback to domain selective flush if no - * PSI support or the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, tag->domain_id, - 0, 0, DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, tag->domain_id, - addr, mask, - DMA_TLB_PSI_FLUSH); - } + tag->type == CACHE_TAG_NESTING_IOTLB) + cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0); trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 10468c871fe0..dfd33adffb14 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1184,9 +1184,8 @@ static void __iommu_flush_context(struct intel_iommu *iommu, raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -/* return value determine if we need a write buffer flush */ -static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int size_order, u64 type) +void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type) { int tlb_offset = ecap_iotlb_offset(iommu->ecap); u64 val = 0, val_iva = 0; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index e297a322ba2d..74634805abd1 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1206,6 +1206,9 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, unsigned int count, unsigned long options); + +void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, + unsigned int size_order, u64 type); /* * Options used in qi_submit_sync: * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8. -- cgit From 777cdd853434849cc98ef94787538b1eb9f492d9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 10:27:23 +0800 Subject: iommu/vt-d: Add qi_batch for dmar_domain Introduces a qi_batch structure to hold batched cache invalidation descriptors on a per-dmar_domain basis. A fixed-size descriptor array is used for simplicity. The qi_batch is allocated when the first cache tag is added to the domain and freed during iommu_free_domain(). Signed-off-by: Lu Baolu Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20240815065221.50328-4-tina.zhang@intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cache.c | 7 +++++++ drivers/iommu/intel/iommu.c | 1 + drivers/iommu/intel/iommu.h | 14 ++++++++++++++ drivers/iommu/intel/nested.c | 1 + drivers/iommu/intel/svm.c | 5 ++++- 5 files changed, 27 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/intel/iommu.c') diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 08f7ce2c16c3..2e997d782beb 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -190,6 +190,13 @@ int cache_tag_assign_domain(struct dmar_domain *domain, u16 did = domain_get_id_for_dev(domain, dev); int ret; + /* domain->qi_bach will be freed in iommu_free_domain() path. */ + if (!domain->qi_batch) { + domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL); + if (!domain->qi_batch) + return -ENOMEM; + } + ret = __cache_tag_assign_domain(domain, did, dev, pasid); if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) return ret; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index dfd33adffb14..038a81efaaf7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1572,6 +1572,7 @@ static void domain_exit(struct dmar_domain *domain) if (WARN_ON(!list_empty(&domain->devices))) return; + kfree(domain->qi_batch); kfree(domain); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 74634805abd1..d21eca94cb8f 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -584,6 +584,19 @@ struct iommu_domain_info { * to VT-d spec, section 9.3 */ }; +/* + * We start simply by using a fixed size for the batched descriptors. This + * size is currently sufficient for our needs. Future improvements could + * involve dynamically allocating the batch buffer based on actual demand, + * allowing us to adjust the batch size for optimal performance in different + * scenarios. + */ +#define QI_MAX_BATCHED_DESC_COUNT 16 +struct qi_batch { + struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT]; + unsigned int index; +}; + struct dmar_domain { int nid; /* node id */ struct xarray iommu_array; /* Attached IOMMU array */ @@ -608,6 +621,7 @@ struct dmar_domain { spinlock_t cache_lock; /* Protect the cache tag list */ struct list_head cache_tags; /* Cache tag list */ + struct qi_batch *qi_batch; /* Batched QI descriptors */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 36a91b1b52be..433c58944401 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -83,6 +83,7 @@ static void intel_nested_domain_free(struct iommu_domain *domain) spin_lock(&s2_domain->s1_lock); list_del(&dmar_domain->s2_link); spin_unlock(&s2_domain->s1_lock); + kfree(dmar_domain->qi_batch); kfree(dmar_domain); } diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ef12e95e400a..078d1e32a24e 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) static void intel_mm_free_notifier(struct mmu_notifier *mn) { - kfree(container_of(mn, struct dmar_domain, notifier)); + struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier); + + kfree(domain->qi_batch); + kfree(domain); } static const struct mmu_notifier_ops intel_mmuops = { -- cgit