aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt17
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml5
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/iommu/Kconfig11
-rw-r--r--drivers/iommu/amd/amd_iommu.h26
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h35
-rw-r--r--drivers/iommu/amd/init.c16
-rw-r--r--drivers/iommu/amd/io_pgtable.c105
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c56
-rw-r--r--drivers/iommu/amd/iommu.c210
-rw-r--r--drivers/iommu/amd/pasid.c2
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/Makefile1
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c83
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c578
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h135
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c909
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c28
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c2
-rw-r--r--drivers/iommu/fsl_pamu_domain.c4
-rw-r--r--drivers/iommu/intel/cache.c239
-rw-r--r--drivers/iommu/intel/dmar.c109
-rw-r--r--drivers/iommu/intel/iommu.c504
-rw-r--r--drivers/iommu/intel/iommu.h128
-rw-r--r--drivers/iommu/intel/nested.c3
-rw-r--r--drivers/iommu/intel/pasid.c12
-rw-r--r--drivers/iommu/intel/svm.c7
-rw-r--r--drivers/iommu/io-pgtable-arm.c31
-rw-r--r--drivers/iommu/of_iommu.c2
-rw-r--r--drivers/pci/ats.c33
-rw-r--r--include/linux/io-pgtable.h4
-rw-r--r--include/linux/pci-ats.h3
31 files changed, 2259 insertions, 1040 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bfc27d1bf4ed..8337d0fed311 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -333,12 +333,17 @@
allowed anymore to lift isolation
requirements as needed. This option
does not override iommu=pt
- force_enable - Force enable the IOMMU on platforms known
- to be buggy with IOMMU enabled. Use this
- option with care.
- pgtbl_v1 - Use v1 page table for DMA-API (Default).
- pgtbl_v2 - Use v2 page table for DMA-API.
- irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
+ force_enable - Force enable the IOMMU on platforms known
+ to be buggy with IOMMU enabled. Use this
+ option with care.
+ pgtbl_v1 - Use v1 page table for DMA-API (Default).
+ pgtbl_v2 - Use v2 page table for DMA-API.
+ irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
+ nohugepages - Limit page-sizes used for v1 page-tables
+ to 4 KiB.
+ v2_pgsizes_only - Limit page-sizes used for v1 page-tables
+ to 4KiB/2Mib/1GiB.
+
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 280b4e49f219..92d350b8e01a 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -36,7 +36,9 @@ properties:
items:
- enum:
- qcom,qcm2290-smmu-500
+ - qcom,qcs8300-smmu-500
- qcom,qdu1000-smmu-500
+ - qcom,sa8255p-smmu-500
- qcom,sa8775p-smmu-500
- qcom,sc7180-smmu-500
- qcom,sc7280-smmu-500
@@ -84,6 +86,7 @@ properties:
items:
- enum:
- qcom,qcm2290-smmu-500
+ - qcom,sa8255p-smmu-500
- qcom,sa8775p-smmu-500
- qcom,sc7280-smmu-500
- qcom,sc8180x-smmu-500
@@ -552,7 +555,9 @@ allOf:
- cavium,smmu-v2
- marvell,ap806-smmu-500
- nvidia,smmu-500
+ - qcom,qcs8300-smmu-500
- qcom,qdu1000-smmu-500
+ - qcom,sa8255p-smmu-500
- qcom,sc7180-smmu-500
- qcom,sdm670-smmu-500
- qcom,sdm845-smmu-500
diff --git a/MAINTAINERS b/MAINTAINERS
index 12d7e989d402..8a76c256229c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22650,6 +22650,7 @@ M: Thierry Reding <thierry.reding@gmail.com>
R: Krishna Reddy <vdumpa@nvidia.com>
L: linux-tegra@vger.kernel.org
S: Supported
+F: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
F: drivers/iommu/tegra*
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index a82f10054aec..22addaedf64d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -424,6 +424,17 @@ config ARM_SMMU_V3_KUNIT_TEST
Enable this option to unit-test arm-smmu-v3 driver functions.
If unsure, say N.
+
+config TEGRA241_CMDQV
+ bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"
+ depends on ACPI
+ help
+ Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The
+ CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
+ support, except with virtualization capabilities.
+
+ Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same
+ CMDQ-V extension.
endif
config S390_IOMMU
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 2d5945c982bd..6386fa4556d9 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -43,9 +43,10 @@ int amd_iommu_enable_faulting(unsigned int cpu);
extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
+extern unsigned long amd_iommu_pgsize_bitmap;
/* Protection domain ops */
-struct protection_domain *protection_domain_alloc(unsigned int type);
+struct protection_domain *protection_domain_alloc(unsigned int type, int nid);
void protection_domain_free(struct protection_domain *domain);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
@@ -87,14 +88,10 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag);
void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
void amd_iommu_domain_update(struct protection_domain *domain);
-void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set);
-void amd_iommu_domain_flush_complete(struct protection_domain *domain);
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size);
void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
ioasid_t pasid, u64 address, size_t size);
-void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data,
- ioasid_t pasid);
#ifdef CONFIG_IRQ_REMAP
int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
@@ -121,11 +118,6 @@ static inline bool check_feature2(u64 mask)
return (amd_iommu_efr2 & mask);
}
-static inline int check_feature_gpt_level(void)
-{
- return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
-}
-
static inline bool amd_iommu_gt_ppr_supported(void)
{
return (check_feature(FEATURE_GT) &&
@@ -143,19 +135,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
return phys_to_virt(__sme_clr(paddr));
}
-static inline
-void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
-{
- domain->iop.root = (u64 *)(root & PAGE_MASK);
- domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
-}
-
-static inline
-void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
-{
- amd_iommu_domain_set_pt_root(domain, 0);
-}
-
static inline int get_pci_sbdf_id(struct pci_dev *pdev)
{
int seg = pci_domain_nr(pdev->bus);
@@ -185,7 +164,6 @@ static inline struct protection_domain *to_pdomain(struct iommu_domain *dom)
}
bool translation_pre_enabled(struct amd_iommu *iommu);
-bool amd_iommu_is_attach_deferred(struct device *dev);
int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line);
#ifdef CONFIG_DMI
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 2b76b5dedc1d..601fb4ee6900 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -8,6 +8,7 @@
#ifndef _ASM_X86_AMD_IOMMU_TYPES_H
#define _ASM_X86_AMD_IOMMU_TYPES_H
+#include <linux/bitfield.h>
#include <linux/iommu.h>
#include <linux/types.h>
#include <linux/mmu_notifier.h>
@@ -95,26 +96,21 @@
#define FEATURE_GA BIT_ULL(7)
#define FEATURE_HE BIT_ULL(8)
#define FEATURE_PC BIT_ULL(9)
-#define FEATURE_GATS_SHIFT (12)
-#define FEATURE_GATS_MASK (3ULL)
+#define FEATURE_GATS GENMASK_ULL(13, 12)
+#define FEATURE_GLX GENMASK_ULL(15, 14)
#define FEATURE_GAM_VAPIC BIT_ULL(21)
+#define FEATURE_PASMAX GENMASK_ULL(36, 32)
#define FEATURE_GIOSUP BIT_ULL(48)
#define FEATURE_HASUP BIT_ULL(49)
#define FEATURE_EPHSUP BIT_ULL(50)
#define FEATURE_HDSUP BIT_ULL(52)
#define FEATURE_SNP BIT_ULL(63)
-#define FEATURE_PASID_SHIFT 32
-#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
-
-#define FEATURE_GLXVAL_SHIFT 14
-#define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT)
/* Extended Feature 2 Bits */
-#define FEATURE_SNPAVICSUP_SHIFT 5
-#define FEATURE_SNPAVICSUP_MASK (0x07ULL << FEATURE_SNPAVICSUP_SHIFT)
+#define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5)
#define FEATURE_SNPAVICSUP_GAM(x) \
- ((x & FEATURE_SNPAVICSUP_MASK) >> FEATURE_SNPAVICSUP_SHIFT == 0x1)
+ (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1)
/* Note:
* The current driver only support 16-bit PASID.
@@ -294,8 +290,13 @@
* that we support.
*
* 512GB Pages are not supported due to a hardware bug
+ * Page sizes >= the 52 bit max physical address of the CPU are not supported.
*/
-#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+#define AMD_IOMMU_PGSIZES (GENMASK_ULL(51, 12) ^ SZ_512G)
+
+/* Special mode where page-sizes are limited to 4 KiB */
+#define AMD_IOMMU_PGSIZES_4K (PAGE_SIZE)
+
/* 4K, 2MB, 1G page sizes are supported */
#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30))
@@ -419,10 +420,6 @@
#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL)
#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL)
-#define DTE_GCR3_INDEX_A 0
-#define DTE_GCR3_INDEX_B 1
-#define DTE_GCR3_INDEX_C 1
-
#define DTE_GCR3_SHIFT_A 58
#define DTE_GCR3_SHIFT_B 16
#define DTE_GCR3_SHIFT_C 43
@@ -527,7 +524,7 @@ struct amd_irte_ops;
#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0)
#define io_pgtable_to_data(x) \
- container_of((x), struct amd_io_pgtable, iop)
+ container_of((x), struct amd_io_pgtable, pgtbl)
#define io_pgtable_ops_to_data(x) \
io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
@@ -537,7 +534,7 @@ struct amd_irte_ops;
struct protection_domain, iop)
#define io_pgtable_cfg_to_data(x) \
- container_of((x), struct amd_io_pgtable, pgtbl_cfg)
+ container_of((x), struct amd_io_pgtable, pgtbl.cfg)
struct gcr3_tbl_info {
u64 *gcr3_tbl; /* Guest CR3 table */
@@ -547,8 +544,7 @@ struct gcr3_tbl_info {
};
struct amd_io_pgtable {
- struct io_pgtable_cfg pgtbl_cfg;
- struct io_pgtable iop;
+ struct io_pgtable pgtbl;
int mode;
u64 *root;
u64 *pgd; /* v2 pgtable pgd pointer */
@@ -580,7 +576,6 @@ struct protection_domain {
struct amd_io_pgtable iop;
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
- int nid; /* Node ID */
enum protection_domain_mode pd_mode; /* Track page table type */
bool dirty_tracking; /* dirty tracking is enabled in the domain */
unsigned dev_cnt; /* devices assigned to this domain */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index c89d85b54a1a..43131c3a2172 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -192,6 +192,8 @@ bool amdr_ivrs_remap_support __read_mostly;
bool amd_iommu_force_isolation __read_mostly;
+unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
+
/*
* AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
* to know which ones are already in use.
@@ -2042,14 +2044,12 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
int glxval;
u64 pasmax;
- pasmax = amd_iommu_efr & FEATURE_PASID_MASK;
- pasmax >>= FEATURE_PASID_SHIFT;
+ pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr);
iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
- glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK;
- glxval >>= FEATURE_GLXVAL_SHIFT;
+ glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr);
if (amd_iommu_max_glx_val == -1)
amd_iommu_max_glx_val = glxval;
@@ -3088,7 +3088,7 @@ static int __init early_amd_iommu_init(void)
/* 5 level guest page table */
if (cpu_feature_enabled(X86_FEATURE_LA57) &&
- check_feature_gpt_level() == GUEST_PGTABLE_5_LEVEL)
+ FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
/* Disable any previously enabled IOMMUs */
@@ -3494,6 +3494,12 @@ static int __init parse_amd_iommu_options(char *str)
amd_iommu_pgtable = AMD_IOMMU_V2;
} else if (strncmp(str, "irtcachedis", 11) == 0) {
amd_iommu_irtcachedis = true;
+ } else if (strncmp(str, "nohugepages", 11) == 0) {
+ pr_info("Restricting V1 page-sizes to 4KiB");
+ amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K;
+ } else if (strncmp(str, "v2_pgsizes_only", 15) == 0) {
+ pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB");
+ amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
} else {
pr_notice("Unknown option - '%s'\n", str);
}
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 1074ee25064d..804b788f3f16 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -24,27 +24,6 @@
#include "amd_iommu.h"
#include "../iommu-pages.h"
-static void v1_tlb_flush_all(void *cookie)
-{
-}
-
-static void v1_tlb_flush_walk(unsigned long iova, size_t size,
- size_t granule, void *cookie)
-{
-}
-
-static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
-{
-}
-
-static const struct iommu_flush_ops v1_flush_ops = {
- .tlb_flush_all = v1_tlb_flush_all,
- .tlb_flush_walk = v1_tlb_flush_walk,
- .tlb_add_page = v1_tlb_add_page,
-};
-
/*
* Helper function to get the first pte of a large mapping
*/
@@ -132,56 +111,40 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
}
}
-void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
- u64 *root, int mode)
-{
- u64 pt_root;
-
- /* lowest 3 bits encode pgtable mode */
- pt_root = mode & 7;
- pt_root |= (u64)root;
-
- amd_iommu_domain_set_pt_root(domain, pt_root);
-}
-
/*
* This function is used to add another level to an IO page table. Adding
* another level increases the size of the address space by 9 bits to a size up
* to 64 bits.
*/
-static bool increase_address_space(struct protection_domain *domain,
+static bool increase_address_space(struct amd_io_pgtable *pgtable,
unsigned long address,
gfp_t gfp)
{
+ struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
+ struct protection_domain *domain =
+ container_of(pgtable, struct protection_domain, iop);
unsigned long flags;
bool ret = true;
u64 *pte;
- pte = iommu_alloc_page_node(domain->nid, gfp);
+ pte = iommu_alloc_page_node(cfg->amd.nid, gfp);
if (!pte)
return false;
spin_lock_irqsave(&domain->lock, flags);
- if (address <= PM_LEVEL_SIZE(domain->iop.mode))
+ if (address <= PM_LEVEL_SIZE(pgtable->mode))
goto out;
ret = false;
- if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
+ if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL))
goto out;
- *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
+ *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
- domain->iop.root = pte;
- domain->iop.mode += 1;
+ pgtable->root = pte;
+ pgtable->mode += 1;
amd_iommu_update_and_flush_device_table(domain);
- amd_iommu_domain_flush_complete(domain);
-
- /*
- * Device Table needs to be updated and flushed before the new root can
- * be published.
- */
- amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
pte = NULL;
ret = true;
@@ -193,30 +156,31 @@ out:
return ret;
}
-static u64 *alloc_pte(struct protection_domain *domain,
+static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
unsigned long address,
unsigned long page_size,
u64 **pte_page,
gfp_t gfp,
bool *updated)
{
+ struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
int level, end_lvl;
u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size));
- while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
+ while (address > PM_LEVEL_SIZE(pgtable->mode)) {
/*
* Return an error if there is no memory to update the
* page-table.
*/
- if (!increase_address_space(domain, address, gfp))
+ if (!increase_address_space(pgtable, address, gfp))
return NULL;
}
- level = domain->iop.mode - 1;
- pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+ level = pgtable->mode - 1;
+ pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
address = PAGE_SIZE_ALIGN(address, page_size);
end_lvl = PAGE_SIZE_LEVEL(page_size);
@@ -251,7 +215,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
if (!IOMMU_PTE_PRESENT(__pte) ||
pte_level == PAGE_MODE_NONE) {
- page = iommu_alloc_page_node(domain->nid, gfp);
+ page = iommu_alloc_page_node(cfg->amd.nid, gfp);
if (!page)
return NULL;
@@ -365,7 +329,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
{
- struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
LIST_HEAD(freelist);
bool updated = false;
u64 __pte, *pte;
@@ -382,7 +346,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
while (pgcount > 0) {
count = PAGE_SIZE_PTE_COUNT(pgsize);
- pte = alloc_pte(dom, iova, pgsize, NULL, gfp, &updated);
+ pte = alloc_pte(pgtable, iova, pgsize, NULL, gfp, &updated);
ret = -ENOMEM;
if (!pte)
@@ -419,6 +383,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
out:
if (updated) {
+ struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
@@ -560,27 +525,17 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
*/
static void v1_free_pgtable(struct io_pgtable *iop)
{
- struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
- struct protection_domain *dom;
+ struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
LIST_HEAD(freelist);
if (pgtable->mode == PAGE_MODE_NONE)
return;
- dom = container_of(pgtable, struct protection_domain, iop);
-
/* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
pgtable->mode > PAGE_MODE_6_LEVEL);
free_sub_pt(pgtable->root, pgtable->mode, &freelist);
-
- /* Update data structure */
- amd_iommu_domain_clr_pt_root(dom);
-
- /* Make changes visible to IOMMUs */
- amd_iommu_domain_update(dom);
-
iommu_put_pages_list(&freelist);
}
@@ -588,17 +543,21 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
{
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
- cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES;
+ pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
+ if (!pgtable->root)
+ return NULL;
+ pgtable->mode = PAGE_MODE_3_LEVEL;
+
+ cfg->pgsize_bitmap = amd_iommu_pgsize_bitmap;
cfg->ias = IOMMU_IN_ADDR_BIT_SIZE;
cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
- cfg->tlb = &v1_flush_ops;
- pgtable->iop.ops.map_pages = iommu_v1_map_pages;
- pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages;
- pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
- pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
+ pgtable->pgtbl.ops.map_pages = iommu_v1_map_pages;
+ pgtable->pgtbl.ops.unmap_pages = iommu_v1_unmap_pages;
+ pgtable->pgtbl.ops.iova_to_phys = iommu_v1_iova_to_phys;
+ pgtable->pgtbl.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
- return &pgtable->iop;
+ return &pgtable->pgtbl;
}
struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index 664e91c88748..25b9042fa453 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -51,7 +51,7 @@ static inline u64 set_pgtable_attr(u64 *page)
u64 prot;
prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER;
- prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY;
+ prot |= IOMMU_PAGE_ACCESS;
return (iommu_virt_to_phys(page) | prot);
}
@@ -233,8 +233,8 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
{
- struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
- struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg;
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
u64 *pte;
unsigned long map_size;
unsigned long mapped_size = 0;
@@ -251,7 +251,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
while (mapped_size < size) {
map_size = get_alloc_page_size(pgsize);
- pte = v2_alloc_pte(pdom->nid, pdom->iop.pgd,
+ pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd,
iova, map_size, gfp, &updated);
if (!pte) {
ret = -EINVAL;
@@ -266,8 +266,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
}
out:
- if (updated)
+ if (updated) {
+ struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
+
amd_iommu_domain_flush_pages(pdom, o_iova, size);
+ }
if (mapped)
*mapped += mapped_size;
@@ -281,7 +284,7 @@ static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops,
struct iommu_iotlb_gather *gather)
{
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- struct io_pgtable_cfg *cfg = &pgtable->iop.cfg;
+ struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
unsigned long unmap_size;
unsigned long unmapped = 0;
size_t size = pgcount << __ffs(pgsize);
@@ -323,30 +326,9 @@ static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
/*
* ----------------------------------------------------
*/
-static void v2_tlb_flush_all(void *cookie)
-{
-}
-
-static void v2_tlb_flush_walk(unsigned long iova, size_t size,
- size_t granule, void *cookie)
-{
-}
-
-static void v2_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
-{
-}
-
-static const struct iommu_flush_ops v2_flush_ops = {
- .tlb_flush_all = v2_tlb_flush_all,
- .tlb_flush_walk = v2_tlb_flush_walk,
- .tlb_add_page = v2_tlb_add_page,
-};
-
static void v2_free_pgtable(struct io_pgtable *iop)
{
- struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
+ struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
if (!pgtable || !pgtable->pgd)
return;
@@ -359,26 +341,24 @@ static void v2_free_pgtable(struct io_pgtable *iop)
static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
- struct protection_domain *pdom = (struct protection_domain *)cookie;
int ias = IOMMU_IN_ADDR_BIT_SIZE;
- pgtable->pgd = iommu_alloc_page_node(pdom->nid, GFP_ATOMIC);
+ pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
if (!pgtable->pgd)
return NULL;
if (get_pgtable_level() == PAGE_MODE_5_LEVEL)
ias = 57;
- pgtable->iop.ops.map_pages = iommu_v2_map_pages;
- pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages;
- pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys;
+ pgtable->pgtbl.ops.map_pages = iommu_v2_map_pages;
+ pgtable->pgtbl.ops.unmap_pages = iommu_v2_unmap_pages;
+ pgtable->pgtbl.ops.iova_to_phys = iommu_v2_iova_to_phys;
- cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2,
- cfg->ias = ias,
- cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
- cfg->tlb = &v2_flush_ops;
+ cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
+ cfg->ias = ias;
+ cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE;
- return &pgtable->iop;
+ return &pgtable->pgtbl;
}
struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = {
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b19e8c0f48fa..8364cd6fa47d 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -52,8 +52,6 @@
#define HT_RANGE_START (0xfd00000000ULL)
#define HT_RANGE_END (0xffffffffffULL)
-#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL
-
static DEFINE_SPINLOCK(pd_bitmap_lock);
LIST_HEAD(ioapic_map);
@@ -825,10 +823,12 @@ static void iommu_poll_events(struct amd_iommu *iommu)
while (head != tail) {
iommu_print_event(iommu, iommu->evt_buf + head);
+
+ /* Update head pointer of hardware ring-buffer */
head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE;
+ writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
}
- writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
}
#ifdef CONFIG_IRQ_REMAP
@@ -1247,6 +1247,22 @@ out_unlock:
return ret;
}
+static void domain_flush_complete(struct protection_domain *domain)
+{
+ int i;
+
+ for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
+ if (domain && !domain->dev_iommu[i])
+ continue;
+
+ /*
+ * Devices of this domain are behind this IOMMU
+ * We need to wait for completion of all commands.
+ */
+ iommu_completion_wait(amd_iommus[i]);
+ }
+}
+
static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
{
struct iommu_cmd cmd;
@@ -1483,7 +1499,7 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain,
__domain_flush_pages(domain, address, size);
/* Wait until IOMMU TLB and all device IOTLB flushes are complete */
- amd_iommu_domain_flush_complete(domain);
+ domain_flush_complete(domain);
return;
}
@@ -1523,7 +1539,7 @@ void amd_iommu_domain_flush_pages(struct protection_domain *domain,
}
/* Wait until IOMMU TLB and all device IOTLB flushes are complete */
- amd_iommu_domain_flush_complete(domain);
+ domain_flush_complete(domain);
}
/* Flush the whole IO/TLB for a given protection domain - including PDE */
@@ -1549,27 +1565,11 @@ void amd_iommu_dev_flush_pasid_pages(struct iommu_dev_data *dev_data,
iommu_completion_wait(iommu);
}
-void amd_iommu_dev_flush_pasid_all(struct iommu_dev_data *dev_data,
- ioasid_t pasid)
+static void dev_flush_pasid_all(struct iommu_dev_data *dev_data,
+ ioasid_t pasid)
{
- amd_iommu_dev_flush_pasid_pages(dev_data, 0,
- CMD_INV_IOMMU_ALL_PAGES_ADDRESS, pasid);
-}
-
-void amd_iommu_domain_flush_complete(struct protection_domain *domain)
-{
- int i;
-
- for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
- if (domain && !domain->dev_iommu[i])
- continue;
-
- /*
- * Devices of this domain are behind this IOMMU
- * We need to wait for completion of all commands.
- */
- iommu_completion_wait(amd_iommus[i]);
- }
+ amd_iommu_dev_flush_pasid_pages(dev_data, pasid, 0,
+ CMD_INV_IOMMU_ALL_PAGES_ADDRESS);
}
/* Flush the not present cache if it exists */
@@ -1589,15 +1589,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
/*
* This function flushes the DTEs for all devices in domain
*/
-static void domain_flush_devices(struct protection_domain *domain)
-{
- struct iommu_dev_data *dev_data;
-
- list_for_each_entry(dev_data, &domain->dev_list, list)
- device_flush_dte(dev_data);
-}
-
-static void update_device_table(struct protection_domain *domain)
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
@@ -1607,12 +1599,11 @@ static void update_device_table(struct protection_domain *domain)
set_dte_entry(iommu, dev_data);
clone_aliases(iommu, dev_data->dev);
}
-}
-void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
-{
- update_device_table(domain);
- domain_flush_devices(domain);
+ list_for_each_entry(dev_data, &domain->dev_list, list)
+ device_flush_dte(dev_data);
+
+ domain_flush_complete(domain);
}
void amd_iommu_domain_update(struct protection_domain *domain)
@@ -1816,7 +1807,7 @@ static int update_gcr3(struct iommu_dev_data *dev_data,
else
*pte = 0;
- amd_iommu_dev_flush_pasid_all(dev_data, pasid);
+ dev_flush_pasid_all(dev_data, pasid);
return 0;
}
@@ -1962,7 +1953,7 @@ static void clear_dte_entry(struct amd_iommu *iommu, u16 devid)
}
/* Update and flush DTE for the given device */
-void amd_iommu_dev_update_dte(struct iommu_dev_data *dev_data, bool set)
+static void dev_update_dte(struct iommu_dev_data *dev_data, bool set)
{
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev);
@@ -2032,6 +2023,7 @@ static int do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg;
int ret = 0;
/* Update data structures */
@@ -2039,8 +2031,8 @@ static int do_attach(struct iommu_dev_data *dev_data,
list_add(&dev_data->list, &domain->dev_list);
/* Update NUMA Node ID */
- if (domain->nid == NUMA_NO_NODE)
- domain->nid = dev_to_node(dev_data->dev);
+ if (cfg->amd.nid == NUMA_NO_NODE)
+ cfg->amd.nid = dev_to_node(dev_data->dev);
/* Do reference counting */
domain->dev_iommu[iommu->index] += 1;
@@ -2062,7 +2054,7 @@ static void do_detach(struct iommu_dev_data *dev_data)
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
/* Clear DTE and flush the entry */
- amd_iommu_dev_update_dte(dev_data, false);
+ dev_update_dte(dev_data, false);
/* Flush IOTLB and wait for the flushes to finish */
amd_iommu_domain_flush_all(domain);
@@ -2185,11 +2177,12 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
dev_err(dev, "Failed to initialize - trying to proceed anyway\n");
iommu_dev = ERR_PTR(ret);
iommu_ignore_device(iommu, dev);
- } else {
- amd_iommu_set_pci_msi_domain(dev, iommu);
- iommu_dev = &iommu->iommu;
+ goto out_err;
}
+ amd_iommu_set_pci_msi_domain(dev, iommu);
+ iommu_dev = &iommu->iommu;
+
/*
* If IOMMU and device supports PASID then it will contain max
* supported PASIDs, else it will be zero.
@@ -2201,8 +2194,12 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
pci_max_pasids(to_pci_dev(dev)));
}
+out_err:
iommu_completion_wait(iommu);
+ if (dev_is_pci(dev))
+ pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT);
+
return iommu_dev;
}
@@ -2259,53 +2256,18 @@ static void cleanup_domain(struct protection_domain *domain)
void protection_domain_free(struct protection_domain *domain)
{
- if (!domain)
- return;
-
- if (domain->iop.pgtbl_cfg.tlb)
- free_io_pgtable_ops(&domain->iop.iop.ops);
-
- if (domain->iop.root)
- iommu_free_page(domain->iop.root);
-
- if (domain->id)
- domain_id_free(domain->id);
-
+ WARN_ON(!list_empty(&domain->dev_list));
+ if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
+ free_io_pgtable_ops(&domain->iop.pgtbl.ops);
+ domain_id_free(domain->id);
kfree(domain);
}
-static int protection_domain_init_v1(struct protection_domain *domain, int mode)
-{
- u64 *pt_root = NULL;
-
- BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
-
- if (mode != PAGE_MODE_NONE) {
- pt_root = iommu_alloc_page(GFP_KERNEL);
- if (!pt_root)
- return -ENOMEM;
- }
-
- domain->pd_mode = PD_MODE_V1;
- amd_iommu_domain_set_pgtable(domain, pt_root, mode);
-
- return 0;
-}
-
-static int protection_domain_init_v2(struct protection_domain *pdom)
-{
- pdom->pd_mode = PD_MODE_V2;
- pdom->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
-
- return 0;
-}
-
-struct protection_domain *protection_domain_alloc(unsigned int type)
+struct protection_domain *protection_domain_alloc(unsigned int type, int nid)
{
struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
int pgtable;
- int ret;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
@@ -2313,12 +2275,12 @@ struct protection_domain *protection_domain_alloc(unsigned int type)
domain->id = domain_id_alloc();
if (!domain->id)
- goto out_err;
+ goto err_free;
spin_lock_init(&domain->lock);
INIT_LIST_HEAD(&domain->dev_list);
INIT_LIST_HEAD(&domain->dev_data_list);
- domain->nid = NUMA_NO_NODE;
+ domain->iop.pgtbl.cfg.amd.nid = nid;
switch (type) {
/* No need to allocate io pgtable ops in passthrough mode */
@@ -2336,31 +2298,30 @@ struct protection_domain *protection_domain_alloc(unsigned int type)
pgtable = AMD_IOMMU_V1;
break;
default:
- goto out_err;
+ goto err_id;
}
switch (pgtable) {
case AMD_IOMMU_V1:
- ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL);
+ domain->pd_mode = PD_MODE_V1;
break;
case AMD_IOMMU_V2:
- ret = protection_domain_init_v2(domain);
+ domain->pd_mode = PD_MODE_V2;
break;
default:
- ret = -EINVAL;
- break;
+ goto err_id;
}
- if (ret)
- goto out_err;
-
- pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
+ pgtbl_ops =
+ alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain);
if (!pgtbl_ops)
- goto out_err;
+ goto err_id;
return domain;
-out_err:
- protection_domain_free(domain);
+err_id:
+ domain_id_free(domain->id);
+err_free:
+ kfree(domain);
return NULL;
}
@@ -2398,17 +2359,18 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
if (dirty_tracking && !amd_iommu_hd_support(iommu))
return ERR_PTR(-EOPNOTSUPP);
- domain = protection_domain_alloc(type);
+ domain = protection_domain_alloc(type,
+ dev ? dev_to_node(dev) : NUMA_NO_NODE);
if (!domain)
return ERR_PTR(-ENOMEM);
domain->domain.geometry.aperture_start = 0;
domain->domain.geometry.aperture_end = dma_max_address();
domain->domain.geometry.force_aperture = true;
+ domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap;
if (iommu) {
domain->domain.type = type;
- domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap;
domain->domain.ops = iommu->iommu.ops->default_domain_ops;
if (dirty_tracking)
@@ -2448,9 +2410,6 @@ void amd_iommu_domain_free(struct iommu_domain *dom)
struct protection_domain *domain;
unsigned long flags;
- if (!dom)
- return;
-
domain = to_pdomain(dom);
spin_lock_irqsave(&domain->lock, flags);
@@ -2462,6 +2421,29 @@ void amd_iommu_domain_free(struct iommu_domain *dom)
protection_domain_free(domain);
}
+static int blocked_domain_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+
+ if (dev_data->domain)
+ detach_device(dev);
+
+ /* Clear DTE and flush the entry */
+ spin_lock(&dev_data->lock);
+ dev_update_dte(dev_data, false);
+ spin_unlock(&dev_data->lock);
+
+ return 0;
+}
+
+static struct iommu_domain blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &(const struct iommu_domain_ops) {
+ .attach_dev = blocked_domain_attach_device,
+ }
+};
+
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
@@ -2517,7 +2499,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
}
/* Update device table */
- amd_iommu_dev_update_dte(dev_data, true);
+ dev_update_dte(dev_data, true);
return ret;
}
@@ -2526,7 +2508,7 @@ static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
unsigned long iova, size_t size)
{
struct protection_domain *domain = to_pdomain(dom);
- struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+ struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
if (ops->map_pages)
domain_flush_np_cache(domain, iova, size);
@@ -2538,7 +2520,7 @@ static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
int iommu_prot, gfp_t gfp, size_t *mapped)
{
struct protection_domain *domain = to_pdomain(dom);
- struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+ struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
int prot = 0;
int ret = -EINVAL;
@@ -2585,7 +2567,7 @@ static size_t amd_iommu_unmap_pages(struct iommu_domain *dom, unsigned long iova
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
- struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+ struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
size_t r;
if ((domain->pd_mode == PD_MODE_V1) &&
@@ -2604,7 +2586,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
dma_addr_t iova)
{
struct protection_domain *domain = to_pdomain(dom);
- struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+ struct io_pgtable_ops *ops = &domain->iop.pgtbl.ops;
return ops->iova_to_phys(ops, iova);
}
@@ -2682,7 +2664,7 @@ static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain,
struct iommu_dirty_bitmap *dirty)
{
struct protection_domain *pdomain = to_pdomain(domain);
- struct io_pgtable_ops *ops = &pdomain->iop.iop.ops;
+ struct io_pgtable_ops *ops = &pdomain->iop.pgtbl.ops;
unsigned long lflags;
if (!ops || !ops->read_and_clear_dirty)
@@ -2757,7 +2739,7 @@ static void amd_iommu_get_resv_regions(struct device *dev,
list_add_tail(&region->list, head);
}
-bool amd_iommu_is_attach_deferred(struct device *dev)
+static bool amd_iommu_is_attach_deferred(struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
@@ -2859,6 +2841,7 @@ static int amd_iommu_dev_disable_feature(struct device *dev,
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
+ .blocked_domain = &blocked_domain,
.domain_alloc = amd_iommu_domain_alloc,
.domain_alloc_user = amd_iommu_domain_alloc_user,
.domain_alloc_sva = amd_iommu_domain_alloc_sva,
@@ -2867,7 +2850,6 @@ const struct iommu_ops amd_iommu_ops = {
.device_group = amd_iommu_device_group,
.get_resv_regions = amd_iommu_get_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
- .pgsize_bitmap = AMD_IOMMU_PGSIZES,
.def_domain_type = amd_iommu_def_domain_type,
.dev_enable_feat = amd_iommu_dev_enable_feature,
.dev_disable_feat = amd_iommu_dev_disable_feature,
diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c
index a68215f2b3e1..0657b9373be5 100644
--- a/drivers/iommu/amd/pasid.c
+++ b/drivers/iommu/amd/pasid.c
@@ -181,7 +181,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct protection_domain *pdom;
int ret;
- pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA);
+ pdom = protection_domain_alloc(IOMMU_DOMAIN_SVA, dev_to_node(dev));
if (!pdom)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
index 355173d1441d..dc98c88b48c8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -2,5 +2,6 @@
obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
arm_smmu_v3-y := arm-smmu-v3.o
arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
+arm_smmu_v3-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o
obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
index cceb737a7001..84baa021370a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
@@ -30,6 +30,11 @@ static struct mm_struct sva_mm = {
.pgd = (void *)0xdaedbeefdeadbeefULL,
};
+enum arm_smmu_test_master_feat {
+ ARM_SMMU_MASTER_TEST_ATS = BIT(0),
+ ARM_SMMU_MASTER_TEST_STALL = BIT(1),
+};
+
static bool arm_smmu_entry_differs_in_used_bits(const __le64 *entry,
const __le64 *used_bits,
const __le64 *target,
@@ -164,16 +169,22 @@ static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0;
static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste,
unsigned int s1dss,
- const dma_addr_t dma_addr)
+ const dma_addr_t dma_addr,
+ enum arm_smmu_test_master_feat feat)
{
+ bool ats_enabled = feat & ARM_SMMU_MASTER_TEST_ATS;
+ bool stall_enabled = feat & ARM_SMMU_MASTER_TEST_STALL;
+
struct arm_smmu_master master = {
+ .ats_enabled = ats_enabled,
.cd_table.cdtab_dma = dma_addr,
.cd_table.s1cdmax = 0xFF,
.cd_table.s1fmt = STRTAB_STE_0_S1FMT_64K_L2,
.smmu = &smmu,
+ .stall_enabled = stall_enabled,
};
- arm_smmu_make_cdtable_ste(ste, &master, true, s1dss);
+ arm_smmu_make_cdtable_ste(ste, &master, ats_enabled, s1dss);
}
static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test)
@@ -204,7 +215,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test)
struct arm_smmu_ste ste;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste,
NUM_EXPECTED_SYNCS(2));
}
@@ -214,7 +225,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test)
struct arm_smmu_ste ste;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste,
NUM_EXPECTED_SYNCS(2));
}
@@ -224,7 +235,7 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test)
struct arm_smmu_ste ste;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste,
NUM_EXPECTED_SYNCS(3));
}
@@ -234,7 +245,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test)
struct arm_smmu_ste ste;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste,
NUM_EXPECTED_SYNCS(3));
}
@@ -245,9 +256,9 @@ static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test)
struct arm_smmu_ste s1dss_bypass;
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
/*
* Flipping s1dss on a CD table STE only involves changes to the second
@@ -265,7 +276,7 @@ arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test)
struct arm_smmu_ste s1dss_bypass;
arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(
test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2));
}
@@ -276,16 +287,20 @@ arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test)
struct arm_smmu_ste s1dss_bypass;
arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(
test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2));
}
static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste,
- bool ats_enabled)
+ enum arm_smmu_test_master_feat feat)
{
+ bool ats_enabled = feat & ARM_SMMU_MASTER_TEST_ATS;
+ bool stall_enabled = feat & ARM_SMMU_MASTER_TEST_STALL;
struct arm_smmu_master master = {
+ .ats_enabled = ats_enabled,
.smmu = &smmu,
+ .stall_enabled = stall_enabled,
};
struct io_pgtable io_pgtable = {};
struct arm_smmu_domain smmu_domain = {
@@ -308,7 +323,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test)
{
struct arm_smmu_ste ste;
- arm_smmu_test_make_s2_ste(&ste, true);
+ arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste,
NUM_EXPECTED_SYNCS(2));
}
@@ -317,7 +332,7 @@ static void arm_smmu_v3_write_ste_test_abort_to_s2(struct kunit *test)
{
struct arm_smmu_ste ste;
- arm_smmu_test_make_s2_ste(&ste, true);
+ arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste,
NUM_EXPECTED_SYNCS(2));
}
@@ -326,7 +341,7 @@ static void arm_smmu_v3_write_ste_test_s2_to_bypass(struct kunit *test)
{
struct arm_smmu_ste ste;
- arm_smmu_test_make_s2_ste(&ste, true);
+ arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste,
NUM_EXPECTED_SYNCS(2));
}
@@ -335,7 +350,7 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test)
{
struct arm_smmu_ste ste;
- arm_smmu_test_make_s2_ste(&ste, true);
+ arm_smmu_test_make_s2_ste(&ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste,
NUM_EXPECTED_SYNCS(2));
}
@@ -346,8 +361,8 @@ static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test)
struct arm_smmu_ste s2_ste;
arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0,
- fake_cdtab_dma_addr);
- arm_smmu_test_make_s2_ste(&s2_ste, true);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
+ arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste,
NUM_EXPECTED_SYNCS(3));
}
@@ -358,8 +373,8 @@ static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test)
struct arm_smmu_ste s2_ste;
arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0,
- fake_cdtab_dma_addr);
- arm_smmu_test_make_s2_ste(&s2_ste, true);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
+ arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste,
NUM_EXPECTED_SYNCS(3));
}
@@ -375,9 +390,9 @@ static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test)
* s1 dss field in the same update.
*/
arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0,
- fake_cdtab_dma_addr);
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS,
- 0x4B4B4b4B4B);
+ 0x4B4B4b4B4B, ARM_SMMU_MASTER_TEST_ATS);
arm_smmu_v3_test_ste_expect_non_hitless_transition(
test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3));
}
@@ -503,6 +518,30 @@ static void arm_smmu_test_make_sva_release_cd(struct arm_smmu_cd *cd,
arm_smmu_make_sva_cd(cd, &master, NULL, asid);
}
+static void arm_smmu_v3_write_ste_test_s1_to_s2_stall(struct kunit *test)
+{
+ struct arm_smmu_ste s1_ste;
+ struct arm_smmu_ste s2_ste;
+
+ arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0,
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_STALL);
+ arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_STALL);
+ arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste,
+ NUM_EXPECTED_SYNCS(3));
+}
+
+static void arm_smmu_v3_write_ste_test_s2_to_s1_stall(struct kunit *test)
+{
+ struct arm_smmu_ste s1_ste;
+ struct arm_smmu_ste s2_ste;
+
+ arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0,
+ fake_cdtab_dma_addr, ARM_SMMU_MASTER_TEST_STALL);
+ arm_smmu_test_make_s2_ste(&s2_ste, ARM_SMMU_MASTER_TEST_STALL);
+ arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste,
+ NUM_EXPECTED_SYNCS(3));
+}
+
static void arm_smmu_v3_write_cd_test_sva_clear(struct kunit *test)
{
struct arm_smmu_cd cd = {};
@@ -547,6 +586,8 @@ static struct kunit_case arm_smmu_v3_test_cases[] = {
KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless),
KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear),
KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid),
+ KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2_stall),
+ KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1_stall),
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear),
KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_release),
{},
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index ed2b106e02dd..737c5b882355 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -346,14 +346,30 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
return 0;
}
-static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_ent *ent)
{
- return &smmu->cmdq;
+ struct arm_smmu_cmdq *cmdq = NULL;
+
+ if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
+ cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
+
+ return cmdq ?: &smmu->cmdq;
+}
+
+static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
+{
+ if (cmdq == &smmu->cmdq)
+ return false;
+
+ return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
}
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q, u32 prod)
+ struct arm_smmu_cmdq *cmdq, u32 prod)
{
+ struct arm_smmu_queue *q = &cmdq->q;
struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CMD_SYNC,
};
@@ -368,10 +384,12 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
}
arm_smmu_cmdq_build_cmd(cmd, &ent);
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
}
-static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q)
+void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
{
static const char * const cerror_str[] = {
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
@@ -379,6 +397,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
[CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
[CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
};
+ struct arm_smmu_queue *q = &cmdq->q;
int i;
u64 cmd[CMDQ_ENT_DWORDS];
@@ -421,13 +440,15 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
/* Convert the erroneous command into a CMD_SYNC */
arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
+ if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
{
- __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
+ __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
}
/*
@@ -592,11 +613,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
/* Wait for the command queue to become non-full */
static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
unsigned long flags;
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
int ret = 0;
/*
@@ -627,11 +648,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
int ret = 0;
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
queue_poll_init(smmu, &qp);
@@ -651,10 +672,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
struct arm_smmu_queue_poll qp;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
u32 prod = llq->prod;
int ret = 0;
@@ -701,12 +722,14 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
}
static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
- if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
- return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
+ if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
+ !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+ return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
- return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
+ return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
}
static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
@@ -743,13 +766,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
* CPU will appear before any of the commands from the other CPU.
*/
static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
u64 *cmds, int n, bool sync)
{
u64 cmd_sync[CMDQ_ENT_DWORDS];
u32 prod;
unsigned long flags;
bool owner;
- struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
struct arm_smmu_ll_queue llq, head;
int ret = 0;
@@ -763,7 +786,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags);
- if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
+ if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
local_irq_save(flags);
}
@@ -789,7 +812,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
if (sync) {
prod = queue_inc_prod_n(&llq, n);
- arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
+ arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
/*
@@ -839,7 +862,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
if (sync) {
llq.prod = queue_inc_prod_n(&llq, n);
- ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
+ ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
if (ret) {
dev_err_ratelimited(smmu->dev,
"CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
@@ -874,7 +897,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
return -EINVAL;
}
- return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
+ return arm_smmu_cmdq_issue_cmdlist(
+ smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
}
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -889,21 +913,33 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
}
+static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_batch *cmds,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ cmds->num = 0;
+ cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
+}
+
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmdq_ent *cmd)
{
+ bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
+ bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
+ (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
int index;
- if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
- (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
- cmds->num = 0;
+ if (force_sync || unsupported_cmd) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
+ arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
- arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
- cmds->num = 0;
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, false);
+ arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
}
index = cmds->num * CMDQ_ENT_DWORDS;
@@ -919,7 +955,8 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds)
{
- return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+ return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, true);
}
static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
@@ -1012,7 +1049,8 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
used_bits[2] |=
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
- STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
+ STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
+ STRTAB_STE_2_S2R);
used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
}
@@ -1170,7 +1208,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
},
};
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
for (i = 0; i < master->num_streams; i++) {
cmd.cfgi.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
@@ -1179,48 +1217,36 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
-static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
- struct arm_smmu_l1_ctx_desc *l1_desc)
+static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
+ dma_addr_t l2ptr_dma)
{
- size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
+ u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
- l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
- &l1_desc->l2ptr_dma, GFP_KERNEL);
- if (!l1_desc->l2ptr) {
- dev_warn(smmu->dev,
- "failed to allocate context descriptor table\n");
- return -ENOMEM;
- }
- return 0;
+ /* The HW has 64 bit atomicity with stores to the L2 CD table */
+ WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
}
-static void arm_smmu_write_cd_l1_desc(__le64 *dst,
- struct arm_smmu_l1_ctx_desc *l1_desc)
+static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
{
- u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
- CTXDESC_L1_DESC_V;
-
- /* The HW has 64 bit atomicity with stores to the L2 CD table */
- WRITE_ONCE(*dst, cpu_to_le64(val));
+ return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
}
struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
u32 ssid)
{
- struct arm_smmu_l1_ctx_desc *l1_desc;
+ struct arm_smmu_cdtab_l2 *l2;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (!cd_table->cdtab)
+ if (!arm_smmu_cdtab_allocated(cd_table))
return NULL;
if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
- return (struct arm_smmu_cd *)(cd_table->cdtab +
- ssid * CTXDESC_CD_DWORDS);
+ return &cd_table->linear.table[ssid];
- l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES];
- if (!l1_desc->l2ptr)
+ l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
+ if (!l2)
return NULL;
- return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES];
+ return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
}
static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
@@ -1232,24 +1258,25 @@ static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
might_sleep();
iommu_group_mutex_assert(master->dev);
- if (!cd_table->cdtab) {
+ if (!arm_smmu_cdtab_allocated(cd_table)) {
if (arm_smmu_alloc_cd_tables(master))
return NULL;
}
if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
- unsigned int idx = ssid / CTXDESC_L2_ENTRIES;
- struct arm_smmu_l1_ctx_desc *l1_desc;
+ unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
+ struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
- l1_desc = &cd_table->l1_desc[idx];
- if (!l1_desc->l2ptr) {
- __le64 *l1ptr;
+ if (!*l2ptr) {
+ dma_addr_t l2ptr_dma;
- if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
+ *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
+ &l2ptr_dma, GFP_KERNEL);
+ if (!*l2ptr)
return NULL;
- l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
- arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
+ arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
+ l2ptr_dma);
/* An invalid L1CD can be cached */
arm_smmu_sync_cd(master, ssid, false);
}
@@ -1369,7 +1396,7 @@ void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
struct arm_smmu_cd target = {};
struct arm_smmu_cd *cdptr;
- if (!master->cd_table.cdtab)
+ if (!arm_smmu_cdtab_allocated(&master->cd_table))
return;
cdptr = arm_smmu_get_cd_ptr(master, ssid);
if (WARN_ON(!cdptr))
@@ -1391,74 +1418,75 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
- cd_table->num_l1_ents = max_contexts;
+ cd_table->linear.num_ents = max_contexts;
- l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
+ l1size = max_contexts * sizeof(struct arm_smmu_cd),
+ cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
+ &cd_table->cdtab_dma,
+ GFP_KERNEL);
+ if (!cd_table->linear.table)
+ return -ENOMEM;
} else {
cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
- cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
- CTXDESC_L2_ENTRIES);
+ cd_table->l2.num_l1_ents =
+ DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
- cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
- sizeof(*cd_table->l1_desc),
- GFP_KERNEL);
- if (!cd_table->l1_desc)
+ cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
+ sizeof(*cd_table->l2.l2ptrs),
+ GFP_KERNEL);
+ if (!cd_table->l2.l2ptrs)
return -ENOMEM;
- l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
- }
-
- cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
- GFP_KERNEL);
- if (!cd_table->cdtab) {
- dev_warn(smmu->dev, "failed to allocate context descriptor\n");
- ret = -ENOMEM;
- goto err_free_l1;
+ l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
+ cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
+ &cd_table->cdtab_dma,
+ GFP_KERNEL);
+ if (!cd_table->l2.l2ptrs) {
+ ret = -ENOMEM;
+ goto err_free_l2ptrs;
+ }
}
-
return 0;
-err_free_l1:
- if (cd_table->l1_desc) {
- devm_kfree(smmu->dev, cd_table->l1_desc);
- cd_table->l1_desc = NULL;
- }
+err_free_l2ptrs:
+ kfree(cd_table->l2.l2ptrs);
+ cd_table->l2.l2ptrs = NULL;
return ret;
}
static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
{
int i;
- size_t size, l1size;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (cd_table->l1_desc) {
- size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
-
- for (i = 0; i < cd_table->num_l1_ents; i++) {
- if (!cd_table->l1_desc[i].l2ptr)
+ if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
+ for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
+ if (!cd_table->l2.l2ptrs[i])
continue;
- dmam_free_coherent(smmu->dev, size,
- cd_table->l1_desc[i].l2ptr,
- cd_table->l1_desc[i].l2ptr_dma);
+ dma_free_coherent(smmu->dev,
+ sizeof(*cd_table->l2.l2ptrs[i]),
+ cd_table->l2.l2ptrs[i],
+ arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
}
- devm_kfree(smmu->dev, cd_table->l1_desc);
- cd_table->l1_desc = NULL;
+ kfree(cd_table->l2.l2ptrs);
- l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ dma_free_coherent(smmu->dev,
+ cd_table->l2.num_l1_ents *
+ sizeof(struct arm_smmu_cdtab_l1),
+ cd_table->l2.l1tab, cd_table->cdtab_dma);
} else {
- l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+ dma_free_coherent(smmu->dev,
+ cd_table->linear.num_ents *
+ sizeof(struct arm_smmu_cd),
+ cd_table->linear.table, cd_table->cdtab_dma);
}
-
- dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
- cd_table->cdtab_dma = 0;
- cd_table->cdtab = NULL;
}
/* Stream table manipulation functions */
-static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma)
+static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst,
+ dma_addr_t l2ptr_dma)
{
u64 val = 0;
@@ -1466,7 +1494,7 @@ static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma)
val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
/* The HW has 64 bit atomicity with stores to the L2 STE table */
- WRITE_ONCE(*dst, cpu_to_le64(val));
+ WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
}
struct arm_smmu_ste_writer {
@@ -1646,6 +1674,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
STRTAB_STE_2_S2ENDI |
#endif
STRTAB_STE_2_S2PTW |
+ (master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
STRTAB_STE_2_S2R);
target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
@@ -1670,52 +1699,61 @@ static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
{
- size_t size;
- void *strtab;
dma_addr_t l2ptr_dma;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
- struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
+ struct arm_smmu_strtab_l2 **l2table;
- if (desc->l2ptr)
+ l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
+ if (*l2table)
return 0;
- size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
- strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
-
- desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma,
- GFP_KERNEL);
- if (!desc->l2ptr) {
+ *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
+ &l2ptr_dma, GFP_KERNEL);
+ if (!*l2table) {
dev_err(smmu->dev,
"failed to allocate l2 stream table for SID %u\n",
sid);
return -ENOMEM;
}
- arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
- arm_smmu_write_strtab_l1_desc(strtab, l2ptr_dma);
+ arm_smmu_init_initial_stes((*l2table)->stes,
+ ARRAY_SIZE((*l2table)->stes));
+ arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
+ l2ptr_dma);
return 0;
}
+static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
+{
+ struct arm_smmu_stream *stream_rhs =
+ rb_entry(rhs, struct arm_smmu_stream, node);
+ const u32 *sid_lhs = lhs;
+
+ if (*sid_lhs < stream_rhs->id)
+ return -1;
+ if (*sid_lhs > stream_rhs->id)
+ return 1;
+ return 0;
+}
+
+static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
+ const struct rb_node *rhs)
+{
+ return arm_smmu_streams_cmp_key(
+ &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
+}
+
static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
{
struct rb_node *node;
- struct arm_smmu_stream *stream;
lockdep_assert_held(&smmu->streams_mutex);
- node = smmu->streams.rb_node;
- while (node) {
- stream = rb_entry(node, struct arm_smmu_stream, node);
- if (stream->id < sid)
- node = node->rb_right;
- else if (stream->id > sid)
- node = node->rb_left;
- else
- return stream->master;
- }
-
- return NULL;
+ node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct arm_smmu_stream, node)->master;
}
/* IRQ and event handlers */
@@ -1739,10 +1777,6 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
return -EOPNOTSUPP;
}
- /* Stage-2 is always pinned at the moment */
- if (evt[1] & EVTQ_1_S2)
- return -EFAULT;
-
if (!(evt[1] & EVTQ_1_STALL))
return -EOPNOTSUPP;
@@ -2021,7 +2055,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
@@ -2036,7 +2070,9 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_master_domain *master_domain;
int i;
unsigned long flags;
- struct arm_smmu_cmdq_ent cmd;
+ struct arm_smmu_cmdq_ent cmd = {
+ .opcode = CMDQ_OP_ATC_INV,
+ };
struct arm_smmu_cmdq_batch cmds;
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
@@ -2059,7 +2095,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
if (!atomic_read(&smmu_domain->nr_ats_masters))
return 0;
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master_domain, &smmu_domain->devices,
@@ -2141,7 +2177,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
num_pages++;
}
- cmds.num = 0;
+ arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
while (iova < end) {
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
@@ -2438,16 +2474,12 @@ arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
- unsigned int idx1, idx2;
-
/* Two-level walk */
- idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
- idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
- return &cfg->l1_desc[idx1].l2ptr[idx2];
+ return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
+ ->stes[arm_smmu_strtab_l2_idx(sid)];
} else {
/* Simple linear lookup */
- return (struct arm_smmu_ste *)&cfg
- ->strtab[sid * STRTAB_STE_DWORDS];
+ return &cfg->linear.table[sid];
}
}
@@ -3062,8 +3094,8 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
return ERR_PTR(-EOPNOTSUPP);
smmu_domain = arm_smmu_domain_alloc();
- if (!smmu_domain)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(smmu_domain))
+ return ERR_CAST(smmu_domain);
smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
@@ -3147,12 +3179,9 @@ struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
{
- unsigned long limit = smmu->strtab_cfg.num_l1_ents;
-
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
- limit *= 1UL << STRTAB_SPLIT;
-
- return sid < limit;
+ return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
+ return sid < smmu->strtab_cfg.linear.num_ents;
}
static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
@@ -3173,8 +3202,6 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
{
int i;
int ret = 0;
- struct arm_smmu_stream *new_stream, *cur_stream;
- struct rb_node **new_node, *parent_node = NULL;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
@@ -3185,9 +3212,9 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
mutex_lock(&smmu->streams_mutex);
for (i = 0; i < fwspec->num_ids; i++) {
+ struct arm_smmu_stream *new_stream = &master->streams[i];
u32 sid = fwspec->ids[i];
- new_stream = &master->streams[i];
new_stream->id = sid;
new_stream->master = master;
@@ -3196,28 +3223,13 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
break;
/* Insert into SID tree */
- new_node = &(smmu->streams.rb_node);
- while (*new_node) {
- cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
- node);
- parent_node = *new_node;
- if (cur_stream->id > new_stream->id) {
- new_node = &((*new_node)->rb_left);
- } else if (cur_stream->id < new_stream->id) {
- new_node = &((*new_node)->rb_right);
- } else {
- dev_warn(master->dev,
- "stream %u already in tree\n",
- cur_stream->id);
- ret = -EINVAL;
- break;
- }
- }
- if (ret)
+ if (rb_find_add(&new_stream->node, &smmu->streams,
+ arm_smmu_streams_cmp_node)) {
+ dev_warn(master->dev, "stream %u already in tree\n",
+ sid);
+ ret = -EINVAL;
break;
-
- rb_link_node(&new_stream->node, parent_node, new_node);
- rb_insert_color(&new_stream->node, &smmu->streams);
+ }
}
if (ret) {
@@ -3295,6 +3307,12 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
+ if (dev_is_pci(dev)) {
+ unsigned int stu = __ffs(smmu->pgsize_bitmap);
+
+ pci_prepare_ats(to_pci_dev(dev), stu);
+ }
+
return &smmu->iommu;
err_free_master:
@@ -3317,7 +3335,7 @@ static void arm_smmu_release_device(struct device *dev)
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
- if (master->cd_table.cdtab)
+ if (arm_smmu_cdtab_allocated(&master->cd_table))
arm_smmu_free_cd_tables(master);
kfree(master);
}
@@ -3507,12 +3525,10 @@ static struct iommu_dirty_ops arm_smmu_dirty_ops = {
};
/* Probing and initialisation functions */
-static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
- struct arm_smmu_queue *q,
- void __iomem *page,
- unsigned long prod_off,
- unsigned long cons_off,
- size_t dwords, const char *name)
+int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+ struct arm_smmu_queue *q, void __iomem *page,
+ unsigned long prod_off, unsigned long cons_off,
+ size_t dwords, const char *name)
{
size_t qsz;
@@ -3550,9 +3566,9 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
return 0;
}
-static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq)
{
- struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
atomic_set(&cmdq->owner_prod, 0);
@@ -3577,7 +3593,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
if (ret)
return ret;
- ret = arm_smmu_cmdq_init(smmu);
+ ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
if (ret)
return ret;
@@ -3606,42 +3622,32 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
{
- void *strtab;
- u64 reg;
- u32 size, l1size;
+ u32 l1size;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+ unsigned int last_sid_idx =
+ arm_smmu_strtab_l1_idx((1 << smmu->sid_bits) - 1);
/* Calculate the L1 size, capped to the SIDSIZE. */
- size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
- size = min(size, smmu->sid_bits - STRTAB_SPLIT);
- cfg->num_l1_ents = 1 << size;
-
- size += STRTAB_SPLIT;
- if (size < smmu->sid_bits)
+ cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
+ if (cfg->l2.num_l1_ents <= last_sid_idx)
dev_warn(smmu->dev,
"2-level strtab only covers %u/%u bits of SID\n",
- size, smmu->sid_bits);
+ ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES),
+ smmu->sid_bits);
- l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
- strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
- GFP_KERNEL);
- if (!strtab) {
+ l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
+ cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma,
+ GFP_KERNEL);
+ if (!cfg->l2.l1tab) {
dev_err(smmu->dev,
"failed to allocate l1 stream table (%u bytes)\n",
l1size);
return -ENOMEM;
}
- cfg->strtab = strtab;
-
- /* Configure strtab_base_cfg for 2 levels */
- reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
- reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
- reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
- cfg->strtab_base_cfg = reg;
- cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
- sizeof(*cfg->l1_desc), GFP_KERNEL);
- if (!cfg->l1_desc)
+ cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents,
+ sizeof(*cfg->l2.l2ptrs), GFP_KERNEL);
+ if (!cfg->l2.l2ptrs)
return -ENOMEM;
return 0;
@@ -3649,50 +3655,36 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
{
- void *strtab;
- u64 reg;
u32 size;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
- size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
- strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
- GFP_KERNEL);
- if (!strtab) {
+ size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste);
+ cfg->linear.table = dmam_alloc_coherent(smmu->dev, size,
+ &cfg->linear.ste_dma,
+ GFP_KERNEL);
+ if (!cfg->linear.table) {
dev_err(smmu->dev,
"failed to allocate linear stream table (%u bytes)\n",
size);
return -ENOMEM;
}
- cfg->strtab = strtab;
- cfg->num_l1_ents = 1 << smmu->sid_bits;
+ cfg->linear.num_ents = 1 << smmu->sid_bits;
- /* Configure strtab_base_cfg for a linear table covering all SIDs */
- reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
- reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
- cfg->strtab_base_cfg = reg;
-
- arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents);
+ arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents);
return 0;
}
static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
{
- u64 reg;
int ret;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
ret = arm_smmu_init_strtab_2lvl(smmu);
else
ret = arm_smmu_init_strtab_linear(smmu);
-
if (ret)
return ret;
- /* Set the strtab base address */
- reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
- reg |= STRTAB_BASE_RA;
- smmu->strtab_cfg.strtab_base = reg;
-
ida_init(&smmu->vmid_map);
return 0;
@@ -3709,7 +3701,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
if (ret)
return ret;
- return arm_smmu_init_strtab(smmu);
+ ret = arm_smmu_init_strtab(smmu);
+ if (ret)
+ return ret;
+
+ if (smmu->impl_ops && smmu->impl_ops->init_structures)
+ return smmu->impl_ops->init_structures(smmu);
+
+ return 0;
}
static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
@@ -3901,6 +3900,30 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
return ret;
}
+static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
+{
+ struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+ dma_addr_t dma;
+ u32 reg;
+
+ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
+ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
+ STRTAB_BASE_CFG_FMT_2LVL) |
+ FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
+ ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) |
+ FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
+ dma = cfg->l2.l1_dma;
+ } else {
+ reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
+ STRTAB_BASE_CFG_FMT_LINEAR) |
+ FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
+ dma = cfg->linear.ste_dma;
+ }
+ writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
+ smmu->base + ARM_SMMU_STRTAB_BASE);
+ writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
+}
+
static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
int ret;
@@ -3936,10 +3959,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
/* Stream table */
- writeq_relaxed(smmu->strtab_cfg.strtab_base,
- smmu->base + ARM_SMMU_STRTAB_BASE);
- writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
- smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
+ arm_smmu_write_strtab(smmu);
/* Command queue */
writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
@@ -4026,6 +4046,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
return ret;
}
+ if (smmu->impl_ops && smmu->impl_ops->device_reset) {
+ ret = smmu->impl_ops->device_reset(smmu);
+ if (ret) {
+ dev_err(smmu->dev, "failed to reset impl\n");
+ return ret;
+ }
+ }
+
return 0;
}
@@ -4315,18 +4343,55 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
}
#ifdef CONFIG_ACPI
-static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
+#ifdef CONFIG_TEGRA241_CMDQV
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
+{
+ const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
+ struct acpi_device *adev;
+
+ /* Look for an NVDA200C node whose _UID matches the SMMU node ID */
+ adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
+ if (adev) {
+ /* Tegra241 CMDQV driver is responsible for put_device() */
+ smmu->impl_dev = &adev->dev;
+ smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
+ dev_info(smmu->dev, "found companion CMDQV device: %s\n",
+ dev_name(smmu->impl_dev));
+ }
+ kfree(uid);
+}
+#else
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
+{
+}
+#endif
+
+static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
+ struct arm_smmu_device *smmu)
{
- switch (model) {
+ struct acpi_iort_smmu_v3 *iort_smmu =
+ (struct acpi_iort_smmu_v3 *)node->node_data;
+
+ switch (iort_smmu->model) {
case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
break;
case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
break;
+ case ACPI_IORT_SMMU_V3_GENERIC:
+ /*
+ * Tegra241 implementation stores its SMMU options and impl_dev
+ * in DSDT. Thus, go through the ACPI tables unconditionally.
+ */
+ acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
+ break;
}
dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
+ return 0;
}
static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
@@ -4341,8 +4406,6 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
/* Retrieve SMMUv3 specific data */
iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
- acpi_smmu_get_options(iort_smmu->model, smmu);
-
if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
smmu->features |= ARM_SMMU_FEAT_COHERENCY;
@@ -4354,7 +4417,7 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
smmu->features |= ARM_SMMU_FEAT_HA;
}
- return 0;
+ return acpi_smmu_iort_probe_model(node, smmu);
}
#else
static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
@@ -4435,6 +4498,39 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
}
+static void arm_smmu_impl_remove(void *data)
+{
+ struct arm_smmu_device *smmu = data;
+
+ if (smmu->impl_ops && smmu->impl_ops->device_remove)
+ smmu->impl_ops->device_remove(smmu);
+}
+
+/*
+ * Probe all the compiled in implementations. Each one checks to see if it
+ * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
+ * replaces the callers. Otherwise the original is returned or ERR_PTR.
+ */
+static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
+{
+ struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
+ int ret;
+
+ if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
+ new_smmu = tegra241_cmdqv_probe(smmu);
+
+ if (new_smmu == ERR_PTR(-ENODEV))
+ return smmu;
+ if (IS_ERR(new_smmu))
+ return new_smmu;
+
+ ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
+ new_smmu);
+ if (ret)
+ return ERR_PTR(ret);
+ return new_smmu;
+}
+
static int arm_smmu_device_probe(struct platform_device *pdev)
{
int irq, ret;
@@ -4456,6 +4552,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
if (ret)
return ret;
+ smmu = arm_smmu_impl_probe(smmu);
+ if (IS_ERR(smmu))
+ return PTR_ERR(smmu);
+
/* Base address */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 14bca41a981b..1e9952ca989f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -14,6 +14,8 @@
#include <linux/mmzone.h>
#include <linux/sizes.h>
+struct arm_smmu_device;
+
/* MMIO registers */
#define ARM_SMMU_IDR0 0x0
#define IDR0_ST_LVL GENMASK(28, 27)
@@ -202,10 +204,8 @@
* 2lvl: 128k L1 entries,
* 256 lazy entries per table (each table covers a PCI bus)
*/
-#define STRTAB_L1_SZ_SHIFT 20
#define STRTAB_SPLIT 8
-#define STRTAB_L1_DESC_DWORDS 1
#define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
#define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
@@ -215,6 +215,26 @@ struct arm_smmu_ste {
__le64 data[STRTAB_STE_DWORDS];
};
+#define STRTAB_NUM_L2_STES (1 << STRTAB_SPLIT)
+struct arm_smmu_strtab_l2 {
+ struct arm_smmu_ste stes[STRTAB_NUM_L2_STES];
+};
+
+struct arm_smmu_strtab_l1 {
+ __le64 l2ptr;
+};
+#define STRTAB_MAX_L1_ENTRIES (1 << 17)
+
+static inline u32 arm_smmu_strtab_l1_idx(u32 sid)
+{
+ return sid / STRTAB_NUM_L2_STES;
+}
+
+static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
+{
+ return sid % STRTAB_NUM_L2_STES;
+}
+
#define STRTAB_STE_0_V (1UL << 0)
#define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
#define STRTAB_STE_0_CFG_ABORT 0
@@ -267,6 +287,7 @@ struct arm_smmu_ste {
#define STRTAB_STE_2_S2AA64 (1UL << 51)
#define STRTAB_STE_2_S2ENDI (1UL << 52)
#define STRTAB_STE_2_S2PTW (1UL << 54)
+#define STRTAB_STE_2_S2S (1UL << 57)
#define STRTAB_STE_2_S2R (1UL << 58)
#define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
@@ -280,7 +301,6 @@ struct arm_smmu_ste {
*/
#define CTXDESC_L2_ENTRIES 1024
-#define CTXDESC_L1_DESC_DWORDS 1
#define CTXDESC_L1_DESC_V (1UL << 0)
#define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12)
@@ -290,6 +310,24 @@ struct arm_smmu_cd {
__le64 data[CTXDESC_CD_DWORDS];
};
+struct arm_smmu_cdtab_l2 {
+ struct arm_smmu_cd cds[CTXDESC_L2_ENTRIES];
+};
+
+struct arm_smmu_cdtab_l1 {
+ __le64 l2ptr;
+};
+
+static inline unsigned int arm_smmu_cdtab_l1_idx(unsigned int ssid)
+{
+ return ssid / CTXDESC_L2_ENTRIES;
+}
+
+static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
+{
+ return ssid % CTXDESC_L2_ENTRIES;
+}
+
#define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
#define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
#define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
@@ -320,7 +358,7 @@ struct arm_smmu_cd {
* When the SMMU only supports linear context descriptor tables, pick a
* reasonable size limit (64kB).
*/
-#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
+#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / sizeof(struct arm_smmu_cd))
/* Command queue */
#define CMDQ_ENT_SZ_SHIFT 4
@@ -566,10 +604,18 @@ struct arm_smmu_cmdq {
atomic_long_t *valid_map;
atomic_t owner_prod;
atomic_t lock;
+ bool (*supports_cmd)(struct arm_smmu_cmdq_ent *ent);
};
+static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true;
+}
+
struct arm_smmu_cmdq_batch {
u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
+ struct arm_smmu_cmdq *cmdq;
int num;
};
@@ -584,24 +630,23 @@ struct arm_smmu_priq {
};
/* High-level stream table and context descriptor structures */
-struct arm_smmu_strtab_l1_desc {
- struct arm_smmu_ste *l2ptr;
-};
-
struct arm_smmu_ctx_desc {
u16 asid;
};
-struct arm_smmu_l1_ctx_desc {
- struct arm_smmu_cd *l2ptr;
- dma_addr_t l2ptr_dma;
-};
-
struct arm_smmu_ctx_desc_cfg {
- __le64 *cdtab;
+ union {
+ struct {
+ struct arm_smmu_cd *table;
+ unsigned int num_ents;
+ } linear;
+ struct {
+ struct arm_smmu_cdtab_l1 *l1tab;
+ struct arm_smmu_cdtab_l2 **l2ptrs;
+ unsigned int num_l1_ents;
+ } l2;
+ };
dma_addr_t cdtab_dma;
- struct arm_smmu_l1_ctx_desc *l1_desc;
- unsigned int num_l1_ents;
unsigned int used_ssids;
u8 in_ste;
u8 s1fmt;
@@ -609,6 +654,12 @@ struct arm_smmu_ctx_desc_cfg {
u8 s1cdmax;
};
+static inline bool
+arm_smmu_cdtab_allocated(struct arm_smmu_ctx_desc_cfg *cfg)
+{
+ return cfg->linear.table || cfg->l2.l1tab;
+}
+
/* True if the cd table has SSIDS > 0 in use. */
static inline bool arm_smmu_ssids_in_use(struct arm_smmu_ctx_desc_cfg *cd_table)
{
@@ -620,18 +671,35 @@ struct arm_smmu_s2_cfg {
};
struct arm_smmu_strtab_cfg {
- __le64 *strtab;
- dma_addr_t strtab_dma;
- struct arm_smmu_strtab_l1_desc *l1_desc;
- unsigned int num_l1_ents;
+ union {
+ struct {
+ struct arm_smmu_ste *table;
+ dma_addr_t ste_dma;
+ unsigned int num_ents;
+ } linear;
+ struct {
+ struct arm_smmu_strtab_l1 *l1tab;
+ struct arm_smmu_strtab_l2 **l2ptrs;
+ dma_addr_t l1_dma;
+ unsigned int num_l1_ents;
+ } l2;
+ };
+};
- u64 strtab_base;
- u32 strtab_base_cfg;
+struct arm_smmu_impl_ops {
+ int (*device_reset)(struct arm_smmu_device *smmu);
+ void (*device_remove)(struct arm_smmu_device *smmu);
+ int (*init_structures)(struct arm_smmu_device *smmu);
+ struct arm_smmu_cmdq *(*get_secondary_cmdq)(
+ struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent);
};
/* An SMMUv3 instance */
struct arm_smmu_device {
struct device *dev;
+ struct device *impl_dev;
+ const struct arm_smmu_impl_ops *impl_ops;
+
void __iomem *base;
void __iomem *page1;
@@ -664,6 +732,7 @@ struct arm_smmu_device {
#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
+#define ARM_SMMU_OPT_TEGRA241_CMDQV (1 << 4)
u32 options;
struct arm_smmu_cmdq cmdq;
@@ -815,6 +884,15 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
unsigned long iova, size_t size);
+void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq);
+int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+ struct arm_smmu_queue *q, void __iomem *page,
+ unsigned long prod_off, unsigned long cons_off,
+ size_t dwords, const char *name);
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq);
+
#ifdef CONFIG_ARM_SMMU_V3_SVA
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
@@ -860,10 +938,15 @@ static inline void arm_smmu_sva_notifier_synchronize(void) {}
#define arm_smmu_sva_domain_alloc NULL
-static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
- struct device *dev,
- ioasid_t id)
+#endif /* CONFIG_ARM_SMMU_V3_SVA */
+
+#ifdef CONFIG_TEGRA241_CMDQV
+struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu);
+#else /* CONFIG_TEGRA241_CMDQV */
+static inline struct arm_smmu_device *
+tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
{
+ return ERR_PTR(-ENODEV);
}
-#endif /* CONFIG_ARM_SMMU_V3_SVA */
+#endif /* CONFIG_TEGRA241_CMDQV */
#endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
new file mode 100644
index 000000000000..fcd13d301fff
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -0,0 +1,909 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021-2024 NVIDIA CORPORATION & AFFILIATES. */
+
+#define dev_fmt(fmt) "tegra241_cmdqv: " fmt
+
+#include <linux/acpi.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+
+#include <acpi/acpixf.h>
+
+#include "arm-smmu-v3.h"
+
+/* CMDQV register page base and size defines */
+#define TEGRA241_CMDQV_CONFIG_BASE (0)
+#define TEGRA241_CMDQV_CONFIG_SIZE (SZ_64K)
+#define TEGRA241_VCMDQ_PAGE0_BASE (TEGRA241_CMDQV_CONFIG_BASE + SZ_64K)
+#define TEGRA241_VCMDQ_PAGE1_BASE (TEGRA241_VCMDQ_PAGE0_BASE + SZ_64K)
+#define TEGRA241_VINTF_PAGE_BASE (TEGRA241_VCMDQ_PAGE1_BASE + SZ_64K)
+
+/* CMDQV global base regs */
+#define TEGRA241_CMDQV_CONFIG 0x0000
+#define CMDQV_EN BIT(0)
+
+#define TEGRA241_CMDQV_PARAM 0x0004
+#define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8)
+#define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4)
+
+#define TEGRA241_CMDQV_STATUS 0x0008
+#define CMDQV_ENABLED BIT(0)
+
+#define TEGRA241_CMDQV_VINTF_ERR_MAP 0x0014
+#define TEGRA241_CMDQV_VINTF_INT_MASK 0x001C
+#define TEGRA241_CMDQV_CMDQ_ERR_MAP(m) (0x0024 + 0x4*(m))
+
+#define TEGRA241_CMDQV_CMDQ_ALLOC(q) (0x0200 + 0x4*(q))
+#define CMDQV_CMDQ_ALLOC_VINTF GENMASK(20, 15)
+#define CMDQV_CMDQ_ALLOC_LVCMDQ GENMASK(7, 1)
+#define CMDQV_CMDQ_ALLOCATED BIT(0)
+
+/* VINTF base regs */
+#define TEGRA241_VINTF(v) (0x1000 + 0x100*(v))
+
+#define TEGRA241_VINTF_CONFIG 0x0000
+#define VINTF_HYP_OWN BIT(17)
+#define VINTF_VMID GENMASK(16, 1)
+#define VINTF_EN BIT(0)
+
+#define TEGRA241_VINTF_STATUS 0x0004
+#define VINTF_STATUS GENMASK(3, 1)
+#define VINTF_ENABLED BIT(0)
+
+#define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \
+ (0x00C0 + 0x8*(m))
+#define LVCMDQ_ERR_MAP_NUM_64 2
+
+/* VCMDQ base regs */
+/* -- PAGE0 -- */
+#define TEGRA241_VCMDQ_PAGE0(q) (TEGRA241_VCMDQ_PAGE0_BASE + 0x80*(q))
+
+#define TEGRA241_VCMDQ_CONS 0x00000
+#define VCMDQ_CONS_ERR GENMASK(30, 24)
+
+#define TEGRA241_VCMDQ_PROD 0x00004
+
+#define TEGRA241_VCMDQ_CONFIG 0x00008
+#define VCMDQ_EN BIT(0)
+
+#define TEGRA241_VCMDQ_STATUS 0x0000C
+#define VCMDQ_ENABLED BIT(0)
+
+#define TEGRA241_VCMDQ_GERROR 0x00010
+#define TEGRA241_VCMDQ_GERRORN 0x00014
+
+/* -- PAGE1 -- */
+#define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q))
+#define VCMDQ_ADDR GENMASK(47, 5)
+#define VCMDQ_LOG2SIZE GENMASK(4, 0)
+#define VCMDQ_LOG2SIZE_MAX 19
+
+#define TEGRA241_VCMDQ_BASE 0x00000
+#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008
+
+/* VINTF logical-VCMDQ pages */
+#define TEGRA241_VINTFi_PAGE0(i) (TEGRA241_VINTF_PAGE_BASE + SZ_128K*(i))
+#define TEGRA241_VINTFi_PAGE1(i) (TEGRA241_VINTFi_PAGE0(i) + SZ_64K)
+#define TEGRA241_VINTFi_LVCMDQ_PAGE0(i, q) \
+ (TEGRA241_VINTFi_PAGE0(i) + 0x80*(q))
+#define TEGRA241_VINTFi_LVCMDQ_PAGE1(i, q) \
+ (TEGRA241_VINTFi_PAGE1(i) + 0x80*(q))
+
+/* MMIO helpers */
+#define REG_CMDQV(_cmdqv, _regname) \
+ ((_cmdqv)->base + TEGRA241_CMDQV_##_regname)
+#define REG_VINTF(_vintf, _regname) \
+ ((_vintf)->base + TEGRA241_VINTF_##_regname)
+#define REG_VCMDQ_PAGE0(_vcmdq, _regname) \
+ ((_vcmdq)->page0 + TEGRA241_VCMDQ_##_regname)
+#define REG_VCMDQ_PAGE1(_vcmdq, _regname) \
+ ((_vcmdq)->page1 + TEGRA241_VCMDQ_##_regname)
+
+
+static bool disable_cmdqv;
+module_param(disable_cmdqv, bool, 0444);
+MODULE_PARM_DESC(disable_cmdqv,
+ "This allows to disable CMDQV HW and use default SMMU internal CMDQ.");
+
+static bool bypass_vcmdq;
+module_param(bypass_vcmdq, bool, 0444);
+MODULE_PARM_DESC(bypass_vcmdq,
+ "This allows to bypass VCMDQ for debugging use or perf comparison.");
+
+/**
+ * struct tegra241_vcmdq - Virtual Command Queue
+ * @idx: Global index in the CMDQV
+ * @lidx: Local index in the VINTF
+ * @enabled: Enable status
+ * @cmdqv: Parent CMDQV pointer
+ * @vintf: Parent VINTF pointer
+ * @cmdq: Command Queue struct
+ * @page0: MMIO Page0 base address
+ * @page1: MMIO Page1 base address
+ */
+struct tegra241_vcmdq {
+ u16 idx;
+ u16 lidx;
+
+ bool enabled;
+
+ struct tegra241_cmdqv *cmdqv;
+ struct tegra241_vintf *vintf;
+ struct arm_smmu_cmdq cmdq;
+
+ void __iomem *page0;
+ void __iomem *page1;
+};
+
+/**
+ * struct tegra241_vintf - Virtual Interface
+ * @idx: Global index in the CMDQV
+ * @enabled: Enable status
+ * @hyp_own: Owned by hypervisor (in-kernel)
+ * @cmdqv: Parent CMDQV pointer
+ * @lvcmdqs: List of logical VCMDQ pointers
+ * @base: MMIO base address
+ */
+struct tegra241_vintf {
+ u16 idx;
+
+ bool enabled;
+ bool hyp_own;
+
+ struct tegra241_cmdqv *cmdqv;
+ struct tegra241_vcmdq **lvcmdqs;
+
+ void __iomem *base;
+};
+
+/**
+ * struct tegra241_cmdqv - CMDQ-V for SMMUv3
+ * @smmu: SMMUv3 device
+ * @dev: CMDQV device
+ * @base: MMIO base address
+ * @irq: IRQ number
+ * @num_vintfs: Total number of VINTFs
+ * @num_vcmdqs: Total number of VCMDQs
+ * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF
+ * @vintf_ids: VINTF id allocator
+ * @vintfs: List of VINTFs
+ */
+struct tegra241_cmdqv {
+ struct arm_smmu_device smmu;
+ struct device *dev;
+
+ void __iomem *base;
+ int irq;
+
+ /* CMDQV Hardware Params */
+ u16 num_vintfs;
+ u16 num_vcmdqs;
+ u16 num_lvcmdqs_per_vintf;
+
+ struct ida vintf_ids;
+
+ struct tegra241_vintf **vintfs;
+};
+
+/* Config and Polling Helpers */
+
+static inline int tegra241_cmdqv_write_config(struct tegra241_cmdqv *cmdqv,
+ void __iomem *addr_config,
+ void __iomem *addr_status,
+ u32 regval, const char *header,
+ bool *out_enabled)
+{
+ bool en = regval & BIT(0);
+ int ret;
+
+ writel(regval, addr_config);
+ ret = readl_poll_timeout(addr_status, regval,
+ en ? regval & BIT(0) : !(regval & BIT(0)),
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+ if (ret)
+ dev_err(cmdqv->dev, "%sfailed to %sable, STATUS=0x%08X\n",
+ header, en ? "en" : "dis", regval);
+ if (out_enabled)
+ WRITE_ONCE(*out_enabled, regval & BIT(0));
+ return ret;
+}
+
+static inline int cmdqv_write_config(struct tegra241_cmdqv *cmdqv, u32 regval)
+{
+ return tegra241_cmdqv_write_config(cmdqv,
+ REG_CMDQV(cmdqv, CONFIG),
+ REG_CMDQV(cmdqv, STATUS),
+ regval, "CMDQV: ", NULL);
+}
+
+static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval)
+{
+ char header[16];
+
+ snprintf(header, 16, "VINTF%u: ", vintf->idx);
+ return tegra241_cmdqv_write_config(vintf->cmdqv,
+ REG_VINTF(vintf, CONFIG),
+ REG_VINTF(vintf, STATUS),
+ regval, header, &vintf->enabled);
+}
+
+static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq,
+ char *header, int hlen)
+{
+ WARN_ON(hlen < 64);
+ if (WARN_ON(!vcmdq->vintf))
+ return "";
+ snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ",
+ vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx);
+ return header;
+}
+
+static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval)
+{
+ char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
+
+ return tegra241_cmdqv_write_config(vcmdq->cmdqv,
+ REG_VCMDQ_PAGE0(vcmdq, CONFIG),
+ REG_VCMDQ_PAGE0(vcmdq, STATUS),
+ regval, h, &vcmdq->enabled);
+}
+
+/* ISR Functions */
+
+static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf)
+{
+ int i;
+
+ for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) {
+ u64 map = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i)));
+
+ while (map) {
+ unsigned long lidx = __ffs64(map);
+ struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
+ u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR));
+
+ __arm_smmu_cmdq_skip_err(&vintf->cmdqv->smmu, &vcmdq->cmdq);
+ writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+ map &= ~BIT_ULL(lidx);
+ }
+ }
+}
+
+static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid)
+{
+ struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid;
+ void __iomem *reg_vintf_map = REG_CMDQV(cmdqv, VINTF_ERR_MAP);
+ char err_str[256];
+ u64 vintf_map;
+
+ /* Use readl_relaxed() as register addresses are not 64-bit aligned */
+ vintf_map = (u64)readl_relaxed(reg_vintf_map + 0x4) << 32 |
+ (u64)readl_relaxed(reg_vintf_map);
+
+ snprintf(err_str, sizeof(err_str),
+ "vintf_map: %016llx, vcmdq_map %08x:%08x:%08x:%08x", vintf_map,
+ readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(3))),
+ readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(2))),
+ readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(1))),
+ readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(0))));
+
+ dev_warn(cmdqv->dev, "unexpected error reported. %s\n", err_str);
+
+ /* Handle VINTF0 and its LVCMDQs */
+ if (vintf_map & BIT_ULL(0)) {
+ tegra241_vintf0_handle_error(cmdqv->vintfs[0]);
+ vintf_map &= ~BIT_ULL(0);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* Command Queue Function */
+
+static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent)
+{
+ switch (ent->opcode) {
+ case CMDQ_OP_TLBI_NH_ASID:
+ case CMDQ_OP_TLBI_NH_VA:
+ case CMDQ_OP_ATC_INV:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static struct arm_smmu_cmdq *
+tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq_ent *ent)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(smmu, struct tegra241_cmdqv, smmu);
+ struct tegra241_vintf *vintf = cmdqv->vintfs[0];
+ struct tegra241_vcmdq *vcmdq;
+ u16 lidx;
+
+ if (READ_ONCE(bypass_vcmdq))
+ return NULL;
+
+ /* Use SMMU CMDQ if VINTF0 is uninitialized */
+ if (!READ_ONCE(vintf->enabled))
+ return NULL;
+
+ /*
+ * Select a LVCMDQ to use. Here we use a temporal solution to
+ * balance out traffic on cmdq issuing: each cmdq has its own
+ * lock, if all cpus issue cmdlist using the same cmdq, only
+ * one CPU at a time can enter the process, while the others
+ * will be spinning at the same lock.
+ */
+ lidx = smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf;
+ vcmdq = vintf->lvcmdqs[lidx];
+ if (!vcmdq || !READ_ONCE(vcmdq->enabled))
+ return NULL;
+
+ /* Unsupported CMD goes for smmu->cmdq pathway */
+ if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, ent))
+ return NULL;
+ return &vcmdq->cmdq;
+}
+
+/* HW Reset Functions */
+
+static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq)
+{
+ char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
+ u32 gerrorn, gerror;
+
+ if (vcmdq_write_config(vcmdq, 0)) {
+ dev_err(vcmdq->cmdqv->dev,
+ "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h,
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)),
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)),
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS)));
+ }
+ writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD));
+ writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS));
+ writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE));
+ writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE));
+
+ gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+ gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR));
+ if (gerror != gerrorn) {
+ dev_warn(vcmdq->cmdqv->dev,
+ "%suncleared error detected, resetting\n", h);
+ writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+ }
+
+ dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h);
+}
+
+static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
+{
+ char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
+ int ret;
+
+ /* Reset VCMDQ */
+ tegra241_vcmdq_hw_deinit(vcmdq);
+
+ /* Configure and enable VCMDQ */
+ writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE));
+
+ ret = vcmdq_write_config(vcmdq, VCMDQ_EN);
+ if (ret) {
+ dev_err(vcmdq->cmdqv->dev,
+ "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h,
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)),
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)),
+ readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS)));
+ return ret;
+ }
+
+ dev_dbg(vcmdq->cmdqv->dev, "%sinited\n", h);
+ return 0;
+}
+
+static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf)
+{
+ u16 lidx;
+
+ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++)
+ if (vintf->lvcmdqs && vintf->lvcmdqs[lidx])
+ tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]);
+ vintf_write_config(vintf, 0);
+}
+
+static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own)
+{
+ u32 regval;
+ u16 lidx;
+ int ret;
+
+ /* Reset VINTF */
+ tegra241_vintf_hw_deinit(vintf);
+
+ /* Configure and enable VINTF */
+ /*
+ * Note that HYP_OWN bit is wired to zero when running in guest kernel,
+ * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a
+ * restricted set of supported commands.
+ */
+ regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own);
+ writel(regval, REG_VINTF(vintf, CONFIG));
+
+ ret = vintf_write_config(vintf, regval | VINTF_EN);
+ if (ret)
+ return ret;
+ /*
+ * As being mentioned above, HYP_OWN bit is wired to zero for a guest
+ * kernel, so read it back from HW to ensure that reflects in hyp_own
+ */
+ vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG)));
+
+ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+ if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) {
+ ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]);
+ if (ret) {
+ tegra241_vintf_hw_deinit(vintf);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(smmu, struct tegra241_cmdqv, smmu);
+ u16 qidx, lidx, idx;
+ u32 regval;
+ int ret;
+
+ /* Reset CMDQV */
+ regval = readl_relaxed(REG_CMDQV(cmdqv, CONFIG));
+ ret = cmdqv_write_config(cmdqv, regval & ~CMDQV_EN);
+ if (ret)
+ return ret;
+ ret = cmdqv_write_config(cmdqv, regval | CMDQV_EN);
+ if (ret)
+ return ret;
+
+ /* Assign preallocated global VCMDQs to each VINTF as LVCMDQs */
+ for (idx = 0, qidx = 0; idx < cmdqv->num_vintfs; idx++) {
+ for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+ regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx);
+ regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx);
+ regval |= CMDQV_CMDQ_ALLOCATED;
+ writel_relaxed(regval,
+ REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++)));
+ }
+ }
+
+ return tegra241_vintf_hw_init(cmdqv->vintfs[0], true);
+}
+
+/* VCMDQ Resource Helpers */
+
+static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
+{
+ struct arm_smmu_queue *q = &vcmdq->cmdq.q;
+ size_t nents = 1 << q->llq.max_n_shift;
+ size_t qsz = nents << CMDQ_ENT_SZ_SHIFT;
+
+ if (!q->base)
+ return;
+ dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma);
+}
+
+static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
+{
+ struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
+ struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq;
+ struct arm_smmu_queue *q = &cmdq->q;
+ char name[16];
+ int ret;
+
+ snprintf(name, 16, "vcmdq%u", vcmdq->idx);
+
+ q->llq.max_n_shift = VCMDQ_LOG2SIZE_MAX;
+
+ /* Use the common helper to init the VCMDQ, and then... */
+ ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0,
+ TEGRA241_VCMDQ_PROD, TEGRA241_VCMDQ_CONS,
+ CMDQ_ENT_DWORDS, name);
+ if (ret)
+ return ret;
+
+ /* ...override q_base to write VCMDQ_BASE registers */
+ q->q_base = q->base_dma & VCMDQ_ADDR;
+ q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift);
+
+ if (!vcmdq->vintf->hyp_own)
+ cmdq->supports_cmd = tegra241_guest_vcmdq_supports_cmd;
+
+ return arm_smmu_cmdq_init(smmu, cmdq);
+}
+
+/* VINTF Logical VCMDQ Resource Helpers */
+
+static void tegra241_vintf_deinit_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+ vintf->lvcmdqs[lidx] = NULL;
+}
+
+static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx,
+ struct tegra241_vcmdq *vcmdq)
+{
+ struct tegra241_cmdqv *cmdqv = vintf->cmdqv;
+ u16 idx = vintf->idx;
+
+ vcmdq->idx = idx * cmdqv->num_lvcmdqs_per_vintf + lidx;
+ vcmdq->lidx = lidx;
+ vcmdq->cmdqv = cmdqv;
+ vcmdq->vintf = vintf;
+ vcmdq->page0 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE0(idx, lidx);
+ vcmdq->page1 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE1(idx, lidx);
+
+ vintf->lvcmdqs[lidx] = vcmdq;
+ return 0;
+}
+
+static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+ struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
+ char header[64];
+
+ tegra241_vcmdq_free_smmu_cmdq(vcmdq);
+ tegra241_vintf_deinit_lvcmdq(vintf, lidx);
+
+ dev_dbg(vintf->cmdqv->dev,
+ "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64));
+ kfree(vcmdq);
+}
+
+static struct tegra241_vcmdq *
+tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+ struct tegra241_cmdqv *cmdqv = vintf->cmdqv;
+ struct tegra241_vcmdq *vcmdq;
+ char header[64];
+ int ret;
+
+ vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL);
+ if (!vcmdq)
+ return ERR_PTR(-ENOMEM);
+
+ ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq);
+ if (ret)
+ goto free_vcmdq;
+
+ /* Build an arm_smmu_cmdq for each LVCMDQ */
+ ret = tegra241_vcmdq_alloc_smmu_cmdq(vcmdq);
+ if (ret)
+ goto deinit_lvcmdq;
+
+ dev_dbg(cmdqv->dev,
+ "%sallocated\n", lvcmdq_error_header(vcmdq, header, 64));
+ return vcmdq;
+
+deinit_lvcmdq:
+ tegra241_vintf_deinit_lvcmdq(vintf, lidx);
+free_vcmdq:
+ kfree(vcmdq);
+ return ERR_PTR(ret);
+}
+
+/* VINTF Resource Helpers */
+
+static void tegra241_cmdqv_deinit_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
+{
+ kfree(cmdqv->vintfs[idx]->lvcmdqs);
+ ida_free(&cmdqv->vintf_ids, idx);
+ cmdqv->vintfs[idx] = NULL;
+}
+
+static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx,
+ struct tegra241_vintf *vintf)
+{
+
+ u16 idx;
+ int ret;
+
+ ret = ida_alloc_max(&cmdqv->vintf_ids, max_idx, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ idx = ret;
+
+ vintf->idx = idx;
+ vintf->cmdqv = cmdqv;
+ vintf->base = cmdqv->base + TEGRA241_VINTF(idx);
+
+ vintf->lvcmdqs = kcalloc(cmdqv->num_lvcmdqs_per_vintf,
+ sizeof(*vintf->lvcmdqs), GFP_KERNEL);
+ if (!vintf->lvcmdqs) {
+ ida_free(&cmdqv->vintf_ids, idx);
+ return -ENOMEM;
+ }
+
+ cmdqv->vintfs[idx] = vintf;
+ return ret;
+}
+
+/* Remove Helpers */
+
+static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+ tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]);
+ tegra241_vintf_free_lvcmdq(vintf, lidx);
+}
+
+static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
+{
+ struct tegra241_vintf *vintf = cmdqv->vintfs[idx];
+ u16 lidx;
+
+ /* Remove LVCMDQ resources */
+ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++)
+ if (vintf->lvcmdqs[lidx])
+ tegra241_vintf_remove_lvcmdq(vintf, lidx);
+
+ /* Remove VINTF resources */
+ tegra241_vintf_hw_deinit(vintf);
+
+ dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx);
+ tegra241_cmdqv_deinit_vintf(cmdqv, idx);
+ kfree(vintf);
+}
+
+static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(smmu, struct tegra241_cmdqv, smmu);
+ u16 idx;
+
+ /* Remove VINTF resources */
+ for (idx = 0; idx < cmdqv->num_vintfs; idx++) {
+ if (cmdqv->vintfs[idx]) {
+ /* Only vintf0 should remain at this stage */
+ WARN_ON(idx > 0);
+ tegra241_cmdqv_remove_vintf(cmdqv, idx);
+ }
+ }
+
+ /* Remove cmdqv resources */
+ ida_destroy(&cmdqv->vintf_ids);
+
+ if (cmdqv->irq > 0)
+ free_irq(cmdqv->irq, cmdqv);
+ iounmap(cmdqv->base);
+ kfree(cmdqv->vintfs);
+ put_device(cmdqv->dev); /* smmu->impl_dev */
+}
+
+static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
+ .get_secondary_cmdq = tegra241_cmdqv_get_cmdq,
+ .device_reset = tegra241_cmdqv_hw_reset,
+ .device_remove = tegra241_cmdqv_remove,
+};
+
+/* Probe Functions */
+
+static int tegra241_cmdqv_acpi_is_memory(struct acpi_resource *res, void *data)
+{
+ struct resource_win win;
+
+ return !acpi_dev_resource_address_space(res, &win);
+}
+
+static int tegra241_cmdqv_acpi_get_irqs(struct acpi_resource *ares, void *data)
+{
+ struct resource r;
+ int *irq = data;
+
+ if (*irq <= 0 && acpi_dev_resource_interrupt(ares, 0, &r))
+ *irq = r.start;
+ return 1; /* No need to add resource to the list */
+}
+
+static struct resource *
+tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq)
+{
+ struct acpi_device *adev = to_acpi_device(dev);
+ struct list_head resource_list;
+ struct resource_entry *rentry;
+ struct resource *res = NULL;
+ int ret;
+
+ INIT_LIST_HEAD(&resource_list);
+ ret = acpi_dev_get_resources(adev, &resource_list,
+ tegra241_cmdqv_acpi_is_memory, NULL);
+ if (ret < 0) {
+ dev_err(dev, "failed to get memory resource: %d\n", ret);
+ return NULL;
+ }
+
+ rentry = list_first_entry_or_null(&resource_list,
+ struct resource_entry, node);
+ if (!rentry) {
+ dev_err(dev, "failed to get memory resource entry\n");
+ goto free_list;
+ }
+
+ /* Caller must free the res */
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ goto free_list;
+
+ *res = *rentry->res;
+
+ acpi_dev_free_resource_list(&resource_list);
+
+ INIT_LIST_HEAD(&resource_list);
+
+ if (irq)
+ ret = acpi_dev_get_resources(adev, &resource_list,
+ tegra241_cmdqv_acpi_get_irqs, irq);
+ if (ret < 0 || !irq || *irq <= 0)
+ dev_warn(dev, "no interrupt. errors will not be reported\n");
+
+free_list:
+ acpi_dev_free_resource_list(&resource_list);
+ return res;
+}
+
+static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(smmu, struct tegra241_cmdqv, smmu);
+ struct tegra241_vintf *vintf;
+ int lidx;
+ int ret;
+
+ vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
+ if (!vintf)
+ goto out_fallback;
+
+ /* Init VINTF0 for in-kernel use */
+ ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
+ if (ret) {
+ dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret);
+ goto free_vintf;
+ }
+
+ /* Preallocate logical VCMDQs to VINTF0 */
+ for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+ struct tegra241_vcmdq *vcmdq;
+
+ vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
+ if (IS_ERR(vcmdq))
+ goto free_lvcmdq;
+ }
+
+ /* Now, we are ready to run all the impl ops */
+ smmu->impl_ops = &tegra241_cmdqv_impl_ops;
+ return 0;
+
+free_lvcmdq:
+ for (lidx--; lidx >= 0; lidx--)
+ tegra241_vintf_free_lvcmdq(vintf, lidx);
+ tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
+free_vintf:
+ kfree(vintf);
+out_fallback:
+ dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n");
+ smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV;
+ tegra241_cmdqv_remove(smmu);
+ return 0;
+}
+
+struct dentry *cmdqv_debugfs_dir;
+
+static struct arm_smmu_device *
+__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,
+ int irq)
+{
+ static const struct arm_smmu_impl_ops init_ops = {
+ .init_structures = tegra241_cmdqv_init_structures,
+ .device_remove = tegra241_cmdqv_remove,
+ };
+ struct tegra241_cmdqv *cmdqv = NULL;
+ struct arm_smmu_device *new_smmu;
+ void __iomem *base;
+ u32 regval;
+ int ret;
+
+ static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0);
+
+ base = ioremap(res->start, resource_size(res));
+ if (!base) {
+ dev_err(smmu->dev, "failed to ioremap\n");
+ return NULL;
+ }
+
+ regval = readl(base + TEGRA241_CMDQV_CONFIG);
+ if (disable_cmdqv) {
+ dev_info(smmu->dev, "Detected disable_cmdqv=true\n");
+ writel(regval & ~CMDQV_EN, base + TEGRA241_CMDQV_CONFIG);
+ goto iounmap;
+ }
+
+ cmdqv = devm_krealloc(smmu->dev, smmu, sizeof(*cmdqv), GFP_KERNEL);
+ if (!cmdqv)
+ goto iounmap;
+ new_smmu = &cmdqv->smmu;
+
+ cmdqv->irq = irq;
+ cmdqv->base = base;
+ cmdqv->dev = smmu->impl_dev;
+
+ if (cmdqv->irq > 0) {
+ ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv",
+ cmdqv);
+ if (ret) {
+ dev_err(cmdqv->dev, "failed to request irq (%d): %d\n",
+ cmdqv->irq, ret);
+ goto iounmap;
+ }
+ }
+
+ regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM));
+ cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval);
+ cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval);
+ cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs;
+
+ cmdqv->vintfs =
+ kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL);
+ if (!cmdqv->vintfs)
+ goto free_irq;
+
+ ida_init(&cmdqv->vintf_ids);
+
+#ifdef CONFIG_IOMMU_DEBUGFS
+ if (!cmdqv_debugfs_dir) {
+ cmdqv_debugfs_dir =
+ debugfs_create_dir("tegra241_cmdqv", iommu_debugfs_dir);
+ debugfs_create_bool("bypass_vcmdq", 0644, cmdqv_debugfs_dir,
+ &bypass_vcmdq);
+ }
+#endif
+
+ /* Provide init-level ops only, until tegra241_cmdqv_init_structures */
+ new_smmu->impl_ops = &init_ops;
+
+ return new_smmu;
+
+free_irq:
+ if (cmdqv->irq > 0)
+ free_irq(cmdqv->irq, cmdqv);
+iounmap:
+ iounmap(base);
+ return NULL;
+}
+
+struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
+{
+ struct arm_smmu_device *new_smmu;
+ struct resource *res = NULL;
+ int irq;
+
+ if (!smmu->dev->of_node)
+ res = tegra241_cmdqv_find_acpi_resource(smmu->impl_dev, &irq);
+ if (!res)
+ goto out_fallback;
+
+ new_smmu = __tegra241_cmdqv_probe(smmu, res, irq);
+ kfree(res);
+
+ if (new_smmu)
+ return new_smmu;
+
+out_fallback:
+ dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n");
+ smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV;
+ put_device(smmu->impl_dev);
+ return ERR_PTR(-ENODEV);
+}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 36c6b36ad4ff..6372f3e25c4b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -283,6 +283,20 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
int i;
/*
+ * MSM8998 LPASS SMMU reports 13 context banks, but accessing
+ * the last context bank crashes the system.
+ */
+ if (of_device_is_compatible(smmu->dev->of_node, "qcom,msm8998-smmu-v2") &&
+ smmu->num_context_banks == 13) {
+ smmu->num_context_banks = 12;
+ } else if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm630-smmu-v2")) {
+ if (smmu->num_context_banks == 21) /* SDM630 / SDM660 A2NOC SMMU */
+ smmu->num_context_banks = 7;
+ else if (smmu->num_context_banks == 14) /* SDM630 / SDM660 LPASS SMMU */
+ smmu->num_context_banks = 13;
+ }
+
+ /*
* Some platforms support more than the Arm SMMU architected maximum of
* 128 stream matching groups. For unknown reasons, the additional
* groups don't exhibit the same behavior as the architected registers,
@@ -338,6 +352,19 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
return 0;
}
+static int qcom_adreno_smmuv2_cfg_probe(struct arm_smmu_device *smmu)
+{
+ /* Support for 16K pages is advertised on some SoCs, but it doesn't seem to work */
+ smmu->features &= ~ARM_SMMU_FEAT_FMT_AARCH64_16K;
+
+ /* TZ protects several last context banks, hide them from Linux */
+ if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm630-smmu-v2") &&
+ smmu->num_context_banks == 5)
+ smmu->num_context_banks = 2;
+
+ return 0;
+}
+
static void qcom_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
{
struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
@@ -436,6 +463,7 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = {
static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = {
.init_context = qcom_adreno_smmu_init_context,
+ .cfg_probe = qcom_adreno_smmuv2_cfg_probe,
.def_domain_type = qcom_smmu_def_domain_type,
.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
.write_sctlr = qcom_adreno_smmu_write_sctlr,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 723273440c21..8321962b3714 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -417,7 +417,7 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,
const struct arm_smmu_context_fault_info *cfi)
{
- dev_dbg(smmu->dev,
+ dev_err(smmu->dev,
"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx);
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index e9d2bff4659b..30be786bff11 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -416,14 +416,12 @@ static struct iommu_group *fsl_pamu_device_group(struct device *dev)
static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
{
- int len;
-
/*
* uboot must fill the fsl,liodn for platform devices to be supported by
* the iommu.
*/
if (!dev_is_pci(dev) &&
- !of_get_property(dev->of_node, "fsl,liodn", &len))
+ !of_property_present(dev->of_node, "fsl,liodn"))
return ERR_PTR(-ENODEV);
return &pamu_iommu;
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 44e92638c0cd..e5b89f728ad3 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -190,6 +190,13 @@ int cache_tag_assign_domain(struct dmar_domain *domain,
u16 did = domain_get_id_for_dev(domain, dev);
int ret;
+ /* domain->qi_bach will be freed in iommu_free_domain() path. */
+ if (!domain->qi_batch) {
+ domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL);
+ if (!domain->qi_batch)
+ return -ENOMEM;
+ }
+
ret = __cache_tag_assign_domain(domain, did, dev, pasid);
if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
return ret;
@@ -255,6 +262,154 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
}
+static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
+{
+ if (!iommu || !batch->index)
+ return;
+
+ qi_submit_sync(iommu, batch->descs, batch->index, 0);
+
+ /* Reset the index value and clean the whole batch buffer. */
+ memset(batch, 0, sizeof(*batch));
+}
+
+static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
+{
+ if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
+ qi_batch_flush_descs(iommu, batch);
+}
+
+static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type,
+ struct qi_batch *batch)
+{
+ qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
+ qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u16 qdep, u64 addr, unsigned int mask,
+ struct qi_batch *batch)
+{
+ /*
+ * According to VT-d spec, software is recommended to not submit any Device-TLB
+ * invalidation requests while address remapping hardware is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+
+ qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
+ qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
+ u64 addr, unsigned long npages, bool ih,
+ struct qi_batch *batch)
+{
+ /*
+ * npages == -1 means a PASID-selective invalidation, otherwise,
+ * a positive value for Page-selective-within-PASID invalidation.
+ * 0 is not a valid input.
+ */
+ if (!npages)
+ return;
+
+ qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
+ qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep, u64 addr,
+ unsigned int size_order, struct qi_batch *batch)
+{
+ /*
+ * According to VT-d spec, software is recommended to not submit any
+ * Device-TLB invalidation requests while address remapping hardware
+ * is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+
+ qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
+ &batch->descs[batch->index]);
+ qi_batch_increment_index(iommu, batch);
+}
+
+static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
+ unsigned long addr, unsigned long pages,
+ unsigned long mask, int ih)
+{
+ struct intel_iommu *iommu = tag->iommu;
+ u64 type = DMA_TLB_PSI_FLUSH;
+
+ if (domain->use_first_level) {
+ qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
+ pages, ih, domain->qi_batch);
+ return;
+ }
+
+ /*
+ * Fallback to domain selective flush if no PSI support or the size
+ * is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap) || pages == -1) {
+ addr = 0;
+ mask = 0;
+ ih = 0;
+ type = DMA_TLB_DSI_FLUSH;
+ }
+
+ if (ecap_qis(iommu->ecap))
+ qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
+ domain->qi_batch);
+ else
+ __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
+}
+
+static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
+ unsigned long addr, unsigned long mask)
+{
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ if (tag->pasid == IOMMU_NO_PASID) {
+ qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ addr, mask, domain->qi_batch);
+ if (info->dtlb_extra_inval)
+ qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ addr, mask, domain->qi_batch);
+ return;
+ }
+
+ qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
+ info->ats_qdep, addr, mask, domain->qi_batch);
+ if (info->dtlb_extra_inval)
+ qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
+ info->ats_qdep, addr, mask,
+ domain->qi_batch);
+}
+
+static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
+{
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
+ MAX_AGAW_PFN_WIDTH, domain->qi_batch);
+ if (info->dtlb_extra_inval)
+ qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
+ MAX_AGAW_PFN_WIDTH, domain->qi_batch);
+}
+
/*
* Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
* when the memory mappings in the target domain have been modified.
@@ -262,6 +417,7 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
unsigned long end, int ih)
{
+ struct intel_iommu *iommu = NULL;
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
@@ -270,30 +426,14 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
- struct device_domain_info *info;
- u16 sid;
+ if (iommu && iommu != tag->iommu)
+ qi_batch_flush_descs(iommu, domain->qi_batch);
+ iommu = tag->iommu;
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
- if (domain->use_first_level) {
- qi_flush_piotlb(iommu, tag->domain_id,
- tag->pasid, addr, pages, ih);
- } else {
- /*
- * Fallback to domain selective flush if no
- * PSI support or the size is too big.
- */
- if (!cap_pgsel_inv(iommu->cap) ||
- mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- addr | ih, mask,
- DMA_TLB_PSI_FLUSH);
- }
+ cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
break;
case CACHE_TAG_NESTING_DEVTLB:
/*
@@ -307,23 +447,13 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
mask = MAX_AGAW_PFN_WIDTH;
fallthrough;
case CACHE_TAG_DEVTLB:
- info = dev_iommu_priv_get(tag->dev);
- sid = PCI_DEVID(info->bus, info->devfn);
-
- if (tag->pasid == IOMMU_NO_PASID)
- qi_flush_dev_iotlb(iommu, sid, info->pfsid,
- info->ats_qdep, addr, mask);
- else
- qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
- tag->pasid, info->ats_qdep,
- addr, mask);
-
- quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
+ cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
break;
}
trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
}
+ qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
@@ -333,39 +463,30 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
*/
void cache_tag_flush_all(struct dmar_domain *domain)
{
+ struct intel_iommu *iommu = NULL;
struct cache_tag *tag;
unsigned long flags;
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
- struct device_domain_info *info;
- u16 sid;
+ if (iommu && iommu != tag->iommu)
+ qi_batch_flush_descs(iommu, domain->qi_batch);
+ iommu = tag->iommu;
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
- if (domain->use_first_level)
- qi_flush_piotlb(iommu, tag->domain_id,
- tag->pasid, 0, -1, 0);
- else
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
+ cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
break;
case CACHE_TAG_DEVTLB:
case CACHE_TAG_NESTING_DEVTLB:
- info = dev_iommu_priv_get(tag->dev);
- sid = PCI_DEVID(info->bus, info->devfn);
-
- qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
- 0, MAX_AGAW_PFN_WIDTH);
- quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
- IOMMU_NO_PASID, info->ats_qdep);
+ cache_tag_flush_devtlb_all(domain, tag);
break;
}
trace_cache_tag_flush_all(tag);
}
+ qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
@@ -383,6 +504,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
unsigned long end)
{
+ struct intel_iommu *iommu = NULL;
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
@@ -391,7 +513,9 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
- struct intel_iommu *iommu = tag->iommu;
+ if (iommu && iommu != tag->iommu)
+ qi_batch_flush_descs(iommu, domain->qi_batch);
+ iommu = tag->iommu;
if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
iommu_flush_write_buffer(iommu);
@@ -399,22 +523,11 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
}
if (tag->type == CACHE_TAG_IOTLB ||
- tag->type == CACHE_TAG_NESTING_IOTLB) {
- /*
- * Fallback to domain selective flush if no
- * PSI support or the size is too big.
- */
- if (!cap_pgsel_inv(iommu->cap) ||
- mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- 0, 0, DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, tag->domain_id,
- addr, mask,
- DMA_TLB_PSI_FLUSH);
- }
+ tag->type == CACHE_TAG_NESTING_IOTLB)
+ cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
}
+ qi_batch_flush_descs(iommu, domain->qi_batch);
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 1c8d3141cb55..eaf862e8dea1 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1204,9 +1204,7 @@ static void free_iommu(struct intel_iommu *iommu)
*/
static inline void reclaim_free_desc(struct q_inval *qi)
{
- while (qi->desc_status[qi->free_tail] == QI_DONE ||
- qi->desc_status[qi->free_tail] == QI_ABORT) {
- qi->desc_status[qi->free_tail] = QI_FREE;
+ while (qi->desc_status[qi->free_tail] == QI_FREE && qi->free_tail != qi->free_head) {
qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
qi->free_cnt++;
}
@@ -1463,8 +1461,16 @@ restart:
raw_spin_lock(&qi->q_lock);
}
- for (i = 0; i < count; i++)
- qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
+ /*
+ * The reclaim code can free descriptors from multiple submissions
+ * starting from the tail of the queue. When count == 0, the
+ * status of the standalone wait descriptor at the tail of the queue
+ * must be set to QI_FREE to allow the reclaim code to proceed.
+ * It is also possible that descriptors from one of the previous
+ * submissions has to be reclaimed by a subsequent submission.
+ */
+ for (i = 0; i <= count; i++)
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_FREE;
reclaim_free_desc(qi);
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
@@ -1520,24 +1526,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type)
{
- u8 dw = 0, dr = 0;
-
struct qi_desc desc;
- int ih = 0;
-
- if (cap_write_drain(iommu->cap))
- dw = 1;
-
- if (cap_read_drain(iommu->cap))
- dr = 1;
-
- desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
- | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
- desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
- | QI_IOTLB_AM(size_order);
- desc.qw2 = 0;
- desc.qw3 = 0;
+ qi_desc_iotlb(iommu, did, addr, size_order, type, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1555,20 +1546,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE))
return;
- if (mask) {
- addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
- } else
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
-
- if (qdep >= QI_DEV_IOTLB_MAX_INVS)
- qdep = 0;
-
- desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
- QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
- desc.qw2 = 0;
- desc.qw3 = 0;
-
+ qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1588,28 +1566,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
return;
}
- if (npages == -1) {
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = 0;
- } else {
- int mask = ilog2(__roundup_pow_of_two(npages));
- unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
-
- if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
- addr = ALIGN_DOWN(addr, align);
-
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = QI_EIOTLB_ADDR(addr) |
- QI_EIOTLB_IH(ih) |
- QI_EIOTLB_AM(mask);
- }
-
+ qi_desc_piotlb(did, pasid, addr, npages, ih, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1617,7 +1574,6 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr, unsigned int size_order)
{
- unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
/*
@@ -1629,40 +1585,9 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE))
return;
- desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
- QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
- QI_DEV_IOTLB_PFSID(pfsid);
-
- /*
- * If S bit is 0, we only flush a single page. If S bit is set,
- * The least significant zero bit indicates the invalidation address
- * range. VT-d spec 6.5.2.6.
- * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
- * size order = 0 is PAGE_SIZE 4KB
- * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
- * ECAP.
- */
- if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
- pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
- addr, size_order);
-
- /* Take page address */
- desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
-
- if (size_order) {
- /*
- * Existing 0s in address below size_order may be the least
- * significant bit, we must set them to 1s to avoid having
- * smaller size than desired.
- */
- desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
- VTD_PAGE_SHIFT);
- /* Clear size_order bit to indicate size */
- desc.qw1 &= ~mask;
- /* Set the S bit to indicate flushing more than 1 page */
- desc.qw1 |= QI_DEV_EIOTLB_SIZE;
- }
-
+ qi_desc_dev_iotlb_pasid(sid, pfsid, pasid,
+ qdep, addr, size_order,
+ &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 4aa070cf56e7..9f6b0780f2ef 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -167,15 +167,6 @@ static void device_rbtree_remove(struct device_domain_info *info)
spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
}
-/*
- * This domain is a statically identity mapping domain.
- * 1. This domain creats a static 1:1 mapping to all usable memory.
- * 2. It maps to each iommu if successful.
- * 3. Each iommu mapps to this domain if successful.
- */
-static struct dmar_domain *si_domain;
-static int hw_pass_through = 1;
-
struct dmar_rmrr_unit {
struct list_head list; /* list of rmrr units */
struct acpi_dmar_header *hdr; /* ACPI header */
@@ -293,11 +284,6 @@ static int __init intel_iommu_setup(char *str)
}
__setup("intel_iommu=", intel_iommu_setup);
-static int domain_type_is_si(struct dmar_domain *domain)
-{
- return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
-}
-
static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
@@ -492,7 +478,6 @@ void domain_update_iommu_cap(struct dmar_domain *domain)
domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
- domain_update_iotlb(domain);
}
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
@@ -1199,9 +1184,8 @@ static void __iommu_flush_context(struct intel_iommu *iommu,
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-/* return value determine if we need a write buffer flush */
-static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
- u64 addr, unsigned int size_order, u64 type)
+void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type)
{
int tlb_offset = ecap_iotlb_offset(iommu->ecap);
u64 val = 0, val_iva = 0;
@@ -1270,32 +1254,6 @@ domain_lookup_dev_info(struct dmar_domain *domain,
return NULL;
}
-void domain_update_iotlb(struct dmar_domain *domain)
-{
- struct dev_pasid_info *dev_pasid;
- struct device_domain_info *info;
- bool has_iotlb_device = false;
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(info, &domain->devices, link) {
- if (info->ats_enabled) {
- has_iotlb_device = true;
- break;
- }
- }
-
- list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
- info = dev_iommu_priv_get(dev_pasid->dev);
- if (info->ats_enabled) {
- has_iotlb_device = true;
- break;
- }
- }
- domain->has_iotlb_device = has_iotlb_device;
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-
/*
* The extra devTLB flush quirk impacts those QAT devices with PCI device
* IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
@@ -1322,20 +1280,9 @@ static void iommu_enable_pci_caps(struct device_domain_info *info)
return;
pdev = to_pci_dev(info->dev);
-
- /* The PCIe spec, in its wisdom, declares that the behaviour of
- the device if you enable PASID support after ATS support is
- undefined. So always enable PASID support on devices which
- have it, even if we can't yet know if we're ever going to
- use it. */
- if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
- info->pasid_enabled = 1;
-
if (info->ats_supported && pci_ats_page_aligned(pdev) &&
- !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
+ !pci_enable_ats(pdev, VTD_PAGE_SHIFT))
info->ats_enabled = 1;
- domain_update_iotlb(info->domain);
- }
}
static void iommu_disable_pci_caps(struct device_domain_info *info)
@@ -1350,12 +1297,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info)
if (info->ats_enabled) {
pci_disable_ats(pdev);
info->ats_enabled = 0;
- domain_update_iotlb(info->domain);
- }
-
- if (info->pasid_enabled) {
- pci_disable_pasid(pdev);
- info->pasid_enabled = 0;
}
}
@@ -1447,10 +1388,10 @@ static int iommu_init_domains(struct intel_iommu *iommu)
* entry for first-level or pass-through translation modes should
* be programmed with a domain id different from those used for
* second-level or nested translation. We reserve a domain id for
- * this purpose.
+ * this purpose. This domain id is also used for identity domain
+ * in legacy mode.
*/
- if (sm_supported(iommu))
- set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
+ set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
return 0;
}
@@ -1524,7 +1465,6 @@ static struct dmar_domain *alloc_domain(unsigned int type)
domain->nid = NUMA_NO_NODE;
if (first_level_by_default(type))
domain->use_first_level = true;
- domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
INIT_LIST_HEAD(&domain->dev_pasids);
INIT_LIST_HEAD(&domain->cache_tags);
@@ -1632,9 +1572,65 @@ static void domain_exit(struct dmar_domain *domain)
if (WARN_ON(!list_empty(&domain->devices)))
return;
+ kfree(domain->qi_batch);
kfree(domain);
}
+/*
+ * For kdump cases, old valid entries may be cached due to the
+ * in-flight DMA and copied pgtable, but there is no unmapping
+ * behaviour for them, thus we need an explicit cache flush for
+ * the newly-mapped device. For kdump, at this point, the device
+ * is supposed to finish reset at its driver probe stage, so no
+ * in-flight DMA will exist, and we don't need to worry anymore
+ * hereafter.
+ */
+static void copied_context_tear_down(struct intel_iommu *iommu,
+ struct context_entry *context,
+ u8 bus, u8 devfn)
+{
+ u16 did_old;
+
+ if (!context_copied(iommu, bus, devfn))
+ return;
+
+ assert_spin_locked(&iommu->lock);
+
+ did_old = context_domain_id(context);
+ context_clear_entry(context);
+
+ if (did_old < cap_ndoms(iommu->cap)) {
+ iommu->flush.flush_context(iommu, did_old,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
+ DMA_TLB_DSI_FLUSH);
+ }
+
+ clear_context_copied(iommu, bus, devfn);
+}
+
+/*
+ * It's a non-present to present mapping. If hardware doesn't cache
+ * non-present entry we only need to flush the write-buffer. If the
+ * _does_ cache non-present entries, then it does so in the special
+ * domain #0, which we have to flush:
+ */
+static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
+ u8 bus, u8 devfn)
+{
+ if (cap_caching_mode(iommu->cap)) {
+ iommu->flush.flush_context(iommu, 0,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+}
+
static int domain_context_mapping_one(struct dmar_domain *domain,
struct intel_iommu *iommu,
u8 bus, u8 devfn)
@@ -1647,9 +1643,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
struct context_entry *context;
int agaw, ret;
- if (hw_pass_through && domain_type_is_si(domain))
- translation = CONTEXT_TT_PASS_THROUGH;
-
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1663,83 +1656,35 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
if (context_present(context) && !context_copied(iommu, bus, devfn))
goto out_unlock;
- /*
- * For kdump cases, old valid entries may be cached due to the
- * in-flight DMA and copied pgtable, but there is no unmapping
- * behaviour for them, thus we need an explicit cache flush for
- * the newly-mapped device. For kdump, at this point, the device
- * is supposed to finish reset at its driver probe stage, so no
- * in-flight DMA will exist, and we don't need to worry anymore
- * hereafter.
- */
- if (context_copied(iommu, bus, devfn)) {
- u16 did_old = context_domain_id(context);
-
- if (did_old < cap_ndoms(iommu->cap)) {
- iommu->flush.flush_context(iommu, did_old,
- (((u16)bus) << 8) | devfn,
- DMA_CCMD_MASK_NOBIT,
- DMA_CCMD_DEVICE_INVL);
- iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
- DMA_TLB_DSI_FLUSH);
- }
-
- clear_context_copied(iommu, bus, devfn);
- }
-
+ copied_context_tear_down(iommu, context, bus, devfn);
context_clear_entry(context);
- context_set_domain_id(context, did);
- if (translation != CONTEXT_TT_PASS_THROUGH) {
- /*
- * Skip top levels of page tables for iommu which has
- * less agaw than default. Unnecessary for PT mode.
- */
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- ret = -ENOMEM;
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd))
- goto out_unlock;
- }
-
- if (info && info->ats_supported)
- translation = CONTEXT_TT_DEV_IOTLB;
- else
- translation = CONTEXT_TT_MULTI_LEVEL;
+ context_set_domain_id(context, did);
- context_set_address_root(context, virt_to_phys(pgd));
- context_set_address_width(context, agaw);
- } else {
- /*
- * In pass through mode, AW must be programmed to
- * indicate the largest AGAW value supported by
- * hardware. And ASR is ignored by hardware.
- */
- context_set_address_width(context, iommu->msagaw);
+ /*
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
+ */
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ ret = -ENOMEM;
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ goto out_unlock;
}
+ if (info && info->ats_supported)
+ translation = CONTEXT_TT_DEV_IOTLB;
+ else
+ translation = CONTEXT_TT_MULTI_LEVEL;
+
+ context_set_address_root(context, virt_to_phys(pgd));
+ context_set_address_width(context, agaw);
context_set_translation_type(context, translation);
context_set_fault_enable(context);
context_set_present(context);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(context, sizeof(*context));
-
- /*
- * It's a non-present to present mapping. If hardware doesn't cache
- * non-present entry we only need to flush the write-buffer. If the
- * _does_ cache non-present entries, then it does so in the special
- * domain #0, which we have to flush:
- */
- if (cap_caching_mode(iommu->cap)) {
- iommu->flush.flush_context(iommu, 0,
- (((u16)bus) << 8) | devfn,
- DMA_CCMD_MASK_NOBIT,
- DMA_CCMD_DEVICE_INVL);
- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
- } else {
- iommu_flush_write_buffer(iommu);
- }
-
+ context_present_cache_flush(iommu, did, bus, devfn);
ret = 0;
out_unlock:
@@ -2000,80 +1945,6 @@ static bool dev_is_real_dma_subdevice(struct device *dev)
pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
}
-static int iommu_domain_identity_map(struct dmar_domain *domain,
- unsigned long first_vpfn,
- unsigned long last_vpfn)
-{
- /*
- * RMRR range might have overlap with physical memory range,
- * clear it first
- */
- dma_pte_clear_range(domain, first_vpfn, last_vpfn);
-
- return __domain_mapping(domain, first_vpfn,
- first_vpfn, last_vpfn - first_vpfn + 1,
- DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL);
-}
-
-static int md_domain_init(struct dmar_domain *domain, int guest_width);
-
-static int __init si_domain_init(int hw)
-{
- struct dmar_rmrr_unit *rmrr;
- struct device *dev;
- int i, nid, ret;
-
- si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
- if (!si_domain)
- return -EFAULT;
-
- if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
- domain_exit(si_domain);
- si_domain = NULL;
- return -EFAULT;
- }
-
- if (hw)
- return 0;
-
- for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn;
- int i;
-
- for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
- ret = iommu_domain_identity_map(si_domain,
- mm_to_dma_pfn_start(start_pfn),
- mm_to_dma_pfn_end(end_pfn-1));
- if (ret)
- return ret;
- }
- }
-
- /*
- * Identity map the RMRRs so that devices with RMRRs could also use
- * the si_domain.
- */
- for_each_rmrr_units(rmrr) {
- for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
- i, dev) {
- unsigned long long start = rmrr->base_address;
- unsigned long long end = rmrr->end_address;
-
- if (WARN_ON(end < start ||
- end >> agaw_to_width(si_domain->agaw)))
- continue;
-
- ret = iommu_domain_identity_map(si_domain,
- mm_to_dma_pfn_start(start >> PAGE_SHIFT),
- mm_to_dma_pfn_end(end >> PAGE_SHIFT));
- if (ret)
- return ret;
- }
- }
-
- return 0;
-}
-
static int dmar_domain_attach_device(struct dmar_domain *domain,
struct device *dev)
{
@@ -2096,8 +1967,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (!sm_supported(iommu))
ret = domain_context_mapping(domain, dev);
- else if (hw_pass_through && domain_type_is_si(domain))
- ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
else if (domain->use_first_level)
ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID);
else
@@ -2106,8 +1975,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (ret)
goto out_block_translation;
- if (sm_supported(info->iommu) || !domain_type_is_si(info->domain))
- iommu_enable_pci_caps(info);
+ iommu_enable_pci_caps(info);
ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
if (ret)
@@ -2151,6 +2019,16 @@ static bool device_rmrr_is_relaxable(struct device *dev)
static int device_def_domain_type(struct device *dev)
{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ /*
+ * Hardware does not support the passthrough translation mode.
+ * Always use a dynamaic mapping domain.
+ */
+ if (!ecap_pass_through(iommu->ecap))
+ return IOMMU_DOMAIN_DMA;
+
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
@@ -2441,8 +2319,6 @@ static int __init init_dmars(void)
}
}
- if (!ecap_pass_through(iommu->ecap))
- hw_pass_through = 0;
intel_svm_check(iommu);
}
@@ -2458,10 +2334,6 @@ static int __init init_dmars(void)
check_tylersburg_isoch();
- ret = si_domain_init(hw_pass_through);
- if (ret)
- goto free_iommu;
-
/*
* for each drhd
* enable fault log
@@ -2507,10 +2379,6 @@ free_iommu:
disable_dmar_iommu(iommu);
free_dmar_iommu(iommu);
}
- if (si_domain) {
- domain_exit(si_domain);
- si_domain = NULL;
- }
return ret;
}
@@ -2885,12 +2753,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
if (ret)
goto out;
- if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
- pr_warn("%s: Doesn't support hardware pass through.\n",
- iommu->name);
- return -ENXIO;
- }
-
sp = domain_update_iommu_superpage(NULL, iommu) - 1;
if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
pr_warn("%s: Doesn't support large page.\n",
@@ -3141,43 +3003,6 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
return 0;
}
-static int intel_iommu_memory_notifier(struct notifier_block *nb,
- unsigned long val, void *v)
-{
- struct memory_notify *mhp = v;
- unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn);
- unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn +
- mhp->nr_pages - 1);
-
- switch (val) {
- case MEM_GOING_ONLINE:
- if (iommu_domain_identity_map(si_domain,
- start_vpfn, last_vpfn)) {
- pr_warn("Failed to build identity map for [%lx-%lx]\n",
- start_vpfn, last_vpfn);
- return NOTIFY_BAD;
- }
- break;
-
- case MEM_OFFLINE:
- case MEM_CANCEL_ONLINE:
- {
- LIST_HEAD(freelist);
-
- domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
- iommu_put_pages_list(&freelist);
- }
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block intel_iommu_memory_nb = {
- .notifier_call = intel_iommu_memory_notifier,
- .priority = 0
-};
-
static void intel_disable_iommus(void)
{
struct intel_iommu *iommu = NULL;
@@ -3474,12 +3299,7 @@ int __init intel_iommu_init(void)
iommu_pmu_register(iommu);
}
- up_read(&dmar_global_lock);
-
- if (si_domain && !hw_pass_through)
- register_memory_notifier(&intel_iommu_memory_nb);
- down_read(&dmar_global_lock);
if (probe_acpi_namespace_devices())
pr_warn("ACPI name space devices didn't probe correctly\n");
@@ -3624,7 +3444,6 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st
xa_init(&domain->iommu_array);
domain->nid = dev_to_node(dev);
- domain->has_iotlb_device = info->ats_enabled;
domain->use_first_level = first_stage;
/* calculate the address width */
@@ -3693,8 +3512,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
domain->geometry.force_aperture = true;
return domain;
- case IOMMU_DOMAIN_IDENTITY:
- return &si_domain->domain;
default:
return NULL;
}
@@ -3761,8 +3578,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
WARN_ON(dmar_domain->nested_parent &&
!list_empty(&dmar_domain->s1_domains));
- if (domain != &si_domain->domain)
- domain_exit(dmar_domain);
+ domain_exit(dmar_domain);
}
int prepare_domain_attach_device(struct iommu_domain *domain,
@@ -3812,11 +3628,9 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
int ret;
- if (info->domain)
- device_block_translation(dev);
+ device_block_translation(dev);
ret = prepare_domain_attach_device(domain, dev);
if (ret)
@@ -4093,6 +3907,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
dev_iommu_priv_set(dev, info);
if (pdev && pci_ats_supported(pdev)) {
+ pci_prepare_ats(pdev, VTD_PAGE_SHIFT);
ret = device_rbtree_insert(iommu, info);
if (ret)
goto free;
@@ -4114,6 +3929,16 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
intel_iommu_debugfs_create_dev(info);
+ /*
+ * The PCIe spec, in its wisdom, declares that the behaviour of the
+ * device is undefined if you enable PASID support after ATS support.
+ * So always enable PASID support on devices which have it, even if
+ * we can't yet know if we're ever going to use it.
+ */
+ if (info->pasid_supported &&
+ !pci_enable_pasid(pdev, info->pasid_supported & ~1))
+ info->pasid_enabled = 1;
+
return &iommu->iommu;
free_table:
intel_pasid_free_table(dev);
@@ -4130,6 +3955,11 @@ static void intel_iommu_release_device(struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
+ if (info->pasid_enabled) {
+ pci_disable_pasid(to_pci_dev(dev));
+ info->pasid_enabled = 0;
+ }
+
mutex_lock(&iommu->iopf_lock);
if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
device_rbtree_remove(info);
@@ -4424,11 +4254,17 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct dev_pasid_info *curr, *dev_pasid = NULL;
struct intel_iommu *iommu = info->iommu;
+ struct dmar_domain *dmar_domain;
unsigned long flags;
+ if (domain->type == IOMMU_DOMAIN_IDENTITY) {
+ intel_pasid_tear_down_entry(iommu, dev, pasid, false);
+ return;
+ }
+
+ dmar_domain = to_dmar_domain(domain);
spin_lock_irqsave(&dmar_domain->lock, flags);
list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
if (curr->dev == dev && curr->pasid == pasid) {
@@ -4483,9 +4319,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (ret)
goto out_detach_iommu;
- if (domain_type_is_si(dmar_domain))
- ret = intel_pasid_setup_pass_through(iommu, dev, pasid);
- else if (dmar_domain->use_first_level)
+ if (dmar_domain->use_first_level)
ret = domain_setup_first_level(iommu, dmar_domain,
dev, pasid);
else
@@ -4655,9 +4489,111 @@ static const struct iommu_dirty_ops intel_dirty_ops = {
.read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
};
+static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct context_entry *context;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, 1);
+ if (!context) {
+ spin_unlock(&iommu->lock);
+ return -ENOMEM;
+ }
+
+ if (context_present(context) && !context_copied(iommu, bus, devfn)) {
+ spin_unlock(&iommu->lock);
+ return 0;
+ }
+
+ copied_context_tear_down(iommu, context, bus, devfn);
+ context_clear_entry(context);
+ context_set_domain_id(context, FLPT_DEFAULT_DID);
+
+ /*
+ * In pass through mode, AW must be programmed to indicate the largest
+ * AGAW value supported by hardware. And ASR is ignored by hardware.
+ */
+ context_set_address_width(context, iommu->msagaw);
+ context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
+ context_set_fault_enable(context);
+ context_set_present(context);
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(context, sizeof(*context));
+ context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
+ spin_unlock(&iommu->lock);
+
+ return 0;
+}
+
+static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct device *dev = data;
+
+ if (dev != &pdev->dev)
+ return 0;
+
+ return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
+}
+
+static int device_setup_pass_through(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ if (!dev_is_pci(dev))
+ return context_setup_pass_through(dev, info->bus, info->devfn);
+
+ return pci_for_each_dma_alias(to_pci_dev(dev),
+ context_setup_pass_through_cb, dev);
+}
+
+static int identity_domain_attach_dev(struct iommu_domain *domain, struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ int ret;
+
+ device_block_translation(dev);
+
+ if (dev_is_real_dma_subdevice(dev))
+ return 0;
+
+ if (sm_supported(iommu)) {
+ ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
+ if (!ret)
+ iommu_enable_pci_caps(info);
+ } else {
+ ret = device_setup_pass_through(dev);
+ }
+
+ return ret;
+}
+
+static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
+ return -EOPNOTSUPP;
+
+ return intel_pasid_setup_pass_through(iommu, dev, pasid);
+}
+
+static struct iommu_domain identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &(const struct iommu_domain_ops) {
+ .attach_dev = identity_domain_attach_dev,
+ .set_dev_pasid = identity_domain_set_dev_pasid,
+ },
+};
+
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
+ .identity_domain = &identity_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index a969be2258b1..428d253f1348 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -584,11 +584,23 @@ struct iommu_domain_info {
* to VT-d spec, section 9.3 */
};
+/*
+ * We start simply by using a fixed size for the batched descriptors. This
+ * size is currently sufficient for our needs. Future improvements could
+ * involve dynamically allocating the batch buffer based on actual demand,
+ * allowing us to adjust the batch size for optimal performance in different
+ * scenarios.
+ */
+#define QI_MAX_BATCHED_DESC_COUNT 16
+struct qi_batch {
+ struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT];
+ unsigned int index;
+};
+
struct dmar_domain {
int nid; /* node id */
struct xarray iommu_array; /* Attached IOMMU array */
- u8 has_iotlb_device: 1;
u8 iommu_coherency: 1; /* indicate coherency of iommu access */
u8 force_snooping : 1; /* Create IOPTEs with snoop control */
u8 set_pte_snp:1;
@@ -609,6 +621,7 @@ struct dmar_domain {
spinlock_t cache_lock; /* Protect the cache tag list */
struct list_head cache_tags; /* Cache tag list */
+ struct qi_batch *qi_batch; /* Batched QI descriptors */
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
@@ -1067,6 +1080,115 @@ static inline unsigned long nrpages_to_size(unsigned long npages)
return npages << VTD_PAGE_SHIFT;
}
+static inline void qi_desc_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type,
+ struct qi_desc *desc)
+{
+ u8 dw = 0, dr = 0;
+ int ih = 0;
+
+ if (cap_write_drain(iommu->cap))
+ dw = 1;
+
+ if (cap_read_drain(iommu->cap))
+ dr = 1;
+
+ desc->qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
+ | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
+ desc->qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
+ | QI_IOTLB_AM(size_order);
+ desc->qw2 = 0;
+ desc->qw3 = 0;
+}
+
+static inline void qi_desc_dev_iotlb(u16 sid, u16 pfsid, u16 qdep, u64 addr,
+ unsigned int mask, struct qi_desc *desc)
+{
+ if (mask) {
+ addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
+ desc->qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
+ } else {
+ desc->qw1 = QI_DEV_IOTLB_ADDR(addr);
+ }
+
+ if (qdep >= QI_DEV_IOTLB_MAX_INVS)
+ qdep = 0;
+
+ desc->qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
+ QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
+ desc->qw2 = 0;
+ desc->qw3 = 0;
+}
+
+static inline void qi_desc_piotlb(u16 did, u32 pasid, u64 addr,
+ unsigned long npages, bool ih,
+ struct qi_desc *desc)
+{
+ if (npages == -1) {
+ desc->qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc->qw1 = 0;
+ } else {
+ int mask = ilog2(__roundup_pow_of_two(npages));
+ unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
+
+ if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
+ addr = ALIGN_DOWN(addr, align);
+
+ desc->qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+ QI_EIOTLB_TYPE;
+ desc->qw1 = QI_EIOTLB_ADDR(addr) |
+ QI_EIOTLB_IH(ih) |
+ QI_EIOTLB_AM(mask);
+ }
+}
+
+static inline void qi_desc_dev_iotlb_pasid(u16 sid, u16 pfsid, u32 pasid,
+ u16 qdep, u64 addr,
+ unsigned int size_order,
+ struct qi_desc *desc)
+{
+ unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
+
+ desc->qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(pfsid);
+
+ /*
+ * If S bit is 0, we only flush a single page. If S bit is set,
+ * The least significant zero bit indicates the invalidation address
+ * range. VT-d spec 6.5.2.6.
+ * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
+ * size order = 0 is PAGE_SIZE 4KB
+ * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
+ * ECAP.
+ */
+ if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
+ pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
+ addr, size_order);
+
+ /* Take page address */
+ desc->qw1 = QI_DEV_EIOTLB_ADDR(addr);
+
+ if (size_order) {
+ /*
+ * Existing 0s in address below size_order may be the least
+ * significant bit, we must set them to 1s to avoid having
+ * smaller size than desired.
+ */
+ desc->qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
+ VTD_PAGE_SHIFT);
+ /* Clear size_order bit to indicate size */
+ desc->qw1 &= ~mask;
+ /* Set the S bit to indicate flushing more than 1 page */
+ desc->qw1 |= QI_DEV_EIOTLB_SIZE;
+ }
+}
+
/* Convert value to context PASID directory size field coding. */
#define context_pdts(pds) (((pds) & 0x7) << 9)
@@ -1098,13 +1220,15 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
unsigned int count, unsigned long options);
+
+void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type);
/*
* Options used in qi_submit_sync:
* QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8.
*/
#define QI_OPT_WAIT_DRAIN BIT(0)
-void domain_update_iotlb(struct dmar_domain *domain);
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu);
void device_block_translation(struct device *dev);
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 16a2bcf5cfeb..433c58944401 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -66,8 +66,6 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
list_add(&info->link, &dmar_domain->devices);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
- domain_update_iotlb(dmar_domain);
-
return 0;
unassign_tag:
cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID);
@@ -85,6 +83,7 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
spin_lock(&s2_domain->s1_lock);
list_del(&dmar_domain->s2_link);
spin_unlock(&s2_domain->s1_lock);
+ kfree(dmar_domain->qi_batch);
kfree(dmar_domain);
}
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index b51fc268dc84..2e5fa0a23299 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -264,9 +264,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
else
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
- /* Device IOTLB doesn't need to be flushed in caching mode. */
- if (!cap_caching_mode(iommu->cap))
- devtlb_invalidation_with_pasid(iommu, dev, pasid);
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
}
/*
@@ -493,9 +491,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
- /* Device IOTLB doesn't need to be flushed in caching mode. */
- if (!cap_caching_mode(iommu->cap))
- devtlb_invalidation_with_pasid(iommu, dev, pasid);
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
return 0;
}
@@ -572,9 +568,7 @@ void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
- /* Device IOTLB doesn't need to be flushed in caching mode. */
- if (!cap_caching_mode(iommu->cap))
- devtlb_invalidation_with_pasid(iommu, dev, pasid);
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
}
/**
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 0e3a9b38bef2..078d1e32a24e 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
static void intel_mm_free_notifier(struct mmu_notifier *mn)
{
- kfree(container_of(mn, struct dmar_domain, notifier));
+ struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
+
+ kfree(domain->qi_batch);
+ kfree(domain);
}
static const struct mmu_notifier_ops intel_mmuops = {
@@ -311,7 +314,7 @@ void intel_drain_pasid_prq(struct device *dev, u32 pasid)
domain = info->domain;
pdev = to_pci_dev(dev);
sid = PCI_DEVID(info->bus, info->devfn);
- did = domain_id_iommu(domain, iommu);
+ did = domain ? domain_id_iommu(domain, iommu) : FLPT_DEFAULT_DID;
qdep = pci_ats_queue_depth(pdev);
/*
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index ff4149ae1751..0e67f1721a3d 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -274,13 +274,13 @@ static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
}
-static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg)
+static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
{
+ for (int i = 0; i < num_entries; i++)
+ ptep[i] = 0;
- *ptep = 0;
-
- if (!cfg->coherent_walk)
- __arm_lpae_sync_pte(ptep, 1, cfg);
+ if (!cfg->coherent_walk && num_entries)
+ __arm_lpae_sync_pte(ptep, num_entries, cfg);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
@@ -653,26 +653,29 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
num_entries = min_t(int, pgcount, max_entries);
- while (i < num_entries) {
- pte = READ_ONCE(*ptep);
+ /* Find and handle non-leaf entries */
+ for (i = 0; i < num_entries; i++) {
+ pte = READ_ONCE(ptep[i]);
if (WARN_ON(!pte))
break;
- __arm_lpae_clear_pte(ptep, &iop->cfg);
-
if (!iopte_leaf(pte, lvl, iop->fmt)) {
+ __arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1);
+
/* Also flush any partial walks */
io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
ARM_LPAE_GRANULE(data));
__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
- } else if (!iommu_iotlb_gather_queued(gather)) {
- io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
}
-
- ptep++;
- i++;
}
+ /* Clear the remaining entries */
+ __arm_lpae_clear_pte(ptep, &iop->cfg, i);
+
+ if (gather && !iommu_iotlb_gather_queued(gather))
+ for (int j = 0; j < i; j++)
+ io_pgtable_tlb_add_page(iop, gather, iova + j * size, size);
+
return i * size;
} else if (iopte_leaf(pte, lvl, iop->fmt)) {
/*
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 78d61da75257..e7a6a1611d19 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -214,7 +214,7 @@ void of_iommu_get_resv_regions(struct device *dev, struct list_head *list)
* that represent reservations in the IOVA space, which are regions that should
* not be mapped.
*/
- if (of_find_property(it.node, "reg", NULL)) {
+ if (of_property_present(it.node, "reg")) {
err = of_address_to_resource(it.node, 0, &phys);
if (err < 0) {
dev_err(dev, "failed to parse memory region %pOF: %d\n",
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index c570892b2090..87fa03540b8a 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -48,6 +48,39 @@ bool pci_ats_supported(struct pci_dev *dev)
EXPORT_SYMBOL_GPL(pci_ats_supported);
/**
+ * pci_prepare_ats - Setup the PS for ATS
+ * @dev: the PCI device
+ * @ps: the IOMMU page shift
+ *
+ * This must be done by the IOMMU driver on the PF before any VFs are created to
+ * ensure that the VF can have ATS enabled.
+ *
+ * Returns 0 on success, or negative on failure.
+ */
+int pci_prepare_ats(struct pci_dev *dev, int ps)
+{
+ u16 ctrl;
+
+ if (!pci_ats_supported(dev))
+ return -EINVAL;
+
+ if (WARN_ON(dev->ats_enabled))
+ return -EBUSY;
+
+ if (ps < PCI_ATS_MIN_STU)
+ return -EINVAL;
+
+ if (dev->is_virtfn)
+ return 0;
+
+ dev->ats_stu = ps;
+ ctrl = PCI_ATS_CTRL_STU(dev->ats_stu - PCI_ATS_MIN_STU);
+ pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_prepare_ats);
+
+/**
* pci_enable_ats - enable the ATS capability
* @dev: the PCI device
* @ps: the IOMMU page shift
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index f9a81761bfce..b1ecfc3cd5bc 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -171,6 +171,10 @@ struct io_pgtable_cfg {
u64 ttbr[4];
u32 n_ttbrs;
} apple_dart_cfg;
+
+ struct {
+ int nid;
+ } amd;
};
};
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index df54cd5b15db..0e8b74e63767 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -8,6 +8,7 @@
/* Address Translation Service */
bool pci_ats_supported(struct pci_dev *dev);
int pci_enable_ats(struct pci_dev *dev, int ps);
+int pci_prepare_ats(struct pci_dev *dev, int ps);
void pci_disable_ats(struct pci_dev *dev);
int pci_ats_queue_depth(struct pci_dev *dev);
int pci_ats_page_aligned(struct pci_dev *dev);
@@ -16,6 +17,8 @@ static inline bool pci_ats_supported(struct pci_dev *d)
{ return false; }
static inline int pci_enable_ats(struct pci_dev *d, int ps)
{ return -ENODEV; }
+static inline int pci_prepare_ats(struct pci_dev *dev, int ps)
+{ return -ENODEV; }
static inline void pci_disable_ats(struct pci_dev *d) { }
static inline int pci_ats_queue_depth(struct pci_dev *d)
{ return -ENODEV; }