aboutsummaryrefslogtreecommitdiff
path: root/drivers/iommu/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r--drivers/iommu/amd/amd_iommu.h11
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h23
-rw-r--r--drivers/iommu/amd/init.c63
-rw-r--r--drivers/iommu/amd/io_pgtable.c11
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c3
-rw-r--r--drivers/iommu/amd/iommu.c495
-rw-r--r--drivers/iommu/amd/pasid.c6
7 files changed, 323 insertions, 289 deletions
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 6386fa4556d9..1bef5d55b2f9 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -46,13 +46,15 @@ extern int amd_iommu_gpt_level;
extern unsigned long amd_iommu_pgsize_bitmap;
/* Protection domain ops */
+void amd_iommu_init_identity_domain(void);
struct protection_domain *protection_domain_alloc(unsigned int type, int nid);
void protection_domain_free(struct protection_domain *domain);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
void amd_iommu_domain_free(struct iommu_domain *dom);
int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid);
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old);
void amd_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
struct iommu_domain *domain);
@@ -118,9 +120,14 @@ static inline bool check_feature2(u64 mask)
return (amd_iommu_efr2 & mask);
}
+static inline bool amd_iommu_v2_pgtbl_supported(void)
+{
+ return (check_feature(FEATURE_GIOSUP) && check_feature(FEATURE_GT));
+}
+
static inline bool amd_iommu_gt_ppr_supported(void)
{
- return (check_feature(FEATURE_GT) &&
+ return (amd_iommu_v2_pgtbl_supported() &&
check_feature(FEATURE_PPR) &&
check_feature(FEATURE_EPHSUP));
}
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 601fb4ee6900..fdb0357e0bb9 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -565,6 +565,12 @@ struct pdom_dev_data {
struct list_head list;
};
+/* Keeps track of the IOMMUs attached to protection domain */
+struct pdom_iommu_info {
+ struct amd_iommu *iommu; /* IOMMUs attach to protection domain */
+ u32 refcnt; /* Count of attached dev/pasid per domain/IOMMU */
+};
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@@ -578,8 +584,7 @@ struct protection_domain {
u16 id; /* the domain id written to the device table */
enum protection_domain_mode pd_mode; /* Track page table type */
bool dirty_tracking; /* dirty tracking is enabled in the domain */
- unsigned dev_cnt; /* devices assigned to this domain */
- unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
+ struct xarray iommu_array; /* per-IOMMU reference count */
struct mmu_notifier mn; /* mmu notifier for the SVA domain */
struct list_head dev_data_list; /* List of pdom_dev_data */
@@ -831,7 +836,7 @@ struct devid_map {
*/
struct iommu_dev_data {
/*Protect against attach/detach races */
- spinlock_t lock;
+ struct mutex mutex;
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
@@ -873,12 +878,6 @@ extern struct list_head amd_iommu_pci_seg_list;
extern struct list_head amd_iommu_list;
/*
- * Array with pointers to each IOMMU struct
- * The indices are referenced in the protection domains
- */
-extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
-
-/*
* Structure defining one entry in the device table
*/
struct dev_table_entry {
@@ -912,14 +911,14 @@ struct unity_map_entry {
/* size of the dma_ops aperture as power of 2 */
extern unsigned amd_iommu_aperture_order;
-/* allocation bitmap for domain ids */
-extern unsigned long *amd_iommu_pd_alloc_bitmap;
-
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
extern int amd_iommu_max_glx_val;
+/* IDA to track protection domain IDs */
+extern struct ida pdom_ids;
+
/* Global EFR and EFR2 registers */
extern u64 amd_iommu_efr;
extern u64 amd_iommu_efr2;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 43131c3a2172..0e0a531042ac 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -177,9 +177,6 @@ LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
-/* Array to assign indices to IOMMUs*/
-struct amd_iommu *amd_iommus[MAX_IOMMUS];
-
/* Number of IOMMUs present in the system */
static int amd_iommus_present;
@@ -194,12 +191,6 @@ bool amd_iommu_force_isolation __read_mostly;
unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
-/*
- * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
- * to know which ones are already in use.
- */
-unsigned long *amd_iommu_pd_alloc_bitmap;
-
enum iommu_init_state {
IOMMU_START_STATE,
IOMMU_IVRS_DETECTED,
@@ -1082,7 +1073,12 @@ static bool __copy_device_table(struct amd_iommu *iommu)
if (dte_v && dom_id) {
pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
- __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+ /* Reserve the Domain IDs used by previous kernel */
+ if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) {
+ pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
+ memunmap(old_devtb);
+ return false;
+ }
/* If gcr3 table existed, mask it out */
if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
@@ -1744,9 +1740,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
return -ENOSYS;
}
- /* Index is fine - add IOMMU to the array */
- amd_iommus[iommu->index] = iommu;
-
/*
* Copy data from ACPI table entry to the iommu struct
*/
@@ -2070,14 +2063,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
init_iommu_perf_ctr(iommu);
- if (amd_iommu_pgtable == AMD_IOMMU_V2) {
- if (!check_feature(FEATURE_GIOSUP) ||
- !check_feature(FEATURE_GT)) {
- pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
- }
- }
-
if (is_rd890_iommu(iommu->dev)) {
int i, j;
@@ -2172,6 +2157,9 @@ static int __init amd_iommu_init_pci(void)
struct amd_iommu_pci_seg *pci_seg;
int ret;
+ /* Init global identity domain before registering IOMMU */
+ amd_iommu_init_identity_domain();
+
for_each_iommu(iommu) {
ret = iommu_init_pci(iommu);
if (ret) {
@@ -2882,11 +2870,6 @@ static void enable_iommus_vapic(void)
#endif
}
-static void enable_iommus(void)
-{
- early_enable_iommus();
-}
-
static void disable_iommus(void)
{
struct amd_iommu *iommu;
@@ -2913,7 +2896,8 @@ static void amd_iommu_resume(void)
iommu_apply_resume_quirks(iommu);
/* re-load the hardware */
- enable_iommus();
+ for_each_iommu(iommu)
+ early_enable_iommu(iommu);
amd_iommu_enable_interrupts();
}
@@ -2994,9 +2978,7 @@ static bool __init check_ioapic_information(void)
static void __init free_dma_resources(void)
{
- iommu_free_pages(amd_iommu_pd_alloc_bitmap,
- get_order(MAX_DOMAIN_ID / 8));
- amd_iommu_pd_alloc_bitmap = NULL;
+ ida_destroy(&pdom_ids);
free_unity_maps();
}
@@ -3064,20 +3046,6 @@ static int __init early_amd_iommu_init(void)
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
- /* Device table - directly used by all IOMMUs */
- ret = -ENOMEM;
-
- amd_iommu_pd_alloc_bitmap = iommu_alloc_pages(GFP_KERNEL,
- get_order(MAX_DOMAIN_ID / 8));
- if (amd_iommu_pd_alloc_bitmap == NULL)
- goto out;
-
- /*
- * never allocate domain 0 because its used as the non-allocated and
- * error value placeholder
- */
- __set_bit(0, amd_iommu_pd_alloc_bitmap);
-
/*
* now the data structures are allocated and basically initialized
* start the real acpi table scan
@@ -3091,6 +3059,13 @@ static int __init early_amd_iommu_init(void)
FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
+ if (amd_iommu_pgtable == AMD_IOMMU_V2) {
+ if (!amd_iommu_v2_pgtbl_supported()) {
+ pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
+ amd_iommu_pgtable = AMD_IOMMU_V1;
+ }
+ }
+
/* Disable any previously enabled IOMMUs */
if (!is_kdump_kernel() || amd_iommu_disabled)
disable_iommus();
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 804b788f3f16..f3399087859f 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -118,6 +118,7 @@ static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
*/
static bool increase_address_space(struct amd_io_pgtable *pgtable,
unsigned long address,
+ unsigned int page_size_level,
gfp_t gfp)
{
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
@@ -133,7 +134,8 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
spin_lock_irqsave(&domain->lock, flags);
- if (address <= PM_LEVEL_SIZE(pgtable->mode))
+ if (address <= PM_LEVEL_SIZE(pgtable->mode) &&
+ pgtable->mode - 1 >= page_size_level)
goto out;
ret = false;
@@ -163,18 +165,21 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
gfp_t gfp,
bool *updated)
{
+ unsigned long last_addr = address + (page_size - 1);
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
int level, end_lvl;
u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size));
- while (address > PM_LEVEL_SIZE(pgtable->mode)) {
+ while (last_addr > PM_LEVEL_SIZE(pgtable->mode) ||
+ pgtable->mode - 1 < PAGE_SIZE_LEVEL(page_size)) {
/*
* Return an error if there is no memory to update the
* page-table.
*/
- if (!increase_address_space(pgtable, address, gfp))
+ if (!increase_address_space(pgtable, last_addr,
+ PAGE_SIZE_LEVEL(page_size), gfp))
return NULL;
}
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index 25b9042fa453..c616de2c5926 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -268,8 +268,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
out:
if (updated) {
struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
+ unsigned long flags;
+ spin_lock_irqsave(&pdom->lock, flags);
amd_iommu_domain_flush_pages(pdom, o_iova, size);
+ spin_unlock_irqrestore(&pdom->lock, flags);
}
if (mapped)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 8364cd6fa47d..5ce8e6504ba7 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -18,6 +18,7 @@
#include <linux/scatterlist.h>
#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
+#include <linux/idr.h>
#include <linux/iommu-helper.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
@@ -52,8 +53,6 @@
#define HT_RANGE_START (0xfd00000000ULL)
#define HT_RANGE_END (0xffffffffffULL)
-static DEFINE_SPINLOCK(pd_bitmap_lock);
-
LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
@@ -70,9 +69,16 @@ struct iommu_cmd {
u32 data[4];
};
+/*
+ * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
+ * to know which ones are already in use.
+ */
+DEFINE_IDA(pdom_ids);
+
struct kmem_cache *amd_iommu_irq_cache;
-static void detach_device(struct device *dev);
+static int amd_iommu_attach_device(struct iommu_domain *dom,
+ struct device *dev);
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);
@@ -202,7 +208,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
if (!dev_data)
return NULL;
- spin_lock_init(&dev_data->lock);
+ mutex_init(&dev_data->mutex);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@@ -555,22 +561,6 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
setup_aliases(iommu, dev);
}
-static void amd_iommu_uninit_device(struct device *dev)
-{
- struct iommu_dev_data *dev_data;
-
- dev_data = dev_iommu_priv_get(dev);
- if (!dev_data)
- return;
-
- if (dev_data->domain)
- detach_device(dev);
-
- /*
- * We keep dev_data around for unplugged devices and reuse it when the
- * device is re-plugged - not doing so would introduce a ton of races.
- */
-}
/****************************************************************************
*
@@ -1230,7 +1220,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (!iommu->need_sync)
return 0;
- data = atomic64_add_return(1, &iommu->cmd_sem_val);
+ data = atomic64_inc_return(&iommu->cmd_sem_val);
build_completion_wait(&cmd, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
@@ -1249,18 +1239,17 @@ out_unlock:
static void domain_flush_complete(struct protection_domain *domain)
{
- int i;
+ struct pdom_iommu_info *pdom_iommu_info;
+ unsigned long i;
- for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
- if (domain && !domain->dev_iommu[i])
- continue;
+ lockdep_assert_held(&domain->lock);
- /*
- * Devices of this domain are behind this IOMMU
- * We need to wait for completion of all commands.
- */
- iommu_completion_wait(amd_iommus[i]);
- }
+ /*
+ * Devices of this domain are behind this IOMMU
+ * We need to wait for completion of all commands.
+ */
+ xa_for_each(&domain->iommu_array, i, pdom_iommu_info)
+ iommu_completion_wait(pdom_iommu_info->iommu);
}
static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
@@ -1442,21 +1431,22 @@ static int domain_flush_pages_v2(struct protection_domain *pdom,
static int domain_flush_pages_v1(struct protection_domain *pdom,
u64 address, size_t size)
{
+ struct pdom_iommu_info *pdom_iommu_info;
struct iommu_cmd cmd;
- int ret = 0, i;
+ int ret = 0;
+ unsigned long i;
+
+ lockdep_assert_held(&pdom->lock);
build_inv_iommu_pages(&cmd, address, size,
pdom->id, IOMMU_NO_PASID, false);
- for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
- if (!pdom->dev_iommu[i])
- continue;
-
+ xa_for_each(&pdom->iommu_array, i, pdom_iommu_info) {
/*
* Devices of this domain are behind this IOMMU
* We need a TLB flush
*/
- ret |= iommu_queue_command(amd_iommus[i], &cmd);
+ ret |= iommu_queue_command(pdom_iommu_info->iommu, &cmd);
}
return ret;
@@ -1495,6 +1485,8 @@ static void __domain_flush_pages(struct protection_domain *domain,
void amd_iommu_domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size)
{
+ lockdep_assert_held(&domain->lock);
+
if (likely(!amd_iommu_np_cache)) {
__domain_flush_pages(domain, address, size);
@@ -1640,31 +1632,14 @@ int amd_iommu_complete_ppr(struct device *dev, u32 pasid, int status, int tag)
*
****************************************************************************/
-static u16 domain_id_alloc(void)
+static int pdom_id_alloc(void)
{
- unsigned long flags;
- int id;
-
- spin_lock_irqsave(&pd_bitmap_lock, flags);
- id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
- BUG_ON(id == 0);
- if (id > 0 && id < MAX_DOMAIN_ID)
- __set_bit(id, amd_iommu_pd_alloc_bitmap);
- else
- id = 0;
- spin_unlock_irqrestore(&pd_bitmap_lock, flags);
-
- return id;
+ return ida_alloc_range(&pdom_ids, 1, MAX_DOMAIN_ID - 1, GFP_ATOMIC);
}
-static void domain_id_free(int id)
+static void pdom_id_free(int id)
{
- unsigned long flags;
-
- spin_lock_irqsave(&pd_bitmap_lock, flags);
- if (id > 0 && id < MAX_DOMAIN_ID)
- __clear_bit(id, amd_iommu_pd_alloc_bitmap);
- spin_unlock_irqrestore(&pd_bitmap_lock, flags);
+ ida_free(&pdom_ids, id);
}
static void free_gcr3_tbl_level1(u64 *tbl)
@@ -1709,7 +1684,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
gcr3_info->glx = 0;
/* Free per device domain ID */
- domain_id_free(gcr3_info->domid);
+ pdom_id_free(gcr3_info->domid);
iommu_free_page(gcr3_info->gcr3_tbl);
gcr3_info->gcr3_tbl = NULL;
@@ -1736,6 +1711,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
{
int levels = get_gcr3_levels(pasids);
int nid = iommu ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
+ int domid;
if (levels > amd_iommu_max_glx_val)
return -EINVAL;
@@ -1744,11 +1720,14 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
return -EBUSY;
/* Allocate per device domain ID */
- gcr3_info->domid = domain_id_alloc();
+ domid = pdom_id_alloc();
+ if (domid <= 0)
+ return -ENOSPC;
+ gcr3_info->domid = domid;
gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC);
if (gcr3_info->gcr3_tbl == NULL) {
- domain_id_free(gcr3_info->domid);
+ pdom_id_free(domid);
return -ENOMEM;
}
@@ -2019,57 +1998,69 @@ static void destroy_gcr3_table(struct iommu_dev_data *dev_data,
free_gcr3_table(gcr3_info);
}
-static int do_attach(struct iommu_dev_data *dev_data,
- struct protection_domain *domain)
+static int pdom_attach_iommu(struct amd_iommu *iommu,
+ struct protection_domain *pdom)
{
- struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
- struct io_pgtable_cfg *cfg = &domain->iop.pgtbl.cfg;
+ struct pdom_iommu_info *pdom_iommu_info, *curr;
+ struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg;
+ unsigned long flags;
int ret = 0;
- /* Update data structures */
- dev_data->domain = domain;
- list_add(&dev_data->list, &domain->dev_list);
+ spin_lock_irqsave(&pdom->lock, flags);
- /* Update NUMA Node ID */
- if (cfg->amd.nid == NUMA_NO_NODE)
- cfg->amd.nid = dev_to_node(dev_data->dev);
+ pdom_iommu_info = xa_load(&pdom->iommu_array, iommu->index);
+ if (pdom_iommu_info) {
+ pdom_iommu_info->refcnt++;
+ goto out_unlock;
+ }
- /* Do reference counting */
- domain->dev_iommu[iommu->index] += 1;
- domain->dev_cnt += 1;
+ pdom_iommu_info = kzalloc(sizeof(*pdom_iommu_info), GFP_ATOMIC);
+ if (!pdom_iommu_info) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
- /* Setup GCR3 table */
- if (pdom_is_sva_capable(domain)) {
- ret = init_gcr3_table(dev_data, domain);
- if (ret)
- return ret;
+ pdom_iommu_info->iommu = iommu;
+ pdom_iommu_info->refcnt = 1;
+
+ curr = xa_cmpxchg(&pdom->iommu_array, iommu->index,
+ NULL, pdom_iommu_info, GFP_ATOMIC);
+ if (curr) {
+ kfree(pdom_iommu_info);
+ ret = -ENOSPC;
+ goto out_unlock;
}
+ /* Update NUMA Node ID */
+ if (cfg->amd.nid == NUMA_NO_NODE)
+ cfg->amd.nid = dev_to_node(&iommu->dev->dev);
+
+out_unlock:
+ spin_unlock_irqrestore(&pdom->lock, flags);
return ret;
}
-static void do_detach(struct iommu_dev_data *dev_data)
+static void pdom_detach_iommu(struct amd_iommu *iommu,
+ struct protection_domain *pdom)
{
- struct protection_domain *domain = dev_data->domain;
- struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
-
- /* Clear DTE and flush the entry */
- dev_update_dte(dev_data, false);
+ struct pdom_iommu_info *pdom_iommu_info;
+ unsigned long flags;
- /* Flush IOTLB and wait for the flushes to finish */
- amd_iommu_domain_flush_all(domain);
+ spin_lock_irqsave(&pdom->lock, flags);
- /* Clear GCR3 table */
- if (pdom_is_sva_capable(domain))
- destroy_gcr3_table(dev_data, domain);
+ pdom_iommu_info = xa_load(&pdom->iommu_array, iommu->index);
+ if (!pdom_iommu_info) {
+ spin_unlock_irqrestore(&pdom->lock, flags);
+ return;
+ }
- /* Update data structures */
- dev_data->domain = NULL;
- list_del(&dev_data->list);
+ pdom_iommu_info->refcnt--;
+ if (pdom_iommu_info->refcnt == 0) {
+ xa_erase(&pdom->iommu_array, iommu->index);
+ kfree(pdom_iommu_info);
+ }
- /* decrease reference counters - needs to happen after the flushes */
- domain->dev_iommu[iommu->index] -= 1;
- domain->dev_cnt -= 1;
+ spin_unlock_irqrestore(&pdom->lock, flags);
}
/*
@@ -2079,27 +2070,56 @@ static void do_detach(struct iommu_dev_data *dev_data)
static int attach_device(struct device *dev,
struct protection_domain *domain)
{
- struct iommu_dev_data *dev_data;
- unsigned long flags;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+ struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ struct pci_dev *pdev;
int ret = 0;
- spin_lock_irqsave(&domain->lock, flags);
-
- dev_data = dev_iommu_priv_get(dev);
-
- spin_lock(&dev_data->lock);
+ mutex_lock(&dev_data->mutex);
if (dev_data->domain != NULL) {
ret = -EBUSY;
goto out;
}
- ret = do_attach(dev_data, domain);
+ /* Do reference counting */
+ ret = pdom_attach_iommu(iommu, domain);
+ if (ret)
+ goto out;
-out:
- spin_unlock(&dev_data->lock);
+ /* Setup GCR3 table */
+ if (pdom_is_sva_capable(domain)) {
+ ret = init_gcr3_table(dev_data, domain);
+ if (ret) {
+ pdom_detach_iommu(iommu, domain);
+ goto out;
+ }
+ }
- spin_unlock_irqrestore(&domain->lock, flags);
+ pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
+ if (pdev && pdom_is_sva_capable(domain)) {
+ pdev_enable_caps(pdev);
+
+ /*
+ * Device can continue to function even if IOPF
+ * enablement failed. Hence in error path just
+ * disable device PRI support.
+ */
+ if (amd_iommu_iopf_add_device(iommu, dev_data))
+ pdev_disable_cap_pri(pdev);
+ } else if (pdev) {
+ pdev_enable_cap_ats(pdev);
+ }
+
+ /* Update data structures */
+ dev_data->domain = domain;
+ list_add(&dev_data->list, &domain->dev_list);
+
+ /* Update device table */
+ dev_update_dte(dev_data, true);
+
+out:
+ mutex_unlock(&dev_data->mutex);
return ret;
}
@@ -2110,14 +2130,11 @@ out:
static void detach_device(struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
- struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ struct protection_domain *domain = dev_data->domain;
unsigned long flags;
- bool ppr = dev_data->ppr;
-
- spin_lock_irqsave(&domain->lock, flags);
- spin_lock(&dev_data->lock);
+ mutex_lock(&dev_data->mutex);
/*
* First check if the device is still attached. It might already
@@ -2128,27 +2145,36 @@ static void detach_device(struct device *dev)
if (WARN_ON(!dev_data->domain))
goto out;
- if (ppr) {
+ /* Remove IOPF handler */
+ if (dev_data->ppr) {
iopf_queue_flush_dev(dev);
-
- /* Updated here so that it gets reflected in DTE */
- dev_data->ppr = false;
+ amd_iommu_iopf_remove_device(iommu, dev_data);
}
- do_detach(dev_data);
+ if (dev_is_pci(dev))
+ pdev_disable_caps(to_pci_dev(dev));
-out:
- spin_unlock(&dev_data->lock);
+ /* Clear DTE and flush the entry */
+ dev_update_dte(dev_data, false);
+ /* Flush IOTLB and wait for the flushes to finish */
+ spin_lock_irqsave(&domain->lock, flags);
+ amd_iommu_domain_flush_all(domain);
spin_unlock_irqrestore(&domain->lock, flags);
- /* Remove IOPF handler */
- if (ppr)
- amd_iommu_iopf_remove_device(iommu, dev_data);
+ /* Clear GCR3 table */
+ if (pdom_is_sva_capable(domain))
+ destroy_gcr3_table(dev_data, domain);
- if (dev_is_pci(dev))
- pdev_disable_caps(to_pci_dev(dev));
+ /* Update data structures */
+ dev_data->domain = NULL;
+ list_del(&dev_data->list);
+
+ /* decrease reference counters - needs to happen after the flushes */
+ pdom_detach_iommu(iommu, domain);
+out:
+ mutex_unlock(&dev_data->mutex);
}
static struct iommu_device *amd_iommu_probe_device(struct device *dev)
@@ -2205,17 +2231,14 @@ out_err:
static void amd_iommu_release_device(struct device *dev)
{
- struct amd_iommu *iommu;
-
- if (!check_device(dev))
- return;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
- iommu = rlookup_amd_iommu(dev);
- if (!iommu)
- return;
+ WARN_ON(dev_data->domain);
- amd_iommu_uninit_device(dev);
- iommu_completion_wait(iommu);
+ /*
+ * We keep dev_data around for unplugged devices and reuse it when the
+ * device is re-plugged - not doing so would introduce a ton of races.
+ */
}
static struct iommu_group *amd_iommu_device_group(struct device *dev)
@@ -2236,70 +2259,53 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
*
*****************************************************************************/
-static void cleanup_domain(struct protection_domain *domain)
-{
- struct iommu_dev_data *entry;
-
- lockdep_assert_held(&domain->lock);
-
- if (!domain->dev_cnt)
- return;
-
- while (!list_empty(&domain->dev_list)) {
- entry = list_first_entry(&domain->dev_list,
- struct iommu_dev_data, list);
- BUG_ON(!entry->domain);
- do_detach(entry);
- }
- WARN_ON(domain->dev_cnt != 0);
-}
-
void protection_domain_free(struct protection_domain *domain)
{
WARN_ON(!list_empty(&domain->dev_list));
if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
free_io_pgtable_ops(&domain->iop.pgtbl.ops);
- domain_id_free(domain->id);
+ pdom_id_free(domain->id);
kfree(domain);
}
+static void protection_domain_init(struct protection_domain *domain, int nid)
+{
+ spin_lock_init(&domain->lock);
+ INIT_LIST_HEAD(&domain->dev_list);
+ INIT_LIST_HEAD(&domain->dev_data_list);
+ xa_init(&domain->iommu_array);
+ domain->iop.pgtbl.cfg.amd.nid = nid;
+}
+
struct protection_domain *protection_domain_alloc(unsigned int type, int nid)
{
- struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
- int pgtable;
+ int domid;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
- domain->id = domain_id_alloc();
- if (!domain->id)
- goto err_free;
+ domid = pdom_id_alloc();
+ if (domid <= 0) {
+ kfree(domain);
+ return NULL;
+ }
+ domain->id = domid;
- spin_lock_init(&domain->lock);
- INIT_LIST_HEAD(&domain->dev_list);
- INIT_LIST_HEAD(&domain->dev_data_list);
- domain->iop.pgtbl.cfg.amd.nid = nid;
+ protection_domain_init(domain, nid);
+
+ return domain;
+}
+
+static int pdom_setup_pgtable(struct protection_domain *domain,
+ unsigned int type, int pgtable)
+{
+ struct io_pgtable_ops *pgtbl_ops;
- switch (type) {
/* No need to allocate io pgtable ops in passthrough mode */
- case IOMMU_DOMAIN_IDENTITY:
- case IOMMU_DOMAIN_SVA:
- return domain;
- case IOMMU_DOMAIN_DMA:
- pgtable = amd_iommu_pgtable;
- break;
- /*
- * Force IOMMU v1 page table when allocating
- * domain for pass-through devices.
- */
- case IOMMU_DOMAIN_UNMANAGED:
- pgtable = AMD_IOMMU_V1;
- break;
- default:
- goto err_id;
- }
+ if (!(type & __IOMMU_DOMAIN_PAGING))
+ return 0;
switch (pgtable) {
case AMD_IOMMU_V1:
@@ -2309,25 +2315,20 @@ struct protection_domain *protection_domain_alloc(unsigned int type, int nid)
domain->pd_mode = PD_MODE_V2;
break;
default:
- goto err_id;
+ return -EINVAL;
}
pgtbl_ops =
alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl.cfg, domain);
if (!pgtbl_ops)
- goto err_id;
+ return -ENOMEM;
- return domain;
-err_id:
- domain_id_free(domain->id);
-err_free:
- kfree(domain);
- return NULL;
+ return 0;
}
-static inline u64 dma_max_address(void)
+static inline u64 dma_max_address(int pgtable)
{
- if (amd_iommu_pgtable == AMD_IOMMU_V1)
+ if (pgtable == AMD_IOMMU_V1)
return ~0ULL;
/* V2 with 4/5 level page table */
@@ -2340,11 +2341,13 @@ static bool amd_iommu_hd_support(struct amd_iommu *iommu)
}
static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
- struct device *dev, u32 flags)
+ struct device *dev,
+ u32 flags, int pgtable)
{
bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
struct protection_domain *domain;
struct amd_iommu *iommu = NULL;
+ int ret;
if (dev)
iommu = get_amd_iommu_from_dev(dev);
@@ -2356,16 +2359,20 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY))
return ERR_PTR(-EINVAL);
- if (dirty_tracking && !amd_iommu_hd_support(iommu))
- return ERR_PTR(-EOPNOTSUPP);
-
domain = protection_domain_alloc(type,
dev ? dev_to_node(dev) : NUMA_NO_NODE);
if (!domain)
return ERR_PTR(-ENOMEM);
+ ret = pdom_setup_pgtable(domain, type, pgtable);
+ if (ret) {
+ pdom_id_free(domain->id);
+ kfree(domain);
+ return ERR_PTR(ret);
+ }
+
domain->domain.geometry.aperture_start = 0;
- domain->domain.geometry.aperture_end = dma_max_address();
+ domain->domain.geometry.aperture_end = dma_max_address(pgtable);
domain->domain.geometry.force_aperture = true;
domain->domain.pgsize_bitmap = domain->iop.pgtbl.cfg.pgsize_bitmap;
@@ -2383,8 +2390,16 @@ static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type)
{
struct iommu_domain *domain;
+ int pgtable = amd_iommu_pgtable;
- domain = do_iommu_domain_alloc(type, NULL, 0);
+ /*
+ * Force IOMMU v1 page table when allocating
+ * domain for pass-through devices.
+ */
+ if (type == IOMMU_DOMAIN_UNMANAGED)
+ pgtable = AMD_IOMMU_V1;
+
+ domain = do_iommu_domain_alloc(type, NULL, 0, pgtable);
if (IS_ERR(domain))
return NULL;
@@ -2398,25 +2413,41 @@ amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
{
unsigned int type = IOMMU_DOMAIN_UNMANAGED;
+ struct amd_iommu *iommu = NULL;
+ const u32 supported_flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_ALLOC_PASID;
- if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data)
+ if (dev)
+ iommu = get_amd_iommu_from_dev(dev);
+
+ if ((flags & ~supported_flags) || parent || user_data)
return ERR_PTR(-EOPNOTSUPP);
- return do_iommu_domain_alloc(type, dev, flags);
-}
+ /* Allocate domain with v2 page table if IOMMU supports PASID. */
+ if (flags & IOMMU_HWPT_ALLOC_PASID) {
+ if (!amd_iommu_pasid_supported())
+ return ERR_PTR(-EOPNOTSUPP);
-void amd_iommu_domain_free(struct iommu_domain *dom)
-{
- struct protection_domain *domain;
- unsigned long flags;
+ return do_iommu_domain_alloc(type, dev, flags, AMD_IOMMU_V2);
+ }
- domain = to_pdomain(dom);
+ /* Allocate domain with v1 page table for dirty tracking */
+ if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) {
+ if (iommu && amd_iommu_hd_support(iommu)) {
+ return do_iommu_domain_alloc(type, dev,
+ flags, AMD_IOMMU_V1);
+ }
- spin_lock_irqsave(&domain->lock, flags);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
- cleanup_domain(domain);
+ /* If nothing specific is required use the kernel commandline default */
+ return do_iommu_domain_alloc(type, dev, 0, amd_iommu_pgtable);
+}
- spin_unlock_irqrestore(&domain->lock, flags);
+void amd_iommu_domain_free(struct iommu_domain *dom)
+{
+ struct protection_domain *domain = to_pdomain(dom);
protection_domain_free(domain);
}
@@ -2430,9 +2461,9 @@ static int blocked_domain_attach_device(struct iommu_domain *domain,
detach_device(dev);
/* Clear DTE and flush the entry */
- spin_lock(&dev_data->lock);
+ mutex_lock(&dev_data->mutex);
dev_update_dte(dev_data, false);
- spin_unlock(&dev_data->lock);
+ mutex_unlock(&dev_data->mutex);
return 0;
}
@@ -2444,13 +2475,39 @@ static struct iommu_domain blocked_domain = {
}
};
+static struct protection_domain identity_domain;
+
+static const struct iommu_domain_ops identity_domain_ops = {
+ .attach_dev = amd_iommu_attach_device,
+};
+
+void amd_iommu_init_identity_domain(void)
+{
+ struct iommu_domain *domain = &identity_domain.domain;
+
+ domain->type = IOMMU_DOMAIN_IDENTITY;
+ domain->ops = &identity_domain_ops;
+ domain->owner = &amd_iommu_ops;
+
+ identity_domain.id = pdom_id_alloc();
+
+ protection_domain_init(&identity_domain, NUMA_NO_NODE);
+}
+
+/* Same as blocked domain except it supports only ops->attach_dev() */
+static struct iommu_domain release_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &(const struct iommu_domain_ops) {
+ .attach_dev = blocked_domain_attach_device,
+ }
+};
+
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct protection_domain *domain = to_pdomain(dom);
struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
- struct pci_dev *pdev;
int ret;
/*
@@ -2483,24 +2540,6 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
}
#endif
- pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
- if (pdev && pdom_is_sva_capable(domain)) {
- pdev_enable_caps(pdev);
-
- /*
- * Device can continue to function even if IOPF
- * enablement failed. Hence in error path just
- * disable device PRI support.
- */
- if (amd_iommu_iopf_add_device(iommu, dev_data))
- pdev_disable_cap_pri(pdev);
- } else if (pdev) {
- pdev_enable_cap_ats(pdev);
- }
-
- /* Update device table */
- dev_update_dte(dev_data, true);
-
return ret;
}
@@ -2842,6 +2881,8 @@ static int amd_iommu_dev_disable_feature(struct device *dev,
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.blocked_domain = &blocked_domain,
+ .release_domain = &release_domain,
+ .identity_domain = &identity_domain.domain,
.domain_alloc = amd_iommu_domain_alloc,
.domain_alloc_user = amd_iommu_domain_alloc_user,
.domain_alloc_sva = amd_iommu_domain_alloc_sva,
@@ -2890,7 +2931,7 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
return;
build_inv_irt(&cmd, devid);
- data = atomic64_add_return(1, &iommu->cmd_sem_val);
+ data = atomic64_inc_return(&iommu->cmd_sem_val);
build_completion_wait(&cmd2, iommu, data);
raw_spin_lock_irqsave(&iommu->lock, flags);
diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c
index 0657b9373be5..8c73a30c2800 100644
--- a/drivers/iommu/amd/pasid.c
+++ b/drivers/iommu/amd/pasid.c
@@ -100,7 +100,8 @@ static const struct mmu_notifier_ops sva_mn = {
};
int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct pdom_dev_data *pdom_dev_data;
struct protection_domain *sva_pdom = to_pdomain(domain);
@@ -108,6 +109,9 @@ int iommu_sva_set_dev_pasid(struct iommu_domain *domain,
unsigned long flags;
int ret = -EINVAL;
+ if (old)
+ return -EOPNOTSUPP;
+
/* PASID zero is used for requests from the I/O device without PASID */
if (!is_pasid_valid(dev_data, pasid))
return ret;