diff options
Diffstat (limited to 'drivers/iommu/intel/svm.c')
| -rw-r--r-- | drivers/iommu/intel/svm.c | 383 | 
1 files changed, 86 insertions, 297 deletions
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index c1bed89b1026..0e3a9b38bef2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -22,57 +22,22 @@  #include "iommu.h"  #include "pasid.h"  #include "perf.h" +#include "../iommu-pages.h"  #include "trace.h"  static irqreturn_t prq_event_thread(int irq, void *d); -static DEFINE_XARRAY_ALLOC(pasid_private_array); -static int pasid_private_add(ioasid_t pasid, void *priv) -{ -	return xa_alloc(&pasid_private_array, &pasid, priv, -			XA_LIMIT(pasid, pasid), GFP_ATOMIC); -} - -static void pasid_private_remove(ioasid_t pasid) -{ -	xa_erase(&pasid_private_array, pasid); -} - -static void *pasid_private_find(ioasid_t pasid) -{ -	return xa_load(&pasid_private_array, pasid); -} - -static struct intel_svm_dev * -svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) -{ -	struct intel_svm_dev *sdev = NULL, *t; - -	rcu_read_lock(); -	list_for_each_entry_rcu(t, &svm->devs, list) { -		if (t->dev == dev) { -			sdev = t; -			break; -		} -	} -	rcu_read_unlock(); - -	return sdev; -} -  int intel_svm_enable_prq(struct intel_iommu *iommu)  {  	struct iopf_queue *iopfq; -	struct page *pages;  	int irq, ret; -	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); -	if (!pages) { +	iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER); +	if (!iommu->prq) {  		pr_warn("IOMMU: %s: Failed to allocate page request queue\n",  			iommu->name);  		return -ENOMEM;  	} -	iommu->prq = page_address(pages);  	irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);  	if (irq <= 0) { @@ -117,7 +82,7 @@ free_hwirq:  	dmar_free_hwirq(irq);  	iommu->pr_irq = 0;  free_prq: -	free_pages((unsigned long)iommu->prq, PRQ_ORDER); +	iommu_free_pages(iommu->prq, PRQ_ORDER);  	iommu->prq = NULL;  	return ret; @@ -140,7 +105,7 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)  		iommu->iopf_queue = NULL;  	} -	free_pages((unsigned long)iommu->prq, PRQ_ORDER); +	iommu_free_pages(iommu->prq, PRQ_ORDER);  	iommu->prq = NULL;  	return 0; @@ -168,94 +133,32 @@ void intel_svm_check(struct intel_iommu *iommu)  	iommu->flags |= VTD_FLAG_SVM_CAPABLE;  } -static void __flush_svm_range_dev(struct intel_svm *svm, -				  struct intel_svm_dev *sdev, -				  unsigned long address, -				  unsigned long pages, int ih) -{ -	struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); - -	if (WARN_ON(!pages)) -		return; - -	qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); -	if (info->ats_enabled) { -		qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, -					 svm->pasid, sdev->qdep, address, -					 order_base_2(pages)); -		quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), -					  svm->pasid, sdev->qdep); -	} -} - -static void intel_flush_svm_range_dev(struct intel_svm *svm, -				      struct intel_svm_dev *sdev, -				      unsigned long address, -				      unsigned long pages, int ih) -{ -	unsigned long shift = ilog2(__roundup_pow_of_two(pages)); -	unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); -	unsigned long start = ALIGN_DOWN(address, align); -	unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); - -	while (start < end) { -		__flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); -		start += align; -	} -} - -static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, -				unsigned long pages, int ih) -{ -	struct intel_svm_dev *sdev; - -	rcu_read_lock(); -	list_for_each_entry_rcu(sdev, &svm->devs, list) -		intel_flush_svm_range_dev(svm, sdev, address, pages, ih); -	rcu_read_unlock(); -} - -static void intel_flush_svm_all(struct intel_svm *svm) -{ -	struct device_domain_info *info; -	struct intel_svm_dev *sdev; - -	rcu_read_lock(); -	list_for_each_entry_rcu(sdev, &svm->devs, list) { -		info = dev_iommu_priv_get(sdev->dev); - -		qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0); -		if (info->ats_enabled) { -			qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, -						 svm->pasid, sdev->qdep, -						 0, 64 - VTD_PAGE_SHIFT); -			quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT, -						  svm->pasid, sdev->qdep); -		} -	} -	rcu_read_unlock(); -} -  /* Pages have been freed at this point */  static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,  					struct mm_struct *mm,  					unsigned long start, unsigned long end)  { -	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); +	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier); -	if (start == 0 && end == -1UL) { -		intel_flush_svm_all(svm); +	if (start == 0 && end == ULONG_MAX) { +		cache_tag_flush_all(domain);  		return;  	} -	intel_flush_svm_range(svm, start, -			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); +	/* +	 * The mm_types defines vm_end as the first byte after the end address, +	 * different from IOMMU subsystem using the last address of an address +	 * range. +	 */ +	cache_tag_flush_range(domain, start, end - 1, 0);  }  static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)  { -	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); -	struct intel_svm_dev *sdev; +	struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier); +	struct dev_pasid_info *dev_pasid; +	struct device_domain_info *info; +	unsigned long flags;  	/* This might end up being called from exit_mmap(), *before* the page  	 * tables are cleared. And __mmu_notifier_release() will delete us from @@ -269,157 +172,78 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)  	 * page) so that we end up taking a fault that the hardware really  	 * *has* to handle gracefully without affecting other processes.  	 */ -	rcu_read_lock(); -	list_for_each_entry_rcu(sdev, &svm->devs, list) -		intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, -					    svm->pasid, true); -	rcu_read_unlock(); +	spin_lock_irqsave(&domain->lock, flags); +	list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { +		info = dev_iommu_priv_get(dev_pasid->dev); +		intel_pasid_tear_down_entry(info->iommu, dev_pasid->dev, +					    dev_pasid->pasid, true); +	} +	spin_unlock_irqrestore(&domain->lock, flags); + +} +static void intel_mm_free_notifier(struct mmu_notifier *mn) +{ +	kfree(container_of(mn, struct dmar_domain, notifier));  }  static const struct mmu_notifier_ops intel_mmuops = {  	.release = intel_mm_release,  	.arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, +	.free_notifier = intel_mm_free_notifier,  }; -static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, -			     struct intel_svm **rsvm, -			     struct intel_svm_dev **rsdev) -{ -	struct intel_svm_dev *sdev = NULL; -	struct intel_svm *svm; - -	if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) -		return -EINVAL; - -	svm = pasid_private_find(pasid); -	if (IS_ERR(svm)) -		return PTR_ERR(svm); - -	if (!svm) -		goto out; - -	/* -	 * If we found svm for the PASID, there must be at least one device -	 * bond. -	 */ -	if (WARN_ON(list_empty(&svm->devs))) -		return -EINVAL; -	sdev = svm_lookup_device_by_dev(svm, dev); - -out: -	*rsvm = svm; -	*rsdev = sdev; - -	return 0; -} -  static int intel_svm_set_dev_pasid(struct iommu_domain *domain,  				   struct device *dev, ioasid_t pasid)  {  	struct device_domain_info *info = dev_iommu_priv_get(dev); +	struct dmar_domain *dmar_domain = to_dmar_domain(domain);  	struct intel_iommu *iommu = info->iommu;  	struct mm_struct *mm = domain->mm; -	struct intel_svm_dev *sdev; -	struct intel_svm *svm; +	struct dev_pasid_info *dev_pasid;  	unsigned long sflags; +	unsigned long flags;  	int ret = 0; -	svm = pasid_private_find(pasid); -	if (!svm) { -		svm = kzalloc(sizeof(*svm), GFP_KERNEL); -		if (!svm) -			return -ENOMEM; - -		svm->pasid = pasid; -		svm->mm = mm; -		INIT_LIST_HEAD_RCU(&svm->devs); - -		svm->notifier.ops = &intel_mmuops; -		ret = mmu_notifier_register(&svm->notifier, mm); -		if (ret) { -			kfree(svm); -			return ret; -		} - -		ret = pasid_private_add(svm->pasid, svm); -		if (ret) { -			mmu_notifier_unregister(&svm->notifier, mm); -			kfree(svm); -			return ret; -		} -	} +	dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); +	if (!dev_pasid) +		return -ENOMEM; -	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); -	if (!sdev) { -		ret = -ENOMEM; -		goto free_svm; -	} +	dev_pasid->dev = dev; +	dev_pasid->pasid = pasid; -	sdev->dev = dev; -	sdev->iommu = iommu; -	sdev->did = FLPT_DEFAULT_DID; -	sdev->sid = PCI_DEVID(info->bus, info->devfn); -	if (info->ats_enabled) { -		sdev->qdep = info->ats_qdep; -		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) -			sdev->qdep = 0; -	} +	ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid); +	if (ret) +		goto free_dev_pasid;  	/* Setup the pasid table: */  	sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;  	ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,  					    FLPT_DEFAULT_DID, sflags);  	if (ret) -		goto free_sdev; +		goto unassign_tag; -	list_add_rcu(&sdev->list, &svm->devs); +	spin_lock_irqsave(&dmar_domain->lock, flags); +	list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); +	spin_unlock_irqrestore(&dmar_domain->lock, flags);  	return 0; -free_sdev: -	kfree(sdev); -free_svm: -	if (list_empty(&svm->devs)) { -		mmu_notifier_unregister(&svm->notifier, mm); -		pasid_private_remove(pasid); -		kfree(svm); -	} +unassign_tag: +	cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid); +free_dev_pasid: +	kfree(dev_pasid);  	return ret;  } -void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) -{ -	struct intel_svm_dev *sdev; -	struct intel_svm *svm; -	struct mm_struct *mm; - -	if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev)) -		return; -	mm = svm->mm; - -	if (sdev) { -		list_del_rcu(&sdev->list); -		kfree_rcu(sdev, rcu); - -		if (list_empty(&svm->devs)) { -			if (svm->notifier.ops) -				mmu_notifier_unregister(&svm->notifier, mm); -			pasid_private_remove(svm->pasid); -			kfree(svm); -		} -	} -} -  /* Page request queue descriptor */  struct page_req_dsc {  	union {  		struct {  			u64 type:8;  			u64 pasid_present:1; -			u64 priv_data_present:1; -			u64 rsvd:6; +			u64 rsvd:7;  			u64 rid:16;  			u64 pasid:20;  			u64 exe_req:1; @@ -438,7 +262,8 @@ struct page_req_dsc {  		};  		u64 qw_1;  	}; -	u64 priv_data[2]; +	u64 qw_2; +	u64 qw_3;  };  static bool is_canonical_address(u64 addr) @@ -572,24 +397,6 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,  		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;  		event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;  	} -	if (desc->priv_data_present) { -		/* -		 * Set last page in group bit if private data is present, -		 * page response is required as it does for LPIG. -		 * iommu_report_device_fault() doesn't understand this vendor -		 * specific requirement thus we set last_page as a workaround. -		 */ -		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; -		event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; -		event.fault.prm.private_data[0] = desc->priv_data[0]; -		event.fault.prm.private_data[1] = desc->priv_data[1]; -	} else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { -		/* -		 * If the private data fields are not used by hardware, use it -		 * to monitor the prq handle latency. -		 */ -		event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); -	}  	iommu_report_device_fault(dev, &event);  } @@ -597,39 +404,23 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,  static void handle_bad_prq_event(struct intel_iommu *iommu,  				 struct page_req_dsc *req, int result)  { -	struct qi_desc desc; +	struct qi_desc desc = { };  	pr_err("%s: Invalid page request: %08llx %08llx\n",  	       iommu->name, ((unsigned long long *)req)[0],  	       ((unsigned long long *)req)[1]); -	/* -	 * Per VT-d spec. v3.0 ch7.7, system software must -	 * respond with page group response if private data -	 * is present (PDP) or last page in group (LPIG) bit -	 * is set. This is an additional VT-d feature beyond -	 * PCI ATS spec. -	 */ -	if (!req->lpig && !req->priv_data_present) +	if (!req->lpig)  		return;  	desc.qw0 = QI_PGRP_PASID(req->pasid) |  			QI_PGRP_DID(req->rid) |  			QI_PGRP_PASID_P(req->pasid_present) | -			QI_PGRP_PDP(req->priv_data_present) |  			QI_PGRP_RESP_CODE(result) |  			QI_PGRP_RESP_TYPE;  	desc.qw1 = QI_PGRP_IDX(req->prg_index) |  			QI_PGRP_LPIG(req->lpig); -	if (req->priv_data_present) { -		desc.qw2 = req->priv_data[0]; -		desc.qw3 = req->priv_data[1]; -	} else { -		desc.qw2 = 0; -		desc.qw3 = 0; -	} -  	qi_submit_sync(iommu, &desc, 1, 0);  } @@ -697,7 +488,7 @@ bad_req:  		intel_svm_prq_report(iommu, dev, req);  		trace_prq_report(iommu, dev, req->qw_0, req->qw_1, -				 req->priv_data[0], req->priv_data[1], +				 req->qw_2, req->qw_3,  				 iommu->prq_seq_number++);  		mutex_unlock(&iommu->iopf_lock);  prq_advance: @@ -736,7 +527,7 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,  	struct intel_iommu *iommu = info->iommu;  	u8 bus = info->bus, devfn = info->devfn;  	struct iommu_fault_page_request *prm; -	bool private_present; +	struct qi_desc desc;  	bool pasid_present;  	bool last_page;  	u16 sid; @@ -744,42 +535,25 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,  	prm = &evt->fault.prm;  	sid = PCI_DEVID(bus, devfn);  	pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; -	private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;  	last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; -	/* -	 * Per VT-d spec. v3.0 ch7.7, system software must respond -	 * with page group response if private data is present (PDP) -	 * or last page in group (LPIG) bit is set. This is an -	 * additional VT-d requirement beyond PCI ATS spec. -	 */ -	if (last_page || private_present) { -		struct qi_desc desc; - -		desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | -				QI_PGRP_PASID_P(pasid_present) | -				QI_PGRP_PDP(private_present) | -				QI_PGRP_RESP_CODE(msg->code) | -				QI_PGRP_RESP_TYPE; -		desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); -		desc.qw2 = 0; -		desc.qw3 = 0; - -		if (private_present) { -			desc.qw2 = prm->private_data[0]; -			desc.qw3 = prm->private_data[1]; -		} else if (prm->private_data[0]) { -			dmar_latency_update(iommu, DMAR_LATENCY_PRQ, -				ktime_to_ns(ktime_get()) - prm->private_data[0]); -		} +	desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | +			QI_PGRP_PASID_P(pasid_present) | +			QI_PGRP_RESP_CODE(msg->code) | +			QI_PGRP_RESP_TYPE; +	desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); +	desc.qw2 = 0; +	desc.qw3 = 0; -		qi_submit_sync(iommu, &desc, 1, 0); -	} +	qi_submit_sync(iommu, &desc, 1, 0);  }  static void intel_svm_domain_free(struct iommu_domain *domain)  { -	kfree(to_dmar_domain(domain)); +	struct dmar_domain *dmar_domain = to_dmar_domain(domain); + +	/* dmar_domain free is deferred to the mmu free_notifier callback. */ +	mmu_notifier_put(&dmar_domain->notifier);  }  static const struct iommu_domain_ops intel_svm_domain_ops = { @@ -787,14 +561,29 @@ static const struct iommu_domain_ops intel_svm_domain_ops = {  	.free			= intel_svm_domain_free  }; -struct iommu_domain *intel_svm_domain_alloc(void) +struct iommu_domain *intel_svm_domain_alloc(struct device *dev, +					    struct mm_struct *mm)  {  	struct dmar_domain *domain; +	int ret;  	domain = kzalloc(sizeof(*domain), GFP_KERNEL);  	if (!domain) -		return NULL; +		return ERR_PTR(-ENOMEM); +  	domain->domain.ops = &intel_svm_domain_ops; +	domain->use_first_level = true; +	INIT_LIST_HEAD(&domain->dev_pasids); +	INIT_LIST_HEAD(&domain->cache_tags); +	spin_lock_init(&domain->cache_lock); +	spin_lock_init(&domain->lock); + +	domain->notifier.ops = &intel_mmuops; +	ret = mmu_notifier_register(&domain->notifier, mm); +	if (ret) { +		kfree(domain); +		return ERR_PTR(ret); +	}  	return &domain->domain;  }  |