linux-IllusionX/drivers/iommu/intel/cache.c
Lu Baolu 0a3f6b3463 iommu/vt-d: Fix aligned pages in calculate_psi_aligned_address()
The helper calculate_psi_aligned_address() is used to convert an arbitrary
range into a size-aligned one.

The aligned_pages variable is calculated from input start and end, but is
not adjusted when the start pfn is not aligned and the mask is adjusted,
which results in an incorrect number of pages returned.

The number of pages is used by qi_flush_piotlb() to flush caches for the
first-stage translation. With the wrong number of pages, the cache is not
synchronized, leading to inconsistencies in some cases.

Fixes: c4d27ffaa8 ("iommu/vt-d: Add cache tag invalidation helpers")
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20240709152643.28109-3-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
2024-07-10 13:06:55 +01:00

420 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* cache.c - Intel VT-d cache invalidation
*
* Copyright (C) 2024 Intel Corporation
*
* Author: Lu Baolu <baolu.lu@linux.intel.com>
*/
#define pr_fmt(fmt) "DMAR: " fmt
#include <linux/dmar.h>
#include <linux/iommu.h>
#include <linux/memory.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#include "iommu.h"
#include "pasid.h"
#include "trace.h"
/* Check if an existing cache tag can be reused for a new association. */
static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
struct intel_iommu *iommu, struct device *dev,
ioasid_t pasid, enum cache_tag_type type)
{
if (tag->type != type)
return false;
if (tag->domain_id != domain_id || tag->pasid != pasid)
return false;
if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
return tag->iommu == iommu;
if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
return tag->dev == dev;
return false;
}
/* Assign a cache tag with specified type to domain. */
static int cache_tag_assign(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid,
enum cache_tag_type type)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct cache_tag *tag, *temp;
unsigned long flags;
tag = kzalloc(sizeof(*tag), GFP_KERNEL);
if (!tag)
return -ENOMEM;
tag->type = type;
tag->iommu = iommu;
tag->domain_id = did;
tag->pasid = pasid;
tag->users = 1;
if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
tag->dev = dev;
else
tag->dev = iommu->iommu.dev;
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(temp, &domain->cache_tags, node) {
if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
temp->users++;
spin_unlock_irqrestore(&domain->cache_lock, flags);
kfree(tag);
trace_cache_tag_assign(temp);
return 0;
}
}
list_add_tail(&tag->node, &domain->cache_tags);
spin_unlock_irqrestore(&domain->cache_lock, flags);
trace_cache_tag_assign(tag);
return 0;
}
/* Unassign a cache tag with specified type from domain. */
static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid,
enum cache_tag_type type)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct cache_tag *tag;
unsigned long flags;
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
trace_cache_tag_unassign(tag);
if (--tag->users == 0) {
list_del(&tag->node);
kfree(tag);
}
break;
}
}
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
int ret;
ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
if (ret || !info->ats_enabled)
return ret;
ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
if (ret)
cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
return ret;
}
static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
if (info->ats_enabled)
cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
}
static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
int ret;
ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
if (ret || !info->ats_enabled)
return ret;
ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
if (ret)
cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
return ret;
}
static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
if (info->ats_enabled)
cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
}
static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
/*
* The driver assigns different domain IDs for all domains except
* the SVA type.
*/
if (domain->domain.type == IOMMU_DOMAIN_SVA)
return FLPT_DEFAULT_DID;
return domain_id_iommu(domain, iommu);
}
/*
* Assign cache tags to a domain when it's associated with a device's
* PASID using a specific domain ID.
*
* On success (return value of 0), cache tags are created and added to the
* domain's cache tag list. On failure (negative return value), an error
* code is returned indicating the reason for the failure.
*/
int cache_tag_assign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid)
{
u16 did = domain_get_id_for_dev(domain, dev);
int ret;
ret = __cache_tag_assign_domain(domain, did, dev, pasid);
if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
return ret;
ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
if (ret)
__cache_tag_unassign_domain(domain, did, dev, pasid);
return ret;
}
/*
* Remove the cache tags associated with a device's PASID when the domain is
* detached from the device.
*
* The cache tags must be previously assigned to the domain by calling the
* assign interface.
*/
void cache_tag_unassign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid)
{
u16 did = domain_get_id_for_dev(domain, dev);
__cache_tag_unassign_domain(domain, did, dev, pasid);
if (domain->domain.type == IOMMU_DOMAIN_NESTED)
__cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
}
static unsigned long calculate_psi_aligned_address(unsigned long start,
unsigned long end,
unsigned long *_pages,
unsigned long *_mask)
{
unsigned long pages = aligned_nrpages(start, end - start + 1);
unsigned long aligned_pages = __roundup_pow_of_two(pages);
unsigned long bitmask = aligned_pages - 1;
unsigned long mask = ilog2(aligned_pages);
unsigned long pfn = IOVA_PFN(start);
/*
* PSI masks the low order bits of the base address. If the
* address isn't aligned to the mask, then compute a mask value
* needed to ensure the target range is flushed.
*/
if (unlikely(bitmask & pfn)) {
unsigned long end_pfn = pfn + pages - 1, shared_bits;
/*
* Since end_pfn <= pfn + bitmask, the only way bits
* higher than bitmask can differ in pfn and end_pfn is
* by carrying. This means after masking out bitmask,
* high bits starting with the first set bit in
* shared_bits are all equal in both pfn and end_pfn.
*/
shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
aligned_pages = 1UL << mask;
}
*_pages = aligned_pages;
*_mask = mask;
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
}
/*
* Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
* when the memory mappings in the target domain have been modified.
*/
void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
unsigned long end, int ih)
{
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
addr = calculate_psi_aligned_address(start, end, &pages, &mask);
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
struct intel_iommu *iommu = tag->iommu;
struct device_domain_info *info;
u16 sid;
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
if (domain->use_first_level) {
qi_flush_piotlb(iommu, tag->domain_id,
tag->pasid, addr, pages, ih);
} else {
/*
* Fallback to domain selective flush if no
* PSI support or the size is too big.
*/
if (!cap_pgsel_inv(iommu->cap) ||
mask > cap_max_amask_val(iommu->cap))
iommu->flush.flush_iotlb(iommu, tag->domain_id,
0, 0, DMA_TLB_DSI_FLUSH);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
addr | ih, mask,
DMA_TLB_PSI_FLUSH);
}
break;
case CACHE_TAG_NESTING_DEVTLB:
/*
* Address translation cache in device side caches the
* result of nested translation. There is no easy way
* to identify the exact set of nested translations
* affected by a change in S2. So just flush the entire
* device cache.
*/
addr = 0;
mask = MAX_AGAW_PFN_WIDTH;
fallthrough;
case CACHE_TAG_DEVTLB:
info = dev_iommu_priv_get(tag->dev);
sid = PCI_DEVID(info->bus, info->devfn);
if (tag->pasid == IOMMU_NO_PASID)
qi_flush_dev_iotlb(iommu, sid, info->pfsid,
info->ats_qdep, addr, mask);
else
qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
tag->pasid, info->ats_qdep,
addr, mask);
quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
break;
}
trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
}
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
/*
* Invalidates all ranges of IOVA when the memory mappings in the target
* domain have been modified.
*/
void cache_tag_flush_all(struct dmar_domain *domain)
{
struct cache_tag *tag;
unsigned long flags;
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
struct intel_iommu *iommu = tag->iommu;
struct device_domain_info *info;
u16 sid;
switch (tag->type) {
case CACHE_TAG_IOTLB:
case CACHE_TAG_NESTING_IOTLB:
if (domain->use_first_level)
qi_flush_piotlb(iommu, tag->domain_id,
tag->pasid, 0, -1, 0);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
0, 0, DMA_TLB_DSI_FLUSH);
break;
case CACHE_TAG_DEVTLB:
case CACHE_TAG_NESTING_DEVTLB:
info = dev_iommu_priv_get(tag->dev);
sid = PCI_DEVID(info->bus, info->devfn);
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
0, MAX_AGAW_PFN_WIDTH);
quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
IOMMU_NO_PASID, info->ats_qdep);
break;
}
trace_cache_tag_flush_all(tag);
}
spin_unlock_irqrestore(&domain->cache_lock, flags);
}
/*
* Invalidate a range of IOVA when new mappings are created in the target
* domain.
*
* - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
* Set, any software updates to remapping structures other than first-
* stage mapping requires explicit invalidation of the caches.
* - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
* write buffer flushing, software must explicitly perform write-buffer
* flushing, if cache invalidation is not required.
*/
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
unsigned long end)
{
unsigned long pages, mask, addr;
struct cache_tag *tag;
unsigned long flags;
addr = calculate_psi_aligned_address(start, end, &pages, &mask);
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
struct intel_iommu *iommu = tag->iommu;
if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
iommu_flush_write_buffer(iommu);
continue;
}
if (tag->type == CACHE_TAG_IOTLB ||
tag->type == CACHE_TAG_NESTING_IOTLB) {
/*
* Fallback to domain selective flush if no
* PSI support or the size is too big.
*/
if (!cap_pgsel_inv(iommu->cap) ||
mask > cap_max_amask_val(iommu->cap))
iommu->flush.flush_iotlb(iommu, tag->domain_id,
0, 0, DMA_TLB_DSI_FLUSH);
else
iommu->flush.flush_iotlb(iommu, tag->domain_id,
addr, mask,
DMA_TLB_PSI_FLUSH);
}
trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
}
spin_unlock_irqrestore(&domain->cache_lock, flags);
}