From 35bd71dd1c161e6e89b21138e01e8c04c6347716 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 18 Nov 2019 11:35:23 +0100 Subject: drm/tegra: Delete host1x_bo_ops->k(un)map It doesn't have any callers anymore. Aside: The ->mmap/munmap hooks have a bit a confusing name, they don't do userspace mmaps, but a kernel vmap. I think most places use vmap for this, except ttm, which uses kmap for vmap for added confusion. mmap seems entirely for userspace mappings set up through mmap(2) syscall. Reviewed-by: Thierry Reding Tested-by: Thierry Reding Acked-by: Sumit Semwal Signed-off-by: Daniel Vetter Cc: Thierry Reding Cc: Jonathan Hunter Cc: linux-tegra@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20191118103536.17675-3-daniel.vetter@ffwll.ch --- drivers/gpu/drm/tegra/gem.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'drivers/gpu/drm/tegra/gem.c') diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index fb7667c8dd4c..0c55df20e0ef 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -65,32 +65,6 @@ static void tegra_bo_munmap(struct host1x_bo *bo, void *addr) vunmap(addr); } -static void *tegra_bo_kmap(struct host1x_bo *bo, unsigned int page) -{ - struct tegra_bo *obj = host1x_to_tegra_bo(bo); - - if (obj->vaddr) - return obj->vaddr + page * PAGE_SIZE; - else if (obj->gem.import_attach) - return dma_buf_kmap(obj->gem.import_attach->dmabuf, page); - else - return vmap(obj->pages + page, 1, VM_MAP, - pgprot_writecombine(PAGE_KERNEL)); -} - -static void tegra_bo_kunmap(struct host1x_bo *bo, unsigned int page, - void *addr) -{ - struct tegra_bo *obj = host1x_to_tegra_bo(bo); - - if (obj->vaddr) - return; - else if (obj->gem.import_attach) - dma_buf_kunmap(obj->gem.import_attach->dmabuf, page, addr); - else - vunmap(addr); -} - static struct host1x_bo *tegra_bo_get(struct host1x_bo *bo) { struct tegra_bo *obj = host1x_to_tegra_bo(bo); @@ -107,8 +81,6 @@ static const struct host1x_bo_ops tegra_bo_ops = { .unpin = tegra_bo_unpin, .mmap = tegra_bo_mmap, .munmap = tegra_bo_munmap, - .kmap = tegra_bo_kmap, - .kunmap = tegra_bo_kunmap, }; static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo) -- cgit From 7195cf427b6822a40210f5ab1078aecaa5aaea8a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 18 Nov 2019 11:35:29 +0100 Subject: drm/tegra: Remove dma_buf->k(un)map No in-tree users left. Reviewed-by: Thierry Reding Tested-by: Thierry Reding Acked-by: Sumit Semwal Signed-off-by: Daniel Vetter Cc: Thierry Reding Cc: Jonathan Hunter Cc: linux-tegra@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20191118103536.17675-9-daniel.vetter@ffwll.ch --- drivers/gpu/drm/tegra/gem.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers/gpu/drm/tegra/gem.c') diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 0c55df20e0ef..d266b66c1292 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -554,16 +554,6 @@ static int tegra_gem_prime_end_cpu_access(struct dma_buf *buf, return 0; } -static void *tegra_gem_prime_kmap(struct dma_buf *buf, unsigned long page) -{ - return NULL; -} - -static void tegra_gem_prime_kunmap(struct dma_buf *buf, unsigned long page, - void *addr) -{ -} - static int tegra_gem_prime_mmap(struct dma_buf *buf, struct vm_area_struct *vma) { struct drm_gem_object *gem = buf->priv; @@ -594,8 +584,6 @@ static const struct dma_buf_ops tegra_gem_prime_dmabuf_ops = { .release = tegra_gem_prime_release, .begin_cpu_access = tegra_gem_prime_begin_cpu_access, .end_cpu_access = tegra_gem_prime_end_cpu_access, - .map = tegra_gem_prime_kmap, - .unmap = tegra_gem_prime_kunmap, .mmap = tegra_gem_prime_mmap, .vmap = tegra_gem_prime_vmap, .vunmap = tegra_gem_prime_vunmap, -- cgit From 273da5a046965ccf0ec79eb63f2d5173467e20fa Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 4 Feb 2020 14:59:25 +0100 Subject: drm/tegra: Reuse IOVA mapping where possible This partially reverts the DMA API support that was recently merged because it was causing performance regressions on older Tegra devices. Unfortunately, the cache maintenance performed by dma_map_sg() and dma_unmap_sg() causes performance to drop by a factor of 10. The right solution for this would be to cache mappings for buffers per consumer device, but that's a bit involved. Instead, we simply revert to the old behaviour of sharing IOVA mappings when we know that devices can do so (i.e. they share the same IOMMU domain). Cc: # v5.5 Reported-by: Dmitry Osipenko Signed-off-by: Thierry Reding Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/gem.c | 10 +++++++++- drivers/gpu/drm/tegra/plane.c | 44 ++++++++++++++++++++++++------------------- drivers/gpu/host1x/job.c | 32 ++++++++++++++++++++++++++++--- 3 files changed, 63 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/tegra/gem.c') diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index bc15b430156d..c46b4d4190ac 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -60,8 +60,16 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, /* * If we've manually mapped the buffer object through the IOMMU, make * sure to return the IOVA address of our mapping. + * + * Similarly, for buffers that have been allocated by the DMA API the + * physical address can be used for devices that are not attached to + * an IOMMU. For these devices, callers must pass a valid pointer via + * the @phys argument. + * + * Imported buffers were also already mapped at import time, so the + * existing mapping can be reused. */ - if (phys && obj->mm) { + if (phys) { *phys = obj->iova; return NULL; } diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index cadcdd9ea427..9ccfb56e9b01 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -3,6 +3,8 @@ * Copyright (C) 2017 NVIDIA CORPORATION. All rights reserved. */ +#include + #include #include #include @@ -107,21 +109,27 @@ const struct drm_plane_funcs tegra_plane_funcs = { static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dc->dev); unsigned int i; int err; for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); + dma_addr_t phys_addr, *phys; + struct sg_table *sgt; - if (!dc->client.group) { - struct sg_table *sgt; + if (!domain || dc->client.group) + phys = &phys_addr; + else + phys = NULL; - sgt = host1x_bo_pin(dc->dev, &bo->base, NULL); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } + sgt = host1x_bo_pin(dc->dev, &bo->base, phys); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto unpin; + } + if (sgt) { err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); if (err == 0) { @@ -143,7 +151,7 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) state->iova[i] = sg_dma_address(sgt->sgl); state->sgt[i] = sgt; } else { - state->iova[i] = bo->iova; + state->iova[i] = phys_addr; } } @@ -156,9 +164,11 @@ unpin: struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); struct sg_table *sgt = state->sgt[i]; - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); + if (sgt) + dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, + DMA_TO_DEVICE); + host1x_bo_unpin(dc->dev, &bo->base, sgt); state->iova[i] = DMA_MAPPING_ERROR; state->sgt[i] = NULL; } @@ -172,17 +182,13 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); + struct sg_table *sgt = state->sgt[i]; - if (!dc->client.group) { - struct sg_table *sgt = state->sgt[i]; - - if (sgt) { - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); - } - } + if (sgt) + dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, + DMA_TO_DEVICE); + host1x_bo_unpin(dc->dev, &bo->base, sgt); state->iova[i] = DMA_MAPPING_ERROR; state->sgt[i] = NULL; } diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 25ca54de8fc5..0d53c08e9972 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -101,9 +102,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { struct host1x_client *client = job->client; struct device *dev = client->dev; + struct iommu_domain *domain; unsigned int i; int err; + domain = iommu_get_domain_for_dev(dev); job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { @@ -117,7 +120,19 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - if (client->group) + /* + * If the client device is not attached to an IOMMU, the + * physical address of the buffer object can be used. + * + * Similarly, when an IOMMU domain is shared between all + * host1x clients, the IOVA is already available, so no + * need to map the buffer object again. + * + * XXX Note that this isn't always safe to do because it + * relies on an assumption that no cache maintenance is + * needed on the buffer objects. + */ + if (!domain || client->group) phys = &phys_addr; else phys = NULL; @@ -176,6 +191,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; + dma_addr_t *phys; unsigned int j; g->bo = host1x_bo_get(g->bo); @@ -184,7 +200,17 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - sgt = host1x_bo_pin(host->dev, g->bo, NULL); + /** + * If the host1x is not attached to an IOMMU, there is no need + * to map the buffer object for the host1x, since the physical + * address can simply be used. + */ + if (!iommu_get_domain_for_dev(host->dev)) + phys = &phys_addr; + else + phys = NULL; + + sgt = host1x_bo_pin(host->dev, g->bo, phys); if (IS_ERR(sgt)) { err = PTR_ERR(sgt); goto unpin; @@ -214,7 +240,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) job->unpins[job->num_unpins].size = gather_size; phys_addr = iova_dma_addr(&host->iova, alloc); - } else { + } else if (sgt) { err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); if (!err) { -- cgit