From be7538ff7488445297d903f5419de8fb99adf85d Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 15 Oct 2019 15:37:48 +0800 Subject: drm/amdgpu: expand sdma copy_buffer interface with tmz parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch expands sdma copy_buffer interface with tmz parameter. Signed-off-by: Aaron Liu Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1331b4c5bdca..298caa50eed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2054,7 +2054,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes); + dst_addr, num_bytes, false); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -2126,7 +2126,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint32_t cur_size_in_bytes = min(byte_count, max_bytes); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, - dst_offset, cur_size_in_bytes); + dst_offset, cur_size_in_bytes, false); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; -- cgit From c9dc9cfe185f53ffb0b5d621b00eecd80f942a7c Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 15 Oct 2019 15:45:23 +0800 Subject: drm/amdgpu: expand amdgpu_copy_buffer interface with tmz parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch expands amdgpu_copy_buffer interface with tmz parameter. Signed-off-by: Aaron Liu Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index d1495e1c9289..d9b35df33806 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -40,7 +40,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false, false); + false, false, false); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c687f5415b3f..3d822eba9a5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -753,7 +753,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, amdgpu_bo_size(shadow), NULL, fence, - true, false); + true, false, false); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b158230af8db..476f1f89aaad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -124,7 +124,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(gtt_obj[i]); r = amdgpu_copy_buffer(ring, gart_addr, vram_addr, - size, NULL, &fence, false, false); + size, NULL, &fence, false, false, false); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); @@ -170,7 +170,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(vram_obj); r = amdgpu_copy_buffer(ring, vram_addr, gart_addr, - size, NULL, &fence, false, false); + size, NULL, &fence, false, false, false); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 298caa50eed6..0116c4afe1da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -373,7 +373,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } r = amdgpu_copy_buffer(ring, from, to, cur_size, - resv, &next, false, true); + resv, &next, false, true, false); if (r) goto error; @@ -2084,7 +2084,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush) + bool vm_needs_flush, bool tmz) { struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -2126,7 +2126,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint32_t cur_size_in_bytes = min(byte_count, max_bytes); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, - dst_offset, cur_size_in_bytes, false); + dst_offset, cur_size_in_bytes, tmz); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index bd05bbb4878d..dc6502d1060b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -87,7 +87,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush); + bool vm_needs_flush, bool tmz); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, -- cgit From bffc8c5caaa97d6791aef965b618d71faac07098 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 6 Mar 2020 14:36:43 -0500 Subject: drm/amdgpu: also add the TMZ flag to GART MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is necessary for TMZ handling during buffer moves and scanout. Signed-off-by: Christian König Reviewed-by: Alex Deucher Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0116c4afe1da..a3c103b4b0a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1028,6 +1028,9 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; + if (amdgpu_bo_encrypted(abo)) + flags |= AMDGPU_PTE_TMZ; + if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { uint64_t page_idx = 1; -- cgit From effb97cc4ba638eb0a24d612bcea6ff2ef26cfee Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 28 Feb 2020 14:48:06 +0100 Subject: drm/amdgpu: add TMZ handling to amdgpu_move_blit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way we should be at least able to move buffers from VRAM to GTT. Signed-off-by: Christian König Reviewed-by: Alex Deucher Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 28 +++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a3c103b4b0a2..edb5badb7476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -65,7 +65,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem, unsigned num_pages, uint64_t offset, unsigned window, - struct amdgpu_ring *ring, + struct amdgpu_ring *ring, bool tmz, uint64_t *addr); /** @@ -290,17 +290,23 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, /** * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * @adev: amdgpu device + * @src: buffer/address where to read from + * @dst: buffer/address where to write to + * @size: number of bytes to copy + * @tmz: if a secure copy should be used + * @resv: resv object to sync to + * @f: Returns the last fence if multiple jobs are submitted. * * The function copies @size bytes from {src->mem + src->offset} to * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a * move and different for a BO to BO copy. * - * @f: Returns the last fence if multiple jobs are submitted. */ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, - uint64_t size, + uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f) { @@ -352,7 +358,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) { r = amdgpu_map_buffer(src->bo, src->mem, PFN_UP(cur_size + src_page_offset), - src_node_start, 0, ring, + src_node_start, 0, ring, tmz, &from); if (r) goto error; @@ -365,7 +371,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) { r = amdgpu_map_buffer(dst->bo, dst->mem, PFN_UP(cur_size + dst_page_offset), - dst_node_start, 1, ring, + dst_node_start, 1, ring, tmz, &to); if (r) goto error; @@ -373,7 +379,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } r = amdgpu_copy_buffer(ring, from, to, cur_size, - resv, &next, false, true, false); + resv, &next, false, true, tmz); if (r) goto error; @@ -425,6 +431,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_copy_mem src, dst; struct dma_fence *fence = NULL; int r; @@ -438,14 +445,14 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, new_mem->num_pages << PAGE_SHIFT, + amdgpu_bo_encrypted(abo), bo->base.resv, &fence); if (r) goto error; /* clear the space being freed */ if (old_mem->mem_type == TTM_PL_VRAM && - (ttm_to_amdgpu_bo(bo)->flags & - AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { + (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, @@ -2022,7 +2029,7 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) static int amdgpu_map_buffer(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem, unsigned num_pages, uint64_t offset, unsigned window, - struct amdgpu_ring *ring, + struct amdgpu_ring *ring, bool tmz, uint64_t *addr) { struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; @@ -2064,6 +2071,9 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); + if (tmz) + flags |= AMDGPU_PTE_TMZ; + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, &job->ibs[0].ptr[num_dw]); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index dc6502d1060b..21182caade21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -91,7 +91,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, - uint64_t size, + uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, -- cgit From 218c0b7f183a88b3b2678fbca885a750dda2bff3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 2 Mar 2020 13:00:07 +0100 Subject: drm/amdgpu: stop evicting encrypted BOs to swap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Swapping out encrypted BOs doesn't work because they can't change their physical location without going through a bounce copy. As a workaround disable evicting encrypted BOs to the system domain for now. Signed-off-by: Christian König Reviewed-by: Alex Deucher Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index edb5badb7476..aa4ea437155e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1550,6 +1550,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, switch (bo->mem.mem_type) { case TTM_PL_TT: + if (amdgpu_bo_is_amdgpu_bo(bo) && + amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo))) + return false; return true; case TTM_PL_VRAM: -- cgit From f0ee63cbc5264dbbdb994e87504b0b033149da55 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 18 Mar 2020 10:28:20 +0100 Subject: drm/amdgpu: cleanup amdgpu_ttm_copy_mem_to_mem and amdgpu_map_buffer v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup amdgpu_ttm_copy_mem_to_mem by using fewer variables for the same value. Rename amdgpu_map_buffer to amdgpu_ttm_map_buffer, move it to avoid the forward decleration, cleanup by moving the map decission into the function and add some documentation. No functional change. v2: add some more cleanup suggested by Felix Signed-off-by: Christian König Reviewed-by: Felix Kuehling Reviewed-by: Huang Rui Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 270 ++++++++++++++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- 2 files changed, 136 insertions(+), 138 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index aa4ea437155e..7a73282d78e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -62,11 +62,6 @@ #define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem, unsigned num_pages, - uint64_t offset, unsigned window, - struct amdgpu_ring *ring, bool tmz, - uint64_t *addr); /** * amdgpu_init_mem_type - Initialize a memory manager for a specific type of @@ -277,7 +272,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, * */ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, - unsigned long *offset) + uint64_t *offset) { struct drm_mm_node *mm_node = mem->mm_node; @@ -288,6 +283,95 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, return mm_node; } +/** + * amdgpu_ttm_map_buffer - Map memory into the GART windows + * @bo: buffer object to map + * @mem: memory object to map + * @mm_node: drm_mm node object to map + * @num_pages: number of pages to map + * @offset: offset into @mm_node where to start + * @window: which GART window to use + * @ring: DMA ring to use for the copy + * @tmz: if we should setup a TMZ enabled mapping + * @addr: resulting address inside the MC address space + * + * Setup one of the GART windows to access a specific piece of memory or return + * the physical address for local memory. + */ +static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem, + struct drm_mm_node *mm_node, + unsigned num_pages, uint64_t offset, + unsigned window, struct amdgpu_ring *ring, + bool tmz, uint64_t *addr) +{ + struct ttm_dma_tt *dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + unsigned num_dw, num_bytes; + dma_addr_t *dma_address; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + uint64_t flags; + int r; + + BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < + AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + + /* Map only what can't be accessed directly */ + if (mem->start != AMDGPU_BO_INVALID_OFFSET) { + *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset; + return 0; + } + + *addr = adev->gmc.gart_start; + *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE; + *addr += offset & ~PAGE_MASK; + + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + num_bytes = num_pages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_NORMAL, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes, false); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); + if (tmz) + flags |= AMDGPU_PTE_TMZ; + + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, + &job->ibs[0].ptr[num_dw]); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + /** * amdgpu_copy_ttm_mem_to_mem - Helper function for copy * @adev: amdgpu device @@ -304,79 +388,62 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, * */ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - struct amdgpu_copy_mem *src, - struct amdgpu_copy_mem *dst, + const struct amdgpu_copy_mem *src, + const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f) { + const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE); + + uint64_t src_node_size, dst_node_size, src_offset, dst_offset; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct drm_mm_node *src_mm, *dst_mm; - uint64_t src_node_start, dst_node_start, src_node_size, - dst_node_size, src_page_offset, dst_page_offset; struct dma_fence *fence = NULL; int r = 0; - const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE); if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } - src_mm = amdgpu_find_mm_node(src->mem, &src->offset); - src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + - src->offset; - src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; - src_page_offset = src_node_start & (PAGE_SIZE - 1); + src_offset = src->offset; + src_mm = amdgpu_find_mm_node(src->mem, &src_offset); + src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset; - dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); - dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + - dst->offset; - dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); + dst_offset = dst->offset; + dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset); + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset; mutex_lock(&adev->mman.gtt_window_lock); while (size) { - unsigned long cur_size; - uint64_t from = src_node_start, to = dst_node_start; + uint32_t src_page_offset = src_offset & ~PAGE_MASK; + uint32_t dst_page_offset = dst_offset & ~PAGE_MASK; struct dma_fence *next; + uint32_t cur_size; + uint64_t from, to; /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst * begins at an offset, then adjust the size accordingly */ - cur_size = min3(min(src_node_size, dst_node_size), size, - GTT_MAX_BYTES); - if (cur_size + src_page_offset > GTT_MAX_BYTES || - cur_size + dst_page_offset > GTT_MAX_BYTES) - cur_size -= max(src_page_offset, dst_page_offset); - - /* Map only what needs to be accessed. Map src to window 0 and - * dst to window 1 - */ - if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) { - r = amdgpu_map_buffer(src->bo, src->mem, - PFN_UP(cur_size + src_page_offset), - src_node_start, 0, ring, tmz, - &from); - if (r) - goto error; - /* Adjust the offset because amdgpu_map_buffer returns - * start of mapped page - */ - from += src_page_offset; - } + cur_size = min3(src_node_size, dst_node_size, size); + cur_size = min(GTT_MAX_BYTES - src_page_offset, cur_size); + cur_size = min(GTT_MAX_BYTES - dst_page_offset, cur_size); + + /* Map src to window 0 and dst to window 1. */ + r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm, + PFN_UP(cur_size + src_page_offset), + src_offset, 0, ring, tmz, &from); + if (r) + goto error; - if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) { - r = amdgpu_map_buffer(dst->bo, dst->mem, - PFN_UP(cur_size + dst_page_offset), - dst_node_start, 1, ring, tmz, - &to); - if (r) - goto error; - to += dst_page_offset; - } + r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm, + PFN_UP(cur_size + dst_page_offset), + dst_offset, 1, ring, tmz, &to); + if (r) + goto error; r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, &next, false, true, tmz); @@ -392,23 +459,20 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, src_node_size -= cur_size; if (!src_node_size) { - src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, - src->mem); - src_node_size = (src_mm->size << PAGE_SHIFT); - src_page_offset = 0; + ++src_mm; + src_node_size = src_mm->size << PAGE_SHIFT; + src_offset = 0; } else { - src_node_start += cur_size; - src_page_offset = src_node_start & (PAGE_SIZE - 1); + src_offset += cur_size; } + dst_node_size -= cur_size; if (!dst_node_size) { - dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, - dst->mem); - dst_node_size = (dst_mm->size << PAGE_SHIFT); - dst_page_offset = 0; + ++dst_mm; + dst_node_size = dst_mm->size << PAGE_SHIFT; + dst_offset = 0; } else { - dst_node_start += cur_size; - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); + dst_offset += cur_size; } } error: @@ -749,8 +813,8 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { + uint64_t offset = (page_offset << PAGE_SHIFT); struct drm_mm_node *mm; - unsigned long offset = (page_offset << PAGE_SHIFT); mm = amdgpu_find_mm_node(&bo->mem, &offset); return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + @@ -1601,8 +1665,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->mem.mem_type != TTM_PL_VRAM) return -EIO; - nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); - pos = (nodes->start << PAGE_SHIFT) + offset; + pos = offset; + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos); + pos += (nodes->start << PAGE_SHIFT); while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; @@ -2029,73 +2094,6 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) return ttm_bo_mmap(filp, vma, &adev->mman.bdev); } -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem, unsigned num_pages, - uint64_t offset, unsigned window, - struct amdgpu_ring *ring, bool tmz, - uint64_t *addr) -{ - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - struct amdgpu_device *adev = ring->adev; - struct ttm_tt *ttm = bo->ttm; - struct amdgpu_job *job; - unsigned num_dw, num_bytes; - dma_addr_t *dma_address; - struct dma_fence *fence; - uint64_t src_addr, dst_addr; - uint64_t flags; - int r; - - BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < - AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); - - *addr = adev->gmc.gart_start; - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE; - - num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - num_bytes = num_pages * 8; - - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_NORMAL, &job); - if (r) - return r; - - src_addr = num_dw * 4; - src_addr += job->ibs[0].gpu_addr; - - dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; - amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes, false); - - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - - dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; - flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); - if (tmz) - flags |= AMDGPU_PTE_TMZ; - - r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, - &job->ibs[0].ptr[num_dw]); - if (r) - goto error_free; - - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_free; - - dma_fence_put(fence); - - return r; - -error_free: - amdgpu_job_free(job); - return r; -} - int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 21182caade21..11c0e79e7106 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -89,8 +89,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush, bool tmz); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - struct amdgpu_copy_mem *src, - struct amdgpu_copy_mem *dst, + const struct amdgpu_copy_mem *src, + const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); -- cgit From 9504578314a70e6d96ee812dc93b5b5c9514b988 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 19 Mar 2020 13:57:40 +0100 Subject: drm/amdgpu: add full TMZ support into amdgpu_ttm_map_buffer v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should allow us to also support VRAM->GTT moves. v2: fix missing vram_base_adjustment Signed-off-by: Christian König Reviewed-by: Huang Rui Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 ++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 7a73282d78e5..0d7ad6468ef5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -305,21 +305,21 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, unsigned window, struct amdgpu_ring *ring, bool tmz, uint64_t *addr) { - struct ttm_dma_tt *dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; unsigned num_dw, num_bytes; - dma_addr_t *dma_address; struct dma_fence *fence; uint64_t src_addr, dst_addr; + void *cpu_addr; uint64_t flags; + unsigned int i; int r; BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); /* Map only what can't be accessed directly */ - if (mem->start != AMDGPU_BO_INVALID_OFFSET) { + if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset; return 0; } @@ -348,15 +348,37 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); - dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); if (tmz) flags |= AMDGPU_PTE_TMZ; - r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, - &job->ibs[0].ptr[num_dw]); - if (r) - goto error_free; + cpu_addr = &job->ibs[0].ptr[num_dw]; + + if (mem->mem_type == TTM_PL_TT) { + struct ttm_dma_tt *dma; + dma_addr_t *dma_address; + + dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); + dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, + cpu_addr); + if (r) + goto error_free; + } else { + dma_addr_t dma_address; + + dma_address = (mm_node->start << PAGE_SHIFT) + offset; + dma_address += adev->vm_manager.vram_base_offset; + + for (i = 0; i < num_pages; ++i) { + r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, + &dma_address, flags, cpu_addr); + if (r) + goto error_free; + + dma_address += PAGE_SIZE; + } + } r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -- cgit From b717fa5cb1a1aae9ea744a3b61e9d9aa1d258c9c Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 3 Apr 2020 15:56:12 +0200 Subject: drm/amdgpu: fix size calculation in amdgpu_ttm_copy_mem_to_mem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the node is larger than 4GB we overrun the size calculation. Fix this by correctly limiting the size to the window as well. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0d7ad6468ef5..1296499f0f54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -450,9 +450,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst * begins at an offset, then adjust the size accordingly */ - cur_size = min3(src_node_size, dst_node_size, size); - cur_size = min(GTT_MAX_BYTES - src_page_offset, cur_size); - cur_size = min(GTT_MAX_BYTES - dst_page_offset, cur_size); + cur_size = max(src_page_offset, dst_page_offset); + cur_size = min(min3(src_node_size, dst_node_size, size), + (uint64_t)(GTT_MAX_BYTES - cur_size)); /* Map src to window 0 and dst to window 1. */ r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm, -- cgit From 9ecefb19c3a6626c27ea7ee72d431f22462e1d54 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 1 Apr 2020 11:18:21 +0200 Subject: drm/amdgpu: cleanup IB pool handling a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the coding style, move and rename the definitions to better match what they are supposed to be doing. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 +---- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 65 +++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 13 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 11 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- 10 files changed, 71 insertions(+), 56 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 589d8783fa21..99e5f474505d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -204,8 +204,6 @@ extern int amdgpu_cik_support; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) -/* AMDGPU_IB_POOL_SIZE must be a power of 2 */ -#define AMDGPU_IB_POOL_SIZE 16 #define AMDGPU_DEBUGFS_MAX_COMPONENTS 32 #define AMDGPUFB_CONN_LIMIT 4 #define AMDGPU_BIOS_NUM_SCRATCH 16 @@ -402,13 +400,6 @@ struct amdgpu_sa_bo { int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); -enum amdgpu_ib_pool_type { - AMDGPU_IB_POOL_NORMAL = 0, - AMDGPU_IB_POOL_VM, - AMDGPU_IB_POOL_DIRECT, - - AMDGPU_IB_POOL_MAX -}; /* * IRQS. */ @@ -866,7 +857,7 @@ struct amdgpu_device { unsigned num_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; bool ib_pool_ready; - struct amdgpu_sa_manager ring_tmp_bo[AMDGPU_IB_POOL_MAX]; + struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX]; struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; /* interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 3eee5c7d83e0..7653f62b1b2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -924,7 +924,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ring = to_amdgpu_ring(entity->rq->sched); r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? - chunk_ib->ib_bytes : 0, AMDGPU_IB_POOL_NORMAL, ib); + chunk_ib->ib_bytes : 0, + AMDGPU_IB_POOL_DELAYED, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index aebbbb573884..c24366aacf3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -61,14 +61,13 @@ * Returns 0 on success, error on failure. */ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, - enum amdgpu_ib_pool_type pool_type, - struct amdgpu_ib *ib) + unsigned size, enum amdgpu_ib_pool_type pool_type, + struct amdgpu_ib *ib) { int r; if (size) { - r = amdgpu_sa_bo_new(&adev->ring_tmp_bo[pool_type], + r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type], &ib->sa_bo, size, 256); if (r) { dev_err(adev->dev, "failed to get a new IB (%d)\n", r); @@ -305,30 +304,32 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, */ int amdgpu_ib_pool_init(struct amdgpu_device *adev) { - int r, i; unsigned size; + int r, i; - if (adev->ib_pool_ready) { + if (adev->ib_pool_ready) return 0; - } + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { if (i == AMDGPU_IB_POOL_DIRECT) size = PAGE_SIZE * 2; else - size = AMDGPU_IB_POOL_SIZE*64*1024; - r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo[i], - size, - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT); - if (r) { - for (i--; i >= 0; i--) - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]); - return r; - } + size = AMDGPU_IB_POOL_SIZE; + + r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i], + size, AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto error; } adev->ib_pool_ready = true; return 0; + +error: + while (i--) + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); + return r; } /** @@ -343,11 +344,12 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) { int i; - if (adev->ib_pool_ready) { - for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]); - adev->ib_pool_ready = false; - } + if (!adev->ib_pool_ready) + return; + + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); + adev->ib_pool_ready = false; } /** @@ -362,9 +364,9 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) */ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { - unsigned i; - int r, ret = 0; long tmo_gfx, tmo_mm; + int r, ret = 0; + unsigned i; tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; if (amdgpu_sriov_vf(adev)) { @@ -442,15 +444,16 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - seq_printf(m, "-------------------- NORMAL -------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_NORMAL], m); - seq_printf(m, "---------------------- VM ---------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_VM], m); - seq_printf(m, "-------------------- DIRECT--------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_DIRECT], m); + seq_printf(m, "--------------------- DELAYED --------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], + m); + seq_printf(m, "-------------------- IMMEDIATE -------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE], + m); + seq_printf(m, "--------------------- DIRECT ---------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m); return 0; - } static const struct drm_info_list amdgpu_debugfs_sa_list[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7390261095b7..107e80063553 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -50,6 +50,8 @@ #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) +#define AMDGPU_IB_POOL_SIZE (1024 * 1024) + enum amdgpu_ring_type { AMDGPU_RING_TYPE_GFX = AMDGPU_HW_IP_GFX, AMDGPU_RING_TYPE_COMPUTE = AMDGPU_HW_IP_COMPUTE, @@ -63,6 +65,17 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_KIQ }; +enum amdgpu_ib_pool_type { + /* Normal submissions to the top of the pipeline. */ + AMDGPU_IB_POOL_DELAYED, + /* Immediate submissions to the bottom of the pipeline. */ + AMDGPU_IB_POOL_IMMEDIATE, + /* Direct submission to the ring buffer during init and reset. */ + AMDGPU_IB_POOL_DIRECT, + + AMDGPU_IB_POOL_MAX +}; + struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 476f1f89aaad..2f4d5ca9894f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -44,7 +44,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; + n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1296499f0f54..ea0199a8f9c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -333,7 +333,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_bytes = num_pages * 8; r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_NORMAL, &job); + AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; @@ -2122,6 +2122,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush, bool tmz) { + enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED; struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -2139,8 +2141,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, - direct_submit ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job); if (r) return r; @@ -2229,7 +2230,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, /* for IB padding */ num_dw += 64; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, + &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 550282d9c1fc..5100ebe8858d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1056,8 +1056,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, goto err; } - r = amdgpu_job_alloc_with_ib(adev, 64, - direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED, &job); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index d090455282e5..ecaa2d7483b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -447,7 +447,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; @@ -526,7 +526,8 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + direct ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index fbd451f3559a..b96c8d9a1946 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -61,11 +61,12 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, struct dma_resv *resv, enum amdgpu_sync_mode sync_mode) { + enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE : + AMDGPU_IB_POOL_DELAYED; unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; int r; - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, - p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job); + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job); if (r) return r; @@ -199,6 +200,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE : + AMDGPU_IB_POOL_DELAYED; unsigned int i, ndw, nptes; uint64_t *pte; int r; @@ -224,8 +227,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, - p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job); + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, + &p->job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index eff25c72c6c6..edaa50d850a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -372,7 +372,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * translation. Avoid this by doing the invalidation from the SDMA * itself. */ - r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_VM, &job); + r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, + &job); if (r) goto error_alloc; -- cgit