diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 219 |
1 files changed, 153 insertions, 66 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 15a28578d458..51eacefadea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -44,6 +44,7 @@ #include <linux/debugfs.h> #include <linux/iommu.h> #include "amdgpu.h" +#include "amdgpu_object.h" #include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" @@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (adev->mman.buffer_funcs && @@ -257,7 +258,7 @@ gtt: static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; @@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, return addr; } -static int amdgpu_move_blit(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, - struct ttm_mem_reg *new_mem, - struct ttm_mem_reg *old_mem) +/** + * amdgpu_find_mm_node - Helper function finds the drm_mm_node + * corresponding to @offset. It also modifies the offset to be + * within the drm_mm_node returned + */ +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, + unsigned long *offset) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct drm_mm_node *mm_node = mem->mm_node; - struct drm_mm_node *old_mm, *new_mm; - uint64_t old_start, old_size, new_start, new_size; - unsigned long num_pages; - struct dma_fence *fence = NULL; - int r; + while (*offset >= (mm_node->size << PAGE_SHIFT)) { + *offset -= (mm_node->size << PAGE_SHIFT); + ++mm_node; + } + return mm_node; +} - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); +/** + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * + * The function copies @size bytes from {src->mem + src->offset} to + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a + * move and different for a BO to BO copy. + * + * @f: Returns the last fence if multiple jobs are submitted. + */ +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct drm_mm_node *src_mm, *dst_mm; + uint64_t src_node_start, dst_node_start, src_node_size, + dst_node_size, src_page_offset, dst_page_offset; + struct dma_fence *fence = NULL; + int r = 0; + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE); if (!ring->ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } - old_mm = old_mem->mm_node; - old_size = old_mm->size; - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); + src_mm = amdgpu_find_mm_node(src->mem, &src->offset); + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + + src->offset; + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; + src_page_offset = src_node_start & (PAGE_SIZE - 1); - new_mm = new_mem->mm_node; - new_size = new_mm->size; - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + + dst->offset; + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); - num_pages = new_mem->num_pages; mutex_lock(&adev->mman.gtt_window_lock); - while (num_pages) { - unsigned long cur_pages = min(min(old_size, new_size), - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); - uint64_t from = old_start, to = new_start; + + while (size) { + unsigned long cur_size; + uint64_t from = src_node_start, to = dst_node_start; struct dma_fence *next; - if (old_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(old_mem)) { - r = amdgpu_map_buffer(bo, old_mem, cur_pages, - old_start, 0, ring, &from); + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst + * begins at an offset, then adjust the size accordingly + */ + cur_size = min3(min(src_node_size, dst_node_size), size, + GTT_MAX_BYTES); + if (cur_size + src_page_offset > GTT_MAX_BYTES || + cur_size + dst_page_offset > GTT_MAX_BYTES) + cur_size -= max(src_page_offset, dst_page_offset); + + /* Map only what needs to be accessed. Map src to window 0 and + * dst to window 1 + */ + if (src->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(src->mem)) { + r = amdgpu_map_buffer(src->bo, src->mem, + PFN_UP(cur_size + src_page_offset), + src_node_start, 0, ring, + &from); if (r) goto error; + /* Adjust the offset because amdgpu_map_buffer returns + * start of mapped page + */ + from += src_page_offset; } - if (new_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(new_mem)) { - r = amdgpu_map_buffer(bo, new_mem, cur_pages, - new_start, 1, ring, &to); + if (dst->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(dst->mem)) { + r = amdgpu_map_buffer(dst->bo, dst->mem, + PFN_UP(cur_size + dst_page_offset), + dst_node_start, 1, ring, + &to); if (r) goto error; + to += dst_page_offset; } - r = amdgpu_copy_buffer(ring, from, to, - cur_pages * PAGE_SIZE, - bo->resv, &next, false, true); + r = amdgpu_copy_buffer(ring, from, to, cur_size, + resv, &next, false, true); if (r) goto error; dma_fence_put(fence); fence = next; - num_pages -= cur_pages; - if (!num_pages) + size -= cur_size; + if (!size) break; - old_size -= cur_pages; - if (!old_size) { - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); - old_size = old_mm->size; + src_node_size -= cur_size; + if (!src_node_size) { + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, + src->mem); + src_node_size = (src_mm->size << PAGE_SHIFT); } else { - old_start += cur_pages * PAGE_SIZE; + src_node_start += cur_size; + src_page_offset = src_node_start & (PAGE_SIZE - 1); } - - new_size -= cur_pages; - if (!new_size) { - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); - new_size = new_mm->size; + dst_node_size -= cur_size; + if (!dst_node_size) { + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, + dst->mem); + dst_node_size = (dst_mm->size << PAGE_SHIFT); } else { - new_start += cur_pages * PAGE_SIZE; + dst_node_start += cur_size; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); } } +error: mutex_unlock(&adev->mman.gtt_window_lock); + if (f) + *f = dma_fence_get(fence); + dma_fence_put(fence); + return r; +} + + +static int amdgpu_move_blit(struct ttm_buffer_object *bo, + bool evict, bool no_wait_gpu, + struct ttm_mem_reg *new_mem, + struct ttm_mem_reg *old_mem) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_copy_mem src, dst; + struct dma_fence *fence = NULL; + int r; + + src.bo = bo; + dst.bo = bo; + src.mem = old_mem; + dst.mem = new_mem; + src.offset = 0; + dst.offset = 0; + + r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, + new_mem->num_pages << PAGE_SHIFT, + bo->resv, &fence); + if (r) + goto error; r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); dma_fence_put(fence); return r; error: - mutex_unlock(&adev->mman.gtt_window_lock); - if (fence) dma_fence_wait(fence, false); dma_fence_put(fence); @@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, int r; /* Can't move a pinned BO */ - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); if (WARN_ON_ONCE(abo->pin_count > 0)) return -EINVAL; @@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { - struct drm_mm_node *mm = bo->mem.mm_node; - uint64_t size = mm->size; - uint64_t offset = page_offset; + struct drm_mm_node *mm; + unsigned long offset = (page_offset << PAGE_SHIFT); - page_offset = do_div(offset, size); - mm += offset; - return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset; + mm = amdgpu_find_mm_node(&bo->mem, &offset); + return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + + (offset >> PAGE_SHIFT); } /* @@ -1142,9 +1222,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); - struct drm_mm_node *nodes = abo->tbo.mem.mm_node; + struct drm_mm_node *nodes; uint32_t value = 0; int ret = 0; uint64_t pos; @@ -1153,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->mem.mem_type != TTM_PL_VRAM) return -EIO; - while (offset >= (nodes->size << PAGE_SHIFT)) { - offset -= nodes->size << PAGE_SHIFT; - ++nodes; - } + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); pos = (nodes->start << PAGE_SHIFT) + offset; while (len && pos < adev->mc.mc_vram_size) { @@ -1255,6 +1332,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Change the size here instead of the init above so only lpfn is affected */ amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + /* + *The reserved vram for firmware must be pinned to the specified + *place on the VRAM, so reserve it early. + */ + r = amdgpu_fw_reserve_vram_init(adev); + if (r) { + return r; + } + r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, @@ -1479,7 +1565,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, job->vm_needs_flush = vm_needs_flush; if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; @@ -1571,7 +1658,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; |