diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 576 |
1 files changed, 381 insertions, 195 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ad5bf86ee8a3..205da3ff9cd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -46,6 +46,7 @@ #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) @@ -76,7 +77,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; int r; adev->mman.mem_global_referenced = false; @@ -108,9 +109,9 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) mutex_init(&adev->mman.gtt_window_lock); ring = adev->mman.buffer_funcs_ring; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; - r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, + rq, amdgpu_sched_jobs, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move run queue.\n"); goto error_entity; @@ -131,7 +132,7 @@ error_mem: static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) { if (adev->mman.mem_global_referenced) { - amd_sched_entity_fini(adev->mman.entity.sched, + drm_sched_entity_fini(adev->mman.entity.sched, &adev->mman.entity); mutex_destroy(&adev->mman.gtt_window_lock); drm_global_item_unref(&adev->mman.bo_global_ref.ref); @@ -161,7 +162,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_TT: man->func = &amdgpu_gtt_mgr_func; - man->gpu_offset = adev->mc.gart_start; + man->gpu_offset = adev->gmc.gart_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; @@ -169,7 +170,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, case TTM_PL_VRAM: /* "On-card" video ram */ man->func = &amdgpu_vram_mgr_func; - man->gpu_offset = adev->mc.vram_start; + man->gpu_offset = adev->gmc.vram_start; man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; @@ -203,6 +204,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM }; + if (bo->type == ttm_bo_type_sg) { + placement->num_placement = 0; + placement->num_busy_placement = 0; + return; + } + if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; placement->busy_placement = &placements; @@ -213,13 +220,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (adev->mman.buffer_funcs && - adev->mman.buffer_funcs_ring && - adev->mman.buffer_funcs_ring->ready == false) { + if (!adev->mman.buffer_funcs_enabled) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); - } else if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { - unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; + unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; struct drm_mm_node *node = bo->mem.mm_node; unsigned long pages_left; @@ -260,6 +265,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); + /* + * Don't verify access for KFD BOs. They don't have a GEM + * object associated with them. + */ + if (abo->kfd_bo) + return 0; + if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; return drm_vma_node_verify_access(&abo->gem_base.vma_node, @@ -282,8 +294,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, { uint64_t addr = 0; - if (mem->mem_type != TTM_PL_TT || - amdgpu_gtt_mgr_is_allocated(mem)) { + if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) { addr = mm_node->start << PAGE_SHIFT; addr += bo->bdev->man[mem->mem_type].gpu_offset; } @@ -332,7 +343,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE); - if (!ring->ready) { + if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -369,7 +380,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, * dst to window 1 */ if (src->mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(src->mem)) { + !amdgpu_gtt_mgr_has_gart_addr(src->mem)) { r = amdgpu_map_buffer(src->bo, src->mem, PFN_UP(cur_size + src_page_offset), src_node_start, 0, ring, @@ -383,7 +394,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } if (dst->mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(dst->mem)) { + !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) { r = amdgpu_map_buffer(dst->bo, dst->mem, PFN_UP(cur_size + dst_page_offset), dst_node_start, 1, ring, @@ -467,9 +478,8 @@ error: return r; } -static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, +static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { struct amdgpu_device *adev; @@ -489,8 +499,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, &placement, &tmp_mem, - interruptible, no_wait_gpu); + r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { return r; } @@ -500,23 +509,22 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, goto out_cleanup; } - r = ttm_tt_bind(bo->ttm, &tmp_mem); + r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); if (unlikely(r)) { goto out_cleanup; } - r = amdgpu_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem); + r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, ctx, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; } -static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, +static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { struct amdgpu_device *adev; @@ -536,16 +544,15 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, &placement, &tmp_mem, - interruptible, no_wait_gpu); + r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } - r = amdgpu_move_blit(bo, true, no_wait_gpu, new_mem, old_mem); + r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } @@ -554,10 +561,9 @@ out_cleanup: return r; } -static int amdgpu_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) +static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_mem_reg *new_mem) { struct amdgpu_device *adev; struct amdgpu_bo *abo; @@ -583,28 +589,24 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, amdgpu_move_null(bo, new_mem); return 0; } - if (adev->mman.buffer_funcs == NULL || - adev->mman.buffer_funcs_ring == NULL || - !adev->mman.buffer_funcs_ring->ready) { - /* use memcpy */ + + if (!adev->mman.buffer_funcs_enabled) goto memcpy; - } if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { - r = amdgpu_move_vram_ram(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem); } else if (old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_VRAM) { - r = amdgpu_move_ram_vram(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem); } else { - r = amdgpu_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem); + r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, + new_mem, old_mem); } if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, ctx, new_mem); if (r) { return r; } @@ -628,6 +630,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ { struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); + struct drm_mm_node *mm_node = mem->mm_node; mem->bus.addr = NULL; mem->bus.offset = 0; @@ -645,9 +648,18 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; /* check if it's visible */ - if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size) + if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size) return -EINVAL; - mem->bus.base = adev->mc.aper_base; + /* Only physically contiguous buffers apply. In a contiguous + * buffer, size of the first mm_node would match the number of + * pages in ttm_mem_reg. + */ + if (adev->mman.aper_base_kaddr && + (mm_node->size == mem->num_pages)) + mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + + mem->bus.offset; + + mem->bus.base = adev->gmc.aper_base; mem->bus.is_iomem = true; break; default: @@ -681,7 +693,6 @@ struct amdgpu_ttm_gup_task_list { struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; - struct amdgpu_device *adev; u64 offset; uint64_t userptr; struct mm_struct *usermm; @@ -690,7 +701,6 @@ struct amdgpu_ttm_tt { struct list_head guptasks; atomic_t mmu_invalidations; uint32_t last_set_pages; - struct list_head list; }; int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) @@ -840,6 +850,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void*)ttm; uint64_t flags; int r = 0; @@ -861,44 +872,35 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; - if (!amdgpu_gtt_mgr_is_allocated(bo_mem)) + if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { + gtt->offset = AMDGPU_BO_INVALID_OFFSET; return 0; + } - spin_lock(>t->adev->gtt_list_lock); - flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); + flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; - r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, + r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); - if (r) { + if (r) DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", ttm->num_pages, gtt->offset); - goto error_gart_bind; - } - - list_add_tail(>t->list, >t->adev->gtt_list); -error_gart_bind: - spin_unlock(>t->adev->gtt_list_lock); return r; } -bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - - return gtt && !list_empty(>t->list); -} - -int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) +int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); - struct ttm_tt *ttm = bo->ttm; + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_ttm_tt *gtt = (void*)bo->ttm; struct ttm_mem_reg tmp; struct ttm_placement placement; struct ttm_place placements; + uint64_t flags; int r; - if (!ttm || amdgpu_ttm_is_bound(ttm)) + if (bo->mem.mem_type != TTM_PL_TT || + amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) return 0; tmp = bo->mem; @@ -908,71 +910,67 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); + r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); if (unlikely(r)) return r; - r = ttm_bo_move_ttm(bo, true, false, &tmp); - if (unlikely(r)) + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); + gtt->offset = (u64)tmp.start << PAGE_SHIFT; + r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, + bo->ttm->pages, gtt->ttm.dma_address, flags); + if (unlikely(r)) { ttm_bo_mem_put(bo, &tmp); - else - bo->offset = (bo->mem.start << PAGE_SHIFT) + - bo->bdev->man[bo->mem.mem_type].gpu_offset; + return r; + } - return r; + ttm_bo_mem_put(bo, &bo->mem); + bo->mem = tmp; + bo->offset = (bo->mem.start << PAGE_SHIFT) + + bo->bdev->man[bo->mem.mem_type].gpu_offset; + + return 0; } -int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) +int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) { - struct amdgpu_ttm_tt *gtt, *tmp; - struct ttm_mem_reg bo_mem; + struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); + struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm; uint64_t flags; int r; - bo_mem.mem_type = TTM_PL_TT; - spin_lock(&adev->gtt_list_lock); - list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) { - flags = amdgpu_ttm_tt_pte_flags(gtt->adev, >t->ttm.ttm, &bo_mem); - r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, - gtt->ttm.ttm.pages, gtt->ttm.dma_address, - flags); - if (r) { - spin_unlock(&adev->gtt_list_lock); - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", - gtt->ttm.ttm.num_pages, gtt->offset); - return r; - } - } - spin_unlock(&adev->gtt_list_lock); - return 0; + if (!gtt) + return 0; + + flags = amdgpu_ttm_tt_pte_flags(adev, >t->ttm.ttm, &tbo->mem); + r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, + gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); + if (r) + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + gtt->ttm.ttm.num_pages, gtt->offset); + return r; } static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; if (gtt->userptr) amdgpu_ttm_tt_unpin_userptr(ttm); - if (!amdgpu_ttm_is_bound(ttm)) + if (gtt->offset == AMDGPU_BO_INVALID_OFFSET) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - spin_lock(>t->adev->gtt_list_lock); - r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); - if (r) { + r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); + if (r) DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", gtt->ttm.ttm.num_pages, gtt->offset); - goto error_unbind; - } - list_del_init(>t->list); -error_unbind: - spin_unlock(>t->adev->gtt_list_lock); return r; } @@ -990,38 +988,33 @@ static struct ttm_backend_func amdgpu_backend_func = { .destroy = &amdgpu_ttm_backend_destroy, }; -static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, - unsigned long size, uint32_t page_flags, - struct page *dummy_read_page) +static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, + uint32_t page_flags) { struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt; - adev = amdgpu_ttm_adev(bdev); + adev = amdgpu_ttm_adev(bo->bdev); gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) { return NULL; } gtt->ttm.ttm.func = &amdgpu_backend_func; - gtt->adev = adev; - if (ttm_dma_tt_init(>t->ttm, bdev, size, page_flags, dummy_read_page)) { + if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { kfree(gtt); return NULL; } - INIT_LIST_HEAD(>t->list); return >t->ttm.ttm; } -static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) +static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); - if (ttm->state != tt_unpopulated) - return 0; - if (gtt && gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) @@ -1034,18 +1027,19 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) if (slave && ttm->sg) { drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, - gtt->ttm.dma_address, ttm->num_pages); + gtt->ttm.dma_address, + ttm->num_pages); ttm->state = tt_unbound; return 0; } #ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) { - return ttm_dma_populate(>t->ttm, adev->dev); + if (adev->need_swiotlb && swiotlb_nr_tbl()) { + return ttm_dma_populate(>t->ttm, adev->dev, ctx); } #endif - return ttm_populate_and_map_pages(adev->dev, >t->ttm); + return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); } static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) @@ -1067,7 +1061,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) adev = amdgpu_ttm_adev(ttm->bdev); #ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) { + if (adev->need_swiotlb && swiotlb_nr_tbl()) { ttm_dma_unpopulate(>t->ttm, adev->dev); return; } @@ -1192,6 +1186,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, { unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; + struct reservation_object_list *flist; + struct dma_fence *f; + int i; + + /* If bo is a KFD BO, check if the bo belongs to the current process. + * If true, then return false as any KFD process needs all its BOs to + * be resident to run successfully + */ + flist = reservation_object_get_list(bo->resv); + if (flist) { + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + reservation_object_held(bo->resv)); + if (amdkfd_fence_check_mm(f, current->mm)) + return false; + } + } switch (bo->mem.mem_type) { case TTM_PL_TT: @@ -1234,7 +1245,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); pos = (nodes->start << PAGE_SHIFT) + offset; - while (len && pos < adev->mc.mc_vram_size) { + while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; uint32_t bytes = 4 - (pos & 3); uint32_t shift = (pos & 3) * 8; @@ -1292,6 +1303,102 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .access_memory = &amdgpu_ttm_access_memory }; +/* + * Firmware Reservation functions + */ +/** + * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram + * + * @adev: amdgpu_device pointer + * + * free fw reserved vram if it has been reserved. + */ +static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, + NULL, &adev->fw_vram_usage.va); +} + +/** + * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from fw. + */ +static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) +{ + struct ttm_operation_ctx ctx = { false, false }; + int r = 0; + int i; + u64 vram_size = adev->gmc.visible_vram_size; + u64 offset = adev->fw_vram_usage.start_offset; + u64 size = adev->fw_vram_usage.size; + struct amdgpu_bo *bo; + + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + + if (adev->fw_vram_usage.size > 0 && + adev->fw_vram_usage.size <= vram_size) { + + r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + ttm_bo_type_kernel, NULL, + &adev->fw_vram_usage.reserved_bo); + if (r) + goto error_create; + + r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); + if (r) + goto error_reserve; + + /* remove the original mem node and create a new one at the + * request position + */ + bo = adev->fw_vram_usage.reserved_bo; + offset = ALIGN(offset, PAGE_SIZE); + for (i = 0; i < bo->placement.num_placement; ++i) { + bo->placements[i].fpfn = offset >> PAGE_SHIFT; + bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } + + ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); + r = ttm_bo_mem_space(&bo->tbo, &bo->placement, + &bo->tbo.mem, &ctx); + if (r) + goto error_pin; + + r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, + AMDGPU_GEM_DOMAIN_VRAM, + adev->fw_vram_usage.start_offset, + (adev->fw_vram_usage.start_offset + + adev->fw_vram_usage.size), NULL); + if (r) + goto error_pin; + r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); + if (r) + goto error_kmap; + + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); + } + return r; + +error_kmap: + amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); +error_pin: + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); +error_reserve: + amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); +error_create: + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + return r; +} + int amdgpu_ttm_init(struct amdgpu_device *adev) { uint64_t gtt_size; @@ -1314,8 +1421,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } adev->mman.initialized = true; + + /* We opt to avoid OOM on system pages allocations */ + adev->mman.bdev.no_retry = true; + r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, - adev->mc.real_vram_size >> PAGE_SHIFT); + adev->gmc.real_vram_size >> PAGE_SHIFT); if (r) { DRM_ERROR("Failed initializing VRAM heap.\n"); return r; @@ -1324,33 +1435,42 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Reduce size of CPU-visible VRAM if requested */ vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024; if (amdgpu_vis_vram_limit > 0 && - vis_vram_limit <= adev->mc.visible_vram_size) - adev->mc.visible_vram_size = vis_vram_limit; + vis_vram_limit <= adev->gmc.visible_vram_size) + adev->gmc.visible_vram_size = vis_vram_limit; /* Change the size here instead of the init above so only lpfn is affected */ - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); +#ifdef CONFIG_64BIT + adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, + adev->gmc.visible_vram_size); +#endif /* *The reserved vram for firmware must be pinned to the specified *place on the VRAM, so reserve it early. */ - r = amdgpu_fw_reserve_vram_init(adev); + r = amdgpu_ttm_fw_reserve_vram_init(adev); if (r) { return r; } - r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, NULL, NULL); if (r) return r; DRM_INFO("amdgpu: %uM of VRAM memory ready\n", - (unsigned) (adev->mc.real_vram_size / (1024 * 1024))); + (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); - if (amdgpu_gtt_size == -1) - gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); + if (amdgpu_gtt_size == -1) { + struct sysinfo si; + + si_meminfo(&si); + gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->gmc.mc_vram_size), + ((uint64_t)si.totalram * si.mem_unit * 3/4)); + } else gtt_size = (uint64_t)amdgpu_gtt_size << 20; r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); @@ -1410,19 +1530,16 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) void amdgpu_ttm_fini(struct amdgpu_device *adev) { - int r; - if (!adev->mman.initialized) return; + amdgpu_ttm_debugfs_fini(adev); - if (adev->stolen_vga_memory) { - r = amdgpu_bo_reserve(adev->stolen_vga_memory, true); - if (r == 0) { - amdgpu_bo_unpin(adev->stolen_vga_memory); - amdgpu_bo_unreserve(adev->stolen_vga_memory); - } - amdgpu_bo_unref(&adev->stolen_vga_memory); - } + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) + iounmap(adev->mman.aper_base_kaddr); + adev->mman.aper_base_kaddr = NULL; + ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); if (adev->gds.mem.total_size) @@ -1432,24 +1549,35 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (adev->gds.oa.total_size) ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); ttm_bo_device_release(&adev->mman.bdev); - amdgpu_gart_fini(adev); amdgpu_ttm_global_fini(adev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); } -/* this should only be called at bootup or when userspace - * isn't running */ -void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size) +/** + * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions + * + * @adev: amdgpu_device pointer + * @enable: true when we can use buffer functions. + * + * Enable/disable use of buffer functions during suspend/resume. This should + * only be called at bootup or when userspace isn't running. + */ +void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) { - struct ttm_mem_type_manager *man; + struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM]; + uint64_t size; - if (!adev->mman.initialized) + if (!adev->mman.initialized || adev->in_gpu_reset) return; - man = &adev->mman.bdev.man[TTM_PL_VRAM]; /* this just adjusts TTM size idea, which sets lpfn to the correct value */ + if (enable) + size = adev->gmc.real_vram_size; + else + size = adev->gmc.visible_vram_size; man->size = size >> PAGE_SHIFT; + adev->mman.buffer_funcs_enabled = enable; } int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1488,7 +1616,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); - *addr = adev->mc.gart_start; + *addr = adev->gmc.gart_start; *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE; @@ -1548,6 +1676,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r; + if (direct_submit && !ring->ready) { + DRM_ERROR("Trying to move memory with ring turned off.\n"); + return -EINVAL; + } + max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; @@ -1606,13 +1739,12 @@ error_free: } int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint64_t src_data, + uint32_t src_data, struct reservation_object *resv, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint32_t max_bytes = 8 * - adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde; + uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct drm_mm_node *mm_node; @@ -1622,13 +1754,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct amdgpu_job *job; int r; - if (!ring->ready) { + if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to clear memory with ring turned off.\n"); return -EINVAL; } if (bo->tbo.mem.mem_type == TTM_PL_TT) { - r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + r = amdgpu_ttm_alloc_gart(&bo->tbo); if (r) return r; } @@ -1643,9 +1775,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, num_pages -= mm_node->size; ++mm_node; } - - /* num of dwords for each SDMA_OP_PTEPDE cmd */ - num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; + num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; /* for IB padding */ num_dw += 64; @@ -1670,16 +1800,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; - WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8"); - dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_vm_set_pte_pde(adev, &job->ibs[0], - dst_addr, 0, - cur_size_in_bytes >> 3, 0, - src_data); + amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, + dst_addr, cur_size_in_bytes); dst_addr += cur_size_in_bytes; byte_count -= cur_size_in_bytes; @@ -1740,14 +1866,14 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return -ENXIO; while (size) { unsigned long flags; uint32_t value; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return result; spin_lock_irqsave(&adev->mmio_idx_lock, flags); @@ -1779,14 +1905,14 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return -ENXIO; while (size) { unsigned long flags; uint32_t value; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return result; r = get_user(value, (uint32_t *)buf); @@ -1864,38 +1990,98 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { #endif -static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) +static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) { struct amdgpu_device *adev = file_inode(f)->i_private; - int r; - uint64_t phys; struct iommu_domain *dom; + ssize_t result = 0; + int r; - // always return 8 bytes - if (size != 8) - return -EINVAL; + dom = iommu_get_domain_for_dev(adev->dev); - // only accept page addresses - if (*pos & 0xFFF) - return -EINVAL; + while (size) { + phys_addr_t addr = *pos & PAGE_MASK; + loff_t off = *pos & ~PAGE_MASK; + size_t bytes = PAGE_SIZE - off; + unsigned long pfn; + struct page *p; + void *ptr; + + bytes = bytes < size ? bytes : size; + + addr = dom ? iommu_iova_to_phys(dom, addr) : addr; + + pfn = addr >> PAGE_SHIFT; + if (!pfn_valid(pfn)) + return -EPERM; + + p = pfn_to_page(pfn); + if (p->mapping != adev->mman.bdev.dev_mapping) + return -EPERM; + + ptr = kmap(p); + r = copy_to_user(buf, ptr + off, bytes); + kunmap(p); + if (r) + return -EFAULT; + + size -= bytes; + *pos += bytes; + result += bytes; + } + + return result; +} + +static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + struct iommu_domain *dom; + ssize_t result = 0; + int r; dom = iommu_get_domain_for_dev(adev->dev); - if (dom) - phys = iommu_iova_to_phys(dom, *pos); - else - phys = *pos; - r = copy_to_user(buf, &phys, 8); - if (r) - return -EFAULT; + while (size) { + phys_addr_t addr = *pos & PAGE_MASK; + loff_t off = *pos & ~PAGE_MASK; + size_t bytes = PAGE_SIZE - off; + unsigned long pfn; + struct page *p; + void *ptr; + + bytes = bytes < size ? bytes : size; - return 8; + addr = dom ? iommu_iova_to_phys(dom, addr) : addr; + + pfn = addr >> PAGE_SHIFT; + if (!pfn_valid(pfn)) + return -EPERM; + + p = pfn_to_page(pfn); + if (p->mapping != adev->mman.bdev.dev_mapping) + return -EPERM; + + ptr = kmap(p); + r = copy_from_user(ptr + off, buf, bytes); + kunmap(p); + if (r) + return -EFAULT; + + size -= bytes; + *pos += bytes; + result += bytes; + } + + return result; } -static const struct file_operations amdgpu_ttm_iova_fops = { +static const struct file_operations amdgpu_ttm_iomem_fops = { .owner = THIS_MODULE, - .read = amdgpu_iova_to_phys_read, + .read = amdgpu_iomem_read, + .write = amdgpu_iomem_write, .llseek = default_llseek }; @@ -1908,7 +2094,7 @@ static const struct { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT }, #endif - { "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM }, + { "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM }, }; #endif @@ -1930,16 +2116,16 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) if (IS_ERR(ent)) return PTR_ERR(ent); if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM) - i_size_write(ent->d_inode, adev->mc.mc_vram_size); + i_size_write(ent->d_inode, adev->gmc.mc_vram_size); else if (ttm_debugfs_entries[count].domain == TTM_PL_TT) - i_size_write(ent->d_inode, adev->mc.gart_size); + i_size_write(ent->d_inode, adev->gmc.gart_size); adev->mman.debugfs_entries[count] = ent; } count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); #ifdef CONFIG_SWIOTLB - if (!swiotlb_nr_tbl()) + if (!(adev->need_swiotlb && swiotlb_nr_tbl())) --count; #endif |