diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 90 |
1 files changed, 58 insertions, 32 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82dc8d20e28a..a370101d923f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -355,6 +355,7 @@ static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); u64 initial_bytes_moved; uint32_t domain; int r; @@ -372,9 +373,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, retry: amdgpu_ttm_placement_from_domain(bo, domain); - initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); + initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - + p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -387,9 +388,9 @@ retry: /* Last resort, try to evict something from the current working set */ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, - struct amdgpu_bo_list_entry *lobj) + struct amdgpu_bo *validated) { - uint32_t domain = lobj->robj->allowed_domains; + uint32_t domain = validated->allowed_domains; int r; if (!p->evictable) @@ -400,11 +401,12 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); u64 initial_bytes_moved; uint32_t other; /* If we reached our current BO we can forget it */ - if (candidate == lobj) + if (candidate->robj == validated) break; other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); @@ -420,9 +422,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); - initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); + initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - + p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; if (unlikely(r)) @@ -437,6 +439,23 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, return false; } +static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) +{ + struct amdgpu_cs_parser *p = param; + int r; + + do { + r = amdgpu_cs_bo_validate(p, bo); + } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo)); + if (r) + return r; + + if (bo->shadow) + r = amdgpu_cs_bo_validate(p, bo); + + return r; +} + static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, struct list_head *validated) { @@ -464,18 +483,10 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (p->evictable == lobj) p->evictable = NULL; - do { - r = amdgpu_cs_bo_validate(p, bo); - } while (r == -ENOMEM && amdgpu_cs_try_evict(p, lobj)); + r = amdgpu_cs_validate(p, bo); if (r) return r; - if (bo->shadow) { - r = amdgpu_cs_bo_validate(p, bo); - if (r) - return r; - } - if (binding_userptr) { drm_free_large(lobj->user_pages); lobj->user_pages = NULL; @@ -594,14 +605,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_splice(&need_pages, &p->validated); } - amdgpu_vm_get_pt_bos(p->adev, &fpriv->vm, &duplicates); - p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); p->bytes_moved = 0; p->evictable = list_last_entry(&p->validated, struct amdgpu_bo_list_entry, tv.head); + r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, + amdgpu_cs_validate, p); + if (r) { + DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); + goto error_validate; + } + r = amdgpu_cs_list_validate(p, &duplicates); if (r) { DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n"); @@ -720,7 +736,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); } - fence_put(parser->fence); + dma_fence_put(parser->fence); if (parser->ctx) amdgpu_ctx_put(parser->ctx); @@ -757,7 +773,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (p->bo_list) { for (i = 0; i < p->bo_list->num_entries; i++) { - struct fence *f; + struct dma_fence *f; /* ignore duplicates */ bo = p->bo_list->array[i].robj; @@ -807,13 +823,14 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, /* Only for UVD/VCE VM emulation */ if (ring->funcs->parse_cs) { - p->job->vm = NULL; for (i = 0; i < p->job->num_ibs; i++) { r = amdgpu_ring_parse_cs(ring, p, i); if (r) return r; } - } else { + } + + if (p->job->vm) { p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); r = amdgpu_bo_vm_update_pte(p, vm); @@ -902,7 +919,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; kptr += chunk_ib->va_start - offset; - r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib); + r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -917,9 +934,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return r; } - ib->gpu_addr = chunk_ib->va_start; } + ib->gpu_addr = chunk_ib->va_start; ib->length_dw = chunk_ib->ib_bytes / 4; ib->flags = chunk_ib->flags; j++; @@ -927,8 +944,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, /* UVD & VCE fw doesn't support user fences */ if (parser->job->uf_addr && ( - parser->job->ring->type == AMDGPU_RING_TYPE_UVD || - parser->job->ring->type == AMDGPU_RING_TYPE_VCE)) + parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD || + parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; return 0; @@ -957,7 +974,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, for (j = 0; j < num_deps; ++j) { struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; - struct fence *fence; + struct dma_fence *fence; r = amdgpu_cs_get_ring(adev, deps[j].ip_type, deps[j].ip_instance, @@ -979,7 +996,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, } else if (fence) { r = amdgpu_sync_fence(adev, &p->job->sync, fence); - fence_put(fence); + dma_fence_put(fence); amdgpu_ctx_put(ctx); if (r) return r; @@ -1009,7 +1026,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->owner = p->filp; job->fence_ctx = entity->fence_context; - p->fence = fence_get(&job->base.s_fence->finished); + p->fence = dma_fence_get(&job->base.s_fence->finished); cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); @@ -1092,7 +1109,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; - struct fence *fence; + struct dma_fence *fence; long r; r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, @@ -1108,8 +1125,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (IS_ERR(fence)) r = PTR_ERR(fence); else if (fence) { - r = fence_wait_timeout(fence, true, timeout); - fence_put(fence); + r = dma_fence_wait_timeout(fence, true, timeout); + dma_fence_put(fence); } else r = 1; @@ -1196,6 +1213,15 @@ int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser) r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); if (unlikely(r)) return r; + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) + continue; + + bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (unlikely(r)) + return r; } return 0; |