1 files changed, 177 insertions, 125 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82312a7bc6ad..b6e9df11115d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -31,6 +31,7 @@
 #include <drm/drm_syncobj.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_gmc.h"
 
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      struct drm_amdgpu_cs_chunk_fence *data,
@@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	return 0;
 }
 
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+				      struct drm_amdgpu_bo_list_in *data)
+{
+	int r;
+	struct drm_amdgpu_bo_list_entry *info = NULL;
+
+	r = amdgpu_bo_create_list_entry_array(data, &info);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+				  &p->bo_list);
+	if (r)
+		goto error_free;
+
+	kvfree(info);
+	return 0;
+
+error_free:
+	if (info)
+		kvfree(info);
+
+	return r;
+}
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	union drm_amdgpu_cs *cs = data;
 	uint64_t *chunk_array_user;
 	uint64_t *chunk_array;
 	unsigned size, num_ibs = 0;
@@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 
 			break;
 
+		case AMDGPU_CHUNK_ID_BO_HANDLES:
+			size = sizeof(struct drm_amdgpu_bo_list_in);
+			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+				ret = -EINVAL;
+				goto free_partial_kdata;
+			}
+
+			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+			if (ret)
+				goto free_partial_kdata;
+
+			break;
+
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (p->uf_entry.robj)
 		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
+
+	/* Use this opportunity to fill in task info for the vm */
+	amdgpu_vm_set_task_info(vm);
+
 	return 0;
 
 free_all_kdata:
@@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 		return;
 	}
 
-	total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
+	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
 	used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 
@@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
 
 	/* Do the same for visible VRAM if half of it is free */
-	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
 		u64 total_vis_vram = adev->gmc.visible_vram_size;
 		u64 used_vis_vram =
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
 	if (p->bytes_moved < p->bytes_moved_threshold) {
-		if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
 			 * visible VRAM if we've depleted our allowance to do
@@ -377,11 +419,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	}
 
 retry:
-	amdgpu_ttm_placement_from_domain(bo, domain);
+	amdgpu_bo_placement_from_domain(bo, domain);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 
 	p->bytes_moved += ctx.bytes_moved;
-	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 	    amdgpu_bo_in_cpu_visible_vram(bo))
 		p->bytes_moved_vis += ctx.bytes_moved;
 
@@ -434,9 +476,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 
 		/* Good we can try to move this BO somewhere else */
 		update_bytes_moved_vis =
-			adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-			amdgpu_bo_in_cpu_visible_vram(bo);
-		amdgpu_ttm_placement_from_domain(bo, other);
+				!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+				amdgpu_bo_in_cpu_visible_vram(bo);
+		amdgpu_bo_placement_from_domain(bo, other);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		p->bytes_moved += ctx.bytes_moved;
 		if (update_bytes_moved_vis)
@@ -490,8 +532,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		/* Check if we have user pages and nobody bound the BO already */
 		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
 		    lobj->user_pages) {
-			amdgpu_ttm_placement_from_domain(bo,
-							 AMDGPU_GEM_DOMAIN_CPU);
+			amdgpu_bo_placement_from_domain(bo,
+							AMDGPU_GEM_DOMAIN_CPU);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (r)
 				return r;
@@ -519,23 +561,38 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 				union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_list_entry *e;
 	struct list_head duplicates;
-	unsigned i, tries = 10;
 	struct amdgpu_bo *gds;
 	struct amdgpu_bo *gws;
 	struct amdgpu_bo *oa;
+	unsigned tries = 10;
 	int r;
 
 	INIT_LIST_HEAD(&p->validated);
 
-	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
-	if (p->bo_list) {
-		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
-		if (p->bo_list->first_userptr != p->bo_list->num_entries)
-			p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+	if (cs->in.bo_list_handle) {
+		if (p->bo_list)
+			return -EINVAL;
+
+		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
+				       &p->bo_list);
+		if (r)
+			return r;
+	} else if (!p->bo_list) {
+		/* Create a empty bo_list when no handle is provided */
+		r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
+					  &p->bo_list);
+		if (r)
+			return r;
 	}
 
+	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
+	if (p->bo_list->first_userptr != p->bo_list->num_entries)
+		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 
@@ -544,7 +601,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	while (1) {
 		struct list_head need_pages;
-		unsigned i;
 
 		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
 					   &duplicates);
@@ -554,17 +610,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 			goto error_free_pages;
 		}
 
-		/* Without a BO list we don't have userptr BOs */
-		if (!p->bo_list)
-			break;
-
 		INIT_LIST_HEAD(&need_pages);
-		for (i = p->bo_list->first_userptr;
-		     i < p->bo_list->num_entries; ++i) {
-			struct amdgpu_bo *bo;
-
-			e = &p->bo_list->array[i];
-			bo = e->robj;
+		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+			struct amdgpu_bo *bo = e->robj;
 
 			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
 				 &e->user_invalidated) && e->user_pages) {
@@ -656,23 +704,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 				     p->bytes_moved_vis);
 
-	if (p->bo_list) {
-		struct amdgpu_vm *vm = &fpriv->vm;
-		unsigned i;
-
-		gds = p->bo_list->gds_obj;
-		gws = p->bo_list->gws_obj;
-		oa = p->bo_list->oa_obj;
-		for (i = 0; i < p->bo_list->num_entries; i++) {
-			struct amdgpu_bo *bo = p->bo_list->array[i].robj;
+	gds = p->bo_list->gds_obj;
+	gws = p->bo_list->gws_obj;
+	oa = p->bo_list->oa_obj;
 
-			p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
-		}
-	} else {
-		gds = p->adev->gds.gds_gfx_bo;
-		gws = p->adev->gds.gws_gfx_bo;
-		oa = p->adev->gds.oa_gfx_bo;
-	}
+	amdgpu_bo_list_for_each_entry(e, p->bo_list)
+		e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
 
 	if (gds) {
 		p->job->gds_base = amdgpu_bo_gpu_offset(gds);
@@ -700,18 +737,13 @@ error_validate:
 
 error_free_pages:
 
-	if (p->bo_list) {
-		for (i = p->bo_list->first_userptr;
-		     i < p->bo_list->num_entries; ++i) {
-			e = &p->bo_list->array[i];
-
-			if (!e->user_pages)
-				continue;
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		if (!e->user_pages)
+			continue;
 
-			release_pages(e->user_pages,
-				      e->robj->tbo.ttm->num_pages);
-			kvfree(e->user_pages);
-		}
+		release_pages(e->user_pages,
+			      e->robj->tbo.ttm->num_pages);
+		kvfree(e->user_pages);
 	}
 
 	return r;
@@ -773,12 +805,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 
 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 {
-	struct amdgpu_device *adev = p->adev;
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_device *adev = p->adev;
 	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_bo_va *bo_va;
 	struct amdgpu_bo *bo;
-	int i, r;
+	int r;
 
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
@@ -808,29 +841,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 			return r;
 	}
 
-	if (p->bo_list) {
-		for (i = 0; i < p->bo_list->num_entries; i++) {
-			struct dma_fence *f;
-
-			/* ignore duplicates */
-			bo = p->bo_list->array[i].robj;
-			if (!bo)
-				continue;
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct dma_fence *f;
 
-			bo_va = p->bo_list->array[i].bo_va;
-			if (bo_va == NULL)
-				continue;
+		/* ignore duplicates */
+		bo = e->robj;
+		if (!bo)
+			continue;
 
-			r = amdgpu_vm_bo_update(adev, bo_va, false);
-			if (r)
-				return r;
+		bo_va = e->bo_va;
+		if (bo_va == NULL)
+			continue;
 
-			f = bo_va->last_pt_update;
-			r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
-			if (r)
-				return r;
-		}
+		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			return r;
 
+		f = bo_va->last_pt_update;
+		r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+		if (r)
+			return r;
 	}
 
 	r = amdgpu_vm_handle_moved(adev, vm);
@@ -845,15 +875,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	if (amdgpu_vm_debug && p->bo_list) {
+	if (amdgpu_vm_debug) {
 		/* Invalidate all BOs to test for userspace bugs */
-		for (i = 0; i < p->bo_list->num_entries; i++) {
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 			/* ignore duplicates */
-			bo = p->bo_list->array[i].robj;
-			if (!bo)
+			if (!e->robj)
 				continue;
 
-			amdgpu_vm_bo_invalidate(adev, bo, false);
+			amdgpu_vm_bo_invalidate(adev, e->robj, false);
 		}
 	}
 
@@ -865,11 +894,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	struct amdgpu_ring *ring = p->job->ring;
+	struct amdgpu_ring *ring = p->ring;
 	int r;
 
 	/* Only for UVD/VCE VM emulation */
-	if (p->job->ring->funcs->parse_cs) {
+	if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
 		unsigned i, j;
 
 		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
@@ -910,12 +939,20 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
 			kptr += va_start - offset;
 
-			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
-			amdgpu_bo_kunmap(aobj);
-
-			r = amdgpu_ring_parse_cs(ring, p, j);
-			if (r)
-				return r;
+			if (p->ring->funcs->parse_cs) {
+				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+				amdgpu_bo_kunmap(aobj);
+
+				r = amdgpu_ring_parse_cs(ring, p, j);
+				if (r)
+					return r;
+			} else {
+				ib->ptr = (uint32_t *)kptr;
+				r = amdgpu_ring_patch_cs_in_place(ring, p, j);
+				amdgpu_bo_kunmap(aobj);
+				if (r)
+					return r;
+			}
 
 			j++;
 		}
@@ -927,6 +964,10 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 		r = amdgpu_bo_vm_update_pte(p);
 		if (r)
 			return r;
+
+		r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
+		if (r)
+			return r;
 	}
 
 	return amdgpu_cs_sync_rings(p);
@@ -971,18 +1012,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (r)
 			return r;
 
-		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
-			parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
-			if (!parser->ctx->preamble_presented) {
-				parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
-				parser->ctx->preamble_presented = true;
-			}
-		}
+		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+			parser->job->preamble_status |=
+				AMDGPU_PREAMBLE_IB_PRESENT;
 
-		if (parser->job->ring && parser->job->ring != ring)
+		if (parser->ring && parser->ring != ring)
 			return -EINVAL;
 
-		parser->job->ring = ring;
+		parser->ring = ring;
 
 		r =  amdgpu_ib_get(adev, vm,
 					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
@@ -1001,11 +1038,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 
 	/* UVD & VCE fw doesn't support user fences */
 	if (parser->job->uf_addr && (
-	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
-	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+	    parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+	    parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
-	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
+	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
 }
 
 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1156,39 +1193,35 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
-	struct amdgpu_ring *ring = p->job->ring;
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_ring *ring = p->ring;
 	struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+	enum drm_sched_priority priority;
+	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
-	unsigned i;
 	uint64_t seq;
 
 	int r;
 
-	amdgpu_mn_lock(p->mn);
-	if (p->bo_list) {
-		for (i = p->bo_list->first_userptr;
-		     i < p->bo_list->num_entries; ++i) {
-			struct amdgpu_bo *bo = p->bo_list->array[i].robj;
-
-			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-				amdgpu_mn_unlock(p->mn);
-				return -ERESTARTSYS;
-			}
-		}
-	}
-
 	job = p->job;
 	p->job = NULL;
 
-	r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp);
-	if (r) {
-		amdgpu_job_free(job);
-		amdgpu_mn_unlock(p->mn);
-		return r;
+	r = drm_sched_job_init(&job->base, entity, p->filp);
+	if (r)
+		goto error_unlock;
+
+	/* No memory allocation is allowed while holding the mn lock */
+	amdgpu_mn_lock(p->mn);
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = e->robj;
+
+		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
+			r = -ERESTARTSYS;
+			goto error_abort;
+		}
 	}
 
 	job->owner = p->filp;
-	job->fence_ctx = entity->fence_context;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
 	r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
@@ -1202,19 +1235,38 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	amdgpu_cs_post_dependencies(p);
 
+	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+	    !p->ctx->preamble_presented) {
+		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+		p->ctx->preamble_presented = true;
+	}
+
 	cs->out.handle = seq;
 	job->uf_sequence = seq;
 
 	amdgpu_job_free_resources(job);
-	amdgpu_ring_priority_get(job->ring, job->base.s_priority);
 
 	trace_amdgpu_cs_ioctl(job);
+	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
+	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);
 
+	ring = to_amdgpu_ring(entity->rq->sched);
+	amdgpu_ring_priority_get(ring, priority);
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	amdgpu_mn_unlock(p->mn);
 
 	return 0;
+
+error_abort:
+	dma_fence_put(&job->base.s_fence->finished);
+	job->base.s_fence = NULL;
+
+error_unlock:
+	amdgpu_job_free(job);
+	amdgpu_mn_unlock(p->mn);
+	return r;
 }
 
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -1601,7 +1653,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 
 	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
 		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-		amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
+		amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
 		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
 		if (r)
 			return r;