diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 145 | 
1 files changed, 56 insertions, 89 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9392e50a7ba4..9bc8f1d99733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -24,7 +24,6 @@   * Authors:   *    Jerome Glisse <[email protected]>   */ -#include <linux/list_sort.h>  #include <linux/pagemap.h>  #include <drm/drmP.h>  #include <drm/amdgpu_drm.h> @@ -88,44 +87,41 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,  }  static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, -				      struct amdgpu_user_fence *uf, -				      struct drm_amdgpu_cs_chunk_fence *fence_data) +				      struct drm_amdgpu_cs_chunk_fence *data, +				      uint32_t *offset)  {  	struct drm_gem_object *gobj; -	uint32_t handle; -	handle = fence_data->handle; -	gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, -				     fence_data->handle); +	gobj = drm_gem_object_lookup(p->filp, data->handle);  	if (gobj == NULL)  		return -EINVAL; -	uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); -	uf->offset = fence_data->offset; - -	if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { -		drm_gem_object_unreference_unlocked(gobj); -		return -EINVAL; -	} - -	p->uf_entry.robj = amdgpu_bo_ref(uf->bo); +	p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));  	p->uf_entry.priority = 0;  	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;  	p->uf_entry.tv.shared = true;  	p->uf_entry.user_pages = NULL; +	*offset = data->offset;  	drm_gem_object_unreference_unlocked(gobj); + +	if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { +		amdgpu_bo_unref(&p->uf_entry.robj); +		return -EINVAL; +	} +  	return 0;  }  int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; +	struct amdgpu_vm *vm = &fpriv->vm;  	union drm_amdgpu_cs *cs = data;  	uint64_t *chunk_array_user;  	uint64_t *chunk_array; -	struct amdgpu_user_fence uf = {};  	unsigned size, num_ibs = 0; +	uint32_t uf_offset = 0;  	int i;  	int ret; @@ -200,7 +196,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  				goto free_partial_kdata;  			} -			ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); +			ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, +							 &uf_offset);  			if (ret)  				goto free_partial_kdata; @@ -215,11 +212,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  		}  	} -	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job); +	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);  	if (ret)  		goto free_all_kdata; -	p->job->uf = uf; +	if (p->uf_entry.robj) { +		p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj); +		p->job->uf_offset = uf_offset; +	}  	kfree(chunk_array);  	return 0; @@ -377,7 +377,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  	INIT_LIST_HEAD(&duplicates);  	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); -	if (p->job->uf.bo) +	if (p->uf_entry.robj)  		list_add(&p->uf_entry.tv.head, &p->validated);  	if (need_mmap_lock) @@ -473,6 +473,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  		goto error_validate;  	if (p->bo_list) { +		struct amdgpu_bo *gds = p->bo_list->gds_obj; +		struct amdgpu_bo *gws = p->bo_list->gws_obj; +		struct amdgpu_bo *oa = p->bo_list->oa_obj;  		struct amdgpu_vm *vm = &fpriv->vm;  		unsigned i; @@ -481,6 +484,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  			p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);  		} + +		if (gds) { +			p->job->gds_base = amdgpu_bo_gpu_offset(gds); +			p->job->gds_size = amdgpu_bo_size(gds); +		} +		if (gws) { +			p->job->gws_base = amdgpu_bo_gpu_offset(gws); +			p->job->gws_size = amdgpu_bo_size(gws); +		} +		if (oa) { +			p->job->oa_base = amdgpu_bo_gpu_offset(oa); +			p->job->oa_size = amdgpu_bo_size(oa); +		}  	}  error_validate: @@ -527,16 +543,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)  	return 0;  } -static int cmp_size_smaller_first(void *priv, struct list_head *a, -				  struct list_head *b) -{ -	struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head); -	struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head); - -	/* Sort A before B if A is smaller. */ -	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; -} -  /**   * cs_parser_fini() - clean parser states   * @parser:	parser structure holding parsing context. @@ -553,18 +559,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo  	if (!error) {  		amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); -		/* Sort the buffer list from the smallest to largest buffer, -		 * which affects the order of buffers in the LRU list. -		 * This assures that the smallest buffers are added first -		 * to the LRU list, so they are likely to be later evicted -		 * first, instead of large buffers whose eviction is more -		 * expensive. -		 * -		 * This slightly lowers the number of bytes moved by TTM -		 * per frame under memory pressure. -		 */ -		list_sort(NULL, &parser->validated, cmp_size_smaller_first); -  		ttm_eu_fence_buffer_objects(&parser->ticket,  					    &parser->validated,  					    parser->fence); @@ -763,41 +757,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,  		ib->length_dw = chunk_ib->ib_bytes / 4;  		ib->flags = chunk_ib->flags; -		ib->ctx = parser->ctx;  		j++;  	} -	/* add GDS resources to first IB */ -	if (parser->bo_list) { -		struct amdgpu_bo *gds = parser->bo_list->gds_obj; -		struct amdgpu_bo *gws = parser->bo_list->gws_obj; -		struct amdgpu_bo *oa = parser->bo_list->oa_obj; -		struct amdgpu_ib *ib = &parser->job->ibs[0]; - -		if (gds) { -			ib->gds_base = amdgpu_bo_gpu_offset(gds); -			ib->gds_size = amdgpu_bo_size(gds); -		} -		if (gws) { -			ib->gws_base = amdgpu_bo_gpu_offset(gws); -			ib->gws_size = amdgpu_bo_size(gws); -		} -		if (oa) { -			ib->oa_base = amdgpu_bo_gpu_offset(oa); -			ib->oa_size = amdgpu_bo_size(oa); -		} -	} -	/* wrap the last IB with user fence */ -	if (parser->job->uf.bo) { -		struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; - -		/* UVD & VCE fw doesn't support user fences */ -		if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || -		    parser->job->ring->type == AMDGPU_RING_TYPE_VCE) -			return -EINVAL; - -		ib->user = &parser->job->uf; -	} +	/* UVD & VCE fw doesn't support user fences */ +	if (parser->job->uf_bo && ( +	    parser->job->ring->type == AMDGPU_RING_TYPE_UVD || +	    parser->job->ring->type == AMDGPU_RING_TYPE_VCE)) +		return -EINVAL;  	return 0;  } @@ -862,28 +829,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  			    union drm_amdgpu_cs *cs)  {  	struct amdgpu_ring *ring = p->job->ring; -	struct amd_sched_fence *fence; +	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; +	struct fence *fence;  	struct amdgpu_job *job; +	int r;  	job = p->job;  	p->job = NULL; -	job->base.sched = &ring->sched; -	job->base.s_entity = &p->ctx->rings[ring->idx].entity; -	job->owner = p->filp; - -	fence = amd_sched_fence_create(job->base.s_entity, p->filp); -	if (!fence) { +	r = amd_sched_job_init(&job->base, &ring->sched, +			       entity, amdgpu_job_timeout_func, +			       amdgpu_job_free_func, +			       p->filp, &fence); +	if (r) {  		amdgpu_job_free(job); -		return -ENOMEM; +		return r;  	} -	job->base.s_fence = fence; -	p->fence = fence_get(&fence->base); - -	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, -					      &fence->base); -	job->ibs[job->num_ibs - 1].sequence = cs->out.handle; +	job->owner = p->filp; +	job->ctx = entity->fence_context; +	p->fence = fence_get(fence); +	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence); +	job->uf_sequence = cs->out.handle;  	trace_amdgpu_cs_ioctl(job);  	amd_sched_entity_push_job(&job->base);  |