diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 75 | 
1 files changed, 46 insertions, 29 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1bbd39b3b0fc..8516c814bc9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,  		return r;  	++(num_ibs[r]); +	p->gang_leader_idx = r;  	return 0;  } @@ -287,20 +288,18 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,  		}  	} -	if (!p->gang_size) -		return -EINVAL; +	if (!p->gang_size) { +		ret = -EINVAL; +		goto free_partial_kdata; +	}  	for (i = 0; i < p->gang_size; ++i) { -		ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm); -		if (ret) -			goto free_all_kdata; - -		ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i], -					 &fpriv->vm); +		ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, +				       num_ibs[i], &p->jobs[i]);  		if (ret)  			goto free_all_kdata;  	} -	p->gang_leader = p->jobs[p->gang_size - 1]; +	p->gang_leader = p->jobs[p->gang_leader_idx];  	if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {  		ret = -ECANCELED; @@ -430,7 +429,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,  			dma_fence_put(old);  		} -		r = amdgpu_sync_fence(&p->gang_leader->sync, fence); +		r = amdgpu_sync_fence(&p->sync, fence);  		dma_fence_put(fence);  		if (r)  			return r; @@ -452,9 +451,20 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,  		return r;  	} -	r = amdgpu_sync_fence(&p->gang_leader->sync, fence); -	dma_fence_put(fence); +	r = amdgpu_sync_fence(&p->sync, fence); +	if (r) +		goto error; + +	/* +	 * When we have an explicit dependency it might be necessary to insert a +	 * pipeline sync to make sure that all caches etc are flushed and the +	 * next job actually sees the results from the previous one. +	 */ +	if (fence->context == p->gang_leader->base.entity->fence_context) +		r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); +error: +	dma_fence_put(fence);  	return r;  } @@ -910,7 +920,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  			goto out_free_user_pages;  		} -		r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); +		r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);  		if (r) {  			kvfree(e->user_pages);  			e->user_pages = NULL; @@ -988,10 +998,12 @@ out_free_user_pages:  		if (!e->user_pages)  			continue; -		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);  		kvfree(e->user_pages);  		e->user_pages = NULL; +		e->range = NULL;  	} +	mutex_unlock(&p->bo_list->bo_list_mutex);  	return r;  } @@ -1101,7 +1113,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  	if (r)  		return r; -	r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update); +	r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);  	if (r)  		return r; @@ -1112,7 +1124,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  		if (r)  			return r; -		r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update); +		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);  		if (r)  			return r;  	} @@ -1131,7 +1143,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  		if (r)  			return r; -		r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update); +		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);  		if (r)  			return r;  	} @@ -1144,7 +1156,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  	if (r)  		return r; -	r = amdgpu_sync_fence(&job->sync, vm->last_update); +	r = amdgpu_sync_fence(&p->sync, vm->last_update);  	if (r)  		return r; @@ -1176,7 +1188,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; -	struct amdgpu_job *leader = p->gang_leader;  	struct amdgpu_bo_list_entry *e;  	unsigned int i;  	int r; @@ -1188,22 +1199,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)  		sync_mode = amdgpu_bo_explicit_sync(bo) ?  			AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; -		r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode, +		r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,  				     &fpriv->vm);  		if (r)  			return r;  	} -	for (i = 0; i < p->gang_size - 1; ++i) { -		r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync); +	for (i = 0; i < p->gang_size; ++i) { +		r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);  		if (r)  			return r;  	} -	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]); +	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);  	if (r && r != -ERESTARTSYS)  		DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); -  	return r;  } @@ -1237,11 +1247,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  	for (i = 0; i < p->gang_size; ++i)  		drm_sched_job_arm(&p->jobs[i]->base); -	for (i = 0; i < (p->gang_size - 1); ++i) { +	for (i = 0; i < p->gang_size; ++i) {  		struct dma_fence *fence; +		if (p->jobs[i] == leader) +			continue; +  		fence = &p->jobs[i]->base.s_fence->scheduled; -		r = amdgpu_sync_fence(&leader->sync, fence); +		r = drm_sched_job_add_dependency(&leader->base, fence);  		if (r)  			goto error_cleanup;  	} @@ -1264,7 +1277,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {  		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); -		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); +		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); +		e->range = NULL;  	}  	if (r) {  		r = -EAGAIN; @@ -1275,7 +1289,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  	list_for_each_entry(e, &p->validated, tv.head) {  		/* Everybody except for the gang leader uses READ */ -		for (i = 0; i < (p->gang_size - 1); ++i) { +		for (i = 0; i < p->gang_size; ++i) { +			if (p->jobs[i] == leader) +				continue; +  			dma_resv_add_fence(e->tv.bo->base.resv,  					   &p->jobs[i]->base.s_fence->finished,  					   DMA_RESV_USAGE_READ); @@ -1285,7 +1302,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  		e->tv.num_shared = 0;  	} -	seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1], +	seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],  				   p->fence);  	amdgpu_cs_post_dependencies(p); |