diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 302 | 
1 files changed, 177 insertions, 125 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82312a7bc6ad..b6e9df11115d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -31,6 +31,7 @@  #include <drm/drm_syncobj.h>  #include "amdgpu.h"  #include "amdgpu_trace.h" +#include "amdgpu_gmc.h"  static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,  				      struct drm_amdgpu_cs_chunk_fence *data, @@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,  	return 0;  } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, +				      struct drm_amdgpu_bo_list_in *data) +{ +	int r; +	struct drm_amdgpu_bo_list_entry *info = NULL; + +	r = amdgpu_bo_create_list_entry_array(data, &info); +	if (r) +		return r; + +	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, +				  &p->bo_list); +	if (r) +		goto error_free; + +	kvfree(info); +	return 0; + +error_free: +	if (info) +		kvfree(info); + +	return r; +} + +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;  	struct amdgpu_vm *vm = &fpriv->vm; -	union drm_amdgpu_cs *cs = data;  	uint64_t *chunk_array_user;  	uint64_t *chunk_array;  	unsigned size, num_ibs = 0; @@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  			break; +		case AMDGPU_CHUNK_ID_BO_HANDLES: +			size = sizeof(struct drm_amdgpu_bo_list_in); +			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { +				ret = -EINVAL; +				goto free_partial_kdata; +			} + +			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); +			if (ret) +				goto free_partial_kdata; + +			break; +  		case AMDGPU_CHUNK_ID_DEPENDENCIES:  		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:  		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: @@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  	if (p->uf_entry.robj)  		p->job->uf_addr = uf_offset;  	kfree(chunk_array); + +	/* Use this opportunity to fill in task info for the vm */ +	amdgpu_vm_set_task_info(vm); +  	return 0;  free_all_kdata: @@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,  		return;  	} -	total_vram = adev->gmc.real_vram_size - adev->vram_pin_size; +	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);  	used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);  	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; @@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,  	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);  	/* Do the same for visible VRAM if half of it is free */ -	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { +	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {  		u64 total_vis_vram = adev->gmc.visible_vram_size;  		u64 used_vis_vram =  			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,  	 * to move it. Don't move anything if the threshold is zero.  	 */  	if (p->bytes_moved < p->bytes_moved_threshold) { -		if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && +		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&  		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {  			/* And don't move a CPU_ACCESS_REQUIRED BO to limited  			 * visible VRAM if we've depleted our allowance to do @@ -377,11 +419,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,  	}  retry: -	amdgpu_ttm_placement_from_domain(bo, domain); +	amdgpu_bo_placement_from_domain(bo, domain);  	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  	p->bytes_moved += ctx.bytes_moved; -	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && +	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&  	    amdgpu_bo_in_cpu_visible_vram(bo))  		p->bytes_moved_vis += ctx.bytes_moved; @@ -434,9 +476,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,  		/* Good we can try to move this BO somewhere else */  		update_bytes_moved_vis = -			adev->gmc.visible_vram_size < adev->gmc.real_vram_size && -			amdgpu_bo_in_cpu_visible_vram(bo); -		amdgpu_ttm_placement_from_domain(bo, other); +				!amdgpu_gmc_vram_full_visible(&adev->gmc) && +				amdgpu_bo_in_cpu_visible_vram(bo); +		amdgpu_bo_placement_from_domain(bo, other);  		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  		p->bytes_moved += ctx.bytes_moved;  		if (update_bytes_moved_vis) @@ -490,8 +532,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,  		/* Check if we have user pages and nobody bound the BO already */  		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&  		    lobj->user_pages) { -			amdgpu_ttm_placement_from_domain(bo, -							 AMDGPU_GEM_DOMAIN_CPU); +			amdgpu_bo_placement_from_domain(bo, +							AMDGPU_GEM_DOMAIN_CPU);  			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  			if (r)  				return r; @@ -519,23 +561,38 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  				union drm_amdgpu_cs *cs)  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; +	struct amdgpu_vm *vm = &fpriv->vm;  	struct amdgpu_bo_list_entry *e;  	struct list_head duplicates; -	unsigned i, tries = 10;  	struct amdgpu_bo *gds;  	struct amdgpu_bo *gws;  	struct amdgpu_bo *oa; +	unsigned tries = 10;  	int r;  	INIT_LIST_HEAD(&p->validated); -	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); -	if (p->bo_list) { -		amdgpu_bo_list_get_list(p->bo_list, &p->validated); -		if (p->bo_list->first_userptr != p->bo_list->num_entries) -			p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); +	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ +	if (cs->in.bo_list_handle) { +		if (p->bo_list) +			return -EINVAL; + +		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, +				       &p->bo_list); +		if (r) +			return r; +	} else if (!p->bo_list) { +		/* Create a empty bo_list when no handle is provided */ +		r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, +					  &p->bo_list); +		if (r) +			return r;  	} +	amdgpu_bo_list_get_list(p->bo_list, &p->validated); +	if (p->bo_list->first_userptr != p->bo_list->num_entries) +		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); +  	INIT_LIST_HEAD(&duplicates);  	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -544,7 +601,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  	while (1) {  		struct list_head need_pages; -		unsigned i;  		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,  					   &duplicates); @@ -554,17 +610,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  			goto error_free_pages;  		} -		/* Without a BO list we don't have userptr BOs */ -		if (!p->bo_list) -			break; -  		INIT_LIST_HEAD(&need_pages); -		for (i = p->bo_list->first_userptr; -		     i < p->bo_list->num_entries; ++i) { -			struct amdgpu_bo *bo; - -			e = &p->bo_list->array[i]; -			bo = e->robj; +		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { +			struct amdgpu_bo *bo = e->robj;  			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,  				 &e->user_invalidated) && e->user_pages) { @@ -656,23 +704,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,  				     p->bytes_moved_vis); -	if (p->bo_list) { -		struct amdgpu_vm *vm = &fpriv->vm; -		unsigned i; - -		gds = p->bo_list->gds_obj; -		gws = p->bo_list->gws_obj; -		oa = p->bo_list->oa_obj; -		for (i = 0; i < p->bo_list->num_entries; i++) { -			struct amdgpu_bo *bo = p->bo_list->array[i].robj; +	gds = p->bo_list->gds_obj; +	gws = p->bo_list->gws_obj; +	oa = p->bo_list->oa_obj; -			p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); -		} -	} else { -		gds = p->adev->gds.gds_gfx_bo; -		gws = p->adev->gds.gws_gfx_bo; -		oa = p->adev->gds.oa_gfx_bo; -	} +	amdgpu_bo_list_for_each_entry(e, p->bo_list) +		e->bo_va = amdgpu_vm_bo_find(vm, e->robj);  	if (gds) {  		p->job->gds_base = amdgpu_bo_gpu_offset(gds); @@ -700,18 +737,13 @@ error_validate:  error_free_pages: -	if (p->bo_list) { -		for (i = p->bo_list->first_userptr; -		     i < p->bo_list->num_entries; ++i) { -			e = &p->bo_list->array[i]; - -			if (!e->user_pages) -				continue; +	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { +		if (!e->user_pages) +			continue; -			release_pages(e->user_pages, -				      e->robj->tbo.ttm->num_pages); -			kvfree(e->user_pages); -		} +		release_pages(e->user_pages, +			      e->robj->tbo.ttm->num_pages); +		kvfree(e->user_pages);  	}  	return r; @@ -773,12 +805,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,  static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)  { -	struct amdgpu_device *adev = p->adev;  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; +	struct amdgpu_device *adev = p->adev;  	struct amdgpu_vm *vm = &fpriv->vm; +	struct amdgpu_bo_list_entry *e;  	struct amdgpu_bo_va *bo_va;  	struct amdgpu_bo *bo; -	int i, r; +	int r;  	r = amdgpu_vm_clear_freed(adev, vm, NULL);  	if (r) @@ -808,29 +841,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)  			return r;  	} -	if (p->bo_list) { -		for (i = 0; i < p->bo_list->num_entries; i++) { -			struct dma_fence *f; - -			/* ignore duplicates */ -			bo = p->bo_list->array[i].robj; -			if (!bo) -				continue; +	amdgpu_bo_list_for_each_entry(e, p->bo_list) { +		struct dma_fence *f; -			bo_va = p->bo_list->array[i].bo_va; -			if (bo_va == NULL) -				continue; +		/* ignore duplicates */ +		bo = e->robj; +		if (!bo) +			continue; -			r = amdgpu_vm_bo_update(adev, bo_va, false); -			if (r) -				return r; +		bo_va = e->bo_va; +		if (bo_va == NULL) +			continue; -			f = bo_va->last_pt_update; -			r = amdgpu_sync_fence(adev, &p->job->sync, f, false); -			if (r) -				return r; -		} +		r = amdgpu_vm_bo_update(adev, bo_va, false); +		if (r) +			return r; +		f = bo_va->last_pt_update; +		r = amdgpu_sync_fence(adev, &p->job->sync, f, false); +		if (r) +			return r;  	}  	r = amdgpu_vm_handle_moved(adev, vm); @@ -845,15 +875,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)  	if (r)  		return r; -	if (amdgpu_vm_debug && p->bo_list) { +	if (amdgpu_vm_debug) {  		/* Invalidate all BOs to test for userspace bugs */ -		for (i = 0; i < p->bo_list->num_entries; i++) { +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {  			/* ignore duplicates */ -			bo = p->bo_list->array[i].robj; -			if (!bo) +			if (!e->robj)  				continue; -			amdgpu_vm_bo_invalidate(adev, bo, false); +			amdgpu_vm_bo_invalidate(adev, e->robj, false);  		}  	} @@ -865,11 +894,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;  	struct amdgpu_vm *vm = &fpriv->vm; -	struct amdgpu_ring *ring = p->job->ring; +	struct amdgpu_ring *ring = p->ring;  	int r;  	/* Only for UVD/VCE VM emulation */ -	if (p->job->ring->funcs->parse_cs) { +	if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {  		unsigned i, j;  		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { @@ -910,12 +939,20 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,  			offset = m->start * AMDGPU_GPU_PAGE_SIZE;  			kptr += va_start - offset; -			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); -			amdgpu_bo_kunmap(aobj); - -			r = amdgpu_ring_parse_cs(ring, p, j); -			if (r) -				return r; +			if (p->ring->funcs->parse_cs) { +				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); +				amdgpu_bo_kunmap(aobj); + +				r = amdgpu_ring_parse_cs(ring, p, j); +				if (r) +					return r; +			} else { +				ib->ptr = (uint32_t *)kptr; +				r = amdgpu_ring_patch_cs_in_place(ring, p, j); +				amdgpu_bo_kunmap(aobj); +				if (r) +					return r; +			}  			j++;  		} @@ -927,6 +964,10 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,  		r = amdgpu_bo_vm_update_pte(p);  		if (r)  			return r; + +		r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); +		if (r) +			return r;  	}  	return amdgpu_cs_sync_rings(p); @@ -971,18 +1012,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,  		if (r)  			return r; -		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { -			parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; -			if (!parser->ctx->preamble_presented) { -				parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; -				parser->ctx->preamble_presented = true; -			} -		} +		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) +			parser->job->preamble_status |= +				AMDGPU_PREAMBLE_IB_PRESENT; -		if (parser->job->ring && parser->job->ring != ring) +		if (parser->ring && parser->ring != ring)  			return -EINVAL; -		parser->job->ring = ring; +		parser->ring = ring;  		r =  amdgpu_ib_get(adev, vm,  					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, @@ -1001,11 +1038,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,  	/* UVD & VCE fw doesn't support user fences */  	if (parser->job->uf_addr && ( -	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD || -	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) +	    parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || +	    parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))  		return -EINVAL; -	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); +	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);  }  static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1156,39 +1193,35 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  			    union drm_amdgpu_cs *cs)  { -	struct amdgpu_ring *ring = p->job->ring; +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; +	struct amdgpu_ring *ring = p->ring;  	struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; +	enum drm_sched_priority priority; +	struct amdgpu_bo_list_entry *e;  	struct amdgpu_job *job; -	unsigned i;  	uint64_t seq;  	int r; -	amdgpu_mn_lock(p->mn); -	if (p->bo_list) { -		for (i = p->bo_list->first_userptr; -		     i < p->bo_list->num_entries; ++i) { -			struct amdgpu_bo *bo = p->bo_list->array[i].robj; - -			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { -				amdgpu_mn_unlock(p->mn); -				return -ERESTARTSYS; -			} -		} -	} -  	job = p->job;  	p->job = NULL; -	r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp); -	if (r) { -		amdgpu_job_free(job); -		amdgpu_mn_unlock(p->mn); -		return r; +	r = drm_sched_job_init(&job->base, entity, p->filp); +	if (r) +		goto error_unlock; + +	/* No memory allocation is allowed while holding the mn lock */ +	amdgpu_mn_lock(p->mn); +	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { +		struct amdgpu_bo *bo = e->robj; + +		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { +			r = -ERESTARTSYS; +			goto error_abort; +		}  	}  	job->owner = p->filp; -	job->fence_ctx = entity->fence_context;  	p->fence = dma_fence_get(&job->base.s_fence->finished);  	r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); @@ -1202,19 +1235,38 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  	amdgpu_cs_post_dependencies(p); +	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && +	    !p->ctx->preamble_presented) { +		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; +		p->ctx->preamble_presented = true; +	} +  	cs->out.handle = seq;  	job->uf_sequence = seq;  	amdgpu_job_free_resources(job); -	amdgpu_ring_priority_get(job->ring, job->base.s_priority);  	trace_amdgpu_cs_ioctl(job); +	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); +	priority = job->base.s_priority;  	drm_sched_entity_push_job(&job->base, entity); +	ring = to_amdgpu_ring(entity->rq->sched); +	amdgpu_ring_priority_get(ring, priority); +  	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);  	amdgpu_mn_unlock(p->mn);  	return 0; + +error_abort: +	dma_fence_put(&job->base.s_fence->finished); +	job->base.s_fence = NULL; + +error_unlock: +	amdgpu_job_free(job); +	amdgpu_mn_unlock(p->mn); +	return r;  }  int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -1601,7 +1653,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,  	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {  		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; -		amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); +		amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);  		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);  		if (r)  			return r;  |