diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 261 | 
1 files changed, 211 insertions, 50 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 716f2afeb6a9..887483b8b818 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -34,6 +34,7 @@  #include <ttm/ttm_placement.h>  #include <ttm/ttm_module.h>  #include <ttm/ttm_page_alloc.h> +#include <ttm/ttm_memory.h>  #include <drm/drmP.h>  #include <drm/amdgpu_drm.h>  #include <linux/seq_file.h> @@ -74,7 +75,7 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)  	ttm_mem_global_release(ref->object);  } -static int amdgpu_ttm_global_init(struct amdgpu_device *adev) +int amdgpu_ttm_global_init(struct amdgpu_device *adev)  {  	struct drm_global_reference *global_ref;  	struct amdgpu_ring *ring; @@ -88,10 +89,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)  	global_ref->init = &amdgpu_ttm_mem_global_init;  	global_ref->release = &amdgpu_ttm_mem_global_release;  	r = drm_global_item_ref(global_ref); -	if (r != 0) { +	if (r) {  		DRM_ERROR("Failed setting up TTM memory accounting "  			  "subsystem.\n"); -		return r; +		goto error_mem;  	}  	adev->mman.bo_global_ref.mem_glob = @@ -102,26 +103,30 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)  	global_ref->init = &ttm_bo_global_init;  	global_ref->release = &ttm_bo_global_release;  	r = drm_global_item_ref(global_ref); -	if (r != 0) { +	if (r) {  		DRM_ERROR("Failed setting up TTM BO subsystem.\n"); -		drm_global_item_unref(&adev->mman.mem_global_ref); -		return r; +		goto error_bo;  	}  	ring = adev->mman.buffer_funcs_ring;  	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];  	r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,  				  rq, amdgpu_sched_jobs); -	if (r != 0) { +	if (r) {  		DRM_ERROR("Failed setting up TTM BO move run queue.\n"); -		drm_global_item_unref(&adev->mman.mem_global_ref); -		drm_global_item_unref(&adev->mman.bo_global_ref.ref); -		return r; +		goto error_entity;  	}  	adev->mman.mem_global_referenced = true;  	return 0; + +error_entity: +	drm_global_item_unref(&adev->mman.bo_global_ref.ref); +error_bo: +	drm_global_item_unref(&adev->mman.mem_global_ref); +error_mem: +	return r;  }  static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) @@ -155,7 +160,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,  		man->default_caching = TTM_PL_FLAG_CACHED;  		break;  	case TTM_PL_TT: -		man->func = &ttm_bo_manager_func; +		man->func = &amdgpu_gtt_mgr_func;  		man->gpu_offset = adev->mc.gtt_start;  		man->available_caching = TTM_PL_MASK_CACHING;  		man->default_caching = TTM_PL_FLAG_CACHED; @@ -190,12 +195,13 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,  static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  				struct ttm_placement *placement)  { -	struct amdgpu_bo *rbo; +	struct amdgpu_bo *abo;  	static struct ttm_place placements = {  		.fpfn = 0,  		.lpfn = 0,  		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM  	}; +	unsigned i;  	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {  		placement->placement = &placements; @@ -204,28 +210,44 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  		placement->num_busy_placement = 1;  		return;  	} -	rbo = container_of(bo, struct amdgpu_bo, tbo); +	abo = container_of(bo, struct amdgpu_bo, tbo);  	switch (bo->mem.mem_type) {  	case TTM_PL_VRAM: -		if (rbo->adev->mman.buffer_funcs_ring->ready == false) -			amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_CPU); -		else -			amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_GTT); +		if (abo->adev->mman.buffer_funcs_ring->ready == false) { +			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); +		} else { +			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); +			for (i = 0; i < abo->placement.num_placement; ++i) { +				if (!(abo->placements[i].flags & +				      TTM_PL_FLAG_TT)) +					continue; + +				if (abo->placements[i].lpfn) +					continue; + +				/* set an upper limit to force directly +				 * allocating address space for the BO. +				 */ +				abo->placements[i].lpfn = +					abo->adev->mc.gtt_size >> PAGE_SHIFT; +			} +		}  		break;  	case TTM_PL_TT:  	default: -		amdgpu_ttm_placement_from_domain(rbo, AMDGPU_GEM_DOMAIN_CPU); +		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);  	} -	*placement = rbo->placement; +	*placement = abo->placement;  }  static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)  { -	struct amdgpu_bo *rbo = container_of(bo, struct amdgpu_bo, tbo); +	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);  	if (amdgpu_ttm_tt_get_usermm(bo->ttm))  		return -EPERM; -	return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp); +	return drm_vma_node_verify_access(&abo->gem_base.vma_node, +					  filp->private_data);  }  static void amdgpu_move_null(struct ttm_buffer_object *bo, @@ -251,26 +273,30 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  	adev = amdgpu_get_adev(bo->bdev);  	ring = adev->mman.buffer_funcs_ring; -	old_start = (u64)old_mem->start << PAGE_SHIFT; -	new_start = (u64)new_mem->start << PAGE_SHIFT;  	switch (old_mem->mem_type) { -	case TTM_PL_VRAM: -		old_start += adev->mc.vram_start; -		break;  	case TTM_PL_TT: -		old_start += adev->mc.gtt_start; +		r = amdgpu_ttm_bind(bo, old_mem); +		if (r) +			return r; + +	case TTM_PL_VRAM: +		old_start = (u64)old_mem->start << PAGE_SHIFT; +		old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;  		break;  	default:  		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);  		return -EINVAL;  	}  	switch (new_mem->mem_type) { -	case TTM_PL_VRAM: -		new_start += adev->mc.vram_start; -		break;  	case TTM_PL_TT: -		new_start += adev->mc.gtt_start; +		r = amdgpu_ttm_bind(bo, new_mem); +		if (r) +			return r; + +	case TTM_PL_VRAM: +		new_start = (u64)new_mem->start << PAGE_SHIFT; +		new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;  		break;  	default:  		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); @@ -285,7 +311,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  	r = amdgpu_copy_buffer(ring, old_start, new_start,  			       new_mem->num_pages * PAGE_SIZE, /* bytes */ -			       bo->resv, &fence); +			       bo->resv, &fence, false);  	if (r)  		return r; @@ -314,7 +340,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,  	placement.num_busy_placement = 1;  	placement.busy_placement = &placements;  	placements.fpfn = 0; -	placements.lpfn = 0; +	placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;  	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;  	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,  			     interruptible, no_wait_gpu); @@ -335,7 +361,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,  	if (unlikely(r)) {  		goto out_cleanup;  	} -	r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem); +	r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem);  out_cleanup:  	ttm_bo_mem_put(bo, &tmp_mem);  	return r; @@ -361,14 +387,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,  	placement.num_busy_placement = 1;  	placement.busy_placement = &placements;  	placements.fpfn = 0; -	placements.lpfn = 0; +	placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT;  	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;  	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,  			     interruptible, no_wait_gpu);  	if (unlikely(r)) {  		return r;  	} -	r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem); +	r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem);  	if (unlikely(r)) {  		goto out_cleanup;  	} @@ -435,8 +461,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,  	if (r) {  memcpy: -		r = ttm_bo_move_memcpy(bo, evict, interruptible, -				       no_wait_gpu, new_mem); +		r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem);  		if (r) {  			return r;  		} @@ -524,6 +549,7 @@ struct amdgpu_ttm_tt {  	spinlock_t              guptasklock;  	struct list_head        guptasks;  	atomic_t		mmu_invalidations; +	struct list_head        list;  };  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) @@ -641,7 +667,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,  				   struct ttm_mem_reg *bo_mem)  {  	struct amdgpu_ttm_tt *gtt = (void*)ttm; -	uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);  	int r;  	if (gtt->userptr) { @@ -651,7 +676,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,  			return r;  		}  	} -	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);  	if (!ttm->num_pages) {  		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",  		     ttm->num_pages, bo_mem, ttm); @@ -662,14 +686,71 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,  	    bo_mem->mem_type == AMDGPU_PL_OA)  		return -EINVAL; +	return 0; +} + +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) +{ +	struct amdgpu_ttm_tt *gtt = (void *)ttm; + +	return gtt && !list_empty(>t->list); +} + +int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) +{ +	struct ttm_tt *ttm = bo->ttm; +	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; +	uint32_t flags; +	int r; + +	if (!ttm || amdgpu_ttm_is_bound(ttm)) +		return 0; + +	r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo, +				 NULL, bo_mem); +	if (r) { +		DRM_ERROR("Failed to allocate GTT address space (%d)\n", r); +		return r; +	} + +	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); +	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;  	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,  		ttm->pages, gtt->ttm.dma_address, flags);  	if (r) { -		DRM_ERROR("failed to bind %lu pages at 0x%08X\n", -			  ttm->num_pages, (unsigned)gtt->offset); +		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", +			  ttm->num_pages, gtt->offset);  		return r;  	} +	spin_lock(>t->adev->gtt_list_lock); +	list_add_tail(>t->list, >t->adev->gtt_list); +	spin_unlock(>t->adev->gtt_list_lock); +	return 0; +} + +int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) +{ +	struct amdgpu_ttm_tt *gtt, *tmp; +	struct ttm_mem_reg bo_mem; +	uint32_t flags; +	int r; + +	bo_mem.mem_type = TTM_PL_TT; +	spin_lock(&adev->gtt_list_lock); +	list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) { +		flags = amdgpu_ttm_tt_pte_flags(gtt->adev, >t->ttm.ttm, &bo_mem); +		r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, +				     gtt->ttm.ttm.pages, gtt->ttm.dma_address, +				     flags); +		if (r) { +			spin_unlock(&adev->gtt_list_lock); +			DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", +				  gtt->ttm.ttm.num_pages, gtt->offset); +			return r; +		} +	} +	spin_unlock(&adev->gtt_list_lock);  	return 0;  } @@ -677,12 +758,19 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)  {  	struct amdgpu_ttm_tt *gtt = (void *)ttm; +	if (gtt->userptr) +		amdgpu_ttm_tt_unpin_userptr(ttm); + +	if (!amdgpu_ttm_is_bound(ttm)) +		return 0; +  	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */  	if (gtt->adev->gart.ready)  		amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); -	if (gtt->userptr) -		amdgpu_ttm_tt_unpin_userptr(ttm); +	spin_lock(>t->adev->gtt_list_lock); +	list_del_init(>t->list); +	spin_unlock(>t->adev->gtt_list_lock);  	return 0;  } @@ -720,6 +808,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,  		kfree(gtt);  		return NULL;  	} +	INIT_LIST_HEAD(>t->list);  	return >t->ttm.ttm;  } @@ -991,10 +1080,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	unsigned i, j;  	int r; -	r = amdgpu_ttm_global_init(adev); -	if (r) { -		return r; -	}  	/* No others user of address space so set it to 0 */  	r = ttm_bo_device_init(&adev->mman.bdev,  			       adev->mman.bo_global_ref.ref.object, @@ -1159,7 +1244,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,  		       uint64_t dst_offset,  		       uint32_t byte_count,  		       struct reservation_object *resv, -		       struct fence **fence) +		       struct fence **fence, bool direct_submit)  {  	struct amdgpu_device *adev = ring->adev;  	struct amdgpu_job *job; @@ -1203,8 +1288,79 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); +	if (direct_submit) { +		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, +				       NULL, NULL, fence); +		job->fence = fence_get(*fence); +		if (r) +			DRM_ERROR("Error scheduling IBs (%d)\n", r); +		amdgpu_job_free(job); +	} else { +		r = amdgpu_job_submit(job, ring, &adev->mman.entity, +				      AMDGPU_FENCE_OWNER_UNDEFINED, fence); +		if (r) +			goto error_free; +	} + +	return r; + +error_free: +	amdgpu_job_free(job); +	return r; +} + +int amdgpu_fill_buffer(struct amdgpu_bo *bo, +		uint32_t src_data, +		struct reservation_object *resv, +		struct fence **fence) +{ +	struct amdgpu_device *adev = bo->adev; +	struct amdgpu_job *job; +	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + +	uint32_t max_bytes, byte_count; +	uint64_t dst_offset; +	unsigned int num_loops, num_dw; +	unsigned int i; +	int r; + +	byte_count = bo->tbo.num_pages << PAGE_SHIFT; +	max_bytes = adev->mman.buffer_funcs->fill_max_bytes; +	num_loops = DIV_ROUND_UP(byte_count, max_bytes); +	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + +	/* for IB padding */ +	while (num_dw & 0x7) +		num_dw++; + +	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); +	if (r) +		return r; + +	if (resv) { +		r = amdgpu_sync_resv(adev, &job->sync, resv, +				AMDGPU_FENCE_OWNER_UNDEFINED); +		if (r) { +			DRM_ERROR("sync failed (%d).\n", r); +			goto error_free; +		} +	} + +	dst_offset = bo->tbo.mem.start << PAGE_SHIFT; +	for (i = 0; i < num_loops; i++) { +		uint32_t cur_size_in_bytes = min(byte_count, max_bytes); + +		amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, +				dst_offset, cur_size_in_bytes); + +		dst_offset += cur_size_in_bytes; +		byte_count -= cur_size_in_bytes; +	} + +	amdgpu_ring_pad_ib(ring, &job->ibs[0]); +	WARN_ON(job->ibs[0].length_dw > num_dw);  	r = amdgpu_job_submit(job, ring, &adev->mman.entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, fence); +			AMDGPU_FENCE_OWNER_UNDEFINED, fence);  	if (r)  		goto error_free; @@ -1395,3 +1551,8 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)  #endif  } + +u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev) +{ +	return ttm_get_kernel_zone_memory_size(adev->mman.mem_global_ref.object); +} |