diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 531 | 
1 files changed, 379 insertions, 152 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5db0230e45c6..bc746131987f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -29,11 +29,11 @@   *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>   *    Dave Airlie   */ -#include <ttm/ttm_bo_api.h> -#include <ttm/ttm_bo_driver.h> -#include <ttm/ttm_placement.h> -#include <ttm/ttm_module.h> -#include <ttm/ttm_page_alloc.h> +#include <drm/ttm/ttm_bo_api.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_module.h> +#include <drm/ttm/ttm_page_alloc.h>  #include <drm/drmP.h>  #include <drm/amdgpu_drm.h>  #include <linux/seq_file.h> @@ -43,14 +43,20 @@  #include <linux/pagemap.h>  #include <linux/debugfs.h>  #include "amdgpu.h" +#include "amdgpu_trace.h"  #include "bif/bif_4_1_d.h"  #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) +static int amdgpu_map_buffer(struct ttm_buffer_object *bo, +			     struct ttm_mem_reg *mem, unsigned num_pages, +			     uint64_t offset, unsigned window, +			     struct amdgpu_ring *ring, +			     uint64_t *addr); +  static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);  static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); -  /*   * Global memory.   */ @@ -97,6 +103,8 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)  		goto error_bo;  	} +	mutex_init(&adev->mman.gtt_window_lock); +  	ring = adev->mman.buffer_funcs_ring;  	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];  	r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, @@ -123,6 +131,7 @@ static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)  	if (adev->mman.mem_global_referenced) {  		amd_sched_entity_fini(adev->mman.entity.sched,  				      &adev->mman.entity); +		mutex_destroy(&adev->mman.gtt_window_lock);  		drm_global_item_unref(&adev->mman.bo_global_ref.ref);  		drm_global_item_unref(&adev->mman.mem_global_ref);  		adev->mman.mem_global_referenced = false; @@ -150,7 +159,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,  		break;  	case TTM_PL_TT:  		man->func = &amdgpu_gtt_mgr_func; -		man->gpu_offset = adev->mc.gtt_start; +		man->gpu_offset = adev->mc.gart_start;  		man->available_caching = TTM_PL_MASK_CACHING;  		man->default_caching = TTM_PL_FLAG_CACHED;  		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; @@ -186,12 +195,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);  	struct amdgpu_bo *abo; -	static struct ttm_place placements = { +	static const struct ttm_place placements = {  		.fpfn = 0,  		.lpfn = 0,  		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM  	}; -	unsigned i;  	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {  		placement->placement = &placements; @@ -207,22 +215,36 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  		    adev->mman.buffer_funcs_ring &&  		    adev->mman.buffer_funcs_ring->ready == false) {  			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); +		} else if (adev->mc.visible_vram_size < adev->mc.real_vram_size && +			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { +			unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; +			struct drm_mm_node *node = bo->mem.mm_node; +			unsigned long pages_left; + +			for (pages_left = bo->mem.num_pages; +			     pages_left; +			     pages_left -= node->size, node++) { +				if (node->start < fpfn) +					break; +			} + +			if (!pages_left) +				goto gtt; + +			/* Try evicting to the CPU inaccessible part of VRAM +			 * first, but only set GTT as busy placement, so this +			 * BO will be evicted to GTT rather than causing other +			 * BOs to be evicted from VRAM +			 */ +			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | +							 AMDGPU_GEM_DOMAIN_GTT); +			abo->placements[0].fpfn = fpfn; +			abo->placements[0].lpfn = 0; +			abo->placement.busy_placement = &abo->placements[1]; +			abo->placement.num_busy_placement = 1;  		} else { +gtt:  			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); -			for (i = 0; i < abo->placement.num_placement; ++i) { -				if (!(abo->placements[i].flags & -				      TTM_PL_FLAG_TT)) -					continue; - -				if (abo->placements[i].lpfn) -					continue; - -				/* set an upper limit to force directly -				 * allocating address space for the BO. -				 */ -				abo->placements[i].lpfn = -					adev->mc.gtt_size >> PAGE_SHIFT; -			}  		}  		break;  	case TTM_PL_TT: @@ -252,29 +274,18 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,  	new_mem->mm_node = NULL;  } -static int amdgpu_mm_node_addr(struct ttm_buffer_object *bo, -			       struct drm_mm_node *mm_node, -			       struct ttm_mem_reg *mem, -			       uint64_t *addr) +static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, +				    struct drm_mm_node *mm_node, +				    struct ttm_mem_reg *mem)  { -	int r; - -	switch (mem->mem_type) { -	case TTM_PL_TT: -		r = amdgpu_ttm_bind(bo, mem); -		if (r) -			return r; +	uint64_t addr = 0; -	case TTM_PL_VRAM: -		*addr = mm_node->start << PAGE_SHIFT; -		*addr += bo->bdev->man[mem->mem_type].gpu_offset; -		break; -	default: -		DRM_ERROR("Unknown placement %d\n", mem->mem_type); -		return -EINVAL; +	if (mem->mem_type != TTM_PL_TT || +	    amdgpu_gtt_mgr_is_allocated(mem)) { +		addr = mm_node->start << PAGE_SHIFT; +		addr += bo->bdev->man[mem->mem_type].gpu_offset;  	} - -	return 0; +	return addr;  }  static int amdgpu_move_blit(struct ttm_buffer_object *bo, @@ -299,26 +310,40 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  	}  	old_mm = old_mem->mm_node; -	r = amdgpu_mm_node_addr(bo, old_mm, old_mem, &old_start); -	if (r) -		return r;  	old_size = old_mm->size; - +	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);  	new_mm = new_mem->mm_node; -	r = amdgpu_mm_node_addr(bo, new_mm, new_mem, &new_start); -	if (r) -		return r;  	new_size = new_mm->size; +	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);  	num_pages = new_mem->num_pages; +	mutex_lock(&adev->mman.gtt_window_lock);  	while (num_pages) { -		unsigned long cur_pages = min(old_size, new_size); +		unsigned long cur_pages = min(min(old_size, new_size), +					      (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); +		uint64_t from = old_start, to = new_start;  		struct dma_fence *next; -		r = amdgpu_copy_buffer(ring, old_start, new_start, +		if (old_mem->mem_type == TTM_PL_TT && +		    !amdgpu_gtt_mgr_is_allocated(old_mem)) { +			r = amdgpu_map_buffer(bo, old_mem, cur_pages, +					      old_start, 0, ring, &from); +			if (r) +				goto error; +		} + +		if (new_mem->mem_type == TTM_PL_TT && +		    !amdgpu_gtt_mgr_is_allocated(new_mem)) { +			r = amdgpu_map_buffer(bo, new_mem, cur_pages, +					      new_start, 1, ring, &to); +			if (r) +				goto error; +		} + +		r = amdgpu_copy_buffer(ring, from, to,  				       cur_pages * PAGE_SIZE, -				       bo->resv, &next, false); +				       bo->resv, &next, false, true);  		if (r)  			goto error; @@ -331,10 +356,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  		old_size -= cur_pages;  		if (!old_size) { -			r = amdgpu_mm_node_addr(bo, ++old_mm, old_mem, -						&old_start); -			if (r) -				goto error; +			old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);  			old_size = old_mm->size;  		} else {  			old_start += cur_pages * PAGE_SIZE; @@ -342,22 +364,21 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  		new_size -= cur_pages;  		if (!new_size) { -			r = amdgpu_mm_node_addr(bo, ++new_mm, new_mem, -						&new_start); -			if (r) -				goto error; - +			new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);  			new_size = new_mm->size;  		} else {  			new_start += cur_pages * PAGE_SIZE;  		}  	} +	mutex_unlock(&adev->mman.gtt_window_lock);  	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);  	dma_fence_put(fence);  	return r;  error: +	mutex_unlock(&adev->mman.gtt_window_lock); +  	if (fence)  		dma_fence_wait(fence, false);  	dma_fence_put(fence); @@ -384,7 +405,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,  	placement.num_busy_placement = 1;  	placement.busy_placement = &placements;  	placements.fpfn = 0; -	placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; +	placements.lpfn = 0;  	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;  	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,  			     interruptible, no_wait_gpu); @@ -431,7 +452,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,  	placement.num_busy_placement = 1;  	placement.busy_placement = &placements;  	placements.fpfn = 0; -	placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; +	placements.lpfn = 0;  	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;  	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,  			     interruptible, no_wait_gpu); @@ -507,6 +528,15 @@ memcpy:  		}  	} +	if (bo->type == ttm_bo_type_device && +	    new_mem->mem_type == TTM_PL_VRAM && +	    old_mem->mem_type != TTM_PL_VRAM) { +		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU +		 * accesses the BO after it's moved. +		 */ +		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; +	} +  	/* update statistics */  	atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved);  	return 0; @@ -633,6 +663,38 @@ release_pages:  	return r;  } +static void amdgpu_trace_dma_map(struct ttm_tt *ttm) +{ +	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); +	struct amdgpu_ttm_tt *gtt = (void *)ttm; +	unsigned i; + +	if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) { +		for (i = 0; i < ttm->num_pages; i++) { +			trace_amdgpu_ttm_tt_populate( +				adev, +				gtt->ttm.dma_address[i], +				page_to_phys(ttm->pages[i])); +		} +	} +} + +static void amdgpu_trace_dma_unmap(struct ttm_tt *ttm) +{ +	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); +	struct amdgpu_ttm_tt *gtt = (void *)ttm; +	unsigned i; + +	if (unlikely(trace_amdgpu_ttm_tt_unpopulate_enabled())) { +		for (i = 0; i < ttm->num_pages; i++) { +			trace_amdgpu_ttm_tt_unpopulate( +				adev, +				gtt->ttm.dma_address[i], +				page_to_phys(ttm->pages[i])); +		} +	} +} +  /* prepare the sg table with the user pages */  static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)  { @@ -659,6 +721,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)  	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,  					 gtt->ttm.dma_address, ttm->num_pages); +	amdgpu_trace_dma_map(ttm); +  	return 0;  release_sg: @@ -692,6 +756,8 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)  		put_page(page);  	} +	amdgpu_trace_dma_unmap(ttm); +  	sg_free_table(ttm->sg);  } @@ -699,7 +765,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,  				   struct ttm_mem_reg *bo_mem)  {  	struct amdgpu_ttm_tt *gtt = (void*)ttm; -	int r; +	uint64_t flags; +	int r = 0;  	if (gtt->userptr) {  		r = amdgpu_ttm_tt_pin_userptr(ttm); @@ -718,7 +785,25 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,  	    bo_mem->mem_type == AMDGPU_PL_OA)  		return -EINVAL; -	return 0; +	if (!amdgpu_gtt_mgr_is_allocated(bo_mem)) +		return 0; + +	spin_lock(>t->adev->gtt_list_lock); +	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); +	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; +	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, +		ttm->pages, gtt->ttm.dma_address, flags); + +	if (r) { +		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", +			  ttm->num_pages, gtt->offset); +		goto error_gart_bind; +	} + +	list_add_tail(>t->list, >t->adev->gtt_list); +error_gart_bind: +	spin_unlock(>t->adev->gtt_list_lock); +	return r;  }  bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) @@ -730,35 +815,39 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)  int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)  { +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);  	struct ttm_tt *ttm = bo->ttm; -	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; -	uint64_t flags; +	struct ttm_mem_reg tmp; + +	struct ttm_placement placement; +	struct ttm_place placements;  	int r;  	if (!ttm || amdgpu_ttm_is_bound(ttm))  		return 0; -	r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo, -				 NULL, bo_mem); -	if (r) { -		DRM_ERROR("Failed to allocate GTT address space (%d)\n", r); +	tmp = bo->mem; +	tmp.mm_node = NULL; +	placement.num_placement = 1; +	placement.placement = &placements; +	placement.num_busy_placement = 1; +	placement.busy_placement = &placements; +	placements.fpfn = 0; +	placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; +	placements.flags = bo->mem.placement | TTM_PL_FLAG_TT; + +	r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); +	if (unlikely(r))  		return r; -	} -	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); -	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; -	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, -		ttm->pages, gtt->ttm.dma_address, flags); +	r = ttm_bo_move_ttm(bo, true, false, &tmp); +	if (unlikely(r)) +		ttm_bo_mem_put(bo, &tmp); +	else +		bo->offset = (bo->mem.start << PAGE_SHIFT) + +			bo->bdev->man[bo->mem.mem_type].gpu_offset; -	if (r) { -		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", -			  ttm->num_pages, gtt->offset); -		return r; -	} -	spin_lock(>t->adev->gtt_list_lock); -	list_add_tail(>t->list, >t->adev->gtt_list); -	spin_unlock(>t->adev->gtt_list_lock); -	return 0; +	return r;  }  int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) @@ -789,6 +878,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)  static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)  {  	struct amdgpu_ttm_tt *gtt = (void *)ttm; +	int r;  	if (gtt->userptr)  		amdgpu_ttm_tt_unpin_userptr(ttm); @@ -797,14 +887,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)  		return 0;  	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ -	if (gtt->adev->gart.ready) -		amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); -  	spin_lock(>t->adev->gtt_list_lock); +	r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); +	if (r) { +		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", +			  gtt->ttm.ttm.num_pages, gtt->offset); +		goto error_unbind; +	}  	list_del_init(>t->list); +error_unbind:  	spin_unlock(>t->adev->gtt_list_lock); - -	return 0; +	return r;  }  static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) @@ -846,7 +939,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,  static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)  { -	struct amdgpu_device *adev; +	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);  	struct amdgpu_ttm_tt *gtt = (void *)ttm;  	unsigned i;  	int r; @@ -869,14 +962,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)  		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,  						 gtt->ttm.dma_address, ttm->num_pages);  		ttm->state = tt_unbound; -		return 0; +		r = 0; +		goto trace_mappings;  	} -	adev = amdgpu_ttm_adev(ttm->bdev); -  #ifdef CONFIG_SWIOTLB  	if (swiotlb_nr_tbl()) { -		return ttm_dma_populate(>t->ttm, adev->dev); +		r = ttm_dma_populate(>t->ttm, adev->dev); +		goto trace_mappings;  	}  #endif @@ -899,7 +992,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)  			return -EFAULT;  		}  	} -	return 0; + +	r = 0; +trace_mappings: +	if (likely(!r)) +		amdgpu_trace_dma_map(ttm); +	return r;  }  static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) @@ -920,6 +1018,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)  	adev = amdgpu_ttm_adev(ttm->bdev); +	amdgpu_trace_dma_unmap(ttm); +  #ifdef CONFIG_SWIOTLB  	if (swiotlb_nr_tbl()) {  		ttm_dma_unpopulate(>t->ttm, adev->dev); @@ -1069,6 +1169,67 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,  	return ttm_bo_eviction_valuable(bo, place);  } +static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, +				    unsigned long offset, +				    void *buf, int len, int write) +{ +	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); +	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); +	struct drm_mm_node *nodes = abo->tbo.mem.mm_node; +	uint32_t value = 0; +	int ret = 0; +	uint64_t pos; +	unsigned long flags; + +	if (bo->mem.mem_type != TTM_PL_VRAM) +		return -EIO; + +	while (offset >= (nodes->size << PAGE_SHIFT)) { +		offset -= nodes->size << PAGE_SHIFT; +		++nodes; +	} +	pos = (nodes->start << PAGE_SHIFT) + offset; + +	while (len && pos < adev->mc.mc_vram_size) { +		uint64_t aligned_pos = pos & ~(uint64_t)3; +		uint32_t bytes = 4 - (pos & 3); +		uint32_t shift = (pos & 3) * 8; +		uint32_t mask = 0xffffffff << shift; + +		if (len < bytes) { +			mask &= 0xffffffff >> (bytes - len) * 8; +			bytes = len; +		} + +		spin_lock_irqsave(&adev->mmio_idx_lock, flags); +		WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); +		WREG32(mmMM_INDEX_HI, aligned_pos >> 31); +		if (!write || mask != 0xffffffff) +			value = RREG32(mmMM_DATA); +		if (write) { +			value &= ~mask; +			value |= (*(uint32_t *)buf << shift) & mask; +			WREG32(mmMM_DATA, value); +		} +		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); +		if (!write) { +			value = (value & mask) >> shift; +			memcpy(buf, &value, bytes); +		} + +		ret += bytes; +		buf = (uint8_t *)buf + bytes; +		pos += bytes; +		len -= bytes; +		if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { +			++nodes; +			pos = (nodes->start << PAGE_SHIFT); +		} +	} + +	return ret; +} +  static struct ttm_bo_driver amdgpu_bo_driver = {  	.ttm_tt_create = &amdgpu_ttm_tt_create,  	.ttm_tt_populate = &amdgpu_ttm_tt_populate, @@ -1084,11 +1245,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = {  	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,  	.io_mem_free = &amdgpu_ttm_io_mem_free,  	.io_mem_pfn = amdgpu_ttm_io_mem_pfn, +	.access_memory = &amdgpu_ttm_access_memory  };  int amdgpu_ttm_init(struct amdgpu_device *adev)  { +	uint64_t gtt_size;  	int r; +	u64 vis_vram_limit;  	r = amdgpu_ttm_global_init(adev);  	if (r) { @@ -1112,36 +1276,37 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  		DRM_ERROR("Failed initializing VRAM heap.\n");  		return r;  	} + +	/* Reduce size of CPU-visible VRAM if requested */ +	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024; +	if (amdgpu_vis_vram_limit > 0 && +	    vis_vram_limit <= adev->mc.visible_vram_size) +		adev->mc.visible_vram_size = vis_vram_limit; +  	/* Change the size here instead of the init above so only lpfn is affected */  	amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); -	r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true, -			     AMDGPU_GEM_DOMAIN_VRAM, -			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | -			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, -			     NULL, NULL, &adev->stollen_vga_memory); -	if (r) { -		return r; -	} -	r = amdgpu_bo_reserve(adev->stollen_vga_memory, false); +	r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, +				    AMDGPU_GEM_DOMAIN_VRAM, +				    &adev->stolen_vga_memory, +				    NULL, NULL);  	if (r)  		return r; -	r = amdgpu_bo_pin(adev->stollen_vga_memory, AMDGPU_GEM_DOMAIN_VRAM, NULL); -	amdgpu_bo_unreserve(adev->stollen_vga_memory); -	if (r) { -		amdgpu_bo_unref(&adev->stollen_vga_memory); -		return r; -	}  	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",  		 (unsigned) (adev->mc.real_vram_size / (1024 * 1024))); -	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, -				adev->mc.gtt_size >> PAGE_SHIFT); + +	if (amdgpu_gtt_size == -1) +		gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), +			       adev->mc.mc_vram_size); +	else +		gtt_size = (uint64_t)amdgpu_gtt_size << 20; +	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);  	if (r) {  		DRM_ERROR("Failed initializing GTT heap.\n");  		return r;  	}  	DRM_INFO("amdgpu: %uM of GTT memory ready.\n", -		 (unsigned)(adev->mc.gtt_size / (1024 * 1024))); +		 (unsigned)(gtt_size / (1024 * 1024)));  	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;  	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; @@ -1197,13 +1362,13 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)  	if (!adev->mman.initialized)  		return;  	amdgpu_ttm_debugfs_fini(adev); -	if (adev->stollen_vga_memory) { -		r = amdgpu_bo_reserve(adev->stollen_vga_memory, true); +	if (adev->stolen_vga_memory) { +		r = amdgpu_bo_reserve(adev->stolen_vga_memory, true);  		if (r == 0) { -			amdgpu_bo_unpin(adev->stollen_vga_memory); -			amdgpu_bo_unreserve(adev->stollen_vga_memory); +			amdgpu_bo_unpin(adev->stolen_vga_memory); +			amdgpu_bo_unreserve(adev->stolen_vga_memory);  		} -		amdgpu_bo_unref(&adev->stollen_vga_memory); +		amdgpu_bo_unref(&adev->stolen_vga_memory);  	}  	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);  	ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); @@ -1250,12 +1415,77 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)  	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);  } -int amdgpu_copy_buffer(struct amdgpu_ring *ring, -		       uint64_t src_offset, -		       uint64_t dst_offset, -		       uint32_t byte_count, +static int amdgpu_map_buffer(struct ttm_buffer_object *bo, +			     struct ttm_mem_reg *mem, unsigned num_pages, +			     uint64_t offset, unsigned window, +			     struct amdgpu_ring *ring, +			     uint64_t *addr) +{ +	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; +	struct amdgpu_device *adev = ring->adev; +	struct ttm_tt *ttm = bo->ttm; +	struct amdgpu_job *job; +	unsigned num_dw, num_bytes; +	dma_addr_t *dma_address; +	struct dma_fence *fence; +	uint64_t src_addr, dst_addr; +	uint64_t flags; +	int r; + +	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < +	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + +	*addr = adev->mc.gart_start; +	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * +		AMDGPU_GPU_PAGE_SIZE; + +	num_dw = adev->mman.buffer_funcs->copy_num_dw; +	while (num_dw & 0x7) +		num_dw++; + +	num_bytes = num_pages * 8; + +	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); +	if (r) +		return r; + +	src_addr = num_dw * 4; +	src_addr += job->ibs[0].gpu_addr; + +	dst_addr = adev->gart.table_addr; +	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; +	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, +				dst_addr, num_bytes); + +	amdgpu_ring_pad_ib(ring, &job->ibs[0]); +	WARN_ON(job->ibs[0].length_dw > num_dw); + +	dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; +	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); +	r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, +			    &job->ibs[0].ptr[num_dw]); +	if (r) +		goto error_free; + +	r = amdgpu_job_submit(job, ring, &adev->mman.entity, +			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence); +	if (r) +		goto error_free; + +	dma_fence_put(fence); + +	return r; + +error_free: +	amdgpu_job_free(job); +	return r; +} + +int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, +		       uint64_t dst_offset, uint32_t byte_count,  		       struct reservation_object *resv, -		       struct dma_fence **fence, bool direct_submit) +		       struct dma_fence **fence, bool direct_submit, +		       bool vm_needs_flush)  {  	struct amdgpu_device *adev = ring->adev;  	struct amdgpu_job *job; @@ -1277,6 +1507,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,  	if (r)  		return r; +	job->vm_needs_flush = vm_needs_flush;  	if (resv) {  		r = amdgpu_sync_resv(adev, &job->sync, resv,  				     AMDGPU_FENCE_OWNER_UNDEFINED); @@ -1321,11 +1552,12 @@ error_free:  }  int amdgpu_fill_buffer(struct amdgpu_bo *bo, -		       uint32_t src_data, +		       uint64_t src_data,  		       struct reservation_object *resv,  		       struct dma_fence **fence)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); +	/* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/  	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;  	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -1341,6 +1573,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,  		return -EINVAL;  	} +	if (bo->tbo.mem.mem_type == TTM_PL_TT) { +		r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); +		if (r) +			return r; +	} +  	num_pages = bo->tbo.num_pages;  	mm_node = bo->tbo.mem.mm_node;  	num_loops = 0; @@ -1351,7 +1589,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,  		num_pages -= mm_node->size;  		++mm_node;  	} -	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + +	/* 10 double words for each SDMA_OP_PTEPDE cmd */ +	num_dw = num_loops * 10;  	/* for IB padding */  	num_dw += 64; @@ -1376,16 +1616,16 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,  		uint32_t byte_count = mm_node->size << PAGE_SHIFT;  		uint64_t dst_addr; -		r = amdgpu_mm_node_addr(&bo->tbo, mm_node, -					&bo->tbo.mem, &dst_addr); -		if (r) -			return r; +		WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8"); +		dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);  		while (byte_count) {  			uint32_t cur_size_in_bytes = min(byte_count, max_bytes); -			amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, -						dst_addr, cur_size_in_bytes); +			amdgpu_vm_set_pte_pde(adev, &job->ibs[0], +					dst_addr, 0, +					cur_size_in_bytes >> 3, 0, +					src_data);  			dst_addr += cur_size_in_bytes;  			byte_count -= cur_size_in_bytes; @@ -1411,32 +1651,16 @@ error_free:  #if defined(CONFIG_DEBUG_FS) -extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager -				 *man);  static int amdgpu_mm_dump_table(struct seq_file *m, void *data)  {  	struct drm_info_node *node = (struct drm_info_node *)m->private;  	unsigned ttm_pl = *(int *)node->info_ent->data;  	struct drm_device *dev = node->minor->dev;  	struct amdgpu_device *adev = dev->dev_private; -	struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv; -	struct ttm_bo_global *glob = adev->mman.bdev.glob; +	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];  	struct drm_printer p = drm_seq_file_printer(m); -	spin_lock(&glob->lru_lock); -	drm_mm_print(mm, &p); -	spin_unlock(&glob->lru_lock); -	switch (ttm_pl) { -	case TTM_PL_VRAM: -		seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", -			   adev->mman.bdev.man[ttm_pl].size, -			   (u64)atomic64_read(&adev->vram_usage) >> 20, -			   (u64)atomic64_read(&adev->vram_vis_usage) >> 20); -		break; -	case TTM_PL_TT: -		amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]); -		break; -	} +	man->func->debug(man, &p);  	return 0;  } @@ -1462,6 +1686,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,  	if (size & 0x3 || *pos & 0x3)  		return -EINVAL; +	if (*pos >= adev->mc.mc_vram_size) +		return -ENXIO; +  	while (size) {  		unsigned long flags;  		uint32_t value; @@ -1565,7 +1792,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)  				  adev, &amdgpu_ttm_gtt_fops);  	if (IS_ERR(ent))  		return PTR_ERR(ent); -	i_size_write(ent->d_inode, adev->mc.gtt_size); +	i_size_write(ent->d_inode, adev->mc.gart_size);  	adev->mman.gtt = ent;  #endif |