diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1062 | 
1 files changed, 730 insertions, 332 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8ecf82c5fe74..bd20ff018512 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -77,8 +77,12 @@ struct amdgpu_pte_update_params {  	void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,  		     uint64_t addr, unsigned count, uint32_t incr,  		     uint64_t flags); -	/* indicate update pt or its shadow */ -	bool shadow; +	/* The next two are used during VM update by CPU +	 *  DMA addresses to use for mapping +	 *  Kernel pointer of PD/PT BO that needs to be updated +	 */ +	dma_addr_t *pages_addr; +	void *kptr;  };  /* Helper to disable partial resident texture feature from a fence callback */ @@ -155,11 +159,18 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,   */  static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,  				    int (*validate)(void *, struct amdgpu_bo *), -				    void *param) +				    void *param, bool use_cpu_for_update, +				    struct ttm_bo_global *glob)  {  	unsigned i;  	int r; +	if (use_cpu_for_update) { +		r = amdgpu_bo_kmap(parent->bo, NULL); +		if (r) +			return r; +	} +  	if (!parent->entries)  		return 0; @@ -173,11 +184,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,  		if (r)  			return r; +		spin_lock(&glob->lru_lock); +		ttm_bo_move_to_lru_tail(&entry->bo->tbo); +		if (entry->bo->shadow) +			ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); +		spin_unlock(&glob->lru_lock); +  		/*  		 * Recurse into the sub directory. This is harmless because we  		 * have only a maximum of 5 layers.  		 */ -		r = amdgpu_vm_validate_level(entry, validate, param); +		r = amdgpu_vm_validate_level(entry, validate, param, +					     use_cpu_for_update, glob);  		if (r)  			return r;  	} @@ -208,54 +226,12 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	if (num_evictions == vm->last_eviction_counter)  		return 0; -	return amdgpu_vm_validate_level(&vm->root, validate, param); -} - -/** - * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent) -{ -	unsigned i; - -	if (!parent->entries) -		return; - -	for (i = 0; i <= parent->last_entry_used; ++i) { -		struct amdgpu_vm_pt *entry = &parent->entries[i]; - -		if (!entry->bo) -			continue; - -		ttm_bo_move_to_lru_tail(&entry->bo->tbo); -		amdgpu_vm_move_level_in_lru(entry); -	} +	return amdgpu_vm_validate_level(&vm->root, validate, param, +					vm->use_cpu_for_update, +					adev->mman.bdev.glob);  }  /** - * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, -				  struct amdgpu_vm *vm) -{ -	struct ttm_bo_global *glob = adev->mman.bdev.glob; - -	spin_lock(&glob->lru_lock); -	amdgpu_vm_move_level_in_lru(&vm->root); -	spin_unlock(&glob->lru_lock); -} - - /**   * amdgpu_vm_alloc_levels - allocate the PD/PT levels   *   * @adev: amdgpu_device pointer @@ -275,12 +251,15 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,  		adev->vm_manager.block_size;  	unsigned pt_idx, from, to;  	int r; +	u64 flags; +	uint64_t init_value = 0;  	if (!parent->entries) {  		unsigned num_entries = amdgpu_vm_num_entries(adev, level); -		parent->entries = drm_calloc_large(num_entries, -						   sizeof(struct amdgpu_vm_pt)); +		parent->entries = kvmalloc_array(num_entries, +						   sizeof(struct amdgpu_vm_pt), +						   GFP_KERNEL | __GFP_ZERO);  		if (!parent->entries)  			return -ENOMEM;  		memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); @@ -299,6 +278,20 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,  	saddr = saddr & ((1 << shift) - 1);  	eaddr = eaddr & ((1 << shift) - 1); +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | +			AMDGPU_GEM_CREATE_VRAM_CLEARED; +	if (vm->use_cpu_for_update) +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; +	else +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | +				AMDGPU_GEM_CREATE_SHADOW); + +	if (vm->pte_support_ats) { +		init_value = AMDGPU_PTE_SYSTEM; +		if (level != adev->vm_manager.num_level - 1) +			init_value |= AMDGPU_PDE_PTE; +	} +  	/* walk over the address space and allocate the page tables */  	for (pt_idx = from; pt_idx <= to; ++pt_idx) {  		struct reservation_object *resv = vm->root.bo->tbo.resv; @@ -310,14 +303,19 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,  					     amdgpu_vm_bo_size(adev, level),  					     AMDGPU_GPU_PAGE_SIZE, true,  					     AMDGPU_GEM_DOMAIN_VRAM, -					     AMDGPU_GEM_CREATE_NO_CPU_ACCESS | -					     AMDGPU_GEM_CREATE_SHADOW | -					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | -					     AMDGPU_GEM_CREATE_VRAM_CLEARED, -					     NULL, resv, &pt); +					     flags, +					     NULL, resv, init_value, &pt);  			if (r)  				return r; +			if (vm->use_cpu_for_update) { +				r = amdgpu_bo_kmap(pt, NULL); +				if (r) { +					amdgpu_bo_unref(&pt); +					return r; +				} +			} +  			/* Keep a reference to the root directory to avoid  			* freeing them up in the wrong order.  			*/ @@ -391,6 +389,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,  		atomic_read(&adev->gpu_reset_counter);  } +static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) +{ +	return !!vm->reserved_vmid[vmhub]; +} + +/* idr_mgr->lock must be held */ +static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, +					       struct amdgpu_ring *ring, +					       struct amdgpu_sync *sync, +					       struct dma_fence *fence, +					       struct amdgpu_job *job) +{ +	struct amdgpu_device *adev = ring->adev; +	unsigned vmhub = ring->funcs->vmhub; +	uint64_t fence_context = adev->fence_context + ring->idx; +	struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; +	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; +	struct dma_fence *updates = sync->last_vm_update; +	int r = 0; +	struct dma_fence *flushed, *tmp; +	bool needs_flush = vm->use_cpu_for_update; + +	flushed  = id->flushed_updates; +	if ((amdgpu_vm_had_gpu_reset(adev, id)) || +	    (atomic64_read(&id->owner) != vm->client_id) || +	    (job->vm_pd_addr != id->pd_gpu_addr) || +	    (updates && (!flushed || updates->context != flushed->context || +			dma_fence_is_later(updates, flushed))) || +	    (!id->last_flush || (id->last_flush->context != fence_context && +				 !dma_fence_is_signaled(id->last_flush)))) { +		needs_flush = true; +		/* to prevent one context starved by another context */ +		id->pd_gpu_addr = 0; +		tmp = amdgpu_sync_peek_fence(&id->active, ring); +		if (tmp) { +			r = amdgpu_sync_fence(adev, sync, tmp); +			return r; +		} +	} + +	/* Good we can use this VMID. Remember this submission as +	* user of the VMID. +	*/ +	r = amdgpu_sync_fence(ring->adev, &id->active, fence); +	if (r) +		goto out; + +	if (updates && (!flushed || updates->context != flushed->context || +			dma_fence_is_later(updates, flushed))) { +		dma_fence_put(id->flushed_updates); +		id->flushed_updates = dma_fence_get(updates); +	} +	id->pd_gpu_addr = job->vm_pd_addr; +	atomic64_set(&id->owner, vm->client_id); +	job->vm_needs_flush = needs_flush; +	if (needs_flush) { +		dma_fence_put(id->last_flush); +		id->last_flush = NULL; +	} +	job->vm_id = id - id_mgr->ids; +	trace_amdgpu_vm_grab_id(vm, ring, job); +out: +	return r; +} +  /**   * amdgpu_vm_grab_id - allocate the next free VMID   * @@ -415,12 +478,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,  	unsigned i;  	int r = 0; +	mutex_lock(&id_mgr->lock); +	if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { +		r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); +		mutex_unlock(&id_mgr->lock); +		return r; +	}  	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); -	if (!fences) +	if (!fences) { +		mutex_unlock(&id_mgr->lock);  		return -ENOMEM; - -	mutex_lock(&id_mgr->lock); - +	}  	/* Check if we have an idle VMID */  	i = 0;  	list_for_each_entry(idle, &id_mgr->ids_lru, list) { @@ -462,11 +530,11 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,  	}  	kfree(fences); -	job->vm_needs_flush = false; +	job->vm_needs_flush = vm->use_cpu_for_update;  	/* Check if we can use a VMID already assigned to this VM */  	list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {  		struct dma_fence *flushed; -		bool needs_flush = false; +		bool needs_flush = vm->use_cpu_for_update;  		/* Check all the prerequisites to using this VMID */  		if (amdgpu_vm_had_gpu_reset(adev, id)) @@ -521,7 +589,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,  	id->pd_gpu_addr = job->vm_pd_addr;  	dma_fence_put(id->flushed_updates);  	id->flushed_updates = dma_fence_get(updates); -	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);  	atomic64_set(&id->owner, vm->client_id);  needs_flush: @@ -540,40 +607,118 @@ error:  	return r;  } -static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) +static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, +					  struct amdgpu_vm *vm, +					  unsigned vmhub) +{ +	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + +	mutex_lock(&id_mgr->lock); +	if (vm->reserved_vmid[vmhub]) { +		list_add(&vm->reserved_vmid[vmhub]->list, +			&id_mgr->ids_lru); +		vm->reserved_vmid[vmhub] = NULL; +		atomic_dec(&id_mgr->reserved_vmid_num); +	} +	mutex_unlock(&id_mgr->lock); +} + +static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, +					 struct amdgpu_vm *vm, +					 unsigned vmhub) +{ +	struct amdgpu_vm_id_manager *id_mgr; +	struct amdgpu_vm_id *idle; +	int r = 0; + +	id_mgr = &adev->vm_manager.id_mgr[vmhub]; +	mutex_lock(&id_mgr->lock); +	if (vm->reserved_vmid[vmhub]) +		goto unlock; +	if (atomic_inc_return(&id_mgr->reserved_vmid_num) > +	    AMDGPU_VM_MAX_RESERVED_VMID) { +		DRM_ERROR("Over limitation of reserved vmid\n"); +		atomic_dec(&id_mgr->reserved_vmid_num); +		r = -EINVAL; +		goto unlock; +	} +	/* Select the first entry VMID */ +	idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); +	list_del_init(&idle->list); +	vm->reserved_vmid[vmhub] = idle; +	mutex_unlock(&id_mgr->lock); + +	return 0; +unlock: +	mutex_unlock(&id_mgr->lock); +	return r; +} + +/** + * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug + * + * @adev: amdgpu_device pointer + */ +void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)  { -	struct amdgpu_device *adev = ring->adev;  	const struct amdgpu_ip_block *ip_block; +	bool has_compute_vm_bug; +	struct amdgpu_ring *ring; +	int i; -	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) -		/* only compute rings */ -		return false; +	has_compute_vm_bug = false;  	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); -	if (!ip_block) -		return false; +	if (ip_block) { +		/* Compute has a VM bug for GFX version < 7. +		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ +		if (ip_block->version->major <= 7) +			has_compute_vm_bug = true; +		else if (ip_block->version->major == 8) +			if (adev->gfx.mec_fw_version < 673) +				has_compute_vm_bug = true; +	} -	if (ip_block->version->major <= 7) { -		/* gfx7 has no workaround */ -		return true; -	} else if (ip_block->version->major == 8) { -		if (adev->gfx.mec_fw_version >= 673) -			/* gfx8 is fixed in MEC firmware 673 */ -			return false; +	for (i = 0; i < adev->num_rings; i++) { +		ring = adev->rings[i]; +		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) +			/* only compute rings */ +			ring->has_compute_vm_bug = has_compute_vm_bug;  		else -			return true; +			ring->has_compute_vm_bug = false;  	} -	return false;  } -static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, +				  struct amdgpu_job *job)  { -	u64 addr = mc_addr; +	struct amdgpu_device *adev = ring->adev; +	unsigned vmhub = ring->funcs->vmhub; +	struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; +	struct amdgpu_vm_id *id; +	bool gds_switch_needed; +	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; + +	if (job->vm_id == 0) +		return false; +	id = &id_mgr->ids[job->vm_id]; +	gds_switch_needed = ring->funcs->emit_gds_switch && ( +		id->gds_base != job->gds_base || +		id->gds_size != job->gds_size || +		id->gws_base != job->gws_base || +		id->gws_size != job->gws_size || +		id->oa_base != job->oa_base || +		id->oa_size != job->oa_size); + +	if (amdgpu_vm_had_gpu_reset(adev, id)) +		return true; -	if (adev->gart.gart_funcs->adjust_mc_addr) -		addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); +	return vm_flush_needed || gds_switch_needed; +} -	return addr; +static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) +{ +	return (adev->mc.real_vram_size == adev->mc.visible_vram_size);  }  /** @@ -585,7 +730,7 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)   *   * Emit a VM flush when it is necessary.   */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)  {  	struct amdgpu_device *adev = ring->adev;  	unsigned vmhub = ring->funcs->vmhub; @@ -598,8 +743,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)  		id->gws_size != job->gws_size ||  		id->oa_base != job->oa_base ||  		id->oa_size != job->oa_size); -	bool vm_flush_needed = job->vm_needs_flush || -		amdgpu_vm_ring_has_compute_vm_bug(ring); +	bool vm_flush_needed = job->vm_needs_flush;  	unsigned patch_offset = 0;  	int r; @@ -608,21 +752,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)  		vm_flush_needed = true;  	} -	if (!vm_flush_needed && !gds_switch_needed) +	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)  		return 0;  	if (ring->funcs->init_cond_exec)  		patch_offset = amdgpu_ring_init_cond_exec(ring); -	if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) +	if (need_pipe_sync)  		amdgpu_ring_emit_pipeline_sync(ring);  	if (ring->funcs->emit_vm_flush && vm_flush_needed) { -		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);  		struct dma_fence *fence; -		trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); -		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); +		trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); +		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);  		r = amdgpu_fence_emit(ring, &fence);  		if (r) @@ -631,6 +774,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)  		mutex_lock(&id_mgr->lock);  		dma_fence_put(id->last_flush);  		id->last_flush = fence; +		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);  		mutex_unlock(&id_mgr->lock);  	} @@ -718,8 +862,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,  {  	struct amdgpu_bo_va *bo_va; -	list_for_each_entry(bo_va, &bo->va, bo_list) { -		if (bo_va->vm == vm) { +	list_for_each_entry(bo_va, &bo->va, base.bo_list) { +		if (bo_va->base.vm == vm) {  			return bo_va;  		}  	} @@ -805,6 +949,52 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)  	return result;  } +/** + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU + * + * @params: see amdgpu_pte_update_params definition + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Write count number of PT/PD entries directly. + */ +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, +				   uint64_t pe, uint64_t addr, +				   unsigned count, uint32_t incr, +				   uint64_t flags) +{ +	unsigned int i; +	uint64_t value; + +	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + +	for (i = 0; i < count; i++) { +		value = params->pages_addr ? +			amdgpu_vm_map_gart(params->pages_addr, addr) : +			addr; +		amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, +					i, value, flags); +		addr += incr; +	} +} + +static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, +			     void *owner) +{ +	struct amdgpu_sync sync; +	int r; + +	amdgpu_sync_create(&sync); +	amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); +	r = amdgpu_sync_wait(&sync, true); +	amdgpu_sync_free(&sync); + +	return r; +} +  /*   * amdgpu_vm_update_level - update a single level in the hierarchy   * @@ -821,11 +1011,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,  				  unsigned level)  {  	struct amdgpu_bo *shadow; -	struct amdgpu_ring *ring; -	uint64_t pd_addr, shadow_addr; +	struct amdgpu_ring *ring = NULL; +	uint64_t pd_addr, shadow_addr = 0;  	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);  	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; -	unsigned count = 0, pt_idx, ndw; +	unsigned count = 0, pt_idx, ndw = 0;  	struct amdgpu_job *job;  	struct amdgpu_pte_update_params params;  	struct dma_fence *fence = NULL; @@ -834,34 +1024,45 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,  	if (!parent->entries)  		return 0; -	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); -	/* padding, etc. */ -	ndw = 64; +	memset(¶ms, 0, sizeof(params)); +	params.adev = adev; +	shadow = parent->bo->shadow; -	/* assume the worst case */ -	ndw += parent->last_entry_used * 6; +	if (vm->use_cpu_for_update) { +		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); +		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); +		if (unlikely(r)) +			return r; -	pd_addr = amdgpu_bo_gpu_offset(parent->bo); +		params.func = amdgpu_vm_cpu_set_ptes; +	} else { +		ring = container_of(vm->entity.sched, struct amdgpu_ring, +				    sched); -	shadow = parent->bo->shadow; -	if (shadow) { -		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); +		/* padding, etc. */ +		ndw = 64; + +		/* assume the worst case */ +		ndw += parent->last_entry_used * 6; + +		pd_addr = amdgpu_bo_gpu_offset(parent->bo); + +		if (shadow) { +			shadow_addr = amdgpu_bo_gpu_offset(shadow); +			ndw *= 2; +		} else { +			shadow_addr = 0; +		} + +		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);  		if (r)  			return r; -		shadow_addr = amdgpu_bo_gpu_offset(shadow); -		ndw *= 2; -	} else { -		shadow_addr = 0; -	} -	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); -	if (r) -		return r; +		params.ib = &job->ibs[0]; +		params.func = amdgpu_vm_do_set_ptes; +	} -	memset(¶ms, 0, sizeof(params)); -	params.adev = adev; -	params.ib = &job->ibs[0];  	/* walk over the address space and update the directory */  	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { @@ -871,20 +1072,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,  		if (bo == NULL)  			continue; -		if (bo->shadow) { -			struct amdgpu_bo *pt_shadow = bo->shadow; - -			r = amdgpu_ttm_bind(&pt_shadow->tbo, -					    &pt_shadow->tbo.mem); -			if (r) -				return r; -		} -  		pt = amdgpu_bo_gpu_offset(bo); -		if (parent->entries[pt_idx].addr == pt) +		pt = amdgpu_gart_get_vm_pde(adev, pt); +		/* Don't update huge pages here */ +		if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || +		    parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))  			continue; -		parent->entries[pt_idx].addr = pt; +		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;  		pde = pd_addr + pt_idx * 8;  		if (((last_pde + 8 * count) != pde) || @@ -892,19 +1087,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,  		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {  			if (count) { -				uint64_t pt_addr = -					amdgpu_vm_adjust_mc_addr(adev, last_pt); -  				if (shadow) -					amdgpu_vm_do_set_ptes(¶ms, -							      last_shadow, -							      pt_addr, count, -							      incr, -							      AMDGPU_PTE_VALID); - -				amdgpu_vm_do_set_ptes(¶ms, last_pde, -						      pt_addr, count, incr, -						      AMDGPU_PTE_VALID); +					params.func(¶ms, +						    last_shadow, +						    last_pt, count, +						    incr, +						    AMDGPU_PTE_VALID); + +				params.func(¶ms, last_pde, +					    last_pt, count, incr, +					    AMDGPU_PTE_VALID);  			}  			count = 1; @@ -917,36 +1109,37 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,  	}  	if (count) { -		uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); -  		if (vm->root.bo->shadow) -			amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, -					      count, incr, AMDGPU_PTE_VALID); +			params.func(¶ms, last_shadow, last_pt, +				    count, incr, AMDGPU_PTE_VALID); -		amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, -				      count, incr, AMDGPU_PTE_VALID); +		params.func(¶ms, last_pde, last_pt, +			    count, incr, AMDGPU_PTE_VALID);  	} -	if (params.ib->length_dw == 0) { -		amdgpu_job_free(job); -	} else { -		amdgpu_ring_pad_ib(ring, params.ib); -		amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, -				 AMDGPU_FENCE_OWNER_VM); -		if (shadow) -			amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, +	if (!vm->use_cpu_for_update) { +		if (params.ib->length_dw == 0) { +			amdgpu_job_free(job); +		} else { +			amdgpu_ring_pad_ib(ring, params.ib); +			amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,  					 AMDGPU_FENCE_OWNER_VM); +			if (shadow) +				amdgpu_sync_resv(adev, &job->sync, +						 shadow->tbo.resv, +						 AMDGPU_FENCE_OWNER_VM); + +			WARN_ON(params.ib->length_dw > ndw); +			r = amdgpu_job_submit(job, ring, &vm->entity, +					AMDGPU_FENCE_OWNER_VM, &fence); +			if (r) +				goto error_free; -		WARN_ON(params.ib->length_dw > ndw); -		r = amdgpu_job_submit(job, ring, &vm->entity, -				AMDGPU_FENCE_OWNER_VM, &fence); -		if (r) -			goto error_free; - -		amdgpu_bo_fence(parent->bo, fence, true); -		dma_fence_put(vm->last_dir_update); -		vm->last_dir_update = dma_fence_get(fence); -		dma_fence_put(fence); +			amdgpu_bo_fence(parent->bo, fence, true); +			dma_fence_put(vm->last_dir_update); +			vm->last_dir_update = dma_fence_get(fence); +			dma_fence_put(fence); +		}  	}  	/*  	 * Recurse into the subdirectories. This recursion is harmless because @@ -971,6 +1164,32 @@ error_free:  }  /* + * amdgpu_vm_invalidate_level - mark all PD levels as invalid + * + * @parent: parent PD + * + * Mark all PD level as invalid after an error. + */ +static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) +{ +	unsigned pt_idx; + +	/* +	 * Recurse into the subdirectories. This recursion is harmless because +	 * we only have a maximum of 5 layers. +	 */ +	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { +		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + +		if (!entry->bo) +			continue; + +		entry->addr = ~0ULL; +		amdgpu_vm_invalidate_level(entry); +	} +} + +/*   * amdgpu_vm_update_directories - make sure that all directories are valid   *   * @adev: amdgpu_device pointer @@ -982,33 +1201,118 @@ error_free:  int amdgpu_vm_update_directories(struct amdgpu_device *adev,  				 struct amdgpu_vm *vm)  { -	return amdgpu_vm_update_level(adev, vm, &vm->root, 0); +	int r; + +	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); +	if (r) +		amdgpu_vm_invalidate_level(&vm->root); + +	if (vm->use_cpu_for_update) { +		/* Flush HDP */ +		mb(); +		amdgpu_gart_flush_gpu_tlb(adev, 0); +	} + +	return r;  }  /** - * amdgpu_vm_find_pt - find the page table for an address + * amdgpu_vm_find_entry - find the entry for an address   *   * @p: see amdgpu_pte_update_params definition   * @addr: virtual address in question + * @entry: resulting entry or NULL + * @parent: parent entry   * - * Find the page table BO for a virtual address, return NULL when none found. + * Find the vm_pt entry and it's parent for the given address.   */ -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, -					  uint64_t addr) +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, +			 struct amdgpu_vm_pt **entry, +			 struct amdgpu_vm_pt **parent)  { -	struct amdgpu_vm_pt *entry = &p->vm->root;  	unsigned idx, level = p->adev->vm_manager.num_level; -	while (entry->entries) { +	*parent = NULL; +	*entry = &p->vm->root; +	while ((*entry)->entries) {  		idx = addr >> (p->adev->vm_manager.block_size * level--); -		idx %= amdgpu_bo_size(entry->bo) / 8; -		entry = &entry->entries[idx]; +		idx %= amdgpu_bo_size((*entry)->bo) / 8; +		*parent = *entry; +		*entry = &(*entry)->entries[idx];  	}  	if (level) -		return NULL; +		*entry = NULL; +} + +/** + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages + * + * @p: see amdgpu_pte_update_params definition + * @entry: vm_pt entry to check + * @parent: parent entry + * @nptes: number of PTEs updated with this operation + * @dst: destination address where the PTEs should point to + * @flags: access flags fro the PTEs + * + * Check if we can update the PD with a huge page. + */ +static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, +					struct amdgpu_vm_pt *entry, +					struct amdgpu_vm_pt *parent, +					unsigned nptes, uint64_t dst, +					uint64_t flags) +{ +	bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); +	uint64_t pd_addr, pde; + +	/* In the case of a mixed PT the PDE must point to it*/ +	if (p->adev->asic_type < CHIP_VEGA10 || +	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) || +	    p->src || +	    !(flags & AMDGPU_PTE_VALID)) { + +		dst = amdgpu_bo_gpu_offset(entry->bo); +		dst = amdgpu_gart_get_vm_pde(p->adev, dst); +		flags = AMDGPU_PTE_VALID; +	} else { +		/* Set the huge page flag to stop scanning at this PDE */ +		flags |= AMDGPU_PDE_PTE; +	} + +	if (entry->addr == (dst | flags)) +		return; + +	entry->addr = (dst | flags); + +	if (use_cpu_update) { +		/* In case a huge page is replaced with a system +		 * memory mapping, p->pages_addr != NULL and +		 * amdgpu_vm_cpu_set_ptes would try to translate dst +		 * through amdgpu_vm_map_gart. But dst is already a +		 * GPU address (of the page table). Disable +		 * amdgpu_vm_map_gart temporarily. +		 */ +		dma_addr_t *tmp; -	return entry->bo; +		tmp = p->pages_addr; +		p->pages_addr = NULL; + +		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); +		pde = pd_addr + (entry - parent->entries) * 8; +		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); + +		p->pages_addr = tmp; +	} else { +		if (parent->bo->shadow) { +			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); +			pde = pd_addr + (entry - parent->entries) * 8; +			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); +		} +		pd_addr = amdgpu_bo_gpu_offset(parent->bo); +		pde = pd_addr + (entry - parent->entries) * 8; +		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); +	}  }  /** @@ -1022,92 +1326,59 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,   * @flags: mapping flags   *   * Update the page tables in the range @start - @end. + * Returns 0 for success, -EINVAL for failure.   */ -static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,  				  uint64_t start, uint64_t end,  				  uint64_t dst, uint64_t flags)  {  	struct amdgpu_device *adev = params->adev;  	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; -	uint64_t cur_pe_start, cur_nptes, cur_dst; -	uint64_t addr; /* next GPU address to be updated */ +	uint64_t addr, pe_start;  	struct amdgpu_bo *pt; -	unsigned nptes; /* next number of ptes to be updated */ -	uint64_t next_pe_start; - -	/* initialize the variables */ -	addr = start; -	pt = amdgpu_vm_get_pt(params, addr); -	if (!pt) { -		pr_err("PT not found, aborting update_ptes\n"); -		return; -	} - -	if (params->shadow) { -		if (!pt->shadow) -			return; -		pt = pt->shadow; -	} -	if ((addr & ~mask) == (end & ~mask)) -		nptes = end - addr; -	else -		nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - -	cur_pe_start = amdgpu_bo_gpu_offset(pt); -	cur_pe_start += (addr & mask) * 8; -	cur_nptes = nptes; -	cur_dst = dst; - -	/* for next ptb*/ -	addr += nptes; -	dst += nptes * AMDGPU_GPU_PAGE_SIZE; +	unsigned nptes; +	bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);  	/* walk over the address space and update the page tables */ -	while (addr < end) { -		pt = amdgpu_vm_get_pt(params, addr); -		if (!pt) { -			pr_err("PT not found, aborting update_ptes\n"); -			return; -		} +	for (addr = start; addr < end; addr += nptes, +	     dst += nptes * AMDGPU_GPU_PAGE_SIZE) { +		struct amdgpu_vm_pt *entry, *parent; -		if (params->shadow) { -			if (!pt->shadow) -				return; -			pt = pt->shadow; -		} +		amdgpu_vm_get_entry(params, addr, &entry, &parent); +		if (!entry) +			return -ENOENT;  		if ((addr & ~mask) == (end & ~mask))  			nptes = end - addr;  		else  			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); -		next_pe_start = amdgpu_bo_gpu_offset(pt); -		next_pe_start += (addr & mask) * 8; +		amdgpu_vm_handle_huge_pages(params, entry, parent, +					    nptes, dst, flags); +		/* We don't need to update PTEs for huge pages */ +		if (entry->addr & AMDGPU_PDE_PTE) +			continue; -		if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && -		    ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { -			/* The next ptb is consecutive to current ptb. -			 * Don't call the update function now. -			 * Will update two ptbs together in future. -			*/ -			cur_nptes += nptes; +		pt = entry->bo; +		if (use_cpu_update) { +			pe_start = (unsigned long)amdgpu_bo_kptr(pt);  		} else { -			params->func(params, cur_pe_start, cur_dst, cur_nptes, -				     AMDGPU_GPU_PAGE_SIZE, flags); - -			cur_pe_start = next_pe_start; -			cur_nptes = nptes; -			cur_dst = dst; +			if (pt->shadow) { +				pe_start = amdgpu_bo_gpu_offset(pt->shadow); +				pe_start += (addr & mask) * 8; +				params->func(params, pe_start, dst, nptes, +					     AMDGPU_GPU_PAGE_SIZE, flags); +			} +			pe_start = amdgpu_bo_gpu_offset(pt);  		} -		/* for next ptb*/ -		addr += nptes; -		dst += nptes * AMDGPU_GPU_PAGE_SIZE; +		pe_start += (addr & mask) * 8; +		params->func(params, pe_start, dst, nptes, +			     AMDGPU_GPU_PAGE_SIZE, flags);  	} -	params->func(params, cur_pe_start, cur_dst, cur_nptes, -		     AMDGPU_GPU_PAGE_SIZE, flags); +	return 0;  }  /* @@ -1119,11 +1390,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,   * @end: last PTE to handle   * @dst: addr those PTEs should point to   * @flags: hw mapping flags + * Returns 0 for success, -EINVAL for failure.   */ -static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params, +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,  				uint64_t start, uint64_t end,  				uint64_t dst, uint64_t flags)  { +	int r; +  	/**  	 * The MC L1 TLB supports variable sized pages, based on a fragment  	 * field in the PTE. When this field is set to a non-zero value, page @@ -1142,38 +1416,39 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,  	 * Userspace can support this by aligning virtual base address and  	 * allocation size to the fragment size.  	 */ - -	/* SI and newer are optimized for 64KB */ -	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); -	uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; +	unsigned pages_per_frag = params->adev->vm_manager.fragment_size; +	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); +	uint64_t frag_align = 1 << pages_per_frag;  	uint64_t frag_start = ALIGN(start, frag_align);  	uint64_t frag_end = end & ~(frag_align - 1);  	/* system pages are non continuously */  	if (params->src || !(flags & AMDGPU_PTE_VALID) || -	    (frag_start >= frag_end)) { - -		amdgpu_vm_update_ptes(params, start, end, dst, flags); -		return; -	} +	    (frag_start >= frag_end)) +		return amdgpu_vm_update_ptes(params, start, end, dst, flags);  	/* handle the 4K area at the beginning */  	if (start != frag_start) { -		amdgpu_vm_update_ptes(params, start, frag_start, -				      dst, flags); +		r = amdgpu_vm_update_ptes(params, start, frag_start, +					  dst, flags); +		if (r) +			return r;  		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;  	}  	/* handle the area in the middle */ -	amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, -			      flags | frag_flags); +	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, +				  flags | frag_flags); +	if (r) +		return r;  	/* handle the 4K area at the end */  	if (frag_end != end) {  		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; -		amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); +		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);  	} +	return r;  }  /** @@ -1215,12 +1490,30 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  	params.vm = vm;  	params.src = src; -	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); -  	/* sync to everything on unmapping */  	if (!(flags & AMDGPU_PTE_VALID))  		owner = AMDGPU_FENCE_OWNER_UNDEFINED; +	if (vm->use_cpu_for_update) { +		/* params.src is used as flag to indicate system Memory */ +		if (pages_addr) +			params.src = ~0; + +		/* Wait for PT BOs to be free. PTs share the same resv. object +		 * as the root PD BO +		 */ +		r = amdgpu_vm_wait_pd(adev, vm, owner); +		if (unlikely(r)) +			return r; + +		params.func = amdgpu_vm_cpu_set_ptes; +		params.pages_addr = pages_addr; +		return amdgpu_vm_frag_ptes(¶ms, start, last + 1, +					   addr, flags); +	} + +	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); +  	nptes = last - start + 1;  	/* @@ -1232,6 +1525,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  	/* padding, etc. */  	ndw = 64; +	/* one PDE write for each huge page */ +	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; +  	if (src) {  		/* only copy commands needed */  		ndw += ncmds * 7; @@ -1293,10 +1589,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  	if (r)  		goto error_free; -	params.shadow = true; -	amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); -	params.shadow = false; -	amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); +	r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); +	if (r) +		goto error_free;  	amdgpu_ring_pad_ib(ring, params.ib);  	WARN_ON(params.ib->length_dw > ndw); @@ -1312,6 +1607,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  error_free:  	amdgpu_job_free(job); +	amdgpu_vm_invalidate_level(&vm->root);  	return r;  } @@ -1320,7 +1616,6 @@ error_free:   *   * @adev: amdgpu_device pointer   * @exclusive: fence we need to sync to - * @gtt_flags: flags as they are used for GTT   * @pages_addr: DMA addresses to use for mapping   * @vm: requested vm   * @mapping: mapped range and flags to use for the update @@ -1334,7 +1629,6 @@ error_free:   */  static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  				      struct dma_fence *exclusive, -				      uint64_t gtt_flags,  				      dma_addr_t *pages_addr,  				      struct amdgpu_vm *vm,  				      struct amdgpu_bo_va_mapping *mapping, @@ -1389,11 +1683,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  		}  		if (pages_addr) { -			if (flags == gtt_flags) -				src = adev->gart.table_addr + -					(addr >> AMDGPU_GPU_PAGE_SHIFT) * 8; -			else -				max_entries = min(max_entries, 16ull * 1024ull); +			max_entries = min(max_entries, 16ull * 1024ull);  			addr = 0;  		} else if (flags & AMDGPU_PTE_VALID) {  			addr += adev->vm_manager.vram_base_offset; @@ -1434,50 +1724,45 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,  			struct amdgpu_bo_va *bo_va,  			bool clear)  { -	struct amdgpu_vm *vm = bo_va->vm; +	struct amdgpu_bo *bo = bo_va->base.bo; +	struct amdgpu_vm *vm = bo_va->base.vm;  	struct amdgpu_bo_va_mapping *mapping;  	dma_addr_t *pages_addr = NULL; -	uint64_t gtt_flags, flags;  	struct ttm_mem_reg *mem;  	struct drm_mm_node *nodes;  	struct dma_fence *exclusive; +	uint64_t flags;  	int r; -	if (clear || !bo_va->bo) { +	if (clear || !bo_va->base.bo) {  		mem = NULL;  		nodes = NULL;  		exclusive = NULL;  	} else {  		struct ttm_dma_tt *ttm; -		mem = &bo_va->bo->tbo.mem; +		mem = &bo_va->base.bo->tbo.mem;  		nodes = mem->mm_node;  		if (mem->mem_type == TTM_PL_TT) { -			ttm = container_of(bo_va->bo->tbo.ttm, struct -					   ttm_dma_tt, ttm); +			ttm = container_of(bo_va->base.bo->tbo.ttm, +					   struct ttm_dma_tt, ttm);  			pages_addr = ttm->dma_address;  		} -		exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); +		exclusive = reservation_object_get_excl(bo->tbo.resv);  	} -	if (bo_va->bo) { -		flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); -		gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && -			adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? -			flags : 0; -	} else { +	if (bo) +		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); +	else  		flags = 0x0; -		gtt_flags = ~0x0; -	}  	spin_lock(&vm->status_lock); -	if (!list_empty(&bo_va->vm_status)) +	if (!list_empty(&bo_va->base.vm_status))  		list_splice_init(&bo_va->valids, &bo_va->invalids);  	spin_unlock(&vm->status_lock);  	list_for_each_entry(mapping, &bo_va->invalids, list) { -		r = amdgpu_vm_bo_split_mapping(adev, exclusive, -					       gtt_flags, pages_addr, vm, +		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,  					       mapping, flags, nodes,  					       &bo_va->last_pt_update);  		if (r) @@ -1494,11 +1779,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,  	spin_lock(&vm->status_lock);  	list_splice_init(&bo_va->invalids, &bo_va->valids); -	list_del_init(&bo_va->vm_status); +	list_del_init(&bo_va->base.vm_status);  	if (clear) -		list_add(&bo_va->vm_status, &vm->cleared); +		list_add(&bo_va->base.vm_status, &vm->cleared);  	spin_unlock(&vm->status_lock); +	if (vm->use_cpu_for_update) { +		/* Flush HDP */ +		mb(); +		amdgpu_gart_flush_gpu_tlb(adev, 0); +	} +  	return 0;  } @@ -1652,15 +1943,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,  	struct amdgpu_bo_va_mapping *mapping;  	struct dma_fence *f = NULL;  	int r; +	uint64_t init_pte_value = 0;  	while (!list_empty(&vm->freed)) {  		mapping = list_first_entry(&vm->freed,  			struct amdgpu_bo_va_mapping, list);  		list_del(&mapping->list); +		if (vm->pte_support_ats) +			init_pte_value = AMDGPU_PTE_SYSTEM; +  		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,  						mapping->start, mapping->last, -						0, 0, &f); +						init_pte_value, 0, &f);  		amdgpu_vm_free_mapping(adev, vm, mapping, f);  		if (r) {  			dma_fence_put(f); @@ -1680,26 +1975,26 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,  }  /** - * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT + * amdgpu_vm_clear_moved - clear moved BOs in the PT   *   * @adev: amdgpu_device pointer   * @vm: requested vm   * - * Make sure all invalidated BOs are cleared in the PT. + * Make sure all moved BOs are cleared in the PT.   * Returns 0 for success.   *   * PTs have to be reserved and mutex must be locked!   */ -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, -			     struct amdgpu_vm *vm, struct amdgpu_sync *sync) +int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, +			    struct amdgpu_sync *sync)  {  	struct amdgpu_bo_va *bo_va = NULL;  	int r = 0;  	spin_lock(&vm->status_lock); -	while (!list_empty(&vm->invalidated)) { -		bo_va = list_first_entry(&vm->invalidated, -			struct amdgpu_bo_va, vm_status); +	while (!list_empty(&vm->moved)) { +		bo_va = list_first_entry(&vm->moved, +			struct amdgpu_bo_va, base.vm_status);  		spin_unlock(&vm->status_lock);  		r = amdgpu_vm_bo_update(adev, bo_va, true); @@ -1739,16 +2034,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,  	if (bo_va == NULL) {  		return NULL;  	} -	bo_va->vm = vm; -	bo_va->bo = bo; +	bo_va->base.vm = vm; +	bo_va->base.bo = bo; +	INIT_LIST_HEAD(&bo_va->base.bo_list); +	INIT_LIST_HEAD(&bo_va->base.vm_status); +  	bo_va->ref_count = 1; -	INIT_LIST_HEAD(&bo_va->bo_list);  	INIT_LIST_HEAD(&bo_va->valids);  	INIT_LIST_HEAD(&bo_va->invalids); -	INIT_LIST_HEAD(&bo_va->vm_status);  	if (bo) -		list_add_tail(&bo_va->bo_list, &bo->va); +		list_add_tail(&bo_va->base.bo_list, &bo->va);  	return bo_va;  } @@ -1773,7 +2069,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,  		     uint64_t size, uint64_t flags)  {  	struct amdgpu_bo_va_mapping *mapping, *tmp; -	struct amdgpu_vm *vm = bo_va->vm; +	struct amdgpu_bo *bo = bo_va->base.bo; +	struct amdgpu_vm *vm = bo_va->base.vm;  	uint64_t eaddr;  	/* validate the parameters */ @@ -1784,7 +2081,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,  	/* make sure object fit at this offset */  	eaddr = saddr + size - 1;  	if (saddr >= eaddr || -	    (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) +	    (bo && offset + size > amdgpu_bo_size(bo)))  		return -EINVAL;  	saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -1794,7 +2091,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,  	if (tmp) {  		/* bo and tmp overlap, invalid addr */  		dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " -			"0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, +			"0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,  			tmp->start, tmp->last + 1);  		return -EINVAL;  	} @@ -1839,7 +2136,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,  			     uint64_t size, uint64_t flags)  {  	struct amdgpu_bo_va_mapping *mapping; -	struct amdgpu_vm *vm = bo_va->vm; +	struct amdgpu_bo *bo = bo_va->base.bo; +	struct amdgpu_vm *vm = bo_va->base.vm;  	uint64_t eaddr;  	int r; @@ -1851,7 +2149,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,  	/* make sure object fit at this offset */  	eaddr = saddr + size - 1;  	if (saddr >= eaddr || -	    (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) +	    (bo && offset + size > amdgpu_bo_size(bo)))  		return -EINVAL;  	/* Allocate all the needed memory */ @@ -1859,7 +2157,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,  	if (!mapping)  		return -ENOMEM; -	r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); +	r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);  	if (r) {  		kfree(mapping);  		return r; @@ -1899,7 +2197,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,  		       uint64_t saddr)  {  	struct amdgpu_bo_va_mapping *mapping; -	struct amdgpu_vm *vm = bo_va->vm; +	struct amdgpu_vm *vm = bo_va->base.vm;  	bool valid = true;  	saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2047,12 +2345,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,  		      struct amdgpu_bo_va *bo_va)  {  	struct amdgpu_bo_va_mapping *mapping, *next; -	struct amdgpu_vm *vm = bo_va->vm; +	struct amdgpu_vm *vm = bo_va->base.vm; -	list_del(&bo_va->bo_list); +	list_del(&bo_va->base.bo_list);  	spin_lock(&vm->status_lock); -	list_del(&bo_va->vm_status); +	list_del(&bo_va->base.vm_status);  	spin_unlock(&vm->status_lock);  	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { @@ -2084,13 +2382,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,  void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,  			     struct amdgpu_bo *bo)  { -	struct amdgpu_bo_va *bo_va; - -	list_for_each_entry(bo_va, &bo->va, bo_list) { -		spin_lock(&bo_va->vm->status_lock); -		if (list_empty(&bo_va->vm_status)) -			list_add(&bo_va->vm_status, &bo_va->vm->invalidated); -		spin_unlock(&bo_va->vm->status_lock); +	struct amdgpu_vm_bo_base *bo_base; + +	list_for_each_entry(bo_base, &bo->va, bo_list) { +		spin_lock(&bo_base->vm->status_lock); +		if (list_empty(&bo_base->vm_status)) +			list_add(&bo_base->vm_status, +				 &bo_base->vm->moved); +		spin_unlock(&bo_base->vm->status_lock);  	}  } @@ -2108,12 +2407,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)  }  /** - * amdgpu_vm_adjust_size - adjust vm size and block size + * amdgpu_vm_set_fragment_size - adjust fragment size in PTE + * + * @adev: amdgpu_device pointer + * @fragment_size_default: the default fragment size if it's set auto + */ +void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default) +{ +	if (amdgpu_vm_fragment_size == -1) +		adev->vm_manager.fragment_size = fragment_size_default; +	else +		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; +} + +/** + * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size   *   * @adev: amdgpu_device pointer   * @vm_size: the default vm size if it's set auto   */ -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default)  {  	/* adjust vm size firstly */  	if (amdgpu_vm_size == -1) @@ -2128,8 +2441,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)  	else  		adev->vm_manager.block_size = amdgpu_vm_block_size; -	DRM_INFO("vm size is %llu GB, block size is %u-bit\n", -		adev->vm_manager.vm_size, adev->vm_manager.block_size); +	amdgpu_vm_set_fragment_size(adev, fragment_size_default); + +	DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", +		adev->vm_manager.vm_size, adev->vm_manager.block_size, +		adev->vm_manager.fragment_size);  }  /** @@ -2137,22 +2453,28 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)   *   * @adev: amdgpu_device pointer   * @vm: requested vm + * @vm_context: Indicates if it GFX or Compute context   *   * Init @vm fields.   */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, +		   int vm_context)  {  	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,  		AMDGPU_VM_PTE_COUNT(adev) * 8);  	unsigned ring_instance;  	struct amdgpu_ring *ring;  	struct amd_sched_rq *rq; -	int r; +	int r, i; +	u64 flags; +	uint64_t init_pde_value = 0; -	vm->va = RB_ROOT; +	vm->va = RB_ROOT_CACHED;  	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); +	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) +		vm->reserved_vmid[i] = NULL;  	spin_lock_init(&vm->status_lock); -	INIT_LIST_HEAD(&vm->invalidated); +	INIT_LIST_HEAD(&vm->moved);  	INIT_LIST_HEAD(&vm->cleared);  	INIT_LIST_HEAD(&vm->freed); @@ -2167,15 +2489,37 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)  	if (r)  		return r; +	vm->pte_support_ats = false; + +	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { +		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & +						AMDGPU_VM_USE_CPU_FOR_COMPUTE); + +		if (adev->asic_type == CHIP_RAVEN) { +			vm->pte_support_ats = true; +			init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; +		} +	} else +		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & +						AMDGPU_VM_USE_CPU_FOR_GFX); +	DRM_DEBUG_DRIVER("VM update mode is %s\n", +			 vm->use_cpu_for_update ? "CPU" : "SDMA"); +	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), +		  "CPU update of VM recommended only for large BAR system\n");  	vm->last_dir_update = NULL; +	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | +			AMDGPU_GEM_CREATE_VRAM_CLEARED; +	if (vm->use_cpu_for_update) +		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; +	else +		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | +				AMDGPU_GEM_CREATE_SHADOW); +  	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,  			     AMDGPU_GEM_DOMAIN_VRAM, -			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS | -			     AMDGPU_GEM_CREATE_SHADOW | -			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | -			     AMDGPU_GEM_CREATE_VRAM_CLEARED, -			     NULL, NULL, &vm->root.bo); +			     flags, +			     NULL, NULL, init_pde_value, &vm->root.bo);  	if (r)  		goto error_free_sched_entity; @@ -2184,6 +2528,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)  		goto error_free_root;  	vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + +	if (vm->use_cpu_for_update) { +		r = amdgpu_bo_kmap(vm->root.bo, NULL); +		if (r) +			goto error_free_root; +	} +  	amdgpu_bo_unreserve(vm->root.bo);  	return 0; @@ -2219,7 +2570,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)  		for (i = 0; i <= level->last_entry_used; i++)  			amdgpu_vm_free_levels(&level->entries[i]); -	drm_free_large(level->entries); +	kvfree(level->entries);  }  /** @@ -2235,13 +2586,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)  {  	struct amdgpu_bo_va_mapping *mapping, *tmp;  	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; +	int i;  	amd_sched_entity_fini(vm->entity.sched, &vm->entity); -	if (!RB_EMPTY_ROOT(&vm->va)) { +	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {  		dev_err(adev->dev, "still active bo inside vm\n");  	} -	rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { +	rbtree_postorder_for_each_entry_safe(mapping, tmp, +					     &vm->va.rb_root, rb) {  		list_del(&mapping->list);  		amdgpu_vm_it_remove(mapping, &vm->va);  		kfree(mapping); @@ -2258,6 +2611,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)  	amdgpu_vm_free_levels(&vm->root);  	dma_fence_put(vm->last_dir_update); +	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) +		amdgpu_vm_free_reserved_vmid(adev, vm, i);  }  /** @@ -2277,6 +2632,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)  		mutex_init(&id_mgr->lock);  		INIT_LIST_HEAD(&id_mgr->ids_lru); +		atomic_set(&id_mgr->reserved_vmid_num, 0);  		/* skip over VMID 0, since it is the system VM */  		for (j = 1; j < id_mgr->num_ids; ++j) { @@ -2295,6 +2651,23 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)  	atomic64_set(&adev->vm_manager.client_counter, 0);  	spin_lock_init(&adev->vm_manager.prt_lock);  	atomic_set(&adev->vm_manager.num_prt_users, 0); + +	/* If not overridden by the user, by default, only in large BAR systems +	 * Compute VM tables will be updated by CPU +	 */ +#ifdef CONFIG_X86_64 +	if (amdgpu_vm_update_mode == -1) { +		if (amdgpu_vm_is_large_bar(adev)) +			adev->vm_manager.vm_update_mode = +				AMDGPU_VM_USE_CPU_FOR_COMPUTE; +		else +			adev->vm_manager.vm_update_mode = 0; +	} else +		adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; +#else +	adev->vm_manager.vm_update_mode = 0; +#endif +  }  /** @@ -2322,3 +2695,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)  		}  	}  } + +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ +	union drm_amdgpu_vm *args = data; +	struct amdgpu_device *adev = dev->dev_private; +	struct amdgpu_fpriv *fpriv = filp->driver_priv; +	int r; + +	switch (args->in.op) { +	case AMDGPU_VM_OP_RESERVE_VMID: +		/* current, we only have requirement to reserve vmid from gfxhub */ +		r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, +						  AMDGPU_GFXHUB); +		if (r) +			return r; +		break; +	case AMDGPU_VM_OP_UNRESERVE_VMID: +		amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); +		break; +	default: +		return -EINVAL; +	} + +	return 0; +} |