diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 154 | 
1 files changed, 62 insertions, 92 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 83b0c5d86e48..dc379dc22c77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -45,22 +45,43 @@  /**   * DOC: GPUVM   * - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time.  The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages + * GPUVM is the MMU functionality provided on the GPU. + * GPUVM is similar to the legacy GART on older asics, however + * rather than there being a single global GART table + * for the entire GPU, there can be multiple GPUVM page tables active + * at any given time.  The GPUVM page tables can contain a mix + * VRAM pages and system pages (both memory and MMIO) and system pages   * can be mapped as snooped (cached system pages) or unsnooped   * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID.  When executing a command buffer, - * the kernel tells the ring what VMID to use for that command + * + * Each active GPUVM has an ID associated with it and there is a page table + * linked with each VMID.  When executing a command buffer, + * the kernel tells the engine what VMID to use for that command   * buffer.  VMIDs are allocated dynamically as commands are submitted.   * The userspace drivers maintain their own address space and the kernel   * sets up their pages tables accordingly when they submit their   * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. + * The hardware supports up to 16 active GPUVMs at any given time. + * + * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending + * on the ASIC family.  GPUVM supports RWX attributes on each page as well + * as other features such as encryption and caching attributes. + * + * VMID 0 is special.  It is the GPUVM used for the kernel driver.  In + * addition to an aperture managed by a page table, VMID 0 also has + * several other apertures.  There is an aperture for direct access to VRAM + * and there is a legacy AGP aperture which just forwards accesses directly + * to the matching system physical addresses (or IOVAs when an IOMMU is + * present).  These apertures provide direct access to these memories without + * incurring the overhead of a page table.  VMID 0 is used by the kernel + * driver for tasks like memory management. + * + * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory. + * For user applications, each application can have their own unique GPUVM + * address space.  The application manages the address space and the kernel + * driver manages the GPUVM page tables for each process.  If an GPU client + * accesses an invalid page, it will generate a GPU page fault, similar to + * accessing an invalid page on a CPU.   */  #define START(node) ((node)->start) @@ -143,32 +164,6 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	return 0;  } -/* - * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS - * happens while holding this lock anywhere to prevent deadlocks when - * an MMU notifier runs in reclaim-FS context. - */ -static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) -{ -	mutex_lock(&vm->eviction_lock); -	vm->saved_flags = memalloc_noreclaim_save(); -} - -static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) -{ -	if (mutex_trylock(&vm->eviction_lock)) { -		vm->saved_flags = memalloc_noreclaim_save(); -		return 1; -	} -	return 0; -} - -static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) -{ -	memalloc_noreclaim_restore(vm->saved_flags); -	mutex_unlock(&vm->eviction_lock); -} -  /**   * amdgpu_vm_bo_evicted - vm_bo is evicted   * @@ -489,25 +484,20 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,  	struct amdgpu_device *adev = ring->adev;  	unsigned vmhub = ring->funcs->vmhub;  	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; -	struct amdgpu_vmid *id; -	bool gds_switch_needed; -	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;  	if (job->vmid == 0)  		return false; -	id = &id_mgr->ids[job->vmid]; -	gds_switch_needed = ring->funcs->emit_gds_switch && ( -		id->gds_base != job->gds_base || -		id->gds_size != job->gds_size || -		id->gws_base != job->gws_base || -		id->gws_size != job->gws_size || -		id->oa_base != job->oa_base || -		id->oa_size != job->oa_size); - -	if (amdgpu_vmid_had_gpu_reset(adev, id)) + +	if (job->vm_needs_flush || ring->has_compute_vm_bug) +		return true; + +	if (ring->funcs->emit_gds_switch && job->gds_switch_needed) +		return true; + +	if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))  		return true; -	return vm_flush_needed || gds_switch_needed; +	return false;  }  /** @@ -529,27 +519,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	unsigned vmhub = ring->funcs->vmhub;  	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];  	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; -	bool gds_switch_needed = ring->funcs->emit_gds_switch && ( -		id->gds_base != job->gds_base || -		id->gds_size != job->gds_size || -		id->gws_base != job->gws_base || -		id->gws_size != job->gws_size || -		id->oa_base != job->oa_base || -		id->oa_size != job->oa_size); +	bool spm_update_needed = job->spm_update_needed; +	bool gds_switch_needed = ring->funcs->emit_gds_switch && +		job->gds_switch_needed;  	bool vm_flush_needed = job->vm_needs_flush;  	struct dma_fence *fence = NULL;  	bool pasid_mapping_needed = false;  	unsigned patch_offset = 0; -	bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL));  	int r; -	if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid) -		adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); -  	if (amdgpu_vmid_had_gpu_reset(adev, id)) {  		gds_switch_needed = true;  		vm_flush_needed = true;  		pasid_mapping_needed = true; +		spm_update_needed = true;  	}  	mutex_lock(&id_mgr->lock); @@ -567,6 +550,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)  		return 0; +	amdgpu_ring_ib_begin(ring);  	if (ring->funcs->init_cond_exec)  		patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -581,6 +565,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	if (pasid_mapping_needed)  		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); +	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) +		adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); + +	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && +	    gds_switch_needed) { +		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, +					    job->gds_size, job->gws_base, +					    job->gws_size, job->oa_base, +					    job->oa_size); +	} +  	if (vm_flush_needed || pasid_mapping_needed) {  		r = amdgpu_fence_emit(ring, &fence, NULL, 0);  		if (r) @@ -605,20 +600,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	}  	dma_fence_put(fence); -	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && -	    gds_switch_needed) { -		id->gds_base = job->gds_base; -		id->gds_size = job->gds_size; -		id->gws_base = job->gws_base; -		id->gws_size = job->gws_size; -		id->oa_base = job->oa_base; -		id->oa_size = job->oa_size; -		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, -					    job->gds_size, job->gws_base, -					    job->gws_size, job->oa_base, -					    job->oa_size); -	} -  	if (ring->funcs->patch_cond_exec)  		amdgpu_ring_patch_cond_exec(ring, patch_offset); @@ -627,6 +608,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  		amdgpu_ring_emit_switch_buffer(ring);  		amdgpu_ring_emit_switch_buffer(ring);  	} +	amdgpu_ring_ib_end(ring);  	return 0;  } @@ -2338,7 +2320,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)  	 */  #ifdef CONFIG_X86_64  	if (amdgpu_vm_update_mode == -1) { -		if (amdgpu_gmc_vram_full_visible(&adev->gmc)) +		/* For asic with VF MMIO access protection +		 * avoid using CPU for VM table updates +		 */ +		if (amdgpu_gmc_vram_full_visible(&adev->gmc) && +		    !amdgpu_sriov_vf_mmio_access_protection(adev))  			adev->vm_manager.vm_update_mode =  				AMDGPU_VM_USE_CPU_FOR_COMPUTE;  		else @@ -2382,7 +2368,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  	union drm_amdgpu_vm *args = data;  	struct amdgpu_device *adev = drm_to_adev(dev);  	struct amdgpu_fpriv *fpriv = filp->driver_priv; -	long timeout = msecs_to_jiffies(2000);  	int r;  	switch (args->in.op) { @@ -2394,21 +2379,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  			return r;  		break;  	case AMDGPU_VM_OP_UNRESERVE_VMID: -		if (amdgpu_sriov_runtime(adev)) -			timeout = 8 * timeout; - -		/* Wait vm idle to make sure the vmid set in SPM_VMID is -		 * not referenced anymore. -		 */ -		r = amdgpu_bo_reserve(fpriv->vm.root.bo, true); -		if (r) -			return r; - -		r = amdgpu_vm_wait_idle(&fpriv->vm, timeout); -		if (r < 0) -			return r; - -		amdgpu_bo_unreserve(fpriv->vm.root.bo);  		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);  		break;  	default: |