diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 82 |
1 files changed, 57 insertions, 25 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a162d87ca0c8..51b5e977ca88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; + unsigned fence_flags = 0; unsigned i; int r = 0; @@ -138,7 +139,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* ring tests don't use a job */ if (job) { vm = job->vm; - fence_ctx = job->fence_ctx; + fence_ctx = job->base.s_fence->scheduled.context; } else { vm = NULL; fence_ctx = 0; @@ -163,8 +164,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return r; } + need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || + (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; dma_fence_put(tmp); @@ -181,18 +184,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } - if (ring->funcs->init_cond_exec) + if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_hdp_flush #ifdef CONFIG_X86_64 - && !(adev->flags & AMD_IS_APU) + if (!(adev->flags & AMD_IS_APU)) #endif - ) - amdgpu_ring_emit_hdp_flush(ring); + { + if (ring->funcs->emit_hdp_flush) + amdgpu_ring_emit_hdp_flush(ring); + else + amdgpu_asic_flush_hdp(adev, ring); + } skip_preamble = ring->current_ctx == fence_ctx; - need_ctx_switch = ring->current_ctx != fence_ctx; if (job && ring->funcs->emit_cntxcntl) { if (need_ctx_switch) status |= AMDGPU_HAVE_CTX_SWITCH; @@ -219,14 +224,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->emit_tmz) amdgpu_ring_emit_tmz(ring, false); - if (ring->funcs->emit_hdp_invalidate #ifdef CONFIG_X86_64 - && !(adev->flags & AMD_IS_APU) + if (!(adev->flags & AMD_IS_APU)) #endif - ) - amdgpu_ring_emit_hdp_invalidate(ring); + amdgpu_asic_invalidate_hdp(adev, ring); - r = amdgpu_fence_emit(ring, f); + if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) + fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; + + /* wrap the last IB with fence */ + if (job && job->uf_addr) { + amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, + fence_flags | AMDGPU_FENCE_FLAG_64BIT); + } + + r = amdgpu_fence_emit(ring, f, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -238,12 +250,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - /* wrap the last IB with fence */ - if (job && job->uf_addr) { - amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, - AMDGPU_FENCE_FLAG_64BIT); - } - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); @@ -278,11 +284,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) return r; } - r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo); - if (r) { - return r; - } - adev->ib_pool_ready = true; if (amdgpu_debugfs_sa_init(adev)) { dev_err(adev->dev, "failed to register debugfs file for SA\n"); @@ -301,7 +302,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) void amdgpu_ib_pool_fini(struct amdgpu_device *adev) { if (adev->ib_pool_ready) { - amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo); amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo); adev->ib_pool_ready = false; } @@ -321,14 +321,46 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { unsigned i; int r, ret = 0; + long tmo_gfx, tmo_mm; + + tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; + if (amdgpu_sriov_vf(adev)) { + /* for MM engines in hypervisor side they are not scheduled together + * with CP and SDMA engines, so even in exclusive mode MM engine could + * still running on other VF thus the IB TEST TIMEOUT for MM engines + * under SR-IOV should be set to a long time. 8 sec should be enough + * for the MM comes back to this VF. + */ + tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT; + } + + if (amdgpu_sriov_runtime(adev)) { + /* for CP & SDMA engines since they are scheduled together so + * need to make the timeout width enough to cover the time + * cost waiting for it coming back under RUNTIME only + */ + tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; + } for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; + long tmo; if (!ring || !ring->ready) continue; - r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT); + /* MM engine need more time */ + if (ring->funcs->type == AMDGPU_RING_TYPE_UVD || + ring->funcs->type == AMDGPU_RING_TYPE_VCE || + ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) + tmo = tmo_mm; + else + tmo = tmo_gfx; + + r = amdgpu_ring_test_ib(ring, tmo); if (r) { ring->ready = false; |