1 files changed, 57 insertions, 25 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a162d87ca0c8..51b5e977ca88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	struct amdgpu_vm *vm;
 	uint64_t fence_ctx;
 	uint32_t status = 0, alloc_size;
+	unsigned fence_flags = 0;
 
 	unsigned i;
 	int r = 0;
@@ -138,7 +139,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	/* ring tests don't use a job */
 	if (job) {
 		vm = job->vm;
-		fence_ctx = job->fence_ctx;
+		fence_ctx = job->base.s_fence->scheduled.context;
 	} else {
 		vm = NULL;
 		fence_ctx = 0;
@@ -163,8 +164,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return r;
 	}
 
+	need_ctx_switch = ring->current_ctx != fence_ctx;
 	if (ring->funcs->emit_pipeline_sync && job &&
 	    ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
+	     (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
 	     amdgpu_vm_need_pipeline_sync(ring, job))) {
 		need_pipe_sync = true;
 		dma_fence_put(tmp);
@@ -181,18 +184,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		}
 	}
 
-	if (ring->funcs->init_cond_exec)
+	if (job && ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
-	if (ring->funcs->emit_hdp_flush
 #ifdef CONFIG_X86_64
-	    && !(adev->flags & AMD_IS_APU)
+	if (!(adev->flags & AMD_IS_APU))
 #endif
-	   )
-		amdgpu_ring_emit_hdp_flush(ring);
+	{
+		if (ring->funcs->emit_hdp_flush)
+			amdgpu_ring_emit_hdp_flush(ring);
+		else
+			amdgpu_asic_flush_hdp(adev, ring);
+	}
 
 	skip_preamble = ring->current_ctx == fence_ctx;
-	need_ctx_switch = ring->current_ctx != fence_ctx;
 	if (job && ring->funcs->emit_cntxcntl) {
 		if (need_ctx_switch)
 			status |= AMDGPU_HAVE_CTX_SWITCH;
@@ -219,14 +224,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->emit_tmz)
 		amdgpu_ring_emit_tmz(ring, false);
 
-	if (ring->funcs->emit_hdp_invalidate
 #ifdef CONFIG_X86_64
-	    && !(adev->flags & AMD_IS_APU)
+	if (!(adev->flags & AMD_IS_APU))
 #endif
-	   )
-		amdgpu_ring_emit_hdp_invalidate(ring);
+		amdgpu_asic_invalidate_hdp(adev, ring);
 
-	r = amdgpu_fence_emit(ring, f);
+	if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+		fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+	/* wrap the last IB with fence */
+	if (job && job->uf_addr) {
+		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
+				       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
+	}
+
+	r = amdgpu_fence_emit(ring, f, fence_flags);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vmid)
@@ -238,12 +250,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->insert_end)
 		ring->funcs->insert_end(ring);
 
-	/* wrap the last IB with fence */
-	if (job && job->uf_addr) {
-		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
-				       AMDGPU_FENCE_FLAG_64BIT);
-	}
-
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
@@ -278,11 +284,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo);
-	if (r) {
-		return r;
-	}
-
 	adev->ib_pool_ready = true;
 	if (amdgpu_debugfs_sa_init(adev)) {
 		dev_err(adev->dev, "failed to register debugfs file for SA\n");
@@ -301,7 +302,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 {
 	if (adev->ib_pool_ready) {
-		amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo);
 		amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
 		adev->ib_pool_ready = false;
 	}
@@ -321,14 +321,46 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
 	unsigned i;
 	int r, ret = 0;
+	long tmo_gfx, tmo_mm;
+
+	tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+	if (amdgpu_sriov_vf(adev)) {
+		/* for MM engines in hypervisor side they are not scheduled together
+		 * with CP and SDMA engines, so even in exclusive mode MM engine could
+		 * still running on other VF thus the IB TEST TIMEOUT for MM engines
+		 * under SR-IOV should be set to a long time. 8 sec should be enough
+		 * for the MM comes back to this VF.
+		 */
+		tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	}
+
+	if (amdgpu_sriov_runtime(adev)) {
+		/* for CP & SDMA engines since they are scheduled together so
+		 * need to make the timeout width enough to cover the time
+		 * cost waiting for it coming back under RUNTIME only
+		*/
+		tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	}
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
+		long tmo;
 
 		if (!ring || !ring->ready)
 			continue;
 
-		r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
+		/* MM engine need more time */
+		if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+			ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+			tmo = tmo_mm;
+		else
+			tmo = tmo_gfx;
+
+		r = amdgpu_ring_test_ib(ring, tmo);
 		if (r) {
 			ring->ready = false;