From 3b4d68e993d941ec993660f10bacdfd556731dc7 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 1 May 2017 18:09:22 +0800 Subject: drm/amdgpu:use FRAME_CNTL for new GFX ucode (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AI affected: CP/HW team requires KMD insert FRAME_CONTROL(end) after the last IB and before the fence of this DMAframe. this is to make sure the cache are flushed, and it's a must change no matter MCBP/SR-IOV or bare-metal case because new CP hw won't do the cache flush for each IB anymore, it just leaves it to KMD now. with this patch, certain MCBP hang issue when rendering vulkan/chained-ib are resolved. v2: drop gfx8 changes. gfx8 is not affected (Alex) Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6e4ae0d983c2..72ded77f3ee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -208,6 +208,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = false; } + if (ring->funcs->emit_tmz) + amdgpu_ring_emit_tmz(ring, false); + if (ring->funcs->emit_hdp_invalidate #ifdef CONFIG_X86_64 && !(adev->flags & AMD_IS_APU) -- cgit From 503bb31be42e3fea15c5e81bb2d84a0123438c85 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 3 May 2017 14:55:07 +0800 Subject: drm/amdgpu:cleanup flag not used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 70cebbe7f8d9..8e828cda9baa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1117,7 +1117,6 @@ struct amdgpu_cs_parser { #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ -#define AMDGPU_VM_DOMAIN (1 << 3) /* bit set means in virtual memory context */ struct amdgpu_job { struct amd_sched_job base; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 72ded77f3ee7..53d0d21ee26d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -188,8 +188,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, status |= AMDGPU_HAVE_CTX_SWITCH; status |= job->preamble_status; - if (vm) - status |= AMDGPU_VM_DOMAIN; amdgpu_ring_emit_cntxcntl(ring, status); } -- cgit From df83d1ebc9e304fa3ba4bf79dba76418789a77cf Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 9 May 2017 15:50:22 +0800 Subject: drm/amdgpu: add sched sync for amdgpu job v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this is an improvement for previous patch, the sched_sync is to store fence that could be skipped as scheduled, when job is executed, we didn't need pipeline_sync if all fences in sched_sync are signalled, otherwise insert pipeline_sync still. v2: handle error when adding fence to sync failed. Signed-off-by: Chunming Zhou Reviewed-by: Junwei Zhang (v1) Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 +++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 13 +++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ea8ad69fa65d..37bd00345ae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1124,6 +1124,7 @@ struct amdgpu_job { struct amdgpu_vm *vm; struct amdgpu_ring *ring; struct amdgpu_sync sync; + struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; struct dma_fence *fence; /* the hw fence */ uint32_t preamble_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 53d0d21ee26d..631a9f77b973 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; + struct dma_fence *tmp; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; @@ -160,8 +161,13 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } - if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) + + if (ring->funcs->emit_pipeline_sync && job && + (tmp = amdgpu_sync_get_fence(&job->sched_sync))) { + job->need_pipeline_sync = true; amdgpu_ring_emit_pipeline_sync(ring); + dma_fence_put(tmp); + } if (vm) { r = amdgpu_vm_flush(ring, job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 7570f2439a11..4af92649c4a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->need_pipeline_sync = false; amdgpu_sync_create(&(*job)->sync); + amdgpu_sync_create(&(*job)->sched_sync); return 0; } @@ -98,6 +99,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -107,6 +109,7 @@ void amdgpu_job_free(struct amdgpu_job *job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -139,10 +142,10 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) struct amdgpu_vm *vm = job->vm; struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); + int r; while (fence == NULL && vm && !job->vm_id) { struct amdgpu_ring *ring = job->ring; - int r; r = amdgpu_vm_grab_id(vm, ring, &job->sync, &job->base.s_fence->finished, @@ -153,9 +156,11 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) fence = amdgpu_sync_get_fence(&job->sync); } - if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) - job->need_pipeline_sync = true; - + if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) { + r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence); + if (r) + DRM_ERROR("Error adding fence to sync (%d)\n", r); + } return fence; } -- cgit From b9bf33d5ac55aa9f23b60b4d03017b2e59d02f02 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 11 May 2017 14:52:48 -0400 Subject: drm/amdgpu: make pipeline sync be in same place v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: directly return for 'if' case. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 32 +++++++++++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++ 5 files changed, 34 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 37bd00345ae0..e2cafbd690c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1132,7 +1132,6 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; - bool need_pipeline_sync; unsigned vm_id; uint64_t vm_pd_addr; uint32_t gds_base, gds_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 631a9f77b973..cb9472f52e8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -121,7 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; - struct dma_fence *tmp; + struct dma_fence *tmp = NULL; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; @@ -163,8 +163,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } if (ring->funcs->emit_pipeline_sync && job && - (tmp = amdgpu_sync_get_fence(&job->sched_sync))) { - job->need_pipeline_sync = true; + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || + amdgpu_vm_need_pipeline_sync(ring, job))) { amdgpu_ring_emit_pipeline_sync(ring); dma_fence_put(tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4af92649c4a4..874979cc3207 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -57,7 +57,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; - (*job)->need_pipeline_sync = false; amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->sched_sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 11f49b81f653..017258d6b9d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -694,6 +694,35 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) return addr; } +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vm_id *id; + bool gds_switch_needed; + bool vm_flush_needed = job->vm_needs_flush || + amdgpu_vm_ring_has_compute_vm_bug(ring); + + if (job->vm_id == 0) + return false; + id = &id_mgr->ids[job->vm_id]; + gds_switch_needed = ring->funcs->emit_gds_switch && ( + id->gds_base != job->gds_base || + id->gds_size != job->gds_size || + id->gws_base != job->gws_base || + id->gws_size != job->gws_size || + id->oa_base != job->oa_base || + id->oa_size != job->oa_size); + + if (amdgpu_vm_had_gpu_reset(adev, id)) + return true; + if (!vm_flush_needed && !gds_switch_needed) + return false; + return true; +} + /** * amdgpu_vm_flush - hardware flush the vm * @@ -732,9 +761,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) - amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && vm_flush_needed) { u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); struct dma_fence *fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fb25bb747730..9c7b15925aa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -245,5 +245,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job); #endif -- cgit From ef44f8541e8e99f1040c0d1b147956e2c2f25d79 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Thu, 11 May 2017 16:29:08 -0400 Subject: drm/amdgpu: add a ring func for vcn start command Needed for the proper command sequence for VCN. Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + 2 files changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index cb9472f52e8c..f774b3f497d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -169,6 +169,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dma_fence_put(tmp); } + if (ring->funcs->insert_start) + ring->funcs->insert_start(ring); + if (vm) { r = amdgpu_vm_flush(ring, job); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 731f0941927a..9bf3100f273e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -132,6 +132,7 @@ struct amdgpu_ring_funcs { int (*test_ib)(struct amdgpu_ring *ring, long timeout); /* insert NOP packets */ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); + void (*insert_start)(struct amdgpu_ring *ring); void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); -- cgit