From ced5443502b682decd886ccda10f10862e418ae9 Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Thu, 29 Mar 2018 22:36:31 +0530 Subject: drm/scheduler: fix param documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no @kernel parameter anymore and document the @guilty parameter Signed-off-by: Nayan Deshmukh Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 0d95888ccc3e..1d368bc66ac2 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -117,8 +117,9 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) * @sched The pointer to the scheduler * @entity The pointer to a valid drm_sched_entity * @rq The run queue this entity belongs - * @kernel If this is an entity for the kernel * @jobs The max number of jobs in the job queue + * @guilty atomic_t set to 1 when a job on this queue + * is found to be guilty causing a timeout * * return 0 if succeed. negative error code on failure */ -- cgit From a70cdb9eddcfd4ba20d69b84149b4a38648455ac Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Thu, 29 Mar 2018 22:36:33 +0530 Subject: drm/scheduler: move the tracepoints file from the include directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move it with the scheduler code. This is mostly a straight forward rename with no code change except for updating the TRACE_INCLUDE_PATH Signed-off-by: Nayan Deshmukh Suggested-by: Christian König Reviewed-by: Christian König Acked-by: Lucas Stach Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 2 +- drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 82 +++++++++++++++++++++++++ include/drm/gpu_scheduler_trace.h | 82 ------------------------- 3 files changed, 83 insertions(+), 83 deletions(-) create mode 100644 drivers/gpu/drm/scheduler/gpu_scheduler_trace.h delete mode 100644 include/drm/gpu_scheduler_trace.h (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 1d368bc66ac2..310275eaf128 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -30,7 +30,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include "gpu_scheduler_trace.h" #define to_drm_sched_job(sched_job) \ container_of((sched_job), struct drm_sched_job, queue_node) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h new file mode 100644 index 000000000000..4998ad950a48 --- /dev/null +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -0,0 +1,82 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _GPU_SCHED_TRACE_H_ + +#include +#include +#include + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu_scheduler +#define TRACE_INCLUDE_FILE gpu_scheduler_trace + +TRACE_EVENT(drm_sched_job, + TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), + TP_ARGS(sched_job, entity), + TP_STRUCT__entry( + __field(struct drm_sched_entity *, entity) + __field(struct dma_fence *, fence) + __field(const char *, name) + __field(uint64_t, id) + __field(u32, job_count) + __field(int, hw_job_count) + ), + + TP_fast_assign( + __entry->entity = entity; + __entry->id = sched_job->id; + __entry->fence = &sched_job->s_fence->finished; + __entry->name = sched_job->sched->name; + __entry->job_count = spsc_queue_count(&entity->job_queue); + __entry->hw_job_count = atomic_read( + &sched_job->sched->hw_rq_count); + ), + TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", + __entry->entity, __entry->id, + __entry->fence, __entry->name, + __entry->job_count, __entry->hw_job_count) +); + +TRACE_EVENT(drm_sched_process_job, + TP_PROTO(struct drm_sched_fence *fence), + TP_ARGS(fence), + TP_STRUCT__entry( + __field(struct dma_fence *, fence) + ), + + TP_fast_assign( + __entry->fence = &fence->finished; + ), + TP_printk("fence=%p signaled", __entry->fence) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/scheduler +#include diff --git a/include/drm/gpu_scheduler_trace.h b/include/drm/gpu_scheduler_trace.h deleted file mode 100644 index 0789e8d0a0e1..000000000000 --- a/include/drm/gpu_scheduler_trace.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _GPU_SCHED_TRACE_H_ - -#include -#include -#include - -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM gpu_scheduler -#define TRACE_INCLUDE_FILE gpu_scheduler_trace - -TRACE_EVENT(drm_sched_job, - TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), - TP_ARGS(sched_job, entity), - TP_STRUCT__entry( - __field(struct drm_sched_entity *, entity) - __field(struct dma_fence *, fence) - __field(const char *, name) - __field(uint64_t, id) - __field(u32, job_count) - __field(int, hw_job_count) - ), - - TP_fast_assign( - __entry->entity = entity; - __entry->id = sched_job->id; - __entry->fence = &sched_job->s_fence->finished; - __entry->name = sched_job->sched->name; - __entry->job_count = spsc_queue_count(&entity->job_queue); - __entry->hw_job_count = atomic_read( - &sched_job->sched->hw_rq_count); - ), - TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", - __entry->entity, __entry->id, - __entry->fence, __entry->name, - __entry->job_count, __entry->hw_job_count) -); - -TRACE_EVENT(drm_sched_process_job, - TP_PROTO(struct drm_sched_fence *fence), - TP_ARGS(fence), - TP_STRUCT__entry( - __field(struct dma_fence *, fence) - ), - - TP_fast_assign( - __entry->fence = &fence->finished; - ), - TP_printk("fence=%p signaled", __entry->fence) -); - -#endif - -/* This part must be outside protection */ -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#include -- cgit From 8ee3a52e3f35e064a3bf82f21dc74ddaf9843648 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Mon, 16 Apr 2018 10:07:02 +0800 Subject: drm/gpu-sched: fix force APP kill hang(v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit issue: there are VMC page fault occurred if force APP kill during 3dmark test, the cause is in entity_fini we manually signal all those jobs in entity's queue which confuse the sync/dep mechanism: 1)page fault occurred in sdma's clear job which operate on shadow buffer, and shadow buffer's Gart table is cleaned by ttm_bo_release since the fence in its reservation was fake signaled by entity_fini() under the case of SIGKILL received. 2)page fault occurred in gfx' job because during the lifetime of gfx job we manually fake signal all jobs from its entity in entity_fini(), thus the unmapping/clear PTE job depend on those result fence is satisfied and sdma start clearing the PTE and lead to GFX page fault. fix: 1)should at least wait all jobs already scheduled complete in entity_fini() if SIGKILL is the case. 2)if a fence signaled and try to clear some entity's dependency, should set this entity guilty to prevent its job really run since the dependency is fake signaled. v2: splitting drm_sched_entity_fini() into two functions: 1)The first one is does the waiting, removes the entity from the runqueue and returns an error when the process was killed. 2)The second one then goes over the entity, install it as completion signal for the remaining jobs and signals all jobs with an error code. v3: 1)Replace the fini1 and fini2 with better name 2)Call the first part before the VM teardown in amdgpu_driver_postclose_kms() and the second part after the VM teardown 3)Keep the original function drm_sched_entity_fini to refine the code. v4: 1)Rename entity->finished to entity->last_scheduled; 2)Rename drm_sched_entity_fini_job_cb() to drm_sched_entity_kill_jobs_cb(); 3)Pass NULL to drm_sched_entity_fini_job_cb() if -ENOENT; 4)Replace the type of entity->fini_status with "int"; 5)Remove the check about entity->finished. Signed-off-by: Monk Liu Signed-off-by: Emily Deng Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 64 ++++++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 ++- drivers/gpu/drm/scheduler/gpu_scheduler.c | 71 ++++++++++++++++++++++++++----- include/drm/gpu_scheduler.h | 7 +++ 5 files changed, 128 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c25ee750c362..ea1b28536bfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -681,6 +681,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 09d35051fdd6..eb80edfb1b0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -111,8 +111,9 @@ failed: return r; } -static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) +static void amdgpu_ctx_fini(struct kref *ref) { + struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_device *adev = ctx->adev; unsigned i, j; @@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) kfree(ctx->fences); ctx->fences = NULL; - for (i = 0; i < adev->num_rings; i++) - drm_sched_entity_fini(&adev->rings[i]->sched, - &ctx->rings[i].entity); - amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); mutex_destroy(&ctx->lock); + + kfree(ctx); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -170,12 +169,15 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + u32 i; ctx = container_of(ref, struct amdgpu_ctx, refcount); - amdgpu_ctx_fini(ctx); + for (i = 0; i < ctx->adev->num_rings; i++) + drm_sched_entity_fini(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); - kfree(ctx); + amdgpu_ctx_fini(ref); } static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) @@ -435,16 +437,62 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) idr_init(&mgr->ctx_handles); } +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id, i; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) { + + if (!ctx->adev) + return; + + for (i = 0; i < ctx->adev->num_rings; i++) + if (kref_read(&ctx->refcount) == 1) + drm_sched_entity_do_release(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + else + DRM_ERROR("ctx %p is still alive\n", ctx); + } +} + +void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id, i; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) { + + if (!ctx->adev) + return; + + for (i = 0; i < ctx->adev->num_rings; i++) + if (kref_read(&ctx->refcount) == 1) + drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + else + DRM_ERROR("ctx %p is still alive\n", ctx); + } +} + void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) { struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id; + amdgpu_ctx_mgr_entity_cleanup(mgr); + idp = &mgr->ctx_handles; idr_for_each_entry(idp, ctx, id) { - if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) + if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) DRM_ERROR("ctx %p is still alive\n", ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bd9e723dbb2b..1ed379524117 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -913,8 +913,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, return; pm_runtime_get_sync(dev->dev); - - amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr); if (adev->asic_type != CHIP_RAVEN) { amdgpu_uvd_free_handles(adev, file_priv); @@ -935,6 +934,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); amdgpu_vm_fini(adev, &fpriv->vm); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + if (pasid) amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); amdgpu_bo_unref(&pd); diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 310275eaf128..44d21981bf3b 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -136,6 +136,8 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; entity->guilty = guilty; + entity->fini_status = 0; + entity->last_scheduled = NULL; spin_lock_init(&entity->rq_lock); spin_lock_init(&entity->queue_lock); @@ -197,19 +199,30 @@ static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity) return true; } +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct drm_sched_job *job = container_of(cb, struct drm_sched_job, + finish_cb); + drm_sched_fence_finished(job->s_fence); + WARN_ON(job->s_fence->parent); + dma_fence_put(&job->s_fence->finished); + job->sched->ops->free_job(job); +} + + /** * Destroy a context entity * * @sched Pointer to scheduler instance * @entity The pointer to a valid scheduler entity * - * Cleanup and free the allocated resources. + * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting, + * removes the entity from the runqueue and returns an error when the process was killed. */ -void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, +void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { - int r; - if (!drm_sched_entity_is_initialized(sched, entity)) return; /** @@ -217,13 +230,28 @@ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, * queued IBs or discard them on SIGKILL */ if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL) - r = -ERESTARTSYS; + entity->fini_status = -ERESTARTSYS; else - r = wait_event_killable(sched->job_scheduled, + entity->fini_status = wait_event_killable(sched->job_scheduled, drm_sched_entity_is_idle(entity)); drm_sched_entity_set_rq(entity, NULL); - if (r) { +} +EXPORT_SYMBOL(drm_sched_entity_do_release); + +/** + * Destroy a context entity + * + * @sched Pointer to scheduler instance + * @entity The pointer to a valid scheduler entity + * + * The second one then goes over the entity and signals all jobs with an error code. + */ +void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) +{ + if (entity->fini_status) { struct drm_sched_job *job; + int r; /* Park the kernel for a moment to make sure it isn't processing * our enity. @@ -241,13 +269,26 @@ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_fence *s_fence = job->s_fence; drm_sched_fence_scheduled(s_fence); dma_fence_set_error(&s_fence->finished, -ESRCH); - drm_sched_fence_finished(s_fence); - WARN_ON(s_fence->parent); - dma_fence_put(&s_fence->finished); - sched->ops->free_job(job); + r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb, + drm_sched_entity_kill_jobs_cb); + if (r == -ENOENT) + drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", r); } + + dma_fence_put(entity->last_scheduled); + entity->last_scheduled = NULL; } } +EXPORT_SYMBOL(drm_sched_entity_cleanup); + +void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) +{ + drm_sched_entity_do_release(sched, entity); + drm_sched_entity_cleanup(sched, entity); +} EXPORT_SYMBOL(drm_sched_entity_fini); static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb) @@ -530,6 +571,10 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) spin_unlock(&sched->job_list_lock); fence = sched->ops->run_job(s_job); atomic_inc(&sched->hw_rq_count); + + dma_fence_put(s_job->entity->last_scheduled); + s_job->entity->last_scheduled = dma_fence_get(&s_fence->finished); + if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &s_fence->cb, @@ -556,6 +601,7 @@ int drm_sched_job_init(struct drm_sched_job *job, void *owner) { job->sched = sched; + job->entity = entity; job->s_priority = entity->rq - sched->sched_rq; job->s_fence = drm_sched_fence_create(entity, owner); if (!job->s_fence) @@ -669,6 +715,9 @@ static int drm_sched_main(void *param) fence = sched->ops->run_job(sched_job); drm_sched_fence_scheduled(s_fence); + dma_fence_put(entity->last_scheduled); + entity->last_scheduled = dma_fence_get(&s_fence->finished); + if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &s_fence->cb, diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index c053a32341bf..350a62c26b29 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -65,6 +65,8 @@ struct drm_sched_entity { struct dma_fence *dependency; struct dma_fence_cb cb; atomic_t *guilty; /* points to ctx's guilty */ + int fini_status; + struct dma_fence *last_scheduled; }; /** @@ -119,6 +121,7 @@ struct drm_sched_job { uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; + struct drm_sched_entity *entity; }; static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, @@ -186,6 +189,10 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, uint32_t jobs, atomic_t *guilty); +void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity); +void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity); void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); void drm_sched_entity_push_job(struct drm_sched_job *sched_job, -- cgit From a4b3996aeebbaafd2682f4db06bb5659e1653da7 Mon Sep 17 00:00:00 2001 From: Pixel Ding Date: Wed, 18 Apr 2018 04:33:26 -0400 Subject: drm/scheduler: always put last_sched fence in entity_fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the potential memleak since scheduler main thread always hold one last_sched fence. Signed-off-by: Pixel Ding Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 44d21981bf3b..4968867da7a6 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -276,10 +276,10 @@ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); } - - dma_fence_put(entity->last_scheduled); - entity->last_scheduled = NULL; } + + dma_fence_put(entity->last_scheduled); + entity->last_scheduled = NULL; } EXPORT_SYMBOL(drm_sched_entity_cleanup); -- cgit From b5b4ea4d98b42f94442e5d46d5942f392ed8af56 Mon Sep 17 00:00:00 2001 From: Pixel Ding Date: Wed, 18 Apr 2018 04:37:40 -0400 Subject: drm/scheduler: move last_sched fence updating prior to job popping (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure main thread won't update last_sched fence when entity is cleanup. Fix a racing issue which is caused by putting last_sched fence twice. Running vulkaninfo in tight loop can produce this issue as seeing wild fence pointer. v2: squash in build fix (Christian) Signed-off-by: Pixel Ding Reviewed-by: Christian König Reviewed-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 4968867da7a6..088ff2b4e8fb 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -402,6 +402,9 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (entity->guilty && atomic_read(entity->guilty)) dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); + dma_fence_put(entity->last_scheduled); + entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished); + spsc_queue_pop(&entity->job_queue); return sched_job; } @@ -715,9 +718,6 @@ static int drm_sched_main(void *param) fence = sched->ops->run_job(sched_job); drm_sched_fence_scheduled(s_fence); - dma_fence_put(entity->last_scheduled); - entity->last_scheduled = dma_fence_get(&s_fence->finished); - if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &s_fence->cb, -- cgit From 5dd3f9efd4199f0d9e8244322934494ebd140dfd Mon Sep 17 00:00:00 2001 From: Pixel Ding Date: Tue, 24 Apr 2018 22:52:45 -0400 Subject: drm/scheduler: don't update last scheduled fence in TDR The current sequence in scheduler thread is: 1. update last sched fence 2. job begin (adding to mirror list) 3. job finish (remove from mirror list) 4. back to 1 Since we update last sched prior to joining mirror list, the jobs in mirror list already pass the last sched fence. TDR just run the jobs in mirror list, so we should not update the last sched fences in TDR. Signed-off-by: Pixel Ding Reviewed-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 088ff2b4e8fb..1f1dd70125a7 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -575,9 +575,6 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) fence = sched->ops->run_job(s_job); atomic_inc(&sched->hw_rq_count); - dma_fence_put(s_job->entity->last_scheduled); - s_job->entity->last_scheduled = dma_fence_get(&s_fence->finished); - if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &s_fence->cb, -- cgit From 8344c53f57057b42a5da87e9557c40fcda18fb7a Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Thu, 29 Mar 2018 22:36:32 +0530 Subject: drm/scheduler: remove unused parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this patch also effect the amdgpu and etnaviv drivers which use the function drm_sched_entity_init Signed-off-by: Nayan Deshmukh Suggested-by: Christian König Acked-by: Lucas Stach Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 2 +- drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +-- include/drm/gpu_scheduler.h | 2 +- 11 files changed, 12 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6741a62a7d15..a8e531d604fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -91,7 +91,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, continue; r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, - rq, amdgpu_sched_jobs, &ctx->guilty); + rq, &ctx->guilty); if (r) goto failed; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index cc3b067e1ec6..5e9fd256faad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -111,7 +111,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) ring = adev->mman.buffer_funcs_ring; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move run queue.\n"); goto error_entity; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index d8dd4028c2bb..de4d77af02ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -242,7 +242,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) ring = &adev->uvd.ring; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up UVD run queue.\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index e2186eda3271..a86322f5164f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -186,7 +186,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) ring = &adev->vce.ring[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 58e495330b38..e5d234cf804f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -105,7 +105,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) ring = &adev->vcn.ring_dec; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCN dec run queue.\n"); return r; @@ -114,7 +114,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) ring = &adev->vcn.ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCN enc run queue.\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8e71d3984016..1a8f4e0dd023 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2404,7 +2404,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ring = adev->vm_manager.vm_pte_rings[ring_instance]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; r = drm_sched_entity_init(&ring->sched, &vm->entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 8041b26a7a21..ca6ab56357b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -429,7 +429,7 @@ static int uvd_v6_0_sw_init(void *handle) ring = &adev->uvd.ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b0de1e04093b..0ca63d588670 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -418,7 +418,7 @@ static int uvd_v7_0_sw_init(void *handle) ring = &adev->uvd.ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); return r; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index ab50090d066c..23e73c2a19f4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -116,7 +116,7 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file) drm_sched_entity_init(&gpu->sched, &ctx->sched_entity[i], &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], - 32, NULL); + NULL); } } diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 1f1dd70125a7..a364fc0b38c3 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -117,7 +117,6 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) * @sched The pointer to the scheduler * @entity The pointer to a valid drm_sched_entity * @rq The run queue this entity belongs - * @jobs The max number of jobs in the job queue * @guilty atomic_t set to 1 when a job on this queue * is found to be guilty causing a timeout * @@ -126,7 +125,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, - uint32_t jobs, atomic_t *guilty) + atomic_t *guilty) { if (!(sched && entity && rq)) return -EINVAL; diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 350a62c26b29..52380067a43f 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -188,7 +188,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched); int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, - uint32_t jobs, atomic_t *guilty); + atomic_t *guilty); void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, -- cgit From 563e1e664d27292a3b55ca08366dc8c32db52450 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 15 May 2018 14:42:20 -0400 Subject: drm/scheduler: Remove obsolete spinlock. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This spinlock is superfluous, any call to drm_sched_entity_push_job should already be under a lock together with matching drm_sched_job_init to match the order of insertion into queue with job's fence seqence number. v2: Improve patch description. Add functions documentation describing the locking considerations Signed-off-by: Andrey Grodzovsky Acked-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 15 ++++++++++----- include/drm/gpu_scheduler.h | 1 - 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index a364fc0b38c3..df1578d6f42e 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -139,7 +139,6 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, entity->last_scheduled = NULL; spin_lock_init(&entity->rq_lock); - spin_lock_init(&entity->queue_lock); spsc_queue_init(&entity->job_queue); atomic_set(&entity->fence_seq, 0); @@ -413,6 +412,10 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) * * @sched_job The pointer to job required to submit * + * Note: To guarantee that the order of insertion to queue matches + * the job's fence sequence number this function should be + * called with drm_sched_job_init under common lock. + * * Returns 0 for success, negative error code otherwise. */ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, @@ -423,11 +426,8 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, trace_drm_sched_job(sched_job, entity); - spin_lock(&entity->queue_lock); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); - spin_unlock(&entity->queue_lock); - /* first job wakes up scheduler */ if (first) { /* Add the entity to the run queue */ @@ -593,7 +593,12 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) } EXPORT_SYMBOL(drm_sched_job_recovery); -/* init a sched_job with basic field */ +/** + * Init a sched_job with basic field + * + * Note: Refer to drm_sched_entity_push_job documentation + * for locking considerations. + */ int drm_sched_job_init(struct drm_sched_job *job, struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 52380067a43f..dec655894d08 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -56,7 +56,6 @@ struct drm_sched_entity { spinlock_t rq_lock; struct drm_gpu_scheduler *sched; - spinlock_t queue_lock; struct spsc_queue job_queue; atomic_t fence_seq; -- cgit From 6201e033d77fae5ed6798d3d122643c2fe3c24dd Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Fri, 25 May 2018 10:15:46 +0530 Subject: drm/scheduler: fix a corner case in dependency optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When checking for a dependency fence for belonging to the same entity compare it with scheduled as well finished fence. Earlier we were only comparing it with the scheduled fence. Signed-off-by: Nayan Deshmukh Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index df1578d6f42e..44d480768dfe 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -349,8 +349,13 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) struct dma_fence * fence = entity->dependency; struct drm_sched_fence *s_fence; - if (fence->context == entity->fence_context) { - /* We can ignore fences from ourself */ + if (fence->context == entity->fence_context || + fence->context == entity->fence_context + 1) { + /* + * Fence is a scheduled/finished fence from a job + * which belongs to the same entity, we can ignore + * fences from ourself + */ dma_fence_put(entity->dependency); return false; } -- cgit From 2d33948e4e00b501b91367fed21243a948426591 Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Tue, 29 May 2018 11:23:07 +0530 Subject: drm/scheduler: add documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit convert existing raw comments into kernel-doc format as well as add new documentation v2: reword the overview Signed-off-by: Alex Deucher Signed-off-by: Nayan Deshmukh Reviewed-by: Alex Deucher Reviewed-by: Christian König Acked-by: Daniel Vetter --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 214 ++++++++++++++++++++++++------ include/drm/gpu_scheduler.h | 153 +++++++++++++++++---- 2 files changed, 296 insertions(+), 71 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 44d480768dfe..8c1e80c9b674 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -21,6 +21,29 @@ * */ +/** + * DOC: Overview + * + * The GPU scheduler provides entities which allow userspace to push jobs + * into software queues which are then scheduled on a hardware run queue. + * The software queues have a priority among them. The scheduler selects the entities + * from the run queue using a FIFO. The scheduler provides dependency handling + * features among jobs. The driver is supposed to provide callback functions for + * backend operations to the scheduler like submitting a job to hardware run queue, + * returning the dependencies of a job etc. + * + * The organisation of the scheduler is the following: + * + * 1. Each hw run queue has one scheduler + * 2. Each scheduler has multiple run queues with different priorities + * (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL) + * 3. Each scheduler run queue has a queue of entities to schedule + * 4. Entities themselves maintain a queue of jobs that will be scheduled on + * the hardware. + * + * The jobs in a entity are always scheduled in the order that they were pushed. + */ + #include #include #include @@ -39,7 +62,13 @@ static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); static void drm_sched_wakeup(struct drm_gpu_scheduler *sched); static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); -/* Initialize a given run queue struct */ +/** + * drm_sched_rq_init - initialize a given run queue struct + * + * @rq: scheduler run queue + * + * Initializes a scheduler runqueue. + */ static void drm_sched_rq_init(struct drm_sched_rq *rq) { spin_lock_init(&rq->lock); @@ -47,6 +76,14 @@ static void drm_sched_rq_init(struct drm_sched_rq *rq) rq->current_entity = NULL; } +/** + * drm_sched_rq_add_entity - add an entity + * + * @rq: scheduler run queue + * @entity: scheduler entity + * + * Adds a scheduler entity to the run queue. + */ static void drm_sched_rq_add_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity) { @@ -57,6 +94,14 @@ static void drm_sched_rq_add_entity(struct drm_sched_rq *rq, spin_unlock(&rq->lock); } +/** + * drm_sched_rq_remove_entity - remove an entity + * + * @rq: scheduler run queue + * @entity: scheduler entity + * + * Removes a scheduler entity from the run queue. + */ static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity) { @@ -70,9 +115,9 @@ static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, } /** - * Select an entity which could provide a job to run + * drm_sched_rq_select_entity - Select an entity which could provide a job to run * - * @rq The run queue to check. + * @rq: scheduler run queue to check. * * Try to find a ready entity, returns NULL if none found. */ @@ -112,15 +157,16 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) } /** - * Init a context entity used by scheduler when submit to HW ring. + * drm_sched_entity_init - Init a context entity used by scheduler when + * submit to HW ring. * - * @sched The pointer to the scheduler - * @entity The pointer to a valid drm_sched_entity - * @rq The run queue this entity belongs - * @guilty atomic_t set to 1 when a job on this queue - * is found to be guilty causing a timeout + * @sched: scheduler instance + * @entity: scheduler entity to init + * @rq: the run queue this entity belongs + * @guilty: atomic_t set to 1 when a job on this queue + * is found to be guilty causing a timeout * - * return 0 if succeed. negative error code on failure + * Returns 0 on success or a negative error code on failure. */ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, @@ -149,10 +195,10 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, EXPORT_SYMBOL(drm_sched_entity_init); /** - * Query if entity is initialized + * drm_sched_entity_is_initialized - Query if entity is initialized * - * @sched Pointer to scheduler instance - * @entity The pointer to a valid scheduler entity + * @sched: Pointer to scheduler instance + * @entity: The pointer to a valid scheduler entity * * return true if entity is initialized, false otherwise */ @@ -164,11 +210,11 @@ static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched, } /** - * Check if entity is idle + * drm_sched_entity_is_idle - Check if entity is idle * - * @entity The pointer to a valid scheduler entity + * @entity: scheduler entity * - * Return true if entity don't has any unscheduled jobs. + * Returns true if the entity does not have any unscheduled jobs. */ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) { @@ -180,9 +226,9 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) } /** - * Check if entity is ready + * drm_sched_entity_is_ready - Check if entity is ready * - * @entity The pointer to a valid scheduler entity + * @entity: scheduler entity * * Return true if entity could provide a job. */ @@ -210,12 +256,12 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, /** - * Destroy a context entity + * drm_sched_entity_do_release - Destroy a context entity * - * @sched Pointer to scheduler instance - * @entity The pointer to a valid scheduler entity + * @sched: scheduler instance + * @entity: scheduler entity * - * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting, + * Splitting drm_sched_entity_fini() into two functions, The first one does the waiting, * removes the entity from the runqueue and returns an error when the process was killed. */ void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, @@ -237,12 +283,13 @@ void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, EXPORT_SYMBOL(drm_sched_entity_do_release); /** - * Destroy a context entity + * drm_sched_entity_cleanup - Destroy a context entity * - * @sched Pointer to scheduler instance - * @entity The pointer to a valid scheduler entity + * @sched: scheduler instance + * @entity: scheduler entity * - * The second one then goes over the entity and signals all jobs with an error code. + * This should be called after @drm_sched_entity_do_release. It goes over the + * entity and signals all jobs with an error code if the process was killed. */ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) @@ -281,6 +328,14 @@ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, } EXPORT_SYMBOL(drm_sched_entity_cleanup); +/** + * drm_sched_entity_fini - Destroy a context entity + * + * @sched: scheduler instance + * @entity: scheduler entity + * + * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup() + */ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { @@ -306,6 +361,15 @@ static void drm_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb dma_fence_put(f); } +/** + * drm_sched_entity_set_rq - Sets the run queue for an entity + * + * @entity: scheduler entity + * @rq: scheduler run queue + * + * Sets the run queue for an entity and removes the entity from the previous + * run queue in which was present. + */ void drm_sched_entity_set_rq(struct drm_sched_entity *entity, struct drm_sched_rq *rq) { @@ -325,6 +389,14 @@ void drm_sched_entity_set_rq(struct drm_sched_entity *entity, } EXPORT_SYMBOL(drm_sched_entity_set_rq); +/** + * drm_sched_dependency_optimized + * + * @fence: the dependency fence + * @entity: the entity which depends on the above fence + * + * Returns true if the dependency can be optimized and false otherwise + */ bool drm_sched_dependency_optimized(struct dma_fence* fence, struct drm_sched_entity *entity) { @@ -413,9 +485,10 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) } /** - * Submit a job to the job queue + * drm_sched_entity_push_job - Submit a job to the entity's job queue * - * @sched_job The pointer to job required to submit + * @sched_job: job to submit + * @entity: scheduler entity * * Note: To guarantee that the order of insertion to queue matches * the job's fence sequence number this function should be @@ -506,6 +579,13 @@ static void drm_sched_job_timedout(struct work_struct *work) job->sched->ops->timedout_job(job); } +/** + * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job + * + * @sched: scheduler instance + * @bad: bad scheduler job + * + */ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) { struct drm_sched_job *s_job; @@ -550,6 +630,12 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo } EXPORT_SYMBOL(drm_sched_hw_job_reset); +/** + * drm_sched_job_recovery - recover jobs after a reset + * + * @sched: scheduler instance + * + */ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) { struct drm_sched_job *s_job, *tmp; @@ -599,10 +685,17 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) EXPORT_SYMBOL(drm_sched_job_recovery); /** - * Init a sched_job with basic field + * drm_sched_job_init - init a scheduler job * - * Note: Refer to drm_sched_entity_push_job documentation + * @job: scheduler job to init + * @sched: scheduler instance + * @entity: scheduler entity to use + * @owner: job owner for debugging + * + * Refer to drm_sched_entity_push_job() documentation * for locking considerations. + * + * Returns 0 for success, negative error code otherwise. */ int drm_sched_job_init(struct drm_sched_job *job, struct drm_gpu_scheduler *sched, @@ -626,7 +719,11 @@ int drm_sched_job_init(struct drm_sched_job *job, EXPORT_SYMBOL(drm_sched_job_init); /** - * Return ture if we can push more jobs to the hw. + * drm_sched_ready - is the scheduler ready + * + * @sched: scheduler instance + * + * Return true if we can push more jobs to the hw, otherwise false. */ static bool drm_sched_ready(struct drm_gpu_scheduler *sched) { @@ -635,7 +732,10 @@ static bool drm_sched_ready(struct drm_gpu_scheduler *sched) } /** - * Wake up the scheduler when it is ready + * drm_sched_wakeup - Wake up the scheduler when it is ready + * + * @sched: scheduler instance + * */ static void drm_sched_wakeup(struct drm_gpu_scheduler *sched) { @@ -644,8 +744,12 @@ static void drm_sched_wakeup(struct drm_gpu_scheduler *sched) } /** - * Select next entity to process -*/ + * drm_sched_select_entity - Select next entity to process + * + * @sched: scheduler instance + * + * Returns the entity to process or NULL if none are found. + */ static struct drm_sched_entity * drm_sched_select_entity(struct drm_gpu_scheduler *sched) { @@ -665,6 +769,14 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) return entity; } +/** + * drm_sched_process_job - process a job + * + * @f: fence + * @cb: fence callbacks + * + * Called after job has finished execution. + */ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) { struct drm_sched_fence *s_fence = @@ -680,6 +792,13 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) wake_up_interruptible(&sched->wake_up_worker); } +/** + * drm_sched_blocked - check if the scheduler is blocked + * + * @sched: scheduler instance + * + * Returns true if blocked, otherwise false. + */ static bool drm_sched_blocked(struct drm_gpu_scheduler *sched) { if (kthread_should_park()) { @@ -690,6 +809,13 @@ static bool drm_sched_blocked(struct drm_gpu_scheduler *sched) return false; } +/** + * drm_sched_main - main scheduler thread + * + * @param: scheduler instance + * + * Returns 0. + */ static int drm_sched_main(void *param) { struct sched_param sparam = {.sched_priority = 1}; @@ -744,15 +870,17 @@ static int drm_sched_main(void *param) } /** - * Init a gpu scheduler instance + * drm_sched_init - Init a gpu scheduler instance * - * @sched The pointer to the scheduler - * @ops The backend operations for this scheduler. - * @hw_submissions Number of hw submissions to do. - * @name Name used for debugging + * @sched: scheduler instance + * @ops: backend operations for this scheduler + * @hw_submission: number of hw submissions that can be in flight + * @hang_limit: number of times to allow a job to hang before dropping it + * @timeout: timeout value in jiffies for the scheduler + * @name: name used for debugging * * Return 0 on success, otherwise error code. -*/ + */ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, unsigned hw_submission, @@ -788,9 +916,11 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, EXPORT_SYMBOL(drm_sched_init); /** - * Destroy a gpu scheduler + * drm_sched_fini - Destroy a gpu scheduler + * + * @sched: scheduler instance * - * @sched The pointer to the scheduler + * Tears down and cleans up the scheduler. */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index dec655894d08..496442f12bff 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -43,13 +43,33 @@ enum drm_sched_priority { }; /** - * drm_sched_entity - A wrapper around a job queue (typically attached - * to the DRM file_priv). + * struct drm_sched_entity - A wrapper around a job queue (typically + * attached to the DRM file_priv). + * + * @list: used to append this struct to the list of entities in the + * runqueue. + * @rq: runqueue to which this entity belongs. + * @rq_lock: lock to modify the runqueue to which this entity belongs. + * @sched: the scheduler instance to which this entity is enqueued. + * @job_queue: the list of jobs of this entity. + * @fence_seq: a linearly increasing seqno incremented with each + * new &drm_sched_fence which is part of the entity. + * @fence_context: a unique context for all the fences which belong + * to this entity. + * The &drm_sched_fence.scheduled uses the + * fence_context but &drm_sched_fence.finished uses + * fence_context + 1. + * @dependency: the dependency fence of the job which is on the top + * of the job queue. + * @cb: callback for the dependency fence above. + * @guilty: points to ctx's guilty. + * @fini_status: contains the exit status in case the process was signalled. + * @last_scheduled: points to the finished fence of the last scheduled job. * * Entities will emit jobs in order to their corresponding hardware * ring, and the scheduler will alternate between entities based on * scheduling policy. -*/ + */ struct drm_sched_entity { struct list_head list; struct drm_sched_rq *rq; @@ -63,47 +83,96 @@ struct drm_sched_entity { struct dma_fence *dependency; struct dma_fence_cb cb; - atomic_t *guilty; /* points to ctx's guilty */ - int fini_status; - struct dma_fence *last_scheduled; + atomic_t *guilty; + int fini_status; + struct dma_fence *last_scheduled; }; /** + * struct drm_sched_rq - queue of entities to be scheduled. + * + * @lock: to modify the entities list. + * @entities: list of the entities to be scheduled. + * @current_entity: the entity which is to be scheduled. + * * Run queue is a set of entities scheduling command submissions for * one specific ring. It implements the scheduling policy that selects * the next entity to emit commands from. -*/ + */ struct drm_sched_rq { spinlock_t lock; struct list_head entities; struct drm_sched_entity *current_entity; }; +/** + * struct drm_sched_fence - fences corresponding to the scheduling of a job. + */ struct drm_sched_fence { + /** + * @scheduled: this fence is what will be signaled by the scheduler + * when the job is scheduled. + */ struct dma_fence scheduled; - /* This fence is what will be signaled by the scheduler when - * the job is completed. - * - * When setting up an out fence for the job, you should use - * this, since it's available immediately upon - * drm_sched_job_init(), and the fence returned by the driver - * from run_job() won't be created until the dependencies have - * resolved. - */ + /** + * @finished: this fence is what will be signaled by the scheduler + * when the job is completed. + * + * When setting up an out fence for the job, you should use + * this, since it's available immediately upon + * drm_sched_job_init(), and the fence returned by the driver + * from run_job() won't be created until the dependencies have + * resolved. + */ struct dma_fence finished; + /** + * @cb: the callback for the parent fence below. + */ struct dma_fence_cb cb; + /** + * @parent: the fence returned by &drm_sched_backend_ops.run_job + * when scheduling the job on hardware. We signal the + * &drm_sched_fence.finished fence once parent is signalled. + */ struct dma_fence *parent; + /** + * @sched: the scheduler instance to which the job having this struct + * belongs to. + */ struct drm_gpu_scheduler *sched; + /** + * @lock: the lock used by the scheduled and the finished fences. + */ spinlock_t lock; + /** + * @owner: job owner for debugging + */ void *owner; }; struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); /** - * drm_sched_job - A job to be run by an entity. + * struct drm_sched_job - A job to be run by an entity. + * + * @queue_node: used to append this struct to the queue of jobs in an entity. + * @sched: the scheduler instance on which this job is scheduled. + * @s_fence: contains the fences for the scheduling of job. + * @finish_cb: the callback for the finished fence. + * @finish_work: schedules the function @drm_sched_job_finish once the job has + * finished to remove the job from the + * @drm_gpu_scheduler.ring_mirror_list. + * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout + * interval is over. + * @id: a unique id assigned to each job scheduled on the scheduler. + * @karma: increment on every hang caused by this job. If this exceeds the hang + * limit of the scheduler then the job is marked guilty and will not + * be scheduled further. + * @s_priority: the priority of the job. + * @entity: the entity to which this job belongs. * * A job is created by the driver using drm_sched_job_init(), and * should call drm_sched_entity_push_job() once it wants the scheduler @@ -130,38 +199,64 @@ static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, } /** + * struct drm_sched_backend_ops + * * Define the backend operations called by the scheduler, - * these functions should be implemented in driver side -*/ + * these functions should be implemented in driver side. + */ struct drm_sched_backend_ops { - /* Called when the scheduler is considering scheduling this - * job next, to get another struct dma_fence for this job to + /** + * @dependency: Called when the scheduler is considering scheduling + * this job next, to get another struct dma_fence for this job to * block on. Once it returns NULL, run_job() may be called. */ struct dma_fence *(*dependency)(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity); - /* Called to execute the job once all of the dependencies have - * been resolved. This may be called multiple times, if + /** + * @run_job: Called to execute the job once all of the dependencies + * have been resolved. This may be called multiple times, if * timedout_job() has happened and drm_sched_job_recovery() * decides to try it again. */ struct dma_fence *(*run_job)(struct drm_sched_job *sched_job); - /* Called when a job has taken too long to execute, to trigger - * GPU recovery. + /** + * @timedout_job: Called when a job has taken too long to execute, + * to trigger GPU recovery. */ void (*timedout_job)(struct drm_sched_job *sched_job); - /* Called once the job's finished fence has been signaled and - * it's time to clean it up. + /** + * @free_job: Called once the job's finished fence has been signaled + * and it's time to clean it up. */ void (*free_job)(struct drm_sched_job *sched_job); }; /** - * One scheduler is implemented for each hardware ring -*/ + * struct drm_gpu_scheduler + * + * @ops: backend operations provided by the driver. + * @hw_submission_limit: the max size of the hardware queue. + * @timeout: the time after which a job is removed from the scheduler. + * @name: name of the ring for which this scheduler is being used. + * @sched_rq: priority wise array of run queues. + * @wake_up_worker: the wait queue on which the scheduler sleeps until a job + * is ready to be scheduled. + * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler + * waits on this wait queue until all the scheduled jobs are + * finished. + * @hw_rq_count: the number of jobs currently in the hardware queue. + * @job_id_count: used to assign unique id to the each job. + * @thread: the kthread on which the scheduler which run. + * @ring_mirror_list: the list of jobs which are currently in the job queue. + * @job_list_lock: lock to protect the ring_mirror_list. + * @hang_limit: once the hangs by a job crosses this limit then it is marked + * guilty and it will be considered for scheduling further. + * + * One scheduler is implemented for each hardware ring. + */ struct drm_gpu_scheduler { const struct drm_sched_backend_ops *ops; uint32_t hw_submission_limit; -- cgit From 741f01e636b72ff3f81204fd595ac1078907671b Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 30 May 2018 15:11:01 -0400 Subject: drm/scheduler: Avoid using wait_event_killable for dying process (V4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dying process might be blocked from receiving any more signals so avoid using it. Also retire enity->fini_status and just check the SW queue, if it's not empty do the fallback cleanup. Also handle entity->last_scheduled == NULL use case which happens when HW ring is already hangged whem a new entity tried to enqeue jobs. v2: Return the remaining timeout and use that as parameter for the next call. This way when we need to cleanup multiple queues we don't wait for the entire TO period for each queue but rather in total. Styling comments. Rebase. v3: Update types from unsigned to long. Work with jiffies instead of ms. Return 0 when TO expires. Rebase. v4: Remove unnecessary timeout calculation. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 67 ++++++++++++++++++++++--------- include/drm/gpu_scheduler.h | 7 ++-- 2 files changed, 53 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 8c1e80c9b674..6a316701da73 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -181,7 +181,6 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; entity->guilty = guilty; - entity->fini_status = 0; entity->last_scheduled = NULL; spin_lock_init(&entity->rq_lock); @@ -219,7 +218,8 @@ static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched, static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) { rmb(); - if (spsc_queue_peek(&entity->job_queue) == NULL) + + if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL) return true; return false; @@ -260,25 +260,39 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, * * @sched: scheduler instance * @entity: scheduler entity + * @timeout: time to wait in for Q to become empty in jiffies. * * Splitting drm_sched_entity_fini() into two functions, The first one does the waiting, * removes the entity from the runqueue and returns an error when the process was killed. + * + * Returns the remaining time in jiffies left from the input timeout */ -void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity) +long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity, long timeout) { + long ret = timeout; + if (!drm_sched_entity_is_initialized(sched, entity)) - return; + return ret; /** * The client will not queue more IBs during this fini, consume existing * queued IBs or discard them on SIGKILL */ - if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL) - entity->fini_status = -ERESTARTSYS; - else - entity->fini_status = wait_event_killable(sched->job_scheduled, - drm_sched_entity_is_idle(entity)); - drm_sched_entity_set_rq(entity, NULL); + if (current->flags & PF_EXITING) { + if (timeout) + ret = wait_event_timeout( + sched->job_scheduled, + drm_sched_entity_is_idle(entity), + timeout); + } else + wait_event_killable(sched->job_scheduled, drm_sched_entity_is_idle(entity)); + + + /* For killed process disable any more IBs enqueue right now */ + if ((current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) + drm_sched_entity_set_rq(entity, NULL); + + return ret; } EXPORT_SYMBOL(drm_sched_entity_do_release); @@ -290,11 +304,18 @@ EXPORT_SYMBOL(drm_sched_entity_do_release); * * This should be called after @drm_sched_entity_do_release. It goes over the * entity and signals all jobs with an error code if the process was killed. + * */ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { - if (entity->fini_status) { + + drm_sched_entity_set_rq(entity, NULL); + + /* Consumption of existing IBs wasn't completed. Forcefully + * remove them here. + */ + if (spsc_queue_peek(&entity->job_queue)) { struct drm_sched_job *job; int r; @@ -314,12 +335,22 @@ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, struct drm_sched_fence *s_fence = job->s_fence; drm_sched_fence_scheduled(s_fence); dma_fence_set_error(&s_fence->finished, -ESRCH); - r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb, - drm_sched_entity_kill_jobs_cb); - if (r == -ENOENT) + + /* + * When pipe is hanged by older entity, new entity might + * not even have chance to submit it's first job to HW + * and so entity->last_scheduled will remain NULL + */ + if (!entity->last_scheduled) { drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); - else if (r) - DRM_ERROR("fence add callback failed (%d)\n", r); + } else { + r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb, + drm_sched_entity_kill_jobs_cb); + if (r == -ENOENT) + drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", r); + } } } @@ -339,7 +370,7 @@ EXPORT_SYMBOL(drm_sched_entity_cleanup); void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { - drm_sched_entity_do_release(sched, entity); + drm_sched_entity_do_release(sched, entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY); drm_sched_entity_cleanup(sched, entity); } EXPORT_SYMBOL(drm_sched_entity_fini); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 496442f12bff..7c2dfd6cc1af 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -27,6 +27,8 @@ #include #include +#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) + struct drm_gpu_scheduler; struct drm_sched_rq; @@ -84,7 +86,6 @@ struct drm_sched_entity { struct dma_fence *dependency; struct dma_fence_cb cb; atomic_t *guilty; - int fini_status; struct dma_fence *last_scheduled; }; @@ -283,8 +284,8 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, atomic_t *guilty); -void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity); +long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity, long timeout); void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, -- cgit From 180fc134d712a93a2bbc3d11ed657b5208e6f90f Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 5 Jun 2018 12:43:23 -0400 Subject: drm/scheduler: Rename cleanup functions v2. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Everything in the flush code path (i.e. waiting for SW queue to become empty) names with *_flush() and everything in the release code path names *_fini() This patch also effect the amdgpu and etnaviv drivers which use those functions. v2: Also pplay the change to vd3. Signed-off-by: Andrey Grodzovsky Suggested-by: Christian König Acked-by: Lucas Stach Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 4 ++-- drivers/gpu/drm/scheduler/gpu_scheduler.c | 18 +++++++++--------- drivers/gpu/drm/v3d/v3d_drv.c | 2 +- include/drm/gpu_scheduler.h | 6 +++--- 11 files changed, 26 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 64b3a1ed04dc..c0f06c02f2de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -104,7 +104,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, failed: for (j = 0; j < i; j++) - drm_sched_entity_fini(&adev->rings[j]->sched, + drm_sched_entity_destroy(&adev->rings[j]->sched, &ctx->rings[j].entity); kfree(ctx->fences); ctx->fences = NULL; @@ -178,7 +178,7 @@ static void amdgpu_ctx_do_release(struct kref *ref) if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) continue; - drm_sched_entity_fini(&ctx->adev->rings[i]->sched, + drm_sched_entity_destroy(&ctx->adev->rings[i]->sched, &ctx->rings[i].entity); } @@ -466,7 +466,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) continue; - max_wait = drm_sched_entity_do_release(&ctx->adev->rings[i]->sched, + max_wait = drm_sched_entity_flush(&ctx->adev->rings[i]->sched, &ctx->rings[i].entity, max_wait); } } @@ -492,7 +492,7 @@ void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr) continue; if (kref_read(&ctx->refcount) == 1) - drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched, + drm_sched_entity_fini(&ctx->adev->rings[i]->sched, &ctx->rings[i].entity); else DRM_ERROR("ctx %p is still alive\n", ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0c084d3d0865..0246cb87d9e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -162,7 +162,7 @@ error_mem: static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) { if (adev->mman.mem_global_referenced) { - drm_sched_entity_fini(adev->mman.entity.sched, + drm_sched_entity_destroy(adev->mman.entity.sched, &adev->mman.entity); mutex_destroy(&adev->mman.gtt_window_lock); drm_global_item_unref(&adev->mman.bo_global_ref.ref); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index cc15d3230402..0b46ea1c6290 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -309,7 +309,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { kfree(adev->uvd.inst[j].saved_bo); - drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity); + drm_sched_entity_destroy(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity); amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo, &adev->uvd.inst[j].gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 23d960ec1cf2..b0dcdfd85f5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -222,7 +222,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; - drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); + drm_sched_entity_destroy(&adev->vce.ring[0].sched, &adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, (void **)&adev->vce.cpu_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 590db78b8c72..837066076ccf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2643,7 +2643,7 @@ error_free_root: vm->root.base.bo = NULL; error_free_sched_entity: - drm_sched_entity_fini(&ring->sched, &vm->entity); + drm_sched_entity_destroy(&ring->sched, &vm->entity); return r; } @@ -2780,7 +2780,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } - drm_sched_entity_fini(vm->entity.sched, &vm->entity); + drm_sched_entity_destroy(vm->entity.sched, &vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index bfddf97dd13e..1df1c6115341 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -470,7 +470,7 @@ static int uvd_v6_0_sw_fini(void *handle) return r; if (uvd_v6_0_enc_support(adev)) { - drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc); + drm_sched_entity_destroy(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc); for (i = 0; i < adev->uvd.num_enc_rings; ++i) amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 57d32f21b3a6..ba244d3b74db 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -491,7 +491,7 @@ static int uvd_v7_0_sw_fini(void *handle) return r; for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { - drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc); + drm_sched_entity_destroy(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc); for (i = 0; i < adev->uvd.num_enc_rings; ++i) amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index e5013a999147..45bfdf4cc107 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -78,8 +78,8 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file) gpu->lastctx = NULL; mutex_unlock(&gpu->lock); - drm_sched_entity_fini(&gpu->sched, - &ctx->sched_entity[i]); + drm_sched_entity_destroy(&gpu->sched, + &ctx->sched_entity[i]); } } diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 6a316701da73..7d2560699b84 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -256,7 +256,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, /** - * drm_sched_entity_do_release - Destroy a context entity + * drm_sched_entity_flush - Flush a context entity * * @sched: scheduler instance * @entity: scheduler entity @@ -267,7 +267,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, * * Returns the remaining time in jiffies left from the input timeout */ -long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, +long drm_sched_entity_flush(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, long timeout) { long ret = timeout; @@ -294,7 +294,7 @@ long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, return ret; } -EXPORT_SYMBOL(drm_sched_entity_do_release); +EXPORT_SYMBOL(drm_sched_entity_flush); /** * drm_sched_entity_cleanup - Destroy a context entity @@ -306,7 +306,7 @@ EXPORT_SYMBOL(drm_sched_entity_do_release); * entity and signals all jobs with an error code if the process was killed. * */ -void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, +void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { @@ -357,7 +357,7 @@ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, dma_fence_put(entity->last_scheduled); entity->last_scheduled = NULL; } -EXPORT_SYMBOL(drm_sched_entity_cleanup); +EXPORT_SYMBOL(drm_sched_entity_fini); /** * drm_sched_entity_fini - Destroy a context entity @@ -367,13 +367,13 @@ EXPORT_SYMBOL(drm_sched_entity_cleanup); * * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup() */ -void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, +void drm_sched_entity_destroy(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { - drm_sched_entity_do_release(sched, entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY); - drm_sched_entity_cleanup(sched, entity); + drm_sched_entity_flush(sched, entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY); + drm_sched_entity_fini(sched, entity); } -EXPORT_SYMBOL(drm_sched_entity_fini); +EXPORT_SYMBOL(drm_sched_entity_destroy); static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb) { diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index cdb582043b4f..567f7d46d912 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -151,7 +151,7 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file) enum v3d_queue q; for (q = 0; q < V3D_MAX_QUEUES; q++) { - drm_sched_entity_fini(&v3d->queue[q].sched, + drm_sched_entity_destroy(&v3d->queue[q].sched, &v3d_priv->sched_entity[q]); } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 7c2dfd6cc1af..4214ceb71c05 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -284,12 +284,12 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, atomic_t *guilty); -long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, +long drm_sched_entity_flush(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, long timeout); -void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity); void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); +void drm_sched_entity_destroy(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity); void drm_sched_entity_push_job(struct drm_sched_job *sched_job, struct drm_sched_entity *entity); void drm_sched_entity_set_rq(struct drm_sched_entity *entity, -- cgit From 8dc9fbbf274b7b2a647e06141aee70ffabf6dbc0 Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Fri, 13 Jul 2018 15:21:13 +0530 Subject: drm/scheduler: add a pointer to scheduler in the rq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is in preparation for a better load balancing in scheduler. It allows us to associate entities with the run queues instead of binding them to a scheduler. Signed-off-by: Nayan Deshmukh Reviewed-by: Christian König Acked-by: Eric Anholt Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 ++++-- include/drm/gpu_scheduler.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 7d2560699b84..429b1328653a 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -69,11 +69,13 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); * * Initializes a scheduler runqueue. */ -static void drm_sched_rq_init(struct drm_sched_rq *rq) +static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq) { spin_lock_init(&rq->lock); INIT_LIST_HEAD(&rq->entities); rq->current_entity = NULL; + rq->sched = sched; } /** @@ -926,7 +928,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, sched->timeout = timeout; sched->hang_limit = hang_limit; for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++) - drm_sched_rq_init(&sched->sched_rq[i]); + drm_sched_rq_init(sched, &sched->sched_rq[i]); init_waitqueue_head(&sched->wake_up_worker); init_waitqueue_head(&sched->job_scheduled); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 4214ceb71c05..43e93d6077cf 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -93,6 +93,7 @@ struct drm_sched_entity { * struct drm_sched_rq - queue of entities to be scheduled. * * @lock: to modify the entities list. + * @sched: the scheduler to which this rq belongs to. * @entities: list of the entities to be scheduled. * @current_entity: the entity which is to be scheduled. * @@ -102,6 +103,7 @@ struct drm_sched_entity { */ struct drm_sched_rq { spinlock_t lock; + struct drm_gpu_scheduler *sched; struct list_head entities; struct drm_sched_entity *current_entity; }; -- cgit From aa16b6c6b4d979234f830a48add47d02c12bb569 Mon Sep 17 00:00:00 2001 From: Nayan Deshmukh Date: Fri, 13 Jul 2018 15:21:14 +0530 Subject: drm/scheduler: modify args of drm_sched_entity_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit replace run queue by a list of run queues and remove the sched arg as that is part of run queue itself Signed-off-by: Nayan Deshmukh Reviewed-by: Christian König Acked-by: Eric Anholt Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +-- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 ++-- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 8 ++++---- drivers/gpu/drm/scheduler/gpu_scheduler.c | 20 ++++++++++++-------- drivers/gpu/drm/v3d/v3d_drv.c | 7 +++---- include/drm/gpu_scheduler.h | 6 +++--- 11 files changed, 33 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0120b24fae1b..83e3b320a793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -90,8 +90,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, if (ring == &adev->gfx.kiq.ring) continue; - r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, - rq, &ctx->guilty); + r = drm_sched_entity_init(&ctx->rings[i].entity, + &rq, 1, &ctx->guilty); if (r) goto failed; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6a3fead5c1f0..11a12483c995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1918,8 +1918,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) ring = adev->mman.buffer_funcs_ring; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, - rq, NULL); + r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 3e70eb61a960..a6c2cace4b9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -266,8 +266,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) ring = &adev->uvd.inst[j].ring; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity, - rq, NULL); + r = drm_sched_entity_init(&adev->uvd.inst[j].entity, &rq, + 1, NULL); if (r != 0) { DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 6ae1ad7e83b3..ffb0fcc9707e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -190,8 +190,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) ring = &adev->vce.ring[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, - rq, NULL); + r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0fd0a718763b..484e2c19c027 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2564,8 +2564,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ring_instance %= adev->vm_manager.vm_pte_num_rings; ring = adev->vm_manager.vm_pte_rings[ring_instance]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&ring->sched, &vm->entity, - rq, NULL); + r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 2623f249cb7a..1c118c02e8cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -430,8 +430,8 @@ static int uvd_v6_0_sw_init(void *handle) struct drm_sched_rq *rq; ring = &adev->uvd.inst->ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc, - rq, NULL); + r = drm_sched_entity_init(&adev->uvd.inst->entity_enc, + &rq, 1, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index ce360ad16856..d48bc3393545 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -432,8 +432,8 @@ static int uvd_v7_0_sw_init(void *handle) for (j = 0; j < adev->uvd.num_uvd_inst; j++) { ring = &adev->uvd.inst[j].ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc, - rq, NULL); + r = drm_sched_entity_init(&adev->uvd.inst[j].entity_enc, + &rq, 1, NULL); if (r) { DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j); return r; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 45bfdf4cc107..36414ba56b22 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -49,12 +49,12 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file) for (i = 0; i < ETNA_MAX_PIPES; i++) { struct etnaviv_gpu *gpu = priv->gpu[i]; + struct drm_sched_rq *rq; if (gpu) { - drm_sched_entity_init(&gpu->sched, - &ctx->sched_entity[i], - &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], - NULL); + rq = &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + drm_sched_entity_init(&ctx->sched_entity[i], + &rq, 1, NULL); } } diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 429b1328653a..16bf446aa6b3 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -162,26 +162,30 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) * drm_sched_entity_init - Init a context entity used by scheduler when * submit to HW ring. * - * @sched: scheduler instance * @entity: scheduler entity to init - * @rq: the run queue this entity belongs + * @rq_list: the list of run queue on which jobs from this + * entity can be submitted + * @num_rq_list: number of run queue in rq_list * @guilty: atomic_t set to 1 when a job on this queue * is found to be guilty causing a timeout * + * Note: the rq_list should have atleast one element to schedule + * the entity + * * Returns 0 on success or a negative error code on failure. */ -int drm_sched_entity_init(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity, - struct drm_sched_rq *rq, +int drm_sched_entity_init(struct drm_sched_entity *entity, + struct drm_sched_rq **rq_list, + unsigned int num_rq_list, atomic_t *guilty) { - if (!(sched && entity && rq)) + if (!(entity && rq_list && num_rq_list > 0 && rq_list[0])) return -EINVAL; memset(entity, 0, sizeof(struct drm_sched_entity)); INIT_LIST_HEAD(&entity->list); - entity->rq = rq; - entity->sched = sched; + entity->rq = rq_list[0]; + entity->sched = rq_list[0]->sched; entity->guilty = guilty; entity->last_scheduled = NULL; diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 567f7d46d912..1dceba2b42fd 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -123,6 +123,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file) { struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_file_priv *v3d_priv; + struct drm_sched_rq *rq; int i; v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL); @@ -132,10 +133,8 @@ v3d_open(struct drm_device *dev, struct drm_file *file) v3d_priv->v3d = v3d; for (i = 0; i < V3D_MAX_QUEUES; i++) { - drm_sched_entity_init(&v3d->queue[i].sched, - &v3d_priv->sched_entity[i], - &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], - NULL); + rq = &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + drm_sched_entity_init(&v3d_priv->sched_entity[i], &rq, 1, NULL); } file->driver_priv = v3d_priv; diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 43e93d6077cf..2205e89722f6 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -282,9 +282,9 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const char *name); void drm_sched_fini(struct drm_gpu_scheduler *sched); -int drm_sched_entity_init(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity, - struct drm_sched_rq *rq, +int drm_sched_entity_init(struct drm_sched_entity *entity, + struct drm_sched_rq **rq_list, + unsigned int num_rq_list, atomic_t *guilty); long drm_sched_entity_flush(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, long timeout); -- cgit From a6da48caf92b4b6119cae146259528bec0e48545 Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Mon, 16 Jul 2018 10:53:43 +0800 Subject: drm/scheduler: add NULL pointer check for run queue (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To check rq pointer before adding entity into it. That avoids NULL pointer access in some case. v2: move the check to caller Suggested-by: Christian König Signed-off-by: Junwei Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/scheduler/gpu_scheduler.c') diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 16bf446aa6b3..dac71e3b4514 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -547,6 +547,11 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, if (first) { /* Add the entity to the run queue */ spin_lock(&entity->rq_lock); + if (!entity->rq) { + DRM_ERROR("Trying to push to a killed entity\n"); + spin_unlock(&entity->rq_lock); + return; + } drm_sched_rq_add_entity(entity->rq, entity); spin_unlock(&entity->rq_lock); drm_sched_wakeup(sched); -- cgit