From 0b492a4c92050862a9780b941d52c05923fcd669 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 16 Aug 2015 22:48:26 -0400 Subject: drm/amdgpu: cleanup context structure v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment is misleading and incorrect, remove it. Printing the id is completely meaningless and this practice can cause a race conditions on command submission. The flags and hangs fields are completely unused. Give all fields a common indentation. v2: remove fpriv reference and unused flags as well, fix debug message. Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6c66ac8a1891..e63cfb7fa390 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,16 +28,13 @@ static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; - struct amdgpu_ctx_mgr *mgr; ctx = container_of(ref, struct amdgpu_ctx, refcount); - mgr = &ctx->fpriv->ctx_mgr; - - idr_remove(&mgr->ctx_handles, ctx->id); kfree(ctx); } -int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t *id, uint32_t flags) +int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + uint32_t *id) { int r; struct amdgpu_ctx *ctx; @@ -57,8 +54,6 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uin *id = (uint32_t)r; memset(ctx, 0, sizeof(*ctx)); - ctx->id = *id; - ctx->fpriv = fpriv; kref_init(&ctx->refcount); mutex_unlock(&mgr->lock); @@ -73,6 +68,7 @@ int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint mutex_lock(&mgr->lock); ctx = idr_find(&mgr->ctx_handles, id); if (ctx) { + idr_remove(&mgr->ctx_handles, id); kref_put(&ctx->refcount, amdgpu_ctx_do_release); mutex_unlock(&mgr->lock); return 0; @@ -97,8 +93,8 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, } /* TODO: these two are always zero */ - out->state.flags = ctx->state.flags; - out->state.hangs = ctx->state.hangs; + out->state.flags = 0x0; + out->state.hangs = 0x0; /* determine if a GPU reset has occured since the last call */ reset_counter = atomic_read(&adev->gpu_reset_counter); @@ -123,7 +119,7 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv) idr_for_each_entry(idp,ctx,id) { if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) - DRM_ERROR("ctx (id=%ul) is still alive\n",ctx->id); + DRM_ERROR("ctx %p is still alive\n", ctx); } mutex_destroy(&mgr->lock); @@ -134,7 +130,6 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, { int r; uint32_t id; - uint32_t flags; union drm_amdgpu_ctx *args = data; struct amdgpu_device *adev = dev->dev_private; @@ -142,11 +137,10 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, r = 0; id = args->in.ctx_id; - flags = args->in.flags; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: - r = amdgpu_ctx_alloc(adev, fpriv, &id, flags); + r = amdgpu_ctx_alloc(adev, fpriv, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: @@ -177,17 +171,9 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id) int amdgpu_ctx_put(struct amdgpu_ctx *ctx) { - struct amdgpu_fpriv *fpriv; - struct amdgpu_ctx_mgr *mgr; - if (ctx == NULL) return -EINVAL; - fpriv = ctx->fpriv; - mgr = &fpriv->ctx_mgr; - mutex_lock(&mgr->lock); kref_put(&ctx->refcount, amdgpu_ctx_do_release); - mutex_unlock(&mgr->lock); - return 0; } -- cgit From 21c16bf634e62cf9673946f509b469e7f0953ecf Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 7 Jul 2015 17:24:49 +0200 Subject: drm/amdgpu: add user fence context map v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a prerequisite for the GPU scheduler to make the order of submission independent from the order of execution. v2: properly implement the locking Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 60 ++++++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++- 4 files changed, 110 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 70e783a849ed..0220d98ba8bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -415,6 +415,8 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; + /* resulting sequence number */ + uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -985,9 +987,18 @@ struct amdgpu_vm_manager { * context related structures */ +#define AMDGPU_CTX_MAX_CS_PENDING 16 + +struct amdgpu_ctx_ring { + uint64_t sequence; + struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; +}; + struct amdgpu_ctx { struct kref refcount; unsigned reset_counter; + spinlock_t ring_lock; + struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; }; struct amdgpu_ctx_mgr { @@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv); struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct fence *fence); +struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq); + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 53e6a10fe9f9..cef8360698be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, sizeof(struct drm_amdgpu_cs_chunk_dep); for (j = 0; j < num_deps; ++j) { - struct amdgpu_fence *fence; struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; + struct fence *fence; r = amdgpu_cs_get_ring(adev, deps[j].ip_type, deps[j].ip_instance, @@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, if (ctx == NULL) return -EINVAL; - r = amdgpu_fence_recreate(ring, p->filp, - deps[j].handle, - &fence); - if (r) { + fence = amdgpu_ctx_get_fence(ctx, ring, + deps[j].handle); + if (IS_ERR(fence)) { + r = PTR_ERR(fence); amdgpu_ctx_put(ctx); return r; - } - - r = amdgpu_sync_fence(adev, &ib->sync, &fence->base); - amdgpu_fence_unref(&fence); - amdgpu_ctx_put(ctx); - if (r) - return r; + } else if (fence) { + r = amdgpu_sync_fence(adev, &ib->sync, fence); + fence_put(fence); + amdgpu_ctx_put(ctx); + if (r) + return r; + } } } @@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_ib_fill(adev, &parser); } - if (!r) + if (!r) { r = amdgpu_cs_dependencies(adev, &parser); + if (r) + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + } if (r) { amdgpu_cs_parser_fini(&parser, r, reserved_buffers); @@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } - cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq; + cs->out.handle = parser.uf.sequence; out: amdgpu_cs_parser_fini(&parser, r, true); up_read(&adev->exclusive_lock); @@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); - struct amdgpu_fence *fence = NULL; struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; + struct fence *fence; long r; + r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &ring); + if (r) + return r; + ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; - r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + if (IS_ERR(fence)) + r = PTR_ERR(fence); - r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } + else if (fence) { + r = fence_wait_timeout(fence, true, timeout); + fence_put(fence); + + } else + r = 1; - r = fence_wait_timeout(&fence->base, true, timeout); - amdgpu_fence_unref(&fence); amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e63cfb7fa390..c23bfd8fe414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,17 +28,22 @@ static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + unsigned i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) + fence_put(ctx->rings[i].fences[j]); kfree(ctx); } int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t *id) { - int r; struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + int i, r; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, memset(ctx, 0, sizeof(*ctx)); kref_init(&ctx->refcount); + spin_lock_init(&ctx->ring_lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + ctx->rings[i].sequence = 1; mutex_unlock(&mgr->lock); return 0; @@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) kref_put(&ctx->refcount, amdgpu_ctx_do_release); return 0; } + +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct fence *fence) +{ + struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + uint64_t seq = cring->sequence; + unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING; + struct fence *other = cring->fences[idx]; + + if (other) { + signed long r; + r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + } + + fence_get(fence); + + spin_lock(&ctx->ring_lock); + cring->fences[idx] = fence; + cring->sequence++; + spin_unlock(&ctx->ring_lock); + + fence_put(other); + + return seq; +} + +struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq) +{ + struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + struct fence *fence; + + spin_lock(&ctx->ring_lock); + if (seq >= cring->sequence) { + spin_unlock(&ctx->ring_lock); + return ERR_PTR(-EINVAL); + } + + if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) { + spin_unlock(&ctx->ring_lock); + return NULL; + } + + fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]); + spin_unlock(&ctx->ring_lock); + + return fence; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2722815eddbb..95d533422a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, /* wrap the last IB with fence */ if (ib->user) { uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); + ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, + &ib->fence->base); addr += ib->user->offset; - amdgpu_ring_emit_fence(ring, addr, ib->fence->seq, + amdgpu_ring_emit_fence(ring, addr, ib->user->sequence, AMDGPU_FENCE_FLAG_64BIT); } -- cgit From cdecb65b4eaba1d45abbfe34b724664f65623531 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 16 Jul 2015 12:01:06 +0200 Subject: drm/amdgpu: fix context memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c23bfd8fe414..859a4841075e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -130,6 +130,7 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv) DRM_ERROR("ctx %p is still alive\n", ctx); } + idr_destroy(&mgr->ctx_handles); mutex_destroy(&mgr->lock); } -- cgit From cf6f1d39496e9b5dd62953f8dca9f995d80ab4ff Mon Sep 17 00:00:00 2001 From: Christian König Date: Sat, 18 Jul 2015 19:20:05 +0200 Subject: drm/amdgpu: fix signed overrun in amdgpu_ctx_get_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the first 16 fences of a context will always signal immediately. Signed-off-by: Christian König Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 859a4841075e..144edc97c6fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -226,7 +226,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, return ERR_PTR(-EINVAL); } - if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) { + if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) { spin_unlock(&ctx->ring_lock); return NULL; } -- cgit From 9cb7e5a91f6cd4dc018cca7120d2da067f816d3a Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 21 Jul 2015 13:17:19 +0800 Subject: drm/amdgpu: add context entity init Signed-off-by: Chunming Zhou Acked-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 36 ++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 815d40f5e6e1..776339c2a95e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -994,10 +994,12 @@ struct amdgpu_vm_manager { struct amdgpu_ctx_ring { uint64_t sequence; struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; + struct amd_context_entity c_entity; }; struct amdgpu_ctx { struct kref refcount; + struct amdgpu_device *adev; unsigned reset_counter; spinlock_t ring_lock; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 144edc97c6fe..557fb60f416b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,13 +28,23 @@ static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + struct amdgpu_device *adev; unsigned i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + adev = ctx->adev; + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) fence_put(ctx->rings[i].fences[j]); + + if (amdgpu_enable_scheduler) { + for (i = 0; i < adev->num_rings; i++) + amd_context_entity_fini(adev->rings[i]->scheduler, + &ctx->rings[i].c_entity); + } + kfree(ctx); } @@ -43,7 +53,7 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, { struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; - int i, r; + int i, j, r; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -59,11 +69,35 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, *id = (uint32_t)r; memset(ctx, 0, sizeof(*ctx)); + ctx->adev = adev; kref_init(&ctx->refcount); spin_lock_init(&ctx->ring_lock); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) ctx->rings[i].sequence = 1; mutex_unlock(&mgr->lock); + if (amdgpu_enable_scheduler) { + /* create context entity for each ring */ + for (i = 0; i < adev->num_rings; i++) { + struct amd_run_queue *rq; + if (fpriv) + rq = &adev->rings[i]->scheduler->sched_rq; + else + rq = &adev->rings[i]->scheduler->kernel_rq; + r = amd_context_entity_init(adev->rings[i]->scheduler, + &ctx->rings[i].c_entity, + NULL, rq, *id); + if (r) + break; + } + + if (i < adev->num_rings) { + for (j = 0; j < i; j++) + amd_context_entity_fini(adev->rings[j]->scheduler, + &ctx->rings[j].c_entity); + kfree(ctx); + return -EINVAL; + } + } return 0; } -- cgit From b43a9a7e87d2bbb8d0c6ae4ff06dcc604f00e31a Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 21 Jul 2015 15:13:53 +0800 Subject: drm/amdgpu: use scheduler user seq instead of previous user seq Signed-off-by: Chunming Zhou Acked-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 557fb60f416b..b9be250cb206 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -225,10 +225,16 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct fence *fence) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; - uint64_t seq = cring->sequence; - unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING; - struct fence *other = cring->fences[idx]; + uint64_t seq = 0; + unsigned idx = 0; + struct fence *other = NULL; + if (amdgpu_enable_scheduler) + seq = atomic64_read(&cring->c_entity.last_queued_v_seq); + else + seq = cring->sequence; + idx = seq % AMDGPU_CTX_MAX_CS_PENDING; + other = cring->fences[idx]; if (other) { signed long r; r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); @@ -240,7 +246,8 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, spin_lock(&ctx->ring_lock); cring->fences[idx] = fence; - cring->sequence++; + if (!amdgpu_enable_scheduler) + cring->sequence++; spin_unlock(&ctx->ring_lock); fence_put(other); @@ -253,14 +260,21 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; + uint64_t queued_seq; spin_lock(&ctx->ring_lock); - if (seq >= cring->sequence) { + if (amdgpu_enable_scheduler) + queued_seq = atomic64_read(&cring->c_entity.last_queued_v_seq) + 1; + else + queued_seq = cring->sequence; + + if (seq >= queued_seq) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); } - if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) { + + if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) { spin_unlock(&ctx->ring_lock); return NULL; } -- cgit From 4b559c90bc1870313f02cceef680884519af6b2b Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 21 Jul 2015 15:53:04 +0800 Subject: drm/amdgpu: make sure the fence is emitted before ring to get it. Signed-off-by: Chunming Zhou Acked-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 26 +++++++++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 5 ++++- 4 files changed, 25 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index becb26317467..127867c2fc37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -81,6 +81,7 @@ extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; extern int amdgpu_enable_scheduler; +#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) /* AMDGPU_IB_POOL_SIZE must be a power of 2 */ @@ -1239,6 +1240,7 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_user_fence uf; + struct amdgpu_ring *ring; struct mutex job_lock; struct work_struct job_work; int (*prepare_job)(struct amdgpu_cs_parser *sched_job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f9d4fe985668..5f2403898b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -915,7 +915,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } else parser->prepare_job = amdgpu_cs_parser_prepare_job; - + parser->ring = ring; parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; amd_sched_push_job(ring->scheduler, @@ -965,24 +965,16 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; - if (amdgpu_enable_scheduler) { - r = amd_sched_wait_ts(&ctx->rings[ring->idx].c_entity, - wait->in.handle, true, timeout); - if (r) - return r; - r = 1; - } else { - fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); - if (IS_ERR(fence)) - r = PTR_ERR(fence); - else if (fence) { - r = fence_wait_timeout(fence, true, timeout); - fence_put(fence); + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + if (IS_ERR(fence)) + r = PTR_ERR(fence); + else if (fence) { + r = fence_wait_timeout(fence, true, timeout); + fence_put(fence); + } else + r = 1; - } else - r = 1; - } amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index b9be250cb206..41bc7fc0ebf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -261,6 +261,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; uint64_t queued_seq; + int r; + + if (amdgpu_enable_scheduler) { + r = amd_sched_wait_emit(&cring->c_entity, + seq, + true, + AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS); + if (r) + return NULL; + } spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 1f7bf31da7fc..46ec915c9344 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -56,12 +56,15 @@ static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, sched_job->filp); if (r) goto err; - if (sched_job->run_job) { r = sched_job->run_job(sched_job); if (r) goto err; } + atomic64_set(&c_entity->last_emitted_v_seq, + sched_job->uf.sequence); + wake_up_all(&c_entity->wait_emit); + mutex_unlock(&sched_job->job_lock); return; err: -- cgit From 23ca0e4e478836dcb93a54aa68cb48fbc66fb0ed Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 6 Jul 2015 13:42:58 +0800 Subject: drm/amdgpu: add kernel ctx support (v2) v2: rebase against kfd changes Signed-off-by: Chunming Zhou Acked-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 83 +++++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 ++++ 3 files changed, 71 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 79e81f397e60..47e4809c6e71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2065,6 +2065,9 @@ struct amdgpu_device { /* amdkfd interface */ struct kfd_dev *kfd; + + /* kernel conext for IB submission */ + struct amdgpu_ctx *kernel_ctx; }; bool amdgpu_device_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 41bc7fc0ebf6..a5d8242ace95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -48,33 +48,53 @@ static void amdgpu_ctx_do_release(struct kref *ref) kfree(ctx); } +static void amdgpu_ctx_init(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, + struct amdgpu_ctx *ctx, + uint32_t id) +{ + int i; + memset(ctx, 0, sizeof(*ctx)); + ctx->adev = adev; + kref_init(&ctx->refcount); + spin_lock_init(&ctx->ring_lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + ctx->rings[i].sequence = 1; +} + int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t *id) { struct amdgpu_ctx *ctx; - struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; int i, j, r; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - - mutex_lock(&mgr->lock); - r = idr_alloc(&mgr->ctx_handles, ctx, 0, 0, GFP_KERNEL); - if (r < 0) { + if (fpriv) { + struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + if (r < 0) { + mutex_unlock(&mgr->lock); + kfree(ctx); + return r; + } + *id = (uint32_t)r; + amdgpu_ctx_init(adev, fpriv, ctx, *id); mutex_unlock(&mgr->lock); - kfree(ctx); - return r; + } else { + if (adev->kernel_ctx) { + DRM_ERROR("kernel cnotext has been created.\n"); + kfree(ctx); + return 0; + } + *id = AMD_KERNEL_CONTEXT_ID; + amdgpu_ctx_init(adev, fpriv, ctx, *id); + + adev->kernel_ctx = ctx; } - *id = (uint32_t)r; - memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; - kref_init(&ctx->refcount); - spin_lock_init(&ctx->ring_lock); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - ctx->rings[i].sequence = 1; - mutex_unlock(&mgr->lock); if (amdgpu_enable_scheduler) { /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { @@ -105,17 +125,23 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id) { struct amdgpu_ctx *ctx; - struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; - mutex_lock(&mgr->lock); - ctx = idr_find(&mgr->ctx_handles, id); - if (ctx) { - idr_remove(&mgr->ctx_handles, id); - kref_put(&ctx->refcount, amdgpu_ctx_do_release); + if (fpriv) { + struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + ctx = idr_find(&mgr->ctx_handles, id); + if (ctx) { + idr_remove(&mgr->ctx_handles, id); + kref_put(&ctx->refcount, amdgpu_ctx_do_release); + mutex_unlock(&mgr->lock); + return 0; + } mutex_unlock(&mgr->lock); + } else { + ctx = adev->kernel_ctx; + kref_put(&ctx->refcount, amdgpu_ctx_do_release); return 0; } - mutex_unlock(&mgr->lock); return -EINVAL; } @@ -124,9 +150,13 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, union drm_amdgpu_ctx_out *out) { struct amdgpu_ctx *ctx; - struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + struct amdgpu_ctx_mgr *mgr; unsigned reset_counter; + if (!fpriv) + return -EINVAL; + + mgr = &fpriv->ctx_mgr; mutex_lock(&mgr->lock); ctx = idr_find(&mgr->ctx_handles, id); if (!ctx) { @@ -202,7 +232,12 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id) { struct amdgpu_ctx *ctx; - struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + struct amdgpu_ctx_mgr *mgr; + + if (!fpriv) + return NULL; + + mgr = &fpriv->ctx_mgr; mutex_lock(&mgr->lock); ctx = idr_find(&mgr->ctx_handles, id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fefeeb2c4918..801ebfc44034 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1525,6 +1525,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } + if (!adev->kernel_ctx) { + uint32_t id = 0; + r = amdgpu_ctx_alloc(adev, NULL, &id); + if (r) { + dev_err(adev->dev, "failed to create kernel context (%d).\n", r); + return r; + } + } r = amdgpu_ib_ring_tests(adev); if (r) DRM_ERROR("ib ring test failed (%d).\n", r); @@ -1586,6 +1594,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->shutdown = true; /* evict vram memory */ amdgpu_bo_evict_vram(adev); + amdgpu_ctx_free(adev, NULL, 0); amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); -- cgit From 1333f723fb6f1356a54135586f1ede44dcaa9652 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Thu, 30 Jul 2015 16:36:58 +0800 Subject: drm/amdgpu: add amdgpu.sched_jobs option This option can be used to specify the max job number in the job queue, and it is 16 by default. Signed-off-by: Jammy Zhou Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 6 ++++-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 4 ++-- 5 files changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0703fbfd5130..4de114711951 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -80,6 +80,7 @@ extern int amdgpu_deep_color; extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; extern int amdgpu_enable_scheduler; +extern int amdgpu_sched_jobs; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a5d8242ace95..58ce2655a8fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -105,7 +105,8 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, rq = &adev->rings[i]->scheduler->kernel_rq; r = amd_context_entity_init(adev->rings[i]->scheduler, &ctx->rings[i].c_entity, - NULL, rq, *id); + NULL, rq, *id, + amdgpu_sched_jobs); if (r) break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8f33cef9c828..319de441e907 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -76,6 +76,7 @@ int amdgpu_vm_size = 8; int amdgpu_vm_block_size = -1; int amdgpu_exp_hw_support = 0; int amdgpu_enable_scheduler = 0; +int amdgpu_sched_jobs = 16; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -143,6 +144,9 @@ module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable, 0 = disable ((default))"); module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444); +MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 16)"); +module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); + static struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_CIK /* Kaveri */ diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 5799474808e9..87993e06ba37 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -173,6 +173,7 @@ exit: * @parent The parent entity of this amd_context_entity * @rq The run queue this entity belongs * @context_id The context id for this entity + * @jobs The max number of jobs in the job queue * * return 0 if succeed. negative error code on failure */ @@ -180,7 +181,8 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, struct amd_context_entity *entity, struct amd_sched_entity *parent, struct amd_run_queue *rq, - uint32_t context_id) + uint32_t context_id, + uint32_t jobs) { uint64_t seq_ring = 0; @@ -196,7 +198,7 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_emit); if(kfifo_alloc(&entity->job_queue, - AMD_MAX_JOB_ENTRY_PER_CONTEXT * sizeof(void *), + jobs * sizeof(void *), GFP_KERNEL)) return -EINVAL; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index a6226e1e924a..52577a88b054 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -27,7 +27,6 @@ #include #define AMD_MAX_ACTIVE_HW_SUBMISSION 2 -#define AMD_MAX_JOB_ENTRY_PER_CONTEXT 16 #define AMD_KERNEL_CONTEXT_ID 0 #define AMD_KERNEL_PROCESS_ID 0 @@ -155,6 +154,7 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, struct amd_context_entity *entity, struct amd_sched_entity *parent, struct amd_run_queue *rq, - uint32_t context_id); + uint32_t context_id, + uint32_t jobs); #endif -- cgit From 51b9db27d07869cf565ba135e97e2ed5f858612e Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 28 Jul 2015 17:31:04 +0800 Subject: drm/amdgpu: wait forever for wait emit the job must be emitted by scheduler, otherwise scheduler is abnormal. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 58ce2655a8fd..95807b678b6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -302,8 +302,8 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, if (amdgpu_enable_scheduler) { r = amd_sched_wait_emit(&cring->c_entity, seq, - true, - AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS); + false, + -1); if (r) return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 34938d2417a1..26c55a7a1a88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -386,7 +386,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, v_seq, - true, + false, -1); if (r) DRM_ERROR("emit timeout\n"); @@ -537,7 +537,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, v_seq, - true, + false, -1); if (r) DRM_ERROR("emit timeout\n"); @@ -890,7 +890,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, v_seq, - true, + false, -1); if (r) DRM_ERROR("emit timeout\n"); -- cgit From d1ff9086c1b8e67390161599006a34056b437a72 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 30 Jul 2015 17:59:43 +0800 Subject: drm/amdgpu: fix seq in ctx_add_fence if enabling scheduler, then the queued seq is assigned when pushing job before emitting job. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 6 files changed, 15 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6e1fea473a66..2619c78ec303 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -419,7 +419,6 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; - uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -1031,7 +1030,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence); + struct fence *fence, uint64_t queued_seq); struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c41360e443be..40e85bfcdf91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -739,7 +739,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ib->oa_size = amdgpu_bo_size(oa); } } - /* wrap the last IB with user fence */ if (parser->uf.bo) { struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; @@ -908,7 +907,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - parser->uf.sequence = atomic64_inc_return( + parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); @@ -922,7 +921,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) amd_sched_push_job(ring->scheduler, &parser->ctx->rings[ring->idx].c_entity, parser); - cs->out.handle = parser->uf.sequence; + cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; up_read(&adev->exclusive_lock); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 95807b678b6a..e0eaa55bf636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -258,7 +258,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) } uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence) + struct fence *fence, uint64_t queued_seq) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; uint64_t seq = 0; @@ -266,7 +266,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct fence *other = NULL; if (amdgpu_enable_scheduler) - seq = atomic64_read(&cring->c_entity.last_queued_v_seq); + seq = queued_seq; else seq = cring->sequence; idx = seq % AMDGPU_CTX_MAX_CS_PENDING; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 42d6298eb9d7..eed409c59492 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -143,6 +143,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_ring *ring; struct amdgpu_ctx *ctx, *old_ctx; struct amdgpu_vm *vm; + uint64_t sequence; unsigned i; int r = 0; @@ -215,9 +216,12 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, return r; } + sequence = amdgpu_enable_scheduler ? ib->sequence : 0; + if (ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, - &ib->fence->base); + &ib->fence->base, + sequence); /* wrap the last IB with fence */ if (ib->user) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 46ec915c9344..b913c22dd6b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -62,7 +62,7 @@ static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, goto err; } atomic64_set(&c_entity->last_emitted_v_seq, - sched_job->uf.sequence); + sched_job->ibs[sched_job->num_ibs - 1].sequence); wake_up_all(&c_entity->wait_emit); mutex_unlock(&sched_job->job_lock); @@ -93,7 +93,7 @@ static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched, void *job) if (sched_job->ctx) { c_entity = &sched_job->ctx->rings[ring->idx].c_entity; atomic64_set(&c_entity->last_signaled_v_seq, - sched_job->uf.sequence); + sched_job->ibs[sched_job->num_ibs - 1].sequence); } /* wake up users waiting for time stamp */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 26c55a7a1a88..5624d4484fb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -380,7 +380,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); @@ -531,7 +531,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); @@ -884,7 +884,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_bo_update_mapping_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); -- cgit From 27f6642d066ecea7b535dd9b24e2f41e54f3dd85 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Mon, 3 Aug 2015 10:27:57 +0800 Subject: drm/amdgpu: add amd_sched_next_queued_seq function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is used to get the next queued sequence number Signed-off-by: Jammy Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 12 ++++++++++++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e0eaa55bf636..6766ead31c4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -310,7 +310,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) - queued_seq = atomic64_read(&cring->c_entity.last_queued_v_seq) + 1; + queued_seq = amd_sched_next_queued_seq(&cring->c_entity); else queued_seq = cring->sequence; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 55ebbf0f8cd0..4ad1825e713e 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -507,3 +507,15 @@ void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq) atomic64_set(&c_entity->last_emitted_v_seq, seq); wake_up_all(&c_entity->wait_emit); } + +/** + * Get next queued sequence number + * + * @entity The context entity + * + * return the next queued sequence number +*/ +uint64_t amd_sched_next_queued_seq(struct amd_context_entity *c_entity) +{ + return atomic64_read(&c_entity->last_queued_v_seq) + 1; +} diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 44f71cdf7c33..fd6d699d42e1 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -153,4 +153,6 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq); +uint64_t amd_sched_next_queued_seq(struct amd_context_entity *c_entity); + #endif -- cgit From efd4ccb59a4acb8b85835d6b053362dbacee40f9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 4 Aug 2015 16:20:31 +0200 Subject: drm/amdgpu: cleanup ctx_mgr init/fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 40 +++++++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 ++----- 3 files changed, 27 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index eadbe792c8aa..0cd776a55f05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1038,8 +1038,6 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id); -void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv); - struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); @@ -1051,6 +1049,8 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); /* * file private structure diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6766ead31c4d..3c353375b228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -182,23 +182,6 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, return 0; } -void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv) -{ - struct idr *idp; - struct amdgpu_ctx *ctx; - uint32_t id; - struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; - idp = &mgr->ctx_handles; - - idr_for_each_entry(idp,ctx,id) { - if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) - DRM_ERROR("ctx %p is still alive\n", ctx); - } - - idr_destroy(&mgr->ctx_handles); - mutex_destroy(&mgr->lock); -} - int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -330,3 +313,26 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, return fence; } + +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) +{ + mutex_init(&mgr->lock); + idr_init(&mgr->ctx_handles); +} + +void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) { + if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) + DRM_ERROR("ctx %p is still alive\n", ctx); + } + + idr_destroy(&mgr->ctx_handles); + mutex_destroy(&mgr->lock); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 53da3d8a868c..7226def4e8fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -527,10 +527,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init(&fpriv->bo_list_handles); - /* init context manager */ - mutex_init(&fpriv->ctx_mgr.lock); - idr_init(&fpriv->ctx_mgr.ctx_handles); - fpriv->ctx_mgr.adev = adev; + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); file_priv->driver_priv = fpriv; @@ -571,8 +568,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, idr_destroy(&fpriv->bo_list_handles); mutex_destroy(&fpriv->bo_list_lock); - /* release context */ - amdgpu_ctx_fini(fpriv); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); kfree(fpriv); file_priv->driver_priv = NULL; -- cgit From 0e89d0c16b9446a094215e71734e583c438bf83d Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 4 Aug 2015 16:58:36 +0200 Subject: drm/amdgpu: stop leaking the ctx id into the scheduler v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Id's are for the IOCTL ABI only. v2: remove tgid as well Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 11 ++++------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 10 +++------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 6 ------ 3 files changed, 7 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 3c353375b228..c2290ae20312 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -50,8 +50,7 @@ static void amdgpu_ctx_do_release(struct kref *ref) static void amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, - struct amdgpu_ctx *ctx, - uint32_t id) + struct amdgpu_ctx *ctx) { int i; memset(ctx, 0, sizeof(*ctx)); @@ -81,7 +80,7 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, return r; } *id = (uint32_t)r; - amdgpu_ctx_init(adev, fpriv, ctx, *id); + amdgpu_ctx_init(adev, fpriv, ctx); mutex_unlock(&mgr->lock); } else { if (adev->kernel_ctx) { @@ -89,8 +88,7 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, kfree(ctx); return 0; } - *id = AMD_KERNEL_CONTEXT_ID; - amdgpu_ctx_init(adev, fpriv, ctx, *id); + amdgpu_ctx_init(adev, fpriv, ctx); adev->kernel_ctx = ctx; } @@ -105,8 +103,7 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, rq = &adev->rings[i]->scheduler->kernel_rq; r = amd_context_entity_init(adev->rings[i]->scheduler, &ctx->rings[i].c_entity, - NULL, rq, *id, - amdgpu_sched_jobs); + NULL, rq, amdgpu_sched_jobs); if (r) break; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 4ad1825e713e..b9aa572980d2 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -172,7 +172,7 @@ exit: * @entity The pointer to a valid amd_context_entity * @parent The parent entity of this amd_context_entity * @rq The run queue this entity belongs - * @context_id The context id for this entity + * @kernel If this is an entity for the kernel * @jobs The max number of jobs in the job queue * * return 0 if succeed. negative error code on failure @@ -181,7 +181,6 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, struct amd_context_entity *entity, struct amd_sched_entity *parent, struct amd_run_queue *rq, - uint32_t context_id, uint32_t jobs) { uint64_t seq_ring = 0; @@ -203,9 +202,6 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, return -EINVAL; spin_lock_init(&entity->queue_lock); - entity->tgid = (context_id == AMD_KERNEL_CONTEXT_ID) ? - AMD_KERNEL_PROCESS_ID : current->tgid; - entity->context_id = context_id; atomic64_set(&entity->last_emitted_v_seq, seq_ring); atomic64_set(&entity->last_queued_v_seq, seq_ring); @@ -275,9 +271,9 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, if (r) { if (entity->is_pending) - DRM_INFO("Entity %u is in waiting state during fini,\ + DRM_INFO("Entity %p is in waiting state during fini,\ all pending ibs will be canceled.\n", - entity->context_id); + entity); } mutex_lock(&rq->lock); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index fd6d699d42e1..c46d0854ab75 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -26,9 +26,6 @@ #include -#define AMD_KERNEL_CONTEXT_ID 0 -#define AMD_KERNEL_PROCESS_ID 0 - #define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 struct amd_gpu_scheduler; @@ -74,8 +71,6 @@ struct amd_context_entity { /* the virtual_seq is unique per context per ring */ atomic64_t last_queued_v_seq; atomic64_t last_emitted_v_seq; - pid_t tgid; - uint32_t context_id; /* the job_queue maintains the jobs submitted by clients */ struct kfifo job_queue; spinlock_t queue_lock; @@ -148,7 +143,6 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, struct amd_context_entity *entity, struct amd_sched_entity *parent, struct amd_run_queue *rq, - uint32_t context_id, uint32_t jobs); void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq); -- cgit From 47f38501f11fa45d8a7797f1965448c1e20049d4 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 4 Aug 2015 17:51:05 +0200 Subject: drm/amdgpu: cleanup amdgpu_ctx inti/fini v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup the kernel context handling. v2: rebased Signed-off-by: Christian König Reviewed-by: Chunming Zhou (v1) --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 145 ++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 ++-- 5 files changed, 89 insertions(+), 104 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0cd776a55f05..53d70f766afe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1033,10 +1033,9 @@ struct amdgpu_ctx_mgr { struct idr ctx_handles; }; -int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, - uint32_t *id); -int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, - uint32_t id); +int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, + struct amdgpu_ctx *ctx); +void amdgpu_ctx_fini(struct amdgpu_ctx *ctx); struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); @@ -2095,7 +2094,7 @@ struct amdgpu_device { struct kfd_dev *kfd; /* kernel conext for IB submission */ - struct amdgpu_ctx *kernel_ctx; + struct amdgpu_ctx kernel_ctx; }; bool amdgpu_device_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c2290ae20312..08a9292729dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -25,82 +25,27 @@ #include #include "amdgpu.h" -static void amdgpu_ctx_do_release(struct kref *ref) +int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, + struct amdgpu_ctx *ctx) { - struct amdgpu_ctx *ctx; - struct amdgpu_device *adev; unsigned i, j; + int r; - ctx = container_of(ref, struct amdgpu_ctx, refcount); - adev = ctx->adev; - - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) - fence_put(ctx->rings[i].fences[j]); - - if (amdgpu_enable_scheduler) { - for (i = 0; i < adev->num_rings; i++) - amd_context_entity_fini(adev->rings[i]->scheduler, - &ctx->rings[i].c_entity); - } - - kfree(ctx); -} - -static void amdgpu_ctx_init(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv, - struct amdgpu_ctx *ctx) -{ - int i; memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(&ctx->refcount); spin_lock_init(&ctx->ring_lock); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) ctx->rings[i].sequence = 1; -} - -int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, - uint32_t *id) -{ - struct amdgpu_ctx *ctx; - int i, j, r; - - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - if (fpriv) { - struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; - mutex_lock(&mgr->lock); - r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); - if (r < 0) { - mutex_unlock(&mgr->lock); - kfree(ctx); - return r; - } - *id = (uint32_t)r; - amdgpu_ctx_init(adev, fpriv, ctx); - mutex_unlock(&mgr->lock); - } else { - if (adev->kernel_ctx) { - DRM_ERROR("kernel cnotext has been created.\n"); - kfree(ctx); - return 0; - } - amdgpu_ctx_init(adev, fpriv, ctx); - - adev->kernel_ctx = ctx; - } if (amdgpu_enable_scheduler) { /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { struct amd_run_queue *rq; - if (fpriv) - rq = &adev->rings[i]->scheduler->sched_rq; - else + if (kernel) rq = &adev->rings[i]->scheduler->kernel_rq; + else + rq = &adev->rings[i]->scheduler->sched_rq; r = amd_context_entity_init(adev->rings[i]->scheduler, &ctx->rings[i].c_entity, NULL, rq, amdgpu_sched_jobs); @@ -113,33 +58,79 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, amd_context_entity_fini(adev->rings[j]->scheduler, &ctx->rings[j].c_entity); kfree(ctx); - return -EINVAL; + return r; } } - return 0; } -int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id) +void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) { + struct amdgpu_device *adev = ctx->adev; + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) + fence_put(ctx->rings[i].fences[j]); + + if (amdgpu_enable_scheduler) { + for (i = 0; i < adev->num_rings; i++) + amd_context_entity_fini(adev->rings[i]->scheduler, + &ctx->rings[i].c_entity); + } +} + +static int amdgpu_ctx_alloc(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, + uint32_t *id) +{ + struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; struct amdgpu_ctx *ctx; + int r; - if (fpriv) { - struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; - mutex_lock(&mgr->lock); - ctx = idr_find(&mgr->ctx_handles, id); - if (ctx) { - idr_remove(&mgr->ctx_handles, id); - kref_put(&ctx->refcount, amdgpu_ctx_do_release); - mutex_unlock(&mgr->lock); - return 0; - } + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + mutex_lock(&mgr->lock); + r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + if (r < 0) { mutex_unlock(&mgr->lock); - } else { - ctx = adev->kernel_ctx; + kfree(ctx); + return r; + } + *id = (uint32_t)r; + r = amdgpu_ctx_init(adev, false, ctx); + mutex_unlock(&mgr->lock); + + return r; +} + +static void amdgpu_ctx_do_release(struct kref *ref) +{ + struct amdgpu_ctx *ctx; + + ctx = container_of(ref, struct amdgpu_ctx, refcount); + + amdgpu_ctx_fini(ctx); + + kfree(ctx); +} + +static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) +{ + struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + struct amdgpu_ctx *ctx; + + mutex_lock(&mgr->lock); + ctx = idr_find(&mgr->ctx_handles, id); + if (ctx) { + idr_remove(&mgr->ctx_handles, id); kref_put(&ctx->refcount, amdgpu_ctx_do_release); + mutex_unlock(&mgr->lock); return 0; } + mutex_unlock(&mgr->lock); return -EINVAL; } @@ -198,7 +189,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: - r = amdgpu_ctx_free(adev, fpriv, id); + r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: r = amdgpu_ctx_query(adev, fpriv, id, &args->out); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 801ebfc44034..42d1a22c1199 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1525,13 +1525,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } - if (!adev->kernel_ctx) { - uint32_t id = 0; - r = amdgpu_ctx_alloc(adev, NULL, &id); - if (r) { - dev_err(adev->dev, "failed to create kernel context (%d).\n", r); - return r; - } + r = amdgpu_ctx_init(adev, true, &adev->kernel_ctx); + if (r) { + dev_err(adev->dev, "failed to create kernel context (%d).\n", r); + return r; } r = amdgpu_ib_ring_tests(adev); if (r) @@ -1594,7 +1591,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->shutdown = true; /* evict vram memory */ amdgpu_bo_evict_vram(adev); - amdgpu_ctx_free(adev, NULL, 0); + amdgpu_ctx_fini(&adev->kernel_ctx); amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 9f2f19cc4625..995901b9e428 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -122,19 +122,17 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, int r = 0; if (amdgpu_enable_scheduler) { struct amdgpu_cs_parser *sched_job = - amdgpu_cs_parser_create(adev, - owner, - adev->kernel_ctx, + amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, ibs, 1); if(!sched_job) { return -ENOMEM; } sched_job->free_job = free_job; ibs[num_ibs - 1].sequence = amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx->rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit( - &adev->kernel_ctx->rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].c_entity, ibs[num_ibs - 1].sequence, false, -1); if (r) WARN(true, "emit timeout\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ab9c65a245ba..78713ae3b158 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -372,16 +372,16 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, - adev->kernel_ctx, ib, 1); + &adev->kernel_ctx, ib, 1); if(!sched_job) goto error_free; sched_job->job_param.vm.bo = bo; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; ib->sequence = amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx->rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].c_entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, ib->sequence, false, -1); if (r) DRM_ERROR("emit timeout\n"); @@ -517,7 +517,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, - adev->kernel_ctx, + &adev->kernel_ctx, ib, 1); if(!sched_job) goto error_free; @@ -525,9 +525,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; ib->sequence = amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx->rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].c_entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, ib->sequence, false, -1); if (r) DRM_ERROR("emit timeout\n"); @@ -863,7 +863,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, - adev->kernel_ctx, ib, 1); + &adev->kernel_ctx, ib, 1); if(!sched_job) goto error_free; sched_job->job_param.vm_mapping.vm = vm; @@ -873,9 +873,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_bo_update_mapping_run_job; sched_job->free_job = amdgpu_vm_free_job; ib->sequence = amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx->rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].c_entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, ib->sequence, false, -1); if (r) DRM_ERROR("emit timeout\n"); -- cgit From ddf94d33d6434199be08f8965f63d408e2787539 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 5 Aug 2015 18:11:14 +0200 Subject: drm/amdgpu: remove unused parent entity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 3 --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 -- 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 08a9292729dc..e04364cdcc9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -48,7 +48,7 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, rq = &adev->rings[i]->scheduler->sched_rq; r = amd_context_entity_init(adev->rings[i]->scheduler, &ctx->rings[i].c_entity, - NULL, rq, amdgpu_sched_jobs); + rq, amdgpu_sched_jobs); if (r) break; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 4c2c5adbc537..6f0d40b13a23 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -177,7 +177,6 @@ exit: * * @sched The pointer to the scheduler * @entity The pointer to a valid amd_context_entity - * @parent The parent entity of this amd_context_entity * @rq The run queue this entity belongs * @kernel If this is an entity for the kernel * @jobs The max number of jobs in the job queue @@ -186,7 +185,6 @@ exit: */ int amd_context_entity_init(struct amd_gpu_scheduler *sched, struct amd_context_entity *entity, - struct amd_sched_entity *parent, struct amd_run_queue *rq, uint32_t jobs) { @@ -199,7 +197,6 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, seq_ring = ((uint64_t)sched->ring_id) << 60; spin_lock_init(&entity->lock); entity->generic_entity.belongto_rq = rq; - entity->generic_entity.parent = parent; entity->scheduler = sched; init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_emit); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 8a756a565583..64ef0e2b1543 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -41,7 +41,6 @@ struct amd_run_queue; struct amd_sched_entity { struct list_head list; struct amd_run_queue *belongto_rq; - struct amd_sched_entity *parent; }; /** @@ -149,7 +148,6 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, int amd_context_entity_init(struct amd_gpu_scheduler *sched, struct amd_context_entity *entity, - struct amd_sched_entity *parent, struct amd_run_queue *rq, uint32_t jobs); -- cgit From 91404fb20825418fd9ab8e6533bc336e1ffc748e Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 5 Aug 2015 18:33:21 +0200 Subject: drm/amdgpu: merge amd_sched_entity and amd_context_entity v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoiding a couple of casts. v2: rename c_entity to entity as well Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 18 +++---- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 12 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++---- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 67 +++++++++++---------------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 57 ++++++++++------------- 7 files changed, 81 insertions(+), 101 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 423cf91ef652..1e6800050ad8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1013,9 +1013,9 @@ struct amdgpu_vm_manager { #define AMDGPU_CTX_MAX_CS_PENDING 16 struct amdgpu_ctx_ring { - uint64_t sequence; - struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; - struct amd_context_entity c_entity; + uint64_t sequence; + struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; + struct amd_sched_entity entity; }; struct amdgpu_ctx { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d26688ddaa20..b1dc7e1ed271 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -900,7 +900,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( - &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); + &parser->ctx->rings[ring->idx].entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) @@ -911,7 +911,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; amd_sched_push_job(ring->scheduler, - &parser->ctx->rings[ring->idx].c_entity, + &parser->ctx->rings[ring->idx].entity, parser); cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; up_read(&adev->exclusive_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e04364cdcc9e..232e800eea56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -46,17 +46,17 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, rq = &adev->rings[i]->scheduler->kernel_rq; else rq = &adev->rings[i]->scheduler->sched_rq; - r = amd_context_entity_init(adev->rings[i]->scheduler, - &ctx->rings[i].c_entity, - rq, amdgpu_sched_jobs); + r = amd_sched_entity_init(adev->rings[i]->scheduler, + &ctx->rings[i].entity, + rq, amdgpu_sched_jobs); if (r) break; } if (i < adev->num_rings) { for (j = 0; j < i; j++) - amd_context_entity_fini(adev->rings[j]->scheduler, - &ctx->rings[j].c_entity); + amd_sched_entity_fini(adev->rings[j]->scheduler, + &ctx->rings[j].entity); kfree(ctx); return r; } @@ -75,8 +75,8 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) if (amdgpu_enable_scheduler) { for (i = 0; i < adev->num_rings; i++) - amd_context_entity_fini(adev->rings[i]->scheduler, - &ctx->rings[i].c_entity); + amd_sched_entity_fini(adev->rings[i]->scheduler, + &ctx->rings[i].entity); } } @@ -271,7 +271,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, int r; if (amdgpu_enable_scheduler) { - r = amd_sched_wait_emit(&cring->c_entity, + r = amd_sched_wait_emit(&cring->entity, seq, false, -1); @@ -281,7 +281,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) - queued_seq = amd_sched_next_queued_seq(&cring->c_entity); + queued_seq = amd_sched_next_queued_seq(&cring->entity); else queued_seq = cring->sequence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 71a4a7e4b1ae..787b93db6796 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -28,7 +28,7 @@ #include "amdgpu.h" static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *entity, void *job) { int r = 0; @@ -51,7 +51,7 @@ static void amdgpu_fence_sched_cb(struct fence *f, struct fence_cb *cb) } static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *entity, struct amd_sched_job *job) { int r = 0; @@ -83,7 +83,7 @@ static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, goto err; } - amd_sched_emit(c_entity, sched_job->ibs[sched_job->num_ibs - 1].sequence); + amd_sched_emit(entity, sched_job->ibs[sched_job->num_ibs - 1].sequence); mutex_unlock(&sched_job->job_lock); return; @@ -136,13 +136,13 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; } sched_job->free_job = free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); ibs[num_ibs - 1].sequence = v_seq; amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, sched_job); r = amd_sched_wait_emit( - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, v_seq, false, -1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d5043c42fc5..230bf1f34ead 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -379,12 +379,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, sched_job->job_param.vm.bo = bo; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].entity, v_seq, false, -1); @@ -530,12 +530,12 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job->job_param.vm.bo = pd; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].entity, v_seq, false, -1); @@ -883,12 +883,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job->job_param.vm_mapping.fence = fence; sched_job->run_job = amdgpu_vm_bo_update_mapping_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].entity, v_seq, false, -1); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 1f78ad60224a..eb3b0993a8cd 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -76,7 +76,7 @@ static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq) return i ? p : NULL; } -static bool context_entity_is_waiting(struct amd_context_entity *entity) +static bool context_entity_is_waiting(struct amd_sched_entity *entity) { /* TODO: sync obj for multi-ring synchronization */ return false; @@ -84,14 +84,11 @@ static bool context_entity_is_waiting(struct amd_context_entity *entity) static int gpu_entity_check_status(struct amd_sched_entity *entity) { - struct amd_context_entity *tmp; - if (entity == &entity->belongto_rq->head) return -1; - tmp = container_of(entity, typeof(*tmp), generic_entity); - if (kfifo_is_empty(&tmp->job_queue) || - context_entity_is_waiting(tmp)) + if (kfifo_is_empty(&entity->job_queue) || + context_entity_is_waiting(entity)) return -1; return 0; @@ -123,31 +120,26 @@ static bool is_scheduler_ready(struct amd_gpu_scheduler *sched) * Select next entity from the kernel run queue, if not available, * return null. */ -static struct amd_context_entity * +static struct amd_sched_entity * kernel_rq_select_context(struct amd_gpu_scheduler *sched) { struct amd_sched_entity *sched_entity; - struct amd_context_entity *tmp = NULL; struct amd_run_queue *rq = &sched->kernel_rq; mutex_lock(&rq->lock); sched_entity = rq_select_entity(rq); - if (sched_entity) - tmp = container_of(sched_entity, - typeof(*tmp), - generic_entity); mutex_unlock(&rq->lock); - return tmp; + return sched_entity; } /** * Select next entity containing real IB submissions */ -static struct amd_context_entity * +static struct amd_sched_entity * select_context(struct amd_gpu_scheduler *sched) { - struct amd_context_entity *wake_entity = NULL; - struct amd_context_entity *tmp; + struct amd_sched_entity *wake_entity = NULL; + struct amd_sched_entity *tmp; struct amd_run_queue *rq; if (!is_scheduler_ready(sched)) @@ -158,12 +150,9 @@ select_context(struct amd_gpu_scheduler *sched) if (tmp != NULL) goto exit; - WARN_ON(offsetof(struct amd_context_entity, generic_entity) != 0); - rq = &sched->sched_rq; mutex_lock(&rq->lock); - tmp = container_of(rq_select_entity(rq), - typeof(*tmp), generic_entity); + tmp = rq_select_entity(rq); mutex_unlock(&rq->lock); exit: if (sched->current_entity && (sched->current_entity != tmp)) @@ -178,15 +167,15 @@ exit: * Init a context entity used by scheduler when submit to HW ring. * * @sched The pointer to the scheduler - * @entity The pointer to a valid amd_context_entity + * @entity The pointer to a valid amd_sched_entity * @rq The run queue this entity belongs * @kernel If this is an entity for the kernel * @jobs The max number of jobs in the job queue * * return 0 if succeed. negative error code on failure */ -int amd_context_entity_init(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity, +int amd_sched_entity_init(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity, struct amd_run_queue *rq, uint32_t jobs) { @@ -195,10 +184,10 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, if (!(sched && entity && rq)) return -EINVAL; - memset(entity, 0, sizeof(struct amd_context_entity)); + memset(entity, 0, sizeof(struct amd_sched_entity)); seq_ring = ((uint64_t)sched->ring_id) << 60; spin_lock_init(&entity->lock); - entity->generic_entity.belongto_rq = rq; + entity->belongto_rq = rq; entity->scheduler = sched; init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_emit); @@ -213,7 +202,7 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, /* Add the entity to the run queue */ mutex_lock(&rq->lock); - rq_add_entity(rq, &entity->generic_entity); + rq_add_entity(rq, entity); mutex_unlock(&rq->lock); return 0; } @@ -227,14 +216,14 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, * return true if entity is initialized, false otherwise */ static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity) + struct amd_sched_entity *entity) { return entity->scheduler == sched && - entity->generic_entity.belongto_rq != NULL; + entity->belongto_rq != NULL; } static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity) + struct amd_sched_entity *entity) { /** * Idle means no pending IBs, and the entity is not @@ -256,11 +245,11 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, * * return 0 if succeed. negative error code on failure */ -int amd_context_entity_fini(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity) +int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity) { int r = 0; - struct amd_run_queue *rq = entity->generic_entity.belongto_rq; + struct amd_run_queue *rq = entity->belongto_rq; if (!is_context_entity_initialized(sched, entity)) return 0; @@ -283,7 +272,7 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, } mutex_lock(&rq->lock); - rq_remove_entity(rq, &entity->generic_entity); + rq_remove_entity(rq, entity); mutex_unlock(&rq->lock); kfifo_free(&entity->job_queue); return r; @@ -293,7 +282,7 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, * Submit a normal job to the job queue * * @sched The pointer to the scheduler - * @c_entity The pointer to amd_context_entity + * @c_entity The pointer to amd_sched_entity * @job The pointer to job required to submit * return 0 if succeed. -1 if failed. * -2 indicate queue is full for this client, client should wait untill @@ -301,7 +290,7 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, * -1 other fail. */ int amd_sched_push_job(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *c_entity, void *job) { while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), @@ -328,7 +317,7 @@ int amd_sched_push_job(struct amd_gpu_scheduler *sched, * * return =0 signaled , <0 failed */ -int amd_sched_wait_emit(struct amd_context_entity *c_entity, +int amd_sched_wait_emit(struct amd_sched_entity *c_entity, uint64_t seq, bool intr, long timeout) @@ -369,7 +358,7 @@ static int amd_sched_main(void *param) int r; void *job; struct sched_param sparam = {.sched_priority = 1}; - struct amd_context_entity *c_entity = NULL; + struct amd_sched_entity *c_entity = NULL; struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; sched_setscheduler(current, SCHED_FIFO, &sparam); @@ -505,7 +494,7 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched) * @entity The context entity * @seq The sequence number for the latest emitted job */ -void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq) +void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq) { atomic64_set(&c_entity->last_emitted_v_seq, seq); wake_up_all(&c_entity->wait_emit); @@ -518,7 +507,7 @@ void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq) * * return the next queued sequence number */ -uint64_t amd_sched_next_queued_seq(struct amd_context_entity *c_entity) +uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity) { return atomic64_read(&c_entity->last_queued_v_seq) + 1; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 64ef0e2b1543..a3e29df957fc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -41,6 +41,17 @@ struct amd_run_queue; struct amd_sched_entity { struct list_head list; struct amd_run_queue *belongto_rq; + spinlock_t lock; + /* the virtual_seq is unique per context per ring */ + atomic64_t last_queued_v_seq; + atomic64_t last_emitted_v_seq; + /* the job_queue maintains the jobs submitted by clients */ + struct kfifo job_queue; + spinlock_t queue_lock; + struct amd_gpu_scheduler *scheduler; + wait_queue_head_t wait_queue; + wait_queue_head_t wait_emit; + bool is_pending; }; /** @@ -61,25 +72,6 @@ struct amd_run_queue { int (*check_entity_status)(struct amd_sched_entity *entity); }; -/** - * Context based scheduler entity, there can be multiple entities for - * each context, and one entity per ring -*/ -struct amd_context_entity { - struct amd_sched_entity generic_entity; - spinlock_t lock; - /* the virtual_seq is unique per context per ring */ - atomic64_t last_queued_v_seq; - atomic64_t last_emitted_v_seq; - /* the job_queue maintains the jobs submitted by clients */ - struct kfifo job_queue; - spinlock_t queue_lock; - struct amd_gpu_scheduler *scheduler; - wait_queue_head_t wait_queue; - wait_queue_head_t wait_emit; - bool is_pending; -}; - struct amd_sched_job { struct list_head list; struct fence_cb cb; @@ -93,10 +85,10 @@ struct amd_sched_job { */ struct amd_sched_backend_ops { int (*prepare_job)(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *c_entity, void *job); void (*run_job)(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *c_entity, struct amd_sched_job *job); void (*process_job)(struct amd_gpu_scheduler *sched, void *job); }; @@ -116,7 +108,7 @@ struct amd_gpu_scheduler { uint32_t granularity; /* in ms unit */ uint32_t preemption; wait_queue_head_t wait_queue; - struct amd_context_entity *current_entity; + struct amd_sched_entity *current_entity; struct mutex sched_lock; spinlock_t queue_lock; uint32_t hw_submission_limit; @@ -132,10 +124,10 @@ struct amd_gpu_scheduler *amd_sched_create(void *device, int amd_sched_destroy(struct amd_gpu_scheduler *sched); int amd_sched_push_job(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *c_entity, void *job); -int amd_sched_wait_emit(struct amd_context_entity *c_entity, +int amd_sched_wait_emit(struct amd_sched_entity *c_entity, uint64_t seq, bool intr, long timeout); @@ -143,16 +135,15 @@ int amd_sched_wait_emit(struct amd_context_entity *c_entity, void amd_sched_process_job(struct amd_sched_job *sched_job); uint64_t amd_sched_get_handled_seq(struct amd_gpu_scheduler *sched); -int amd_context_entity_fini(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity); - -int amd_context_entity_init(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity, - struct amd_run_queue *rq, - uint32_t jobs); +int amd_sched_entity_init(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity, + struct amd_run_queue *rq, + uint32_t jobs); +int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity); -void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq); +void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq); -uint64_t amd_sched_next_queued_seq(struct amd_context_entity *c_entity); +uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); #endif -- cgit From f556cb0caeec1ba9b8e5e2aa85b47e76277f5d4b Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Sun, 2 Aug 2015 11:18:04 +0800 Subject: drm/amd: add scheduler fence implementation (v2) scheduler fence is based on kernel fence framework. v2: squash in Christian's build fix Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 10 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 34 ++++---- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +++++- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 33 +++++++- drivers/gpu/drm/amd/scheduler/sched_fence.c | 112 ++++++++++++++++++++++++++ 9 files changed, 202 insertions(+), 38 deletions(-) create mode 100644 drivers/gpu/drm/amd/scheduler/sched_fence.c (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index f1cb7d2fa411..04c270757030 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -86,6 +86,7 @@ amdgpu-y += amdgpu_cgs.o # GPU scheduler amdgpu-y += \ ../scheduler/gpu_scheduler.o \ + ../scheduler/sched_fence.o \ amdgpu_sched.o amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 987e3075a03f..2ba448ee948b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1261,6 +1261,7 @@ struct amdgpu_cs_parser { int (*prepare_job)(struct amdgpu_cs_parser *sched_job); int (*run_job)(struct amdgpu_cs_parser *sched_job); int (*free_job)(struct amdgpu_cs_parser *sched_job); + struct amd_sched_fence *s_fence; }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b1dc7e1ed271..f428288d8363 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -899,8 +899,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( - &parser->ctx->rings[ring->idx].entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) @@ -910,10 +908,21 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser->ring = ring; parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; - amd_sched_push_job(ring->scheduler, - &parser->ctx->rings[ring->idx].entity, - parser); - cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; + mutex_lock(&parser->job_lock); + r = amd_sched_push_job(ring->scheduler, + &parser->ctx->rings[ring->idx].entity, + parser, + &parser->s_fence); + if (r) { + mutex_unlock(&parser->job_lock); + goto out; + } + parser->ibs[parser->num_ibs - 1].sequence = + amdgpu_ctx_add_fence(parser->ctx, ring, + &parser->s_fence->base, + parser->s_fence->v_seq); + cs->out.handle = parser->s_fence->v_seq; + mutex_unlock(&parser->job_lock); up_read(&adev->exclusive_lock); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 232e800eea56..1833f05c7e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -268,16 +268,6 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; uint64_t queued_seq; - int r; - - if (amdgpu_enable_scheduler) { - r = amd_sched_wait_emit(&cring->entity, - seq, - false, - -1); - if (r) - return NULL; - } spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index eed409c59492..5104e64e9ad8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -218,7 +218,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, sequence = amdgpu_enable_scheduler ? ib->sequence : 0; - if (ib->ctx) + if (!amdgpu_enable_scheduler && ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, &ib->fence->base, sequence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index d82f2481bd0e..6a7e83edcaa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -118,7 +118,6 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, { int r = 0; if (amdgpu_enable_scheduler) { - uint64_t v_seq; struct amdgpu_cs_parser *sched_job = amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, ibs, num_ibs); @@ -126,22 +125,23 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; } sched_job->free_job = free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); - ibs[num_ibs - 1].sequence = v_seq; - amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].entity, - sched_job); - r = amd_sched_wait_emit( - &adev->kernel_ctx.rings[ring->idx].entity, - v_seq, - false, - -1); - if (r) - WARN(true, "emit timeout\n"); - } else + mutex_lock(&sched_job->job_lock); + r = amd_sched_push_job(ring->scheduler, + &adev->kernel_ctx.rings[ring->idx].entity, + sched_job, &sched_job->s_fence); + if (r) { + mutex_unlock(&sched_job->job_lock); + kfree(sched_job); + return r; + } + ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; + *f = &sched_job->s_fence->base; + mutex_unlock(&sched_job->job_lock); + } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); - if (r) - return r; - *f = &ibs[num_ibs - 1].fence->base; + if (r) + return r; + *f = &ibs[num_ibs - 1].fence->base; + } return 0; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 33b4f55e48b1..402086d96889 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -180,6 +180,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, uint32_t jobs) { uint64_t seq_ring = 0; + char name[20]; if (!(sched && entity && rq)) return -EINVAL; @@ -191,6 +192,10 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->scheduler = sched; init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_emit); + entity->fence_context = fence_context_alloc(1); + snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context); + memcpy(entity->name, name, 20); + INIT_LIST_HEAD(&entity->fence_list); if(kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL)) @@ -199,6 +204,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, spin_lock_init(&entity->queue_lock); atomic64_set(&entity->last_emitted_v_seq, seq_ring); atomic64_set(&entity->last_queued_v_seq, seq_ring); + atomic64_set(&entity->last_signaled_v_seq, seq_ring); /* Add the entity to the run queue */ mutex_lock(&rq->lock); @@ -291,15 +297,25 @@ int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, */ int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_sched_entity *c_entity, - void *data) + void *data, + struct amd_sched_fence **fence) { - struct amd_sched_job *job = kzalloc(sizeof(struct amd_sched_job), - GFP_KERNEL); + struct amd_sched_job *job; + + if (!fence) + return -EINVAL; + job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL); if (!job) return -ENOMEM; job->sched = sched; job->s_entity = c_entity; job->data = data; + *fence = amd_sched_fence_create(c_entity); + if ((*fence) == NULL) { + kfree(job); + return -EINVAL; + } + job->s_fence = *fence; while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), &c_entity->queue_lock) != sizeof(void *)) { /** @@ -368,12 +384,16 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) unsigned long flags; sched = sched_job->sched; + atomic64_set(&sched_job->s_entity->last_signaled_v_seq, + sched_job->s_fence->v_seq); + amd_sched_fence_signal(sched_job->s_fence); spin_lock_irqsave(&sched->queue_lock, flags); list_del(&sched_job->list); atomic64_dec(&sched->hw_rq_count); spin_unlock_irqrestore(&sched->queue_lock, flags); sched->ops->process_job(sched, sched_job); + fence_put(&sched_job->s_fence->base); kfree(sched_job); wake_up_interruptible(&sched->wait_queue); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index f54615d6a500..300132f14d74 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -45,6 +45,7 @@ struct amd_sched_entity { /* the virtual_seq is unique per context per ring */ atomic64_t last_queued_v_seq; atomic64_t last_emitted_v_seq; + atomic64_t last_signaled_v_seq; /* the job_queue maintains the jobs submitted by clients */ struct kfifo job_queue; spinlock_t queue_lock; @@ -52,6 +53,9 @@ struct amd_sched_entity { wait_queue_head_t wait_queue; wait_queue_head_t wait_emit; bool is_pending; + uint64_t fence_context; + struct list_head fence_list; + char name[20]; }; /** @@ -72,14 +76,35 @@ struct amd_run_queue { int (*check_entity_status)(struct amd_sched_entity *entity); }; +struct amd_sched_fence { + struct fence base; + struct fence_cb cb; + struct list_head list; + struct amd_sched_entity *entity; + uint64_t v_seq; + spinlock_t lock; +}; + struct amd_sched_job { struct list_head list; struct fence_cb cb; struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; void *data; + struct amd_sched_fence *s_fence; }; +extern const struct fence_ops amd_sched_fence_ops; +static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) +{ + struct amd_sched_fence *__f = container_of(f, struct amd_sched_fence, base); + + if (__f->base.ops == &amd_sched_fence_ops) + return __f; + + return NULL; +} + /** * Define the backend operations called by the scheduler, * these functions should be implemented in driver side @@ -126,7 +151,8 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched); int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_sched_entity *c_entity, - void *data); + void *data, + struct amd_sched_fence **fence); int amd_sched_wait_emit(struct amd_sched_entity *c_entity, uint64_t seq, @@ -146,4 +172,9 @@ void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq); uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); +struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity); +void amd_sched_fence_signal(struct amd_sched_fence *fence); + + #endif diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c new file mode 100644 index 000000000000..d580a357c547 --- /dev/null +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -0,0 +1,112 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#include +#include +#include +#include +#include "gpu_scheduler.h" + +static void amd_sched_fence_wait_cb(struct fence *f, struct fence_cb *cb) +{ + struct amd_sched_fence *fence = + container_of(cb, struct amd_sched_fence, cb); + list_del_init(&fence->list); + fence_put(&fence->base); +} + +struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity) +{ + struct amd_sched_fence *fence = NULL; + fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); + fence->entity = s_entity; + spin_lock_init(&fence->lock); + fence_init(&fence->base, &amd_sched_fence_ops, + &fence->lock, + s_entity->fence_context, + fence->v_seq); + fence_get(&fence->base); + list_add_tail(&fence->list, &s_entity->fence_list); + if (fence_add_callback(&fence->base,&fence->cb, + amd_sched_fence_wait_cb)) { + fence_put(&fence->base); + kfree(fence); + return NULL; + } + return fence; +} + +bool amd_sched_check_ts(struct amd_sched_entity *s_entity, uint64_t v_seq) +{ + return atomic64_read(&s_entity->last_signaled_v_seq) >= v_seq ? true : false; +} + +void amd_sched_fence_signal(struct amd_sched_fence *fence) +{ + if (amd_sched_check_ts(fence->entity, fence->v_seq)) { + int ret = fence_signal_locked(&fence->base); + if (!ret) + FENCE_TRACE(&fence->base, "signaled from irq context\n"); + else + FENCE_TRACE(&fence->base, "was already signaled\n"); + } else + WARN(true, "fence process dismattch with job!\n"); +} + +static const char *amd_sched_fence_get_driver_name(struct fence *fence) +{ + return "amd_sched"; +} + +static const char *amd_sched_fence_get_timeline_name(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + return (const char *)fence->entity->name; +} + +static bool amd_sched_fence_enable_signaling(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + + return !amd_sched_check_ts(fence->entity, fence->v_seq); +} + +static bool amd_sched_fence_is_signaled(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + + return amd_sched_check_ts(fence->entity, fence->v_seq); +} + +const struct fence_ops amd_sched_fence_ops = { + .get_driver_name = amd_sched_fence_get_driver_name, + .get_timeline_name = amd_sched_fence_get_timeline_name, + .enable_signaling = amd_sched_fence_enable_signaling, + .signaled = amd_sched_fence_is_signaled, + .wait = fence_default_wait, + .release = NULL, +}; -- cgit From 432a4ff8b7224908a8bbc34b598f48af3f42b827 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 12 Aug 2015 11:46:04 +0200 Subject: drm/amdgpu: cleanup sheduler rq handling v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework run queue implementation, especially remove the odd list handling. v2: cleanup the code only, no algorithem change. Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 103 +++++++++++--------------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 25 +++---- 3 files changed, 54 insertions(+), 76 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 1833f05c7e0b..08bc7722ddb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -41,7 +41,7 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, if (amdgpu_enable_scheduler) { /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { - struct amd_run_queue *rq; + struct amd_sched_rq *rq; if (kernel) rq = &adev->rings[i]->scheduler->kernel_rq; else diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 3d45ff29eaa8..265d3e2f63cc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -28,31 +28,29 @@ #include "gpu_scheduler.h" /* Initialize a given run queue struct */ -static void init_rq(struct amd_run_queue *rq) +static void amd_sched_rq_init(struct amd_sched_rq *rq) { - INIT_LIST_HEAD(&rq->head.list); - rq->head.belongto_rq = rq; + INIT_LIST_HEAD(&rq->entities); mutex_init(&rq->lock); - atomic_set(&rq->nr_entity, 0); - rq->current_entity = &rq->head; + rq->current_entity = NULL; } -/* Note: caller must hold the lock or in a atomic context */ -static void rq_remove_entity(struct amd_run_queue *rq, - struct amd_sched_entity *entity) +static void amd_sched_rq_add_entity(struct amd_sched_rq *rq, + struct amd_sched_entity *entity) { - if (rq->current_entity == entity) - rq->current_entity = list_entry(entity->list.prev, - typeof(*entity), list); - list_del_init(&entity->list); - atomic_dec(&rq->nr_entity); + mutex_lock(&rq->lock); + list_add_tail(&entity->list, &rq->entities); + mutex_unlock(&rq->lock); } -static void rq_add_entity(struct amd_run_queue *rq, - struct amd_sched_entity *entity) +static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, + struct amd_sched_entity *entity) { - list_add_tail(&entity->list, &rq->head.list); - atomic_inc(&rq->nr_entity); + mutex_lock(&rq->lock); + list_del_init(&entity->list); + if (rq->current_entity == entity) + rq->current_entity = NULL; + mutex_unlock(&rq->lock); } /** @@ -60,38 +58,32 @@ static void rq_add_entity(struct amd_run_queue *rq, * It could return the same entity as current one if current is the only * available one in the queue. Return NULL if nothing available. */ -static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq) +static struct amd_sched_entity * +amd_sched_rq_select_entity(struct amd_sched_rq *rq) { - struct amd_sched_entity *p = rq->current_entity; - int i = atomic_read(&rq->nr_entity) + 1; /*real count + dummy head*/ - - while (i) { - p = list_entry(p->list.next, typeof(*p), list); - if (!rq->check_entity_status(p)) { - rq->current_entity = p; - break; + struct amd_sched_entity *entity = rq->current_entity; + + if (entity) { + list_for_each_entry_continue(entity, &rq->entities, list) { + if (!kfifo_is_empty(&entity->job_queue)) { + rq->current_entity = entity; + return rq->current_entity; + } } - i--; } - return i ? p : NULL; -} -static bool context_entity_is_waiting(struct amd_sched_entity *entity) -{ - /* TODO: sync obj for multi-ring synchronization */ - return false; -} + list_for_each_entry(entity, &rq->entities, list) { -static int gpu_entity_check_status(struct amd_sched_entity *entity) -{ - if (entity == &entity->belongto_rq->head) - return -1; + if (!kfifo_is_empty(&entity->job_queue)) { + rq->current_entity = entity; + return rq->current_entity; + } - if (kfifo_is_empty(&entity->job_queue) || - context_entity_is_waiting(entity)) - return -1; + if (entity == rq->current_entity) + break; + } - return 0; + return NULL; } /** @@ -124,10 +116,10 @@ static struct amd_sched_entity * kernel_rq_select_context(struct amd_gpu_scheduler *sched) { struct amd_sched_entity *sched_entity; - struct amd_run_queue *rq = &sched->kernel_rq; + struct amd_sched_rq *rq = &sched->kernel_rq; mutex_lock(&rq->lock); - sched_entity = rq_select_entity(rq); + sched_entity = amd_sched_rq_select_entity(rq); mutex_unlock(&rq->lock); return sched_entity; } @@ -140,7 +132,7 @@ select_context(struct amd_gpu_scheduler *sched) { struct amd_sched_entity *wake_entity = NULL; struct amd_sched_entity *tmp; - struct amd_run_queue *rq; + struct amd_sched_rq *rq; if (!is_scheduler_ready(sched)) return NULL; @@ -152,7 +144,7 @@ select_context(struct amd_gpu_scheduler *sched) rq = &sched->sched_rq; mutex_lock(&rq->lock); - tmp = rq_select_entity(rq); + tmp = amd_sched_rq_select_entity(rq); mutex_unlock(&rq->lock); exit: if (sched->current_entity && (sched->current_entity != tmp)) @@ -176,7 +168,7 @@ exit: */ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity, - struct amd_run_queue *rq, + struct amd_sched_rq *rq, uint32_t jobs) { uint64_t seq_ring = 0; @@ -206,9 +198,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, atomic64_set(&entity->last_signaled_v_seq, seq_ring); /* Add the entity to the run queue */ - mutex_lock(&rq->lock); - rq_add_entity(rq, entity); - mutex_unlock(&rq->lock); + amd_sched_rq_add_entity(rq, entity); return 0; } @@ -254,7 +244,7 @@ int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { int r = 0; - struct amd_run_queue *rq = entity->belongto_rq; + struct amd_sched_rq *rq = entity->belongto_rq; if (!is_context_entity_initialized(sched, entity)) return 0; @@ -276,9 +266,7 @@ int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, entity); } - mutex_lock(&rq->lock); - rq_remove_entity(rq, entity); - mutex_unlock(&rq->lock); + amd_sched_rq_remove_entity(rq, entity); kfifo_free(&entity->job_queue); return r; } @@ -429,11 +417,8 @@ struct amd_gpu_scheduler *amd_sched_create(void *device, snprintf(name, sizeof(name), "gpu_sched[%d]", ring); mutex_init(&sched->sched_lock); spin_lock_init(&sched->queue_lock); - init_rq(&sched->sched_rq); - sched->sched_rq.check_entity_status = gpu_entity_check_status; - - init_rq(&sched->kernel_rq); - sched->kernel_rq.check_entity_status = gpu_entity_check_status; + amd_sched_rq_init(&sched->sched_rq); + amd_sched_rq_init(&sched->kernel_rq); init_waitqueue_head(&sched->wait_queue); INIT_LIST_HEAD(&sched->active_hw_rq); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 47823b4a71e0..ceb5918bfbeb 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -30,7 +30,7 @@ #define AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 struct amd_gpu_scheduler; -struct amd_run_queue; +struct amd_sched_rq; /** * A scheduler entity is a wrapper around a job queue or a group @@ -40,7 +40,7 @@ struct amd_run_queue; */ struct amd_sched_entity { struct list_head list; - struct amd_run_queue *belongto_rq; + struct amd_sched_rq *belongto_rq; spinlock_t lock; /* the virtual_seq is unique per context per ring */ atomic64_t last_queued_v_seq; @@ -62,17 +62,10 @@ struct amd_sched_entity { * one specific ring. It implements the scheduling policy that selects * the next entity to emit commands from. */ -struct amd_run_queue { - struct mutex lock; - atomic_t nr_entity; - struct amd_sched_entity head; - struct amd_sched_entity *current_entity; - /** - * Return 0 means this entity can be scheduled - * Return -1 means this entity cannot be scheduled for reasons, - * i.e, it is the head, or these is no job, etc - */ - int (*check_entity_status)(struct amd_sched_entity *entity); +struct amd_sched_rq { + struct mutex lock; + struct list_head entities; + struct amd_sched_entity *current_entity; }; struct amd_sched_fence { @@ -124,8 +117,8 @@ struct amd_sched_backend_ops { struct amd_gpu_scheduler { void *device; struct task_struct *thread; - struct amd_run_queue sched_rq; - struct amd_run_queue kernel_rq; + struct amd_sched_rq sched_rq; + struct amd_sched_rq kernel_rq; struct list_head active_hw_rq; atomic64_t hw_rq_count; struct amd_sched_backend_ops *ops; @@ -154,7 +147,7 @@ int amd_sched_push_job(struct amd_gpu_scheduler *sched, int amd_sched_entity_init(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity, - struct amd_run_queue *rq, + struct amd_sched_rq *rq, uint32_t jobs); int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); -- cgit