From 91e1a5207edec9e4f888e44478a9a254186e0ba8 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 6 Jul 2015 22:06:40 +0200 Subject: drm/amdgpu: deal with foreign fences in amdgpu_sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also requires some error handling from the callers of that function. Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index bc0fac618a3f..2722815eddbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -167,7 +167,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, /* grab a vm id if necessary */ struct amdgpu_fence *vm_id_fence = NULL; vm_id_fence = amdgpu_vm_grab_id(ibs->ring, ibs->vm); - amdgpu_sync_fence(&ibs->sync, vm_id_fence); + r = amdgpu_sync_fence(adev, &ibs->sync, &vm_id_fence->base); + if (r) { + amdgpu_ring_unlock_undo(ring); + return r; + } } r = amdgpu_sync_rings(&ibs->sync, ring); -- cgit From 21c16bf634e62cf9673946f509b469e7f0953ecf Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 7 Jul 2015 17:24:49 +0200 Subject: drm/amdgpu: add user fence context map v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a prerequisite for the GPU scheduler to make the order of submission independent from the order of execution. v2: properly implement the locking Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 60 ++++++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++- 4 files changed, 110 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 70e783a849ed..0220d98ba8bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -415,6 +415,8 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; + /* resulting sequence number */ + uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -985,9 +987,18 @@ struct amdgpu_vm_manager { * context related structures */ +#define AMDGPU_CTX_MAX_CS_PENDING 16 + +struct amdgpu_ctx_ring { + uint64_t sequence; + struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; +}; + struct amdgpu_ctx { struct kref refcount; unsigned reset_counter; + spinlock_t ring_lock; + struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; }; struct amdgpu_ctx_mgr { @@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv); struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct fence *fence); +struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq); + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 53e6a10fe9f9..cef8360698be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, sizeof(struct drm_amdgpu_cs_chunk_dep); for (j = 0; j < num_deps; ++j) { - struct amdgpu_fence *fence; struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; + struct fence *fence; r = amdgpu_cs_get_ring(adev, deps[j].ip_type, deps[j].ip_instance, @@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, if (ctx == NULL) return -EINVAL; - r = amdgpu_fence_recreate(ring, p->filp, - deps[j].handle, - &fence); - if (r) { + fence = amdgpu_ctx_get_fence(ctx, ring, + deps[j].handle); + if (IS_ERR(fence)) { + r = PTR_ERR(fence); amdgpu_ctx_put(ctx); return r; - } - - r = amdgpu_sync_fence(adev, &ib->sync, &fence->base); - amdgpu_fence_unref(&fence); - amdgpu_ctx_put(ctx); - if (r) - return r; + } else if (fence) { + r = amdgpu_sync_fence(adev, &ib->sync, fence); + fence_put(fence); + amdgpu_ctx_put(ctx); + if (r) + return r; + } } } @@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_ib_fill(adev, &parser); } - if (!r) + if (!r) { r = amdgpu_cs_dependencies(adev, &parser); + if (r) + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + } if (r) { amdgpu_cs_parser_fini(&parser, r, reserved_buffers); @@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } - cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq; + cs->out.handle = parser.uf.sequence; out: amdgpu_cs_parser_fini(&parser, r, true); up_read(&adev->exclusive_lock); @@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); - struct amdgpu_fence *fence = NULL; struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; + struct fence *fence; long r; + r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &ring); + if (r) + return r; + ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; - r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + if (IS_ERR(fence)) + r = PTR_ERR(fence); - r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } + else if (fence) { + r = fence_wait_timeout(fence, true, timeout); + fence_put(fence); + + } else + r = 1; - r = fence_wait_timeout(&fence->base, true, timeout); - amdgpu_fence_unref(&fence); amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e63cfb7fa390..c23bfd8fe414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,17 +28,22 @@ static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + unsigned i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) + fence_put(ctx->rings[i].fences[j]); kfree(ctx); } int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t *id) { - int r; struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + int i, r; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, memset(ctx, 0, sizeof(*ctx)); kref_init(&ctx->refcount); + spin_lock_init(&ctx->ring_lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + ctx->rings[i].sequence = 1; mutex_unlock(&mgr->lock); return 0; @@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) kref_put(&ctx->refcount, amdgpu_ctx_do_release); return 0; } + +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct fence *fence) +{ + struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + uint64_t seq = cring->sequence; + unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING; + struct fence *other = cring->fences[idx]; + + if (other) { + signed long r; + r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + } + + fence_get(fence); + + spin_lock(&ctx->ring_lock); + cring->fences[idx] = fence; + cring->sequence++; + spin_unlock(&ctx->ring_lock); + + fence_put(other); + + return seq; +} + +struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq) +{ + struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + struct fence *fence; + + spin_lock(&ctx->ring_lock); + if (seq >= cring->sequence) { + spin_unlock(&ctx->ring_lock); + return ERR_PTR(-EINVAL); + } + + if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) { + spin_unlock(&ctx->ring_lock); + return NULL; + } + + fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]); + spin_unlock(&ctx->ring_lock); + + return fence; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2722815eddbb..95d533422a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, /* wrap the last IB with fence */ if (ib->user) { uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); + ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, + &ib->fence->base); addr += ib->user->offset; - amdgpu_ring_emit_fence(ring, addr, ib->fence->seq, + amdgpu_ring_emit_fence(ring, addr, ib->user->sequence, AMDGPU_FENCE_FLAG_64BIT); } -- cgit From 7f8a5290f5b6c14dd1d295e2508e0dd193a9fda5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 20 Jul 2015 16:09:40 +0200 Subject: drm/amdgpu: rework vm_grab_id interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes assigning VM IDs independent from the use of VM IDs. Signed-off-by: Christian König Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++++++---------- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4e8124749f9f..9f47b3e013c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2294,8 +2294,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct list_head *head); -struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, - struct amdgpu_vm *vm); +int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync); void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_vm *vm, struct amdgpu_fence *updates); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 95d533422a5b..f3ac9d8a5691 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -165,9 +165,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, if (vm) { /* grab a vm id if necessary */ - struct amdgpu_fence *vm_id_fence = NULL; - vm_id_fence = amdgpu_vm_grab_id(ibs->ring, ibs->vm); - r = amdgpu_sync_fence(adev, &ibs->sync, &vm_id_fence->base); + r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync); if (r) { amdgpu_ring_unlock_undo(ring); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1e895b979ec6..fd8395f25723 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -127,16 +127,16 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, /** * amdgpu_vm_grab_id - allocate the next free VMID * - * @ring: ring we want to submit job to * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies * - * Allocate an id for the vm (cayman+). - * Returns the fence we need to sync to (if any). + * Allocate an id for the vm, adding fences to the sync obj as necessary. * - * Global and local mutex must be locked! + * Global mutex must be locked! */ -struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, - struct amdgpu_vm *vm) +int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync) { struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {}; struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; @@ -148,7 +148,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, /* check if the id is still valid */ if (vm_id->id && vm_id->last_id_use && vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) - return NULL; + return 0; /* we definately need to flush */ vm_id->pd_gpu_addr = ~0ll; @@ -161,7 +161,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, /* found a free one */ vm_id->id = i; trace_amdgpu_vm_grab_id(i, ring->idx); - return NULL; + return 0; } if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) { @@ -172,15 +172,19 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring, for (i = 0; i < 2; ++i) { if (choices[i]) { + struct amdgpu_fence *fence; + + fence = adev->vm_manager.active[choices[i]]; vm_id->id = choices[i]; + trace_amdgpu_vm_grab_id(choices[i], ring->idx); - return adev->vm_manager.active[choices[i]]; + return amdgpu_sync_fence(ring->adev, sync, &fence->base); } } /* should never happen */ BUG(); - return NULL; + return -EINVAL; } /** -- cgit From 5430a3ffb0b1902e8aea4ed2ba256b1263126e8d Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 21 Jul 2015 18:02:21 +0200 Subject: drm/amdgpu: fix UVD/VCE fence handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to return the sequence number to userspace even when we don't use user fences. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 ++++++--- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 551143287698..e6c26c1716b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -414,8 +414,6 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; - /* resulting sequence number */ - uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -847,6 +845,8 @@ struct amdgpu_ib { uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; uint32_t flags; + /* resulting sequence number */ + uint64_t sequence; }; enum amdgpu_ring_type { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index cef8360698be..4794e14976ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -794,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } - cs->out.handle = parser.uf.sequence; + cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; out: amdgpu_cs_parser_fini(&parser, r, true); up_read(&adev->exclusive_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index f3ac9d8a5691..42d6298eb9d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -88,6 +88,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, ib->fence = NULL; ib->user = NULL; ib->vm = vm; + ib->ctx = NULL; ib->gds_base = 0; ib->gds_size = 0; ib->gws_base = 0; @@ -214,13 +215,15 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, return r; } + if (ib->ctx) + ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, + &ib->fence->base); + /* wrap the last IB with fence */ if (ib->user) { uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); - ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, - &ib->fence->base); addr += ib->user->offset; - amdgpu_ring_emit_fence(ring, addr, ib->user->sequence, + amdgpu_ring_emit_fence(ring, addr, ib->sequence, AMDGPU_FENCE_FLAG_64BIT); } -- cgit From d1ff9086c1b8e67390161599006a34056b437a72 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 30 Jul 2015 17:59:43 +0800 Subject: drm/amdgpu: fix seq in ctx_add_fence if enabling scheduler, then the queued seq is assigned when pushing job before emitting job. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 6 files changed, 15 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6e1fea473a66..2619c78ec303 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -419,7 +419,6 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; - uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -1031,7 +1030,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence); + struct fence *fence, uint64_t queued_seq); struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c41360e443be..40e85bfcdf91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -739,7 +739,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ib->oa_size = amdgpu_bo_size(oa); } } - /* wrap the last IB with user fence */ if (parser->uf.bo) { struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; @@ -908,7 +907,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - parser->uf.sequence = atomic64_inc_return( + parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); @@ -922,7 +921,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) amd_sched_push_job(ring->scheduler, &parser->ctx->rings[ring->idx].c_entity, parser); - cs->out.handle = parser->uf.sequence; + cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; up_read(&adev->exclusive_lock); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 95807b678b6a..e0eaa55bf636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -258,7 +258,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) } uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence) + struct fence *fence, uint64_t queued_seq) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; uint64_t seq = 0; @@ -266,7 +266,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct fence *other = NULL; if (amdgpu_enable_scheduler) - seq = atomic64_read(&cring->c_entity.last_queued_v_seq); + seq = queued_seq; else seq = cring->sequence; idx = seq % AMDGPU_CTX_MAX_CS_PENDING; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 42d6298eb9d7..eed409c59492 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -143,6 +143,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_ring *ring; struct amdgpu_ctx *ctx, *old_ctx; struct amdgpu_vm *vm; + uint64_t sequence; unsigned i; int r = 0; @@ -215,9 +216,12 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, return r; } + sequence = amdgpu_enable_scheduler ? ib->sequence : 0; + if (ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, - &ib->fence->base); + &ib->fence->base, + sequence); /* wrap the last IB with fence */ if (ib->user) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 46ec915c9344..b913c22dd6b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -62,7 +62,7 @@ static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, goto err; } atomic64_set(&c_entity->last_emitted_v_seq, - sched_job->uf.sequence); + sched_job->ibs[sched_job->num_ibs - 1].sequence); wake_up_all(&c_entity->wait_emit); mutex_unlock(&sched_job->job_lock); @@ -93,7 +93,7 @@ static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched, void *job) if (sched_job->ctx) { c_entity = &sched_job->ctx->rings[ring->idx].c_entity; atomic64_set(&c_entity->last_signaled_v_seq, - sched_job->uf.sequence); + sched_job->ibs[sched_job->num_ibs - 1].sequence); } /* wake up users waiting for time stamp */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 26c55a7a1a88..5624d4484fb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -380,7 +380,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); @@ -531,7 +531,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); @@ -884,7 +884,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_bo_update_mapping_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); -- cgit From f556cb0caeec1ba9b8e5e2aa85b47e76277f5d4b Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Sun, 2 Aug 2015 11:18:04 +0800 Subject: drm/amd: add scheduler fence implementation (v2) scheduler fence is based on kernel fence framework. v2: squash in Christian's build fix Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 10 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 34 ++++---- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +++++- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 33 +++++++- drivers/gpu/drm/amd/scheduler/sched_fence.c | 112 ++++++++++++++++++++++++++ 9 files changed, 202 insertions(+), 38 deletions(-) create mode 100644 drivers/gpu/drm/amd/scheduler/sched_fence.c (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index f1cb7d2fa411..04c270757030 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -86,6 +86,7 @@ amdgpu-y += amdgpu_cgs.o # GPU scheduler amdgpu-y += \ ../scheduler/gpu_scheduler.o \ + ../scheduler/sched_fence.o \ amdgpu_sched.o amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 987e3075a03f..2ba448ee948b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1261,6 +1261,7 @@ struct amdgpu_cs_parser { int (*prepare_job)(struct amdgpu_cs_parser *sched_job); int (*run_job)(struct amdgpu_cs_parser *sched_job); int (*free_job)(struct amdgpu_cs_parser *sched_job); + struct amd_sched_fence *s_fence; }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b1dc7e1ed271..f428288d8363 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -899,8 +899,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( - &parser->ctx->rings[ring->idx].entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) @@ -910,10 +908,21 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser->ring = ring; parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; - amd_sched_push_job(ring->scheduler, - &parser->ctx->rings[ring->idx].entity, - parser); - cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; + mutex_lock(&parser->job_lock); + r = amd_sched_push_job(ring->scheduler, + &parser->ctx->rings[ring->idx].entity, + parser, + &parser->s_fence); + if (r) { + mutex_unlock(&parser->job_lock); + goto out; + } + parser->ibs[parser->num_ibs - 1].sequence = + amdgpu_ctx_add_fence(parser->ctx, ring, + &parser->s_fence->base, + parser->s_fence->v_seq); + cs->out.handle = parser->s_fence->v_seq; + mutex_unlock(&parser->job_lock); up_read(&adev->exclusive_lock); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 232e800eea56..1833f05c7e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -268,16 +268,6 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; uint64_t queued_seq; - int r; - - if (amdgpu_enable_scheduler) { - r = amd_sched_wait_emit(&cring->entity, - seq, - false, - -1); - if (r) - return NULL; - } spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index eed409c59492..5104e64e9ad8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -218,7 +218,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, sequence = amdgpu_enable_scheduler ? ib->sequence : 0; - if (ib->ctx) + if (!amdgpu_enable_scheduler && ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, &ib->fence->base, sequence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index d82f2481bd0e..6a7e83edcaa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -118,7 +118,6 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, { int r = 0; if (amdgpu_enable_scheduler) { - uint64_t v_seq; struct amdgpu_cs_parser *sched_job = amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, ibs, num_ibs); @@ -126,22 +125,23 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; } sched_job->free_job = free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); - ibs[num_ibs - 1].sequence = v_seq; - amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].entity, - sched_job); - r = amd_sched_wait_emit( - &adev->kernel_ctx.rings[ring->idx].entity, - v_seq, - false, - -1); - if (r) - WARN(true, "emit timeout\n"); - } else + mutex_lock(&sched_job->job_lock); + r = amd_sched_push_job(ring->scheduler, + &adev->kernel_ctx.rings[ring->idx].entity, + sched_job, &sched_job->s_fence); + if (r) { + mutex_unlock(&sched_job->job_lock); + kfree(sched_job); + return r; + } + ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; + *f = &sched_job->s_fence->base; + mutex_unlock(&sched_job->job_lock); + } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); - if (r) - return r; - *f = &ibs[num_ibs - 1].fence->base; + if (r) + return r; + *f = &ibs[num_ibs - 1].fence->base; + } return 0; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 33b4f55e48b1..402086d96889 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -180,6 +180,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, uint32_t jobs) { uint64_t seq_ring = 0; + char name[20]; if (!(sched && entity && rq)) return -EINVAL; @@ -191,6 +192,10 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->scheduler = sched; init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_emit); + entity->fence_context = fence_context_alloc(1); + snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context); + memcpy(entity->name, name, 20); + INIT_LIST_HEAD(&entity->fence_list); if(kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL)) @@ -199,6 +204,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, spin_lock_init(&entity->queue_lock); atomic64_set(&entity->last_emitted_v_seq, seq_ring); atomic64_set(&entity->last_queued_v_seq, seq_ring); + atomic64_set(&entity->last_signaled_v_seq, seq_ring); /* Add the entity to the run queue */ mutex_lock(&rq->lock); @@ -291,15 +297,25 @@ int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, */ int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_sched_entity *c_entity, - void *data) + void *data, + struct amd_sched_fence **fence) { - struct amd_sched_job *job = kzalloc(sizeof(struct amd_sched_job), - GFP_KERNEL); + struct amd_sched_job *job; + + if (!fence) + return -EINVAL; + job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL); if (!job) return -ENOMEM; job->sched = sched; job->s_entity = c_entity; job->data = data; + *fence = amd_sched_fence_create(c_entity); + if ((*fence) == NULL) { + kfree(job); + return -EINVAL; + } + job->s_fence = *fence; while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), &c_entity->queue_lock) != sizeof(void *)) { /** @@ -368,12 +384,16 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) unsigned long flags; sched = sched_job->sched; + atomic64_set(&sched_job->s_entity->last_signaled_v_seq, + sched_job->s_fence->v_seq); + amd_sched_fence_signal(sched_job->s_fence); spin_lock_irqsave(&sched->queue_lock, flags); list_del(&sched_job->list); atomic64_dec(&sched->hw_rq_count); spin_unlock_irqrestore(&sched->queue_lock, flags); sched->ops->process_job(sched, sched_job); + fence_put(&sched_job->s_fence->base); kfree(sched_job); wake_up_interruptible(&sched->wait_queue); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index f54615d6a500..300132f14d74 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -45,6 +45,7 @@ struct amd_sched_entity { /* the virtual_seq is unique per context per ring */ atomic64_t last_queued_v_seq; atomic64_t last_emitted_v_seq; + atomic64_t last_signaled_v_seq; /* the job_queue maintains the jobs submitted by clients */ struct kfifo job_queue; spinlock_t queue_lock; @@ -52,6 +53,9 @@ struct amd_sched_entity { wait_queue_head_t wait_queue; wait_queue_head_t wait_emit; bool is_pending; + uint64_t fence_context; + struct list_head fence_list; + char name[20]; }; /** @@ -72,14 +76,35 @@ struct amd_run_queue { int (*check_entity_status)(struct amd_sched_entity *entity); }; +struct amd_sched_fence { + struct fence base; + struct fence_cb cb; + struct list_head list; + struct amd_sched_entity *entity; + uint64_t v_seq; + spinlock_t lock; +}; + struct amd_sched_job { struct list_head list; struct fence_cb cb; struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; void *data; + struct amd_sched_fence *s_fence; }; +extern const struct fence_ops amd_sched_fence_ops; +static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) +{ + struct amd_sched_fence *__f = container_of(f, struct amd_sched_fence, base); + + if (__f->base.ops == &amd_sched_fence_ops) + return __f; + + return NULL; +} + /** * Define the backend operations called by the scheduler, * these functions should be implemented in driver side @@ -126,7 +151,8 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched); int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_sched_entity *c_entity, - void *data); + void *data, + struct amd_sched_fence **fence); int amd_sched_wait_emit(struct amd_sched_entity *c_entity, uint64_t seq, @@ -146,4 +172,9 @@ void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq); uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); +struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity); +void amd_sched_fence_signal(struct amd_sched_fence *fence); + + #endif diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c new file mode 100644 index 000000000000..d580a357c547 --- /dev/null +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -0,0 +1,112 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#include +#include +#include +#include +#include "gpu_scheduler.h" + +static void amd_sched_fence_wait_cb(struct fence *f, struct fence_cb *cb) +{ + struct amd_sched_fence *fence = + container_of(cb, struct amd_sched_fence, cb); + list_del_init(&fence->list); + fence_put(&fence->base); +} + +struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity) +{ + struct amd_sched_fence *fence = NULL; + fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); + fence->entity = s_entity; + spin_lock_init(&fence->lock); + fence_init(&fence->base, &amd_sched_fence_ops, + &fence->lock, + s_entity->fence_context, + fence->v_seq); + fence_get(&fence->base); + list_add_tail(&fence->list, &s_entity->fence_list); + if (fence_add_callback(&fence->base,&fence->cb, + amd_sched_fence_wait_cb)) { + fence_put(&fence->base); + kfree(fence); + return NULL; + } + return fence; +} + +bool amd_sched_check_ts(struct amd_sched_entity *s_entity, uint64_t v_seq) +{ + return atomic64_read(&s_entity->last_signaled_v_seq) >= v_seq ? true : false; +} + +void amd_sched_fence_signal(struct amd_sched_fence *fence) +{ + if (amd_sched_check_ts(fence->entity, fence->v_seq)) { + int ret = fence_signal_locked(&fence->base); + if (!ret) + FENCE_TRACE(&fence->base, "signaled from irq context\n"); + else + FENCE_TRACE(&fence->base, "was already signaled\n"); + } else + WARN(true, "fence process dismattch with job!\n"); +} + +static const char *amd_sched_fence_get_driver_name(struct fence *fence) +{ + return "amd_sched"; +} + +static const char *amd_sched_fence_get_timeline_name(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + return (const char *)fence->entity->name; +} + +static bool amd_sched_fence_enable_signaling(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + + return !amd_sched_check_ts(fence->entity, fence->v_seq); +} + +static bool amd_sched_fence_is_signaled(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + + return amd_sched_check_ts(fence->entity, fence->v_seq); +} + +const struct fence_ops amd_sched_fence_ops = { + .get_driver_name = amd_sched_fence_get_driver_name, + .get_timeline_name = amd_sched_fence_get_timeline_name, + .enable_signaling = amd_sched_fence_enable_signaling, + .signaled = amd_sched_fence_is_signaled, + .wait = fence_default_wait, + .release = NULL, +}; -- cgit From b203dd95949ec8d5e30d53446408eb9a4d1bdc62 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 18 Aug 2015 18:23:16 +0200 Subject: drm/amdgpu: fix zeroing all IB fields manually v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The problem now is that we don't necessarily call amdgpu_ib_get() in some error paths and so work with uninitialized data. Better require that the memory is already zeroed. v2: better commit message Signed-off-by: Christian König Reviewed-by: Chunming Zhou (v1) Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 17 ----------------- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + 7 files changed, 6 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2cf7f52a5650..a592df574939 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -260,7 +260,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } - p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); + p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); if (!p->ibs) r = -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 5104e64e9ad8..1c237f5e3365 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, if (!vm) ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); - else - ib->gpu_addr = 0; - - } else { - ib->sa_bo = NULL; - ib->ptr = NULL; - ib->gpu_addr = 0; } amdgpu_sync_create(&ib->sync); ib->ring = ring; - ib->fence = NULL; - ib->user = NULL; ib->vm = vm; - ib->ctx = NULL; - ib->gds_base = 0; - ib->gds_size = 0; - ib->gws_base = 0; - ib->gws_size = 0; - ib->oa_base = 0; - ib->oa_size = 0; - ib->flags = 0; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 2b4242b39b0a..af526557ac66 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -630,6 +630,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 9b0cab413677..fab7b236f37f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4b68e6306f40..818edb37fa9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 9de8104eddeb..2b0e89e05b1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -689,6 +689,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 029f3455f9f9..6f1df03ebc70 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -810,6 +810,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); -- cgit From 4ce9891ee17c6e064cc334e3297f7e992d47f3a6 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 19 Aug 2015 16:41:19 +0800 Subject: drm/amdgpu: improve sa_bo->fence by kernel fence Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 22 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 50 ++++++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 8 files changed, 58 insertions(+), 34 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4addac5f6763..80f2ceaf6af6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -441,7 +441,7 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, - struct amdgpu_fence **array, + struct fence **array, uint32_t count, bool wait_all, bool intr, @@ -654,7 +654,7 @@ struct amdgpu_sa_bo { struct amdgpu_sa_manager *manager; unsigned soffset; unsigned eoffset; - struct amdgpu_fence *fence; + struct fence *fence; }; /* @@ -696,7 +696,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, struct amdgpu_semaphore *semaphore); void amdgpu_semaphore_free(struct amdgpu_device *adev, struct amdgpu_semaphore **semaphore, - struct amdgpu_fence *fence); + struct fence *fence); /* * Synchronization @@ -717,7 +717,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, int amdgpu_sync_rings(struct amdgpu_sync *sync, struct amdgpu_ring *ring); void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct amdgpu_fence *fence); + struct fence *fence); /* * GART structures, functions & helpers diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index ae014fcf524e..9a87372c3c79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -836,30 +836,30 @@ static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence) return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); } -static bool amdgpu_test_signaled_any(struct amdgpu_fence **fences, uint32_t count) +static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count) { int idx; - struct amdgpu_fence *fence; + struct fence *fence; for (idx = 0; idx < count; ++idx) { fence = fences[idx]; if (fence) { - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) + if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return true; } } return false; } -static bool amdgpu_test_signaled_all(struct amdgpu_fence **fences, uint32_t count) +static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count) { int idx; - struct amdgpu_fence *fence; + struct fence *fence; for (idx = 0; idx < count; ++idx) { fence = fences[idx]; if (fence) { - if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) + if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return false; } } @@ -885,7 +885,7 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, struct amdgpu_fence *fence = to_amdgpu_fence(f); struct amdgpu_device *adev = fence->ring->adev; - return amdgpu_fence_wait_multiple(adev, &fence, 1, false, intr, t); + return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t); } /** @@ -902,7 +902,7 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, * If wait_all is false, it will return when any fence is signaled or timeout. */ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, - struct amdgpu_fence **array, + struct fence **array, uint32_t count, bool wait_all, bool intr, @@ -910,7 +910,7 @@ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, { long idx = 0; struct amdgpu_wait_cb *cb; - struct amdgpu_fence *fence; + struct fence *fence; BUG_ON(!array); @@ -924,7 +924,7 @@ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, fence = array[idx]; if (fence) { cb[idx].task = current; - if (fence_add_callback(&fence->base, + if (fence_add_callback(fence, &cb[idx].base, amdgpu_fence_wait_cb)) { /* The fence is already signaled */ if (wait_all) @@ -967,7 +967,7 @@ fence_rm_cb: for (idx = 0; idx < count; ++idx) { fence = array[idx]; if (fence) - fence_remove_callback(&fence->base, &cb[idx].base); + fence_remove_callback(fence, &cb[idx].base); } err_free_cb: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 1c237f5e3365..13c5978ac69b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -93,8 +93,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, */ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) { - amdgpu_sync_free(adev, &ib->sync, ib->fence); - amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence); + amdgpu_sync_free(adev, &ib->sync, &ib->fence->base); + amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); amdgpu_fence_unref(&ib->fence); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 238465a9ac55..6ea18dcec561 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -193,7 +193,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, unsigned size, unsigned align); void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, - struct amdgpu_fence *fence); + struct fence *fence); #if defined(CONFIG_DEBUG_FS) void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 4597899e9758..b7cbaa9d532e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -139,6 +139,20 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, return r; } +static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f) +{ + struct amdgpu_fence *a_fence; + struct amd_sched_fence *s_fence; + + s_fence = to_amd_sched_fence(f); + if (s_fence) + return s_fence->entity->scheduler->ring_id; + a_fence = to_amdgpu_fence(f); + if (a_fence) + return a_fence->ring->idx; + return 0; +} + static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) { struct amdgpu_sa_manager *sa_manager = sa_bo->manager; @@ -147,7 +161,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) } list_del_init(&sa_bo->olist); list_del_init(&sa_bo->flist); - amdgpu_fence_unref(&sa_bo->fence); + fence_put(sa_bo->fence); kfree(sa_bo); } @@ -161,7 +175,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager) sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { if (sa_bo->fence == NULL || - !fence_is_signaled(&sa_bo->fence->base)) { + !fence_is_signaled(sa_bo->fence)) { return; } amdgpu_sa_bo_remove_locked(sa_bo); @@ -246,7 +260,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager, } static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, - struct amdgpu_fence **fences, + struct fence **fences, unsigned *tries) { struct amdgpu_sa_bo *best_bo = NULL; @@ -275,7 +289,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, sa_bo = list_first_entry(&sa_manager->flist[i], struct amdgpu_sa_bo, flist); - if (!fence_is_signaled(&sa_bo->fence->base)) { + if (!fence_is_signaled(sa_bo->fence)) { fences[i] = sa_bo->fence; continue; } @@ -299,7 +313,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, } if (best_bo) { - ++tries[best_bo->fence->ring->idx]; + uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence); + ++tries[idx]; sa_manager->hole = best_bo->olist.prev; /* we knew that this one is signaled, @@ -315,7 +330,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, unsigned size, unsigned align) { - struct amdgpu_fence *fences[AMDGPU_MAX_RINGS]; + struct fence *fences[AMDGPU_MAX_RINGS]; unsigned tries[AMDGPU_MAX_RINGS]; int i, r; signed long t; @@ -373,7 +388,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, } void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, - struct amdgpu_fence *fence) + struct fence *fence) { struct amdgpu_sa_manager *sa_manager; @@ -383,10 +398,11 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, sa_manager = (*sa_bo)->manager; spin_lock(&sa_manager->wq.lock); - if (fence && !fence_is_signaled(&fence->base)) { - (*sa_bo)->fence = amdgpu_fence_ref(fence); - list_add_tail(&(*sa_bo)->flist, - &sa_manager->flist[fence->ring->idx]); + if (fence && !fence_is_signaled(fence)) { + uint32_t idx; + (*sa_bo)->fence = fence_get(fence); + idx = amdgpu_sa_get_ring_from_fence(fence); + list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); } else { amdgpu_sa_bo_remove_locked(*sa_bo); } @@ -413,8 +429,16 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", soffset, eoffset, eoffset - soffset); if (i->fence) { - seq_printf(m, " protected by 0x%016llx on ring %d", - i->fence->seq, i->fence->ring->idx); + struct amdgpu_fence *a_fence = to_amdgpu_fence(i->fence); + struct amd_sched_fence *s_fence = to_amd_sched_fence(i->fence); + if (a_fence) + seq_printf(m, " protected by 0x%016llx on ring %d", + a_fence->seq, a_fence->ring->idx); + if (s_fence) + seq_printf(m, " protected by 0x%016llx on ring %d", + s_fence->v_seq, + s_fence->entity->scheduler->ring_id); + } seq_printf(m, "\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c index d6d41a42ab65..ff3ca52ec6fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c @@ -87,7 +87,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, void amdgpu_semaphore_free(struct amdgpu_device *adev, struct amdgpu_semaphore **semaphore, - struct amdgpu_fence *fence) + struct fence *fence) { if (semaphore == NULL || *semaphore == NULL) { return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 7cb711fc1ee2..ee68eebfded1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -234,7 +234,7 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, */ void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct amdgpu_fence *fence) + struct fence *fence) { unsigned i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dd3415d2e45d..d7c02e1a309e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1042,7 +1042,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, } amdgpu_ring_unlock_commit(ring); - amdgpu_sync_free(adev, &sync, *fence); + amdgpu_sync_free(adev, &sync, &(*fence)->base); return 0; } -- cgit From ce882e6dc241ab8dded0eeeb33a86482d44a5689 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 19 Aug 2015 15:00:55 +0200 Subject: drm/amdgpu: remove v_seq handling from the scheduler v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simply not used any more. Only keep 32bit atomic for fence sequence numbering. v2: trivial rebase Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Reviewed-by: Jammy Zhou (v1) Reviewed-by: Chunming Zhou (v1) --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 20 +++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +----- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 1 - drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 19 +------------------ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 +------ drivers/gpu/drm/amd/scheduler/sched_fence.c | 13 ++++++++----- 9 files changed, 21 insertions(+), 57 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 80f2ceaf6af6..65e0e9406abb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1047,7 +1047,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx); struct amdgpu_ctx *amdgpu_ctx_get_ref(struct amdgpu_ctx *ctx); uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence, uint64_t queued_seq); + struct fence *fence); struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dc8d2829c1e9..f91849b12a0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -866,11 +866,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) kfree(job); goto out; } - job->ibs[parser->num_ibs - 1].sequence = + cs->out.handle = amdgpu_ctx_add_fence(job->ctx, ring, - &job->base.s_fence->base, - job->base.s_fence->v_seq); - cs->out.handle = job->base.s_fence->v_seq; + &job->base.s_fence->base); list_sort(NULL, &parser->validated, cmp_size_smaller_first); ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 8660c0854a1e..f024effa60f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -236,17 +236,13 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) } uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence, uint64_t queued_seq) + struct fence *fence) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; - uint64_t seq = 0; + uint64_t seq = cring->sequence; unsigned idx = 0; struct fence *other = NULL; - if (amdgpu_enable_scheduler) - seq = queued_seq; - else - seq = cring->sequence; idx = seq % AMDGPU_CTX_MAX_CS_PENDING; other = cring->fences[idx]; if (other) { @@ -260,8 +256,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, spin_lock(&ctx->ring_lock); cring->fences[idx] = fence; - if (!amdgpu_enable_scheduler) - cring->sequence++; + cring->sequence++; spin_unlock(&ctx->ring_lock); fence_put(other); @@ -274,21 +269,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; - uint64_t queued_seq; spin_lock(&ctx->ring_lock); - if (amdgpu_enable_scheduler) - queued_seq = amd_sched_next_queued_seq(&cring->entity); - else - queued_seq = cring->sequence; - if (seq >= queued_seq) { + if (seq >= cring->sequence) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); } - if (seq + AMDGPU_CTX_MAX_CS_PENDING < queued_seq) { + if (seq + AMDGPU_CTX_MAX_CS_PENDING < cring->sequence) { spin_unlock(&ctx->ring_lock); return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 13c5978ac69b..737c8e3e3a74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -126,7 +126,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_ring *ring; struct amdgpu_ctx *ctx, *old_ctx; struct amdgpu_vm *vm; - uint64_t sequence; unsigned i; int r = 0; @@ -199,12 +198,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, return r; } - sequence = amdgpu_enable_scheduler ? ib->sequence : 0; - if (!amdgpu_enable_scheduler && ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, - &ib->fence->base, - sequence); + &ib->fence->base); /* wrap the last IB with fence */ if (ib->user) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index b7cbaa9d532e..26b17939c9c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -435,8 +435,8 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, seq_printf(m, " protected by 0x%016llx on ring %d", a_fence->seq, a_fence->ring->idx); if (s_fence) - seq_printf(m, " protected by 0x%016llx on ring %d", - s_fence->v_seq, + seq_printf(m, " protected by 0x%016x on ring %d", + s_fence->base.seqno, s_fence->entity->scheduler->ring_id); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 06d7bf51db9a..964b54381feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -111,7 +111,6 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, kfree(job); return r; } - ibs[num_ibs - 1].sequence = job->base.s_fence->v_seq; *f = fence_get(&job->base.s_fence->base); mutex_unlock(&job->job_lock); } else { diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 1125aa2e2a9a..f8d46b0b4e3b 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -156,14 +156,12 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, struct amd_sched_rq *rq, uint32_t jobs) { - uint64_t seq_ring = 0; char name[20]; if (!(sched && entity && rq)) return -EINVAL; memset(entity, 0, sizeof(struct amd_sched_entity)); - seq_ring = ((uint64_t)sched->ring_id) << 60; spin_lock_init(&entity->lock); entity->belongto_rq = rq; entity->scheduler = sched; @@ -179,8 +177,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, return -EINVAL; spin_lock_init(&entity->queue_lock); - atomic64_set(&entity->last_queued_v_seq, seq_ring); - atomic64_set(&entity->last_signaled_v_seq, seq_ring); + atomic_set(&entity->fence_seq, 0); /* Add the entity to the run queue */ amd_sched_rq_add_entity(rq, entity); @@ -299,8 +296,6 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) unsigned long flags; sched = sched_job->sched; - atomic64_set(&sched_job->s_entity->last_signaled_v_seq, - sched_job->s_fence->v_seq); amd_sched_fence_signal(sched_job->s_fence); spin_lock_irqsave(&sched->queue_lock, flags); list_del(&sched_job->list); @@ -421,15 +416,3 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched) kfree(sched); return 0; } - -/** - * Get next queued sequence number - * - * @entity The context entity - * - * return the next queued sequence number -*/ -uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity) -{ - return atomic64_read(&c_entity->last_queued_v_seq) + 1; -} diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 6597d61266e7..d328e968beb3 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -42,9 +42,7 @@ struct amd_sched_entity { struct list_head list; struct amd_sched_rq *belongto_rq; spinlock_t lock; - /* the virtual_seq is unique per context per ring */ - atomic64_t last_queued_v_seq; - atomic64_t last_signaled_v_seq; + atomic_t fence_seq; /* the job_queue maintains the jobs submitted by clients */ struct kfifo job_queue; spinlock_t queue_lock; @@ -72,7 +70,6 @@ struct amd_sched_fence { struct fence base; struct fence_cb cb; struct amd_sched_entity *entity; - uint64_t v_seq; spinlock_t lock; }; @@ -148,8 +145,6 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); -uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); - struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_entity *s_entity); void amd_sched_fence_signal(struct amd_sched_fence *fence); diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index a4751598c0b4..266ed7bbbc74 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -30,16 +30,19 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity) { struct amd_sched_fence *fence = NULL; + unsigned seq; + fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); if (fence == NULL) return NULL; - fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); + fence->entity = s_entity; spin_lock_init(&fence->lock); - fence_init(&fence->base, &amd_sched_fence_ops, - &fence->lock, - s_entity->fence_context, - fence->v_seq); + + seq = atomic_inc_return(&s_entity->fence_seq); + fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock, + s_entity->fence_context, seq); + return fence; } -- cgit From f91b3a69418120974c9a416939b903ec86607c52 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Aug 2015 14:47:40 +0800 Subject: drm/amdgpu: fix fence wait in sync_fence, instead should be in sync_rings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 56 ++++++++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 65e0e9406abb..3c5487257ef0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -704,6 +704,7 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev, struct amdgpu_sync { struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS]; + DECLARE_HASHTABLE(fences, 4); struct amdgpu_fence *last_vm_update; }; @@ -716,6 +717,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, void *owner); int amdgpu_sync_rings(struct amdgpu_sync *sync, struct amdgpu_ring *ring); +int amdgpu_sync_wait(struct amdgpu_sync *sync); void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct fence *fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 737c8e3e3a74..c439735ee670 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -140,7 +140,11 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, dev_err(adev->dev, "couldn't schedule ib\n"); return -EINVAL; } - + r = amdgpu_sync_wait(&ibs->sync); + if (r) { + dev_err(adev->dev, "IB sync failed (%d).\n", r); + return r; + } r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index ee68eebfded1..febbf37b1412 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -32,6 +32,11 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +struct amdgpu_sync_entry { + struct hlist_node node; + struct fence *fence; +}; + /** * amdgpu_sync_create - zero init sync object * @@ -49,6 +54,7 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) sync->sync_to[i] = NULL; + hash_init(sync->fences); sync->last_vm_update = NULL; } @@ -62,6 +68,7 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct fence *f) { + struct amdgpu_sync_entry *e; struct amdgpu_fence *fence; struct amdgpu_fence *other; @@ -69,8 +76,27 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, return 0; fence = to_amdgpu_fence(f); - if (!fence || fence->ring->adev != adev) - return fence_wait(f, true); + if (!fence || fence->ring->adev != adev) { + hash_for_each_possible(sync->fences, e, node, f->context) { + struct fence *new; + if (unlikely(e->fence->context != f->context)) + continue; + new = fence_get(fence_later(e->fence, f)); + if (new) { + fence_put(e->fence); + e->fence = new; + } + return 0; + } + + e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); + if (!e) + return -ENOMEM; + + hash_add(sync->fences, &e->node, f->context); + e->fence = fence_get(f); + return 0; + } other = sync->sync_to[fence->ring->idx]; sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( @@ -147,6 +173,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, return r; } +int amdgpu_sync_wait(struct amdgpu_sync *sync) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + int i, r; + + hash_for_each_safe(sync->fences, i, tmp, e, node) { + r = fence_wait(e->fence, false); + if (r) + return r; + + hash_del(&e->node); + fence_put(e->fence); + kfree(e); + } + return 0; +} + /** * amdgpu_sync_rings - sync ring to all registered fences * @@ -236,8 +280,16 @@ void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct fence *fence) { + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; unsigned i; + hash_for_each_safe(sync->fences, i, tmp, e, node) { + hash_del(&e->node); + fence_put(e->fence); + kfree(e); + } + for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); -- cgit