From 91e1a5207edec9e4f888e44478a9a254186e0ba8 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 6 Jul 2015 22:06:40 +0200 Subject: drm/amdgpu: deal with foreign fences in amdgpu_sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also requires some error handling from the callers of that function. Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1f040d85ac47..53e6a10fe9f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -482,6 +482,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (p->bo_list) { for (i = 0; i < p->bo_list->num_entries; i++) { + struct fence *f; + /* ignore duplicates */ bo = p->bo_list->array[i].robj; if (!bo) @@ -495,7 +497,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (r) return r; - amdgpu_sync_fence(&p->ibs[0].sync, bo_va->last_pt_update); + f = &bo_va->last_pt_update->base; + r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f); + if (r) + return r; } } @@ -715,9 +720,12 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, return r; } - amdgpu_sync_fence(&ib->sync, fence); + r = amdgpu_sync_fence(adev, &ib->sync, &fence->base); amdgpu_fence_unref(&fence); amdgpu_ctx_put(ctx); + + if (r) + return r; } } -- cgit From 21c16bf634e62cf9673946f509b469e7f0953ecf Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 7 Jul 2015 17:24:49 +0200 Subject: drm/amdgpu: add user fence context map v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a prerequisite for the GPU scheduler to make the order of submission independent from the order of execution. v2: properly implement the locking Signed-off-by: Christian König Reviewed-by: Jammy Zhou Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 16 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 60 ++++++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++- 4 files changed, 110 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 70e783a849ed..0220d98ba8bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -415,6 +415,8 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; + /* resulting sequence number */ + uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -985,9 +987,18 @@ struct amdgpu_vm_manager { * context related structures */ +#define AMDGPU_CTX_MAX_CS_PENDING 16 + +struct amdgpu_ctx_ring { + uint64_t sequence; + struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; +}; + struct amdgpu_ctx { struct kref refcount; unsigned reset_counter; + spinlock_t ring_lock; + struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; }; struct amdgpu_ctx_mgr { @@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv); struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct fence *fence); +struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq); + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 53e6a10fe9f9..cef8360698be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, sizeof(struct drm_amdgpu_cs_chunk_dep); for (j = 0; j < num_deps; ++j) { - struct amdgpu_fence *fence; struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; + struct fence *fence; r = amdgpu_cs_get_ring(adev, deps[j].ip_type, deps[j].ip_instance, @@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, if (ctx == NULL) return -EINVAL; - r = amdgpu_fence_recreate(ring, p->filp, - deps[j].handle, - &fence); - if (r) { + fence = amdgpu_ctx_get_fence(ctx, ring, + deps[j].handle); + if (IS_ERR(fence)) { + r = PTR_ERR(fence); amdgpu_ctx_put(ctx); return r; - } - - r = amdgpu_sync_fence(adev, &ib->sync, &fence->base); - amdgpu_fence_unref(&fence); - amdgpu_ctx_put(ctx); - if (r) - return r; + } else if (fence) { + r = amdgpu_sync_fence(adev, &ib->sync, fence); + fence_put(fence); + amdgpu_ctx_put(ctx); + if (r) + return r; + } } } @@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_ib_fill(adev, &parser); } - if (!r) + if (!r) { r = amdgpu_cs_dependencies(adev, &parser); + if (r) + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + } if (r) { amdgpu_cs_parser_fini(&parser, r, reserved_buffers); @@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } - cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq; + cs->out.handle = parser.uf.sequence; out: amdgpu_cs_parser_fini(&parser, r, true); up_read(&adev->exclusive_lock); @@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); - struct amdgpu_fence *fence = NULL; struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; + struct fence *fence; long r; + r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &ring); + if (r) + return r; + ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; - r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + if (IS_ERR(fence)) + r = PTR_ERR(fence); - r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } + else if (fence) { + r = fence_wait_timeout(fence, true, timeout); + fence_put(fence); + + } else + r = 1; - r = fence_wait_timeout(&fence->base, true, timeout); - amdgpu_fence_unref(&fence); amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e63cfb7fa390..c23bfd8fe414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,17 +28,22 @@ static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + unsigned i, j; ctx = container_of(ref, struct amdgpu_ctx, refcount); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) + fence_put(ctx->rings[i].fences[j]); kfree(ctx); } int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t *id) { - int r; struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; + int i, r; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, memset(ctx, 0, sizeof(*ctx)); kref_init(&ctx->refcount); + spin_lock_init(&ctx->ring_lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + ctx->rings[i].sequence = 1; mutex_unlock(&mgr->lock); return 0; @@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) kref_put(&ctx->refcount, amdgpu_ctx_do_release); return 0; } + +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, + struct fence *fence) +{ + struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + uint64_t seq = cring->sequence; + unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING; + struct fence *other = cring->fences[idx]; + + if (other) { + signed long r; + r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + } + + fence_get(fence); + + spin_lock(&ctx->ring_lock); + cring->fences[idx] = fence; + cring->sequence++; + spin_unlock(&ctx->ring_lock); + + fence_put(other); + + return seq; +} + +struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq) +{ + struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + struct fence *fence; + + spin_lock(&ctx->ring_lock); + if (seq >= cring->sequence) { + spin_unlock(&ctx->ring_lock); + return ERR_PTR(-EINVAL); + } + + if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) { + spin_unlock(&ctx->ring_lock); + return NULL; + } + + fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]); + spin_unlock(&ctx->ring_lock); + + return fence; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2722815eddbb..95d533422a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, /* wrap the last IB with fence */ if (ib->user) { uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); + ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, + &ib->fence->base); addr += ib->user->offset; - amdgpu_ring_emit_fence(ring, addr, ib->fence->seq, + amdgpu_ring_emit_fence(ring, addr, ib->user->sequence, AMDGPU_FENCE_FLAG_64BIT); } -- cgit From 5430a3ffb0b1902e8aea4ed2ba256b1263126e8d Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 21 Jul 2015 18:02:21 +0200 Subject: drm/amdgpu: fix UVD/VCE fence handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to return the sequence number to userspace even when we don't use user fences. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 ++++++--- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 551143287698..e6c26c1716b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -414,8 +414,6 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; - /* resulting sequence number */ - uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -847,6 +845,8 @@ struct amdgpu_ib { uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; uint32_t flags; + /* resulting sequence number */ + uint64_t sequence; }; enum amdgpu_ring_type { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index cef8360698be..4794e14976ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -794,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } - cs->out.handle = parser.uf.sequence; + cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; out: amdgpu_cs_parser_fini(&parser, r, true); up_read(&adev->exclusive_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index f3ac9d8a5691..42d6298eb9d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -88,6 +88,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, ib->fence = NULL; ib->user = NULL; ib->vm = vm; + ib->ctx = NULL; ib->gds_base = 0; ib->gds_size = 0; ib->gws_base = 0; @@ -214,13 +215,15 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, return r; } + if (ib->ctx) + ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, + &ib->fence->base); + /* wrap the last IB with fence */ if (ib->user) { uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); - ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, - &ib->fence->base); addr += ib->user->offset; - amdgpu_ring_emit_fence(ring, addr, ib->user->sequence, + amdgpu_ring_emit_fence(ring, addr, ib->sequence, AMDGPU_FENCE_FLAG_64BIT); } -- cgit From e60b344f6c0eff03362b5083db746ef5442c4b36 Mon Sep 17 00:00:00 2001 From: "monk.liu" Date: Fri, 17 Jul 2015 18:39:25 +0800 Subject: drm/amdgpu: optimize amdgpu_parser_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit use kmalloc_array instead of kcalloc where appropriate and other cleanups. Signed-off-by: monk.liu Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4794e14976ca..bc0a70415485 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -147,13 +147,13 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) /* get chunks */ INIT_LIST_HEAD(&p->validated); - chunk_array = kcalloc(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); + chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); if (chunk_array == NULL) { r = -ENOMEM; goto out; } - chunk_array_user = (uint64_t *)(unsigned long)(cs->in.chunks); + chunk_array_user = (uint64_t __user *)(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { r = -EFAULT; @@ -161,7 +161,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } p->nchunks = cs->in.num_chunks; - p->chunks = kcalloc(p->nchunks, sizeof(struct amdgpu_cs_chunk), + p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), GFP_KERNEL); if (p->chunks == NULL) { r = -ENOMEM; @@ -173,7 +173,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; - chunk_ptr = (void __user *)(unsigned long)chunk_array[i]; + chunk_ptr = (void __user *)chunk_array[i]; if (copy_from_user(&user_chunk, chunk_ptr, sizeof(struct drm_amdgpu_cs_chunk))) { r = -EFAULT; @@ -183,7 +183,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) p->chunks[i].length_dw = user_chunk.length_dw; size = p->chunks[i].length_dw; - cdata = (void __user *)(unsigned long)user_chunk.chunk_data; + cdata = (void __user *)user_chunk.chunk_data; p->chunks[i].user_ptr = cdata; p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); @@ -235,11 +235,10 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } } - p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!p->ibs) { + + p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); + if (!p->ibs) r = -ENOMEM; - goto out; - } out: kfree(chunk_array); -- cgit From 049fc527b4641f99e573b26f1a726a3eadd0cc25 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 21 Jul 2015 14:36:51 +0800 Subject: drm/amdgpu: dispatch jobs in cs BO validation is moved to scheduler except usrptr which must be validated in user process Signed-off-by: Chunming Zhou Acked-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 256 +++++++++++++++++++++++++-------- 2 files changed, 200 insertions(+), 57 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cfc6c786b2f2..becb26317467 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1243,6 +1243,7 @@ struct amdgpu_cs_parser { struct work_struct job_work; int (*prepare_job)(struct amdgpu_cs_parser *sched_job); int (*run_job)(struct amdgpu_cs_parser *sched_job); + int (*free_job)(struct amdgpu_cs_parser *sched_job); }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index bc0a70415485..f9d4fe985668 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -41,6 +41,11 @@ struct amdgpu_cs_buckets { struct list_head bucket[AMDGPU_CS_NUM_BUCKETS]; }; +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, + int error, bool backoff); +static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff); +static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser); + static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b) { unsigned i; @@ -126,12 +131,52 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, return 0; } +static void amdgpu_job_work_func(struct work_struct *work) +{ + struct amdgpu_cs_parser *sched_job = + container_of(work, struct amdgpu_cs_parser, + job_work); + mutex_lock(&sched_job->job_lock); + sched_job->free_job(sched_job); + mutex_unlock(&sched_job->job_lock); + /* after processing job, free memory */ + kfree(sched_job); +} +struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, + struct drm_file *filp, + struct amdgpu_ctx *ctx, + struct amdgpu_ib *ibs, + uint32_t num_ibs) +{ + struct amdgpu_cs_parser *parser; + int i; + + parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL); + if (!parser) + return NULL; + + parser->adev = adev; + parser->filp = filp; + parser->ctx = ctx; + parser->ibs = ibs; + parser->num_ibs = num_ibs; + if (amdgpu_enable_scheduler) { + mutex_init(&parser->job_lock); + INIT_WORK(&parser->job_work, amdgpu_job_work_func); + } + for (i = 0; i < num_ibs; i++) + ibs[i].ctx = ctx; + + return parser; +} + int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array = NULL; struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_bo_list *bo_list = NULL; unsigned size, i; int r = 0; @@ -143,7 +188,17 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) r = -EINVAL; goto out; } - p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); + bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); + if (bo_list && !bo_list->has_userptr) { + p->bo_list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); + if (!p->bo_list) + return -ENOMEM; + amdgpu_bo_list_copy(p->adev, p->bo_list, bo_list); + amdgpu_bo_list_put(bo_list); + } else if (bo_list && bo_list->has_userptr) + p->bo_list = bo_list; + else + p->bo_list = NULL; /* get chunks */ INIT_LIST_HEAD(&p->validated); @@ -424,8 +479,26 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a, **/ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) { - unsigned i; + amdgpu_cs_parser_fini_early(parser, error, backoff); + amdgpu_cs_parser_fini_late(parser); +} +static int amdgpu_cs_parser_run_job( + struct amdgpu_cs_parser *sched_job) +{ + amdgpu_cs_parser_fini_early(sched_job, 0, true); + return 0; +} + +static int amdgpu_cs_parser_free_job( + struct amdgpu_cs_parser *sched_job) +{ + amdgpu_cs_parser_fini_late(sched_job); + return 0; +} + +static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff) +{ if (!error) { /* Sort the buffer list from the smallest to largest buffer, * which affects the order of buffers in the LRU list. @@ -446,11 +519,19 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); } +} +static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) +{ + unsigned i; if (parser->ctx) amdgpu_ctx_put(parser->ctx); - if (parser->bo_list) - amdgpu_bo_list_put(parser->bo_list); + if (parser->bo_list) { + if (!parser->bo_list->has_userptr) + amdgpu_bo_list_free(parser->bo_list); + else + amdgpu_bo_list_put(parser->bo_list); + } drm_free_large(parser->vm_bos); for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); @@ -461,6 +542,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo kfree(parser->ibs); if (parser->uf.bo) drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base); + + if (!amdgpu_enable_scheduler) + kfree(parser); } static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, @@ -533,9 +617,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, goto out; } amdgpu_cs_sync_rings(parser); - - r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs, - parser->filp); + if (!amdgpu_enable_scheduler) + r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs, + parser->filp); out: mutex_unlock(&vm->mutex); @@ -731,35 +815,16 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, return 0; } -int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) { - struct amdgpu_device *adev = dev->dev_private; - union drm_amdgpu_cs *cs = data; - struct amdgpu_cs_parser parser; - int r, i; - bool reserved_buffers = false; - - down_read(&adev->exclusive_lock); - if (!adev->accel_working) { - up_read(&adev->exclusive_lock); - return -EBUSY; - } - /* initialize parser */ - memset(&parser, 0, sizeof(struct amdgpu_cs_parser)); - parser.filp = filp; - parser.adev = adev; - r = amdgpu_cs_parser_init(&parser, data); - if (r) { - DRM_ERROR("Failed to initialize parser !\n"); - amdgpu_cs_parser_fini(&parser, r, false); - up_read(&adev->exclusive_lock); - r = amdgpu_cs_handle_lockup(adev, r); - return r; - } - - r = amdgpu_cs_parser_relocs(&parser); - if (r) { - if (r != -ERESTARTSYS) { + int r, i; + struct amdgpu_cs_parser *parser = sched_job; + struct amdgpu_device *adev = sched_job->adev; + bool reserved_buffers = false; + + r = amdgpu_cs_parser_relocs(parser); + if (r) { + if (r != -ERESTARTSYS) { if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else @@ -769,33 +834,104 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!r) { reserved_buffers = true; - r = amdgpu_cs_ib_fill(adev, &parser); + r = amdgpu_cs_ib_fill(adev, parser); } - if (!r) { - r = amdgpu_cs_dependencies(adev, &parser); + r = amdgpu_cs_dependencies(adev, parser); if (r) DRM_ERROR("Failed in the dependencies handling %d!\n", r); } + if (r) { + amdgpu_cs_parser_fini(parser, r, reserved_buffers); + return r; + } + + for (i = 0; i < parser->num_ibs; i++) + trace_amdgpu_cs(parser, i); + + r = amdgpu_cs_ib_vm_chunk(adev, parser); + return r; +} + +static struct amdgpu_ring *amdgpu_cs_parser_get_ring( + struct amdgpu_device *adev, + struct amdgpu_cs_parser *parser) +{ + int i, r; + + struct amdgpu_cs_chunk *chunk; + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + struct amdgpu_ring *ring; + for (i = 0; i < parser->nchunks; i++) { + chunk = &parser->chunks[i]; + chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, + &ring); + if (r) + return NULL; + break; + } + return ring; +} + +int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdgpu_device *adev = dev->dev_private; + union drm_amdgpu_cs *cs = data; + struct amdgpu_cs_parser *parser; + int r; + + down_read(&adev->exclusive_lock); + if (!adev->accel_working) { + up_read(&adev->exclusive_lock); + return -EBUSY; + } + parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0); + if (!parser) + return -ENOMEM; + r = amdgpu_cs_parser_init(parser, data); if (r) { - amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + DRM_ERROR("Failed to initialize parser !\n"); + amdgpu_cs_parser_fini(parser, r, false); up_read(&adev->exclusive_lock); r = amdgpu_cs_handle_lockup(adev, r); return r; } - for (i = 0; i < parser.num_ibs; i++) - trace_amdgpu_cs(&parser, i); - - r = amdgpu_cs_ib_vm_chunk(adev, &parser); - if (r) { - goto out; + if (amdgpu_enable_scheduler && parser->num_ibs) { + struct amdgpu_ring * ring = + amdgpu_cs_parser_get_ring(adev, parser); + parser->uf.sequence = atomic64_inc_return( + &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); + if ((parser->bo_list && parser->bo_list->has_userptr)) { + r = amdgpu_cs_parser_prepare_job(parser); + if (r) + goto out; + } else + parser->prepare_job = amdgpu_cs_parser_prepare_job; + + parser->run_job = amdgpu_cs_parser_run_job; + parser->free_job = amdgpu_cs_parser_free_job; + amd_sched_push_job(ring->scheduler, + &parser->ctx->rings[ring->idx].c_entity, + parser); + cs->out.handle = parser->uf.sequence; + up_read(&adev->exclusive_lock); + return 0; } + r = amdgpu_cs_parser_prepare_job(parser); + if (r) + goto out; - cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; + cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; out: - amdgpu_cs_parser_fini(&parser, r, true); + amdgpu_cs_parser_fini(parser, r, true); up_read(&adev->exclusive_lock); r = amdgpu_cs_handle_lockup(adev, r); return r; @@ -829,18 +965,24 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; - - fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); - if (IS_ERR(fence)) - r = PTR_ERR(fence); - - else if (fence) { - r = fence_wait_timeout(fence, true, timeout); - fence_put(fence); - - } else + if (amdgpu_enable_scheduler) { + r = amd_sched_wait_ts(&ctx->rings[ring->idx].c_entity, + wait->in.handle, true, timeout); + if (r) + return r; r = 1; + } else { + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + if (IS_ERR(fence)) + r = PTR_ERR(fence); + else if (fence) { + r = fence_wait_timeout(fence, true, timeout); + fence_put(fence); + + } else + r = 1; + } amdgpu_ctx_put(ctx); if (r < 0) return r; -- cgit From 4b559c90bc1870313f02cceef680884519af6b2b Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 21 Jul 2015 15:53:04 +0800 Subject: drm/amdgpu: make sure the fence is emitted before ring to get it. Signed-off-by: Chunming Zhou Acked-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 26 +++++++++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 5 ++++- 4 files changed, 25 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index becb26317467..127867c2fc37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -81,6 +81,7 @@ extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; extern int amdgpu_enable_scheduler; +#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) /* AMDGPU_IB_POOL_SIZE must be a power of 2 */ @@ -1239,6 +1240,7 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_user_fence uf; + struct amdgpu_ring *ring; struct mutex job_lock; struct work_struct job_work; int (*prepare_job)(struct amdgpu_cs_parser *sched_job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f9d4fe985668..5f2403898b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -915,7 +915,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } else parser->prepare_job = amdgpu_cs_parser_prepare_job; - + parser->ring = ring; parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; amd_sched_push_job(ring->scheduler, @@ -965,24 +965,16 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; - if (amdgpu_enable_scheduler) { - r = amd_sched_wait_ts(&ctx->rings[ring->idx].c_entity, - wait->in.handle, true, timeout); - if (r) - return r; - r = 1; - } else { - fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); - if (IS_ERR(fence)) - r = PTR_ERR(fence); - else if (fence) { - r = fence_wait_timeout(fence, true, timeout); - fence_put(fence); + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + if (IS_ERR(fence)) + r = PTR_ERR(fence); + else if (fence) { + r = fence_wait_timeout(fence, true, timeout); + fence_put(fence); + } else + r = 1; - } else - r = 1; - } amdgpu_ctx_put(ctx); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index b9be250cb206..41bc7fc0ebf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -261,6 +261,16 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; uint64_t queued_seq; + int r; + + if (amdgpu_enable_scheduler) { + r = amd_sched_wait_emit(&cring->c_entity, + seq, + true, + AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS); + if (r) + return NULL; + } spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 1f7bf31da7fc..46ec915c9344 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -56,12 +56,15 @@ static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, sched_job->filp); if (r) goto err; - if (sched_job->run_job) { r = sched_job->run_job(sched_job); if (r) goto err; } + atomic64_set(&c_entity->last_emitted_v_seq, + sched_job->uf.sequence); + wake_up_all(&c_entity->wait_emit); + mutex_unlock(&sched_job->job_lock); return; err: -- cgit From 4274f5d45cf11f88d7380702a7147b70553ddd6e Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 21 Jul 2015 16:04:39 +0800 Subject: drm/amdgpu: prepare job before push to sw queue for pte ring user mode will still use pte ring as a normal ring. if the prepare job generates another command(update pte) on its ring in scheduler, then will kill scheduler which is going to waiting later job but pending running job. Signed-off-by: Chunming Zhou Acked-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + 5 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 127867c2fc37..79e81f397e60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -902,6 +902,7 @@ struct amdgpu_ring { struct amdgpu_ctx *current_ctx; enum amdgpu_ring_type type; char name[16]; + bool is_pte_ring; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5f2403898b06..9ff4d2756a6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -909,7 +909,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) amdgpu_cs_parser_get_ring(adev, parser); parser->uf.sequence = atomic64_inc_return( &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); - if ((parser->bo_list && parser->bo_list->has_userptr)) { + if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 15df46c93f0a..dd3da7bb11c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1404,5 +1404,6 @@ static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; + adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index a988dfb1d394..8b7e2438b6d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1415,5 +1415,6 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; + adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 8f4aac23b317..4b5d769bdb4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1540,5 +1540,6 @@ static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; + adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; } } -- cgit From afe10081498fdf2c5b92c7fcc534e49544218fb9 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 28 Jul 2015 16:11:52 +0800 Subject: drm/amdgpu: add check for callback it is possible that the callback isn't defined sometimes. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9ff4d2756a6f..c41360e443be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -137,7 +137,8 @@ static void amdgpu_job_work_func(struct work_struct *work) container_of(work, struct amdgpu_cs_parser, job_work); mutex_lock(&sched_job->job_lock); - sched_job->free_job(sched_job); + if (sched_job->free_job) + sched_job->free_job(sched_job); mutex_unlock(&sched_job->job_lock); /* after processing job, free memory */ kfree(sched_job); -- cgit From d1ff9086c1b8e67390161599006a34056b437a72 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 30 Jul 2015 17:59:43 +0800 Subject: drm/amdgpu: fix seq in ctx_add_fence if enabling scheduler, then the queued seq is assigned when pushing job before emitting job. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 6 files changed, 15 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6e1fea473a66..2619c78ec303 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -419,7 +419,6 @@ struct amdgpu_user_fence { struct amdgpu_bo *bo; /* write-back address offset to bo start */ uint32_t offset; - uint64_t sequence; }; int amdgpu_fence_driver_init(struct amdgpu_device *adev); @@ -1031,7 +1030,7 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence); + struct fence *fence, uint64_t queued_seq); struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c41360e443be..40e85bfcdf91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -739,7 +739,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ib->oa_size = amdgpu_bo_size(oa); } } - /* wrap the last IB with user fence */ if (parser->uf.bo) { struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; @@ -908,7 +907,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - parser->uf.sequence = atomic64_inc_return( + parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); @@ -922,7 +921,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) amd_sched_push_job(ring->scheduler, &parser->ctx->rings[ring->idx].c_entity, parser); - cs->out.handle = parser->uf.sequence; + cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; up_read(&adev->exclusive_lock); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 95807b678b6a..e0eaa55bf636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -258,7 +258,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) } uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence) + struct fence *fence, uint64_t queued_seq) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; uint64_t seq = 0; @@ -266,7 +266,7 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct fence *other = NULL; if (amdgpu_enable_scheduler) - seq = atomic64_read(&cring->c_entity.last_queued_v_seq); + seq = queued_seq; else seq = cring->sequence; idx = seq % AMDGPU_CTX_MAX_CS_PENDING; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 42d6298eb9d7..eed409c59492 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -143,6 +143,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_ring *ring; struct amdgpu_ctx *ctx, *old_ctx; struct amdgpu_vm *vm; + uint64_t sequence; unsigned i; int r = 0; @@ -215,9 +216,12 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, return r; } + sequence = amdgpu_enable_scheduler ? ib->sequence : 0; + if (ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, - &ib->fence->base); + &ib->fence->base, + sequence); /* wrap the last IB with fence */ if (ib->user) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 46ec915c9344..b913c22dd6b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -62,7 +62,7 @@ static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, goto err; } atomic64_set(&c_entity->last_emitted_v_seq, - sched_job->uf.sequence); + sched_job->ibs[sched_job->num_ibs - 1].sequence); wake_up_all(&c_entity->wait_emit); mutex_unlock(&sched_job->job_lock); @@ -93,7 +93,7 @@ static void amdgpu_sched_process_job(struct amd_gpu_scheduler *sched, void *job) if (sched_job->ctx) { c_entity = &sched_job->ctx->rings[ring->idx].c_entity; atomic64_set(&c_entity->last_signaled_v_seq, - sched_job->uf.sequence); + sched_job->ibs[sched_job->num_ibs - 1].sequence); } /* wake up users waiting for time stamp */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 26c55a7a1a88..5624d4484fb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -380,7 +380,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); @@ -531,7 +531,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); @@ -884,7 +884,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job->run_job = amdgpu_vm_bo_update_mapping_run_job; sched_job->free_job = amdgpu_vm_free_job; v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - sched_job->uf.sequence = v_seq; + ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); -- cgit From 7fc11959018f8ba2d92025679a72339da18a74ad Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 30 Jul 2015 11:53:42 +0200 Subject: drm/amdgpu: stop using addr to check for BO move v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is theoretically possible that a swapped out BO gets the same GTT address, but different backing pages while being swapped in. Instead just use another VA state to note updated areas. Ported from not upstream yet radeon commit with the same name. v2: fix some bugs in the original implementation found in the radeon code. v3: squash in VCE/UVD fix Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 15 +++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 11 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 67 ++++++++++++++++++++-------------- 3 files changed, 59 insertions(+), 34 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 98b47601b30a..5b8e1aeae13b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -539,14 +539,16 @@ struct amdgpu_bo_va_mapping { struct amdgpu_bo_va { /* protected by bo being reserved */ struct list_head bo_list; - uint64_t addr; struct amdgpu_fence *last_pt_update; unsigned ref_count; - /* protected by vm mutex */ - struct list_head mappings; + /* protected by vm mutex and spinlock */ struct list_head vm_status; + /* mappings for this bo_va */ + struct list_head invalids; + struct list_head valids; + /* constant after initialization */ struct amdgpu_vm *vm; struct amdgpu_bo *bo; @@ -964,13 +966,16 @@ struct amdgpu_vm { struct rb_root va; - /* protecting invalidated and freed */ + /* protecting invalidated */ spinlock_t status_lock; /* BOs moved, but not yet updated in the PT */ struct list_head invalidated; - /* BOs freed, but not yet updated in the PT */ + /* BOs cleared in the PT because of a move */ + struct list_head cleared; + + /* BO mappings freed, but not yet updated in the PT */ struct list_head freed; /* contains the page directory */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 40e85bfcdf91..fe81b46266d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1009,7 +1009,16 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (!reloc->bo_va) continue; - list_for_each_entry(mapping, &reloc->bo_va->mappings, list) { + list_for_each_entry(mapping, &reloc->bo_va->valids, list) { + if (mapping->it.start > addr || + addr > mapping->it.last) + continue; + + *bo = reloc->bo_va->bo; + return mapping; + } + + list_for_each_entry(mapping, &reloc->bo_va->invalids, list) { if (mapping->it.start > addr || addr > mapping->it.last) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5624d4484fb6..f2166320a5e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -951,21 +951,24 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, addr = 0; } - if (addr == bo_va->addr) - return 0; - flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - list_for_each_entry(mapping, &bo_va->mappings, list) { + spin_lock(&vm->status_lock); + if (!list_empty(&bo_va->vm_status)) + list_splice_init(&bo_va->valids, &bo_va->invalids); + spin_unlock(&vm->status_lock); + + list_for_each_entry(mapping, &bo_va->invalids, list) { r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, flags, &bo_va->last_pt_update); if (r) return r; } - bo_va->addr = addr; spin_lock(&vm->status_lock); list_del_init(&bo_va->vm_status); + if (!mem) + list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); return 0; @@ -1065,10 +1068,10 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, } bo_va->vm = vm; bo_va->bo = bo; - bo_va->addr = 0; bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->bo_list); - INIT_LIST_HEAD(&bo_va->mappings); + INIT_LIST_HEAD(&bo_va->valids); + INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->vm_status); mutex_lock(&vm->mutex); @@ -1157,12 +1160,10 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, mapping->offset = offset; mapping->flags = flags; - list_add(&mapping->list, &bo_va->mappings); + list_add(&mapping->list, &bo_va->invalids); interval_tree_insert(&mapping->it, &vm->va); trace_amdgpu_vm_bo_map(bo_va, mapping); - bo_va->addr = 0; - /* Make sure the page tables are allocated */ saddr >>= amdgpu_vm_block_size; eaddr >>= amdgpu_vm_block_size; @@ -1243,17 +1244,27 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_vm *vm = bo_va->vm; + bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; - list_for_each_entry(mapping, &bo_va->mappings, list) { + list_for_each_entry(mapping, &bo_va->valids, list) { if (mapping->it.start == saddr) break; } - if (&mapping->list == &bo_va->mappings) { - amdgpu_bo_unreserve(bo_va->bo); - return -ENOENT; + if (&mapping->list == &bo_va->valids) { + valid = false; + + list_for_each_entry(mapping, &bo_va->invalids, list) { + if (mapping->it.start == saddr) + break; + } + + if (&mapping->list == &bo_va->invalids) { + amdgpu_bo_unreserve(bo_va->bo); + return -ENOENT; + } } mutex_lock(&vm->mutex); @@ -1261,12 +1272,10 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, interval_tree_remove(&mapping->it, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); - if (bo_va->addr) { - /* clear the old address */ + if (valid) list_add(&mapping->list, &vm->freed); - } else { + else kfree(mapping); - } mutex_unlock(&vm->mutex); amdgpu_bo_unreserve(bo_va->bo); @@ -1297,15 +1306,18 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_del(&bo_va->vm_status); spin_unlock(&vm->status_lock); - list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) { + list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); interval_tree_remove(&mapping->it, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); - if (bo_va->addr) - list_add(&mapping->list, &vm->freed); - else - kfree(mapping); + list_add(&mapping->list, &vm->freed); + } + list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { + list_del(&mapping->list); + interval_tree_remove(&mapping->it, &vm->va); + kfree(mapping); } + amdgpu_fence_unref(&bo_va->last_pt_update); kfree(bo_va); @@ -1327,12 +1339,10 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va; list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->addr) { - spin_lock(&bo_va->vm->status_lock); - list_del(&bo_va->vm_status); + spin_lock(&bo_va->vm->status_lock); + if (list_empty(&bo_va->vm_status)) list_add(&bo_va->vm_status, &bo_va->vm->invalidated); - spin_unlock(&bo_va->vm->status_lock); - } + spin_unlock(&bo_va->vm->status_lock); } } @@ -1360,6 +1370,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->va = RB_ROOT; spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); + INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); pd_size = amdgpu_vm_directory_size(adev); -- cgit From bb1e38a4bead5025ecca90544f0f733f59996b13 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 3 Aug 2015 18:19:38 +0800 Subject: drm/amdgpu: use kernel fence for last_pt_update Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5b8e1aeae13b..371ff0845989 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -539,7 +539,7 @@ struct amdgpu_bo_va_mapping { struct amdgpu_bo_va { /* protected by bo being reserved */ struct list_head bo_list; - struct amdgpu_fence *last_pt_update; + struct fence *last_pt_update; unsigned ref_count; /* protected by vm mutex and spinlock */ @@ -1241,7 +1241,7 @@ union amdgpu_sched_job_param { struct amdgpu_vm *vm; uint64_t start; uint64_t last; - struct amdgpu_fence **fence; + struct fence **fence; } vm_mapping; struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index fe81b46266d9..aee59110735f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -581,7 +581,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (r) return r; - f = &bo_va->last_pt_update->base; + f = bo_va->last_pt_update; r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8745d4cc7ae0..d90254f5ca6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -737,7 +737,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, */ static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm, uint64_t start, uint64_t end, - struct amdgpu_fence *fence) + struct fence *fence) { unsigned i; @@ -745,20 +745,20 @@ static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm, end >>= amdgpu_vm_block_size; for (i = start; i <= end; ++i) - amdgpu_bo_fence(vm->page_tables[i].bo, &fence->base, true); + amdgpu_bo_fence(vm->page_tables[i].bo, fence, true); } static int amdgpu_vm_bo_update_mapping_run_job( struct amdgpu_cs_parser *sched_job) { - struct amdgpu_fence **fence = sched_job->job_param.vm_mapping.fence; + struct fence **fence = sched_job->job_param.vm_mapping.fence; amdgpu_vm_fence_pts(sched_job->job_param.vm_mapping.vm, sched_job->job_param.vm_mapping.start, sched_job->job_param.vm_mapping.last + 1, - sched_job->ibs[sched_job->num_ibs -1].fence); + &sched_job->ibs[sched_job->num_ibs -1].fence->base); if (fence) { - amdgpu_fence_unref(fence); - *fence = amdgpu_fence_ref(sched_job->ibs[sched_job->num_ibs -1].fence); + fence_put(*fence); + *fence = fence_get(&sched_job->ibs[sched_job->num_ibs -1].fence->base); } return 0; } @@ -781,7 +781,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, uint64_t addr, uint32_t gtt_flags, - struct amdgpu_fence **fence) + struct fence **fence) { struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; unsigned nptes, ncmds, ndw; @@ -902,10 +902,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } amdgpu_vm_fence_pts(vm, mapping->it.start, - mapping->it.last + 1, ib->fence); + mapping->it.last + 1, &ib->fence->base); if (fence) { - amdgpu_fence_unref(fence); - *fence = amdgpu_fence_ref(ib->fence); + fence_put(*fence); + *fence = fence_get(&ib->fence->base); } amdgpu_ib_free(adev, ib); @@ -1038,7 +1038,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); if (bo_va) - r = amdgpu_sync_fence(adev, sync, &bo_va->last_pt_update->base); + r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); return r; } @@ -1318,7 +1318,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, kfree(mapping); } - amdgpu_fence_unref(&bo_va->last_pt_update); + fence_put(bo_va->last_pt_update); kfree(bo_va); mutex_unlock(&vm->mutex); -- cgit From dd01d071957ded58d9bae3d3bf6061ada1d84692 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Thu, 30 Jul 2015 17:19:52 +0800 Subject: drm/amdgpu: some code refinement v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the code alignment, etc. v2: rebase the code Signed-off-by: Jammy Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 26 +++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 9 +++++---- 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index aee59110735f..fda7792d9e08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -817,14 +817,14 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) { - int r, i; + int r, i; struct amdgpu_cs_parser *parser = sched_job; struct amdgpu_device *adev = sched_job->adev; - bool reserved_buffers = false; + bool reserved_buffers = false; - r = amdgpu_cs_parser_relocs(parser); - if (r) { - if (r != -ERESTARTSYS) { + r = amdgpu_cs_parser_relocs(parser); + if (r) { + if (r != -ERESTARTSYS) { if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else @@ -841,16 +841,16 @@ static int amdgpu_cs_parser_prepare_job(struct amdgpu_cs_parser *sched_job) if (r) DRM_ERROR("Failed in the dependencies handling %d!\n", r); } - if (r) { - amdgpu_cs_parser_fini(parser, r, reserved_buffers); - return r; - } + if (r) { + amdgpu_cs_parser_fini(parser, r, reserved_buffers); + return r; + } - for (i = 0; i < parser->num_ibs; i++) - trace_amdgpu_cs(parser, i); + for (i = 0; i < parser->num_ibs; i++) + trace_amdgpu_cs(parser, i); - r = amdgpu_cs_ib_vm_chunk(adev, parser); - return r; + r = amdgpu_cs_ib_vm_chunk(adev, parser); + return r; } static struct amdgpu_ring *amdgpu_cs_parser_get_ring( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 0f55c05c80b1..788dd348a650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -33,11 +33,12 @@ static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched, { int r = 0; struct amdgpu_cs_parser *sched_job = (struct amdgpu_cs_parser *)job; - if (sched_job->prepare_job) + if (sched_job->prepare_job) { r = sched_job->prepare_job(sched_job); - if (r) { - DRM_ERROR("Prepare job error\n"); - schedule_work(&sched_job->job_work); + if (r) { + DRM_ERROR("Prepare job error\n"); + schedule_work(&sched_job->job_work); + } } return r; } -- cgit From ea199cc9f825f3ef5aab3db5f00dcc639f8a8b02 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Fri, 31 Jul 2015 16:47:28 +0800 Subject: drm/amdgpu: return new seq_no for amd_sched_push_job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is clean to update last_queued_v_seq in the scheduler module Signed-off-by: Jammy Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 9 ++------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 27 ++++++--------------------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 10 ++++------ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 +- 5 files changed, 15 insertions(+), 38 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index fda7792d9e08..468f884271b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -907,8 +907,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( - &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) @@ -918,7 +916,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser->ring = ring; parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; - amd_sched_push_job(ring->scheduler, + parser->ibs[parser->num_ibs - 1].sequence = + amd_sched_push_job(ring->scheduler, &parser->ctx->rings[ring->idx].c_entity, parser); cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 788dd348a650..8c01c51aac41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -109,7 +109,6 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, { int r = 0; if (amdgpu_enable_scheduler) { - uint64_t v_seq; struct amdgpu_cs_parser *sched_job = amdgpu_cs_parser_create(adev, owner, @@ -119,16 +118,12 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; } sched_job->free_job = free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - ibs[num_ibs - 1].sequence = v_seq; - amd_sched_push_job(ring->scheduler, + ibs[num_ibs - 1].sequence = amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit( &adev->kernel_ctx->rings[ring->idx].c_entity, - v_seq, - false, - -1); + ibs[num_ibs - 1].sequence, false, -1); if (r) WARN(true, "emit timeout\n"); } else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d90254f5ca6a..ab9c65a245ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -371,7 +371,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; - uint64_t v_seq; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, adev->kernel_ctx, ib, 1); if(!sched_job) @@ -379,15 +378,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, sched_job->job_param.vm.bo = bo; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - ib->sequence = v_seq; - amd_sched_push_job(ring->scheduler, + ib->sequence = amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, - v_seq, - false, - -1); + ib->sequence, false, -1); if (r) DRM_ERROR("emit timeout\n"); @@ -521,7 +516,6 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; - uint64_t v_seq; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, adev->kernel_ctx, ib, 1); @@ -530,15 +524,11 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job->job_param.vm.bo = pd; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - ib->sequence = v_seq; - amd_sched_push_job(ring->scheduler, + ib->sequence = amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, - v_seq, - false, - -1); + ib->sequence, false, -1); if (r) DRM_ERROR("emit timeout\n"); } else { @@ -872,7 +862,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; - uint64_t v_seq; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, adev->kernel_ctx, ib, 1); if(!sched_job) @@ -883,15 +872,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job->job_param.vm_mapping.fence = fence; sched_job->run_job = amdgpu_vm_bo_update_mapping_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx->rings[ring->idx].c_entity.last_queued_v_seq); - ib->sequence = v_seq; - amd_sched_push_job(ring->scheduler, + ib->sequence = amd_sched_push_job(ring->scheduler, &adev->kernel_ctx->rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx->rings[ring->idx].c_entity, - v_seq, - false, - -1); + ib->sequence, false, -1); if (r) DRM_ERROR("emit timeout\n"); } else { diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 89799eb86083..2c4c261ff928 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -293,12 +293,9 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, * @sched The pointer to the scheduler * @c_entity The pointer to amd_context_entity * @job The pointer to job required to submit - * return 0 if succeed. -1 if failed. - * -2 indicate queue is full for this client, client should wait untill - * scheduler consum some queued command. - * -1 other fail. + * return the virtual sequence number */ -int amd_sched_push_job(struct amd_gpu_scheduler *sched, +uint64_t amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_context_entity *c_entity, void *job) { @@ -312,7 +309,8 @@ int amd_sched_push_job(struct amd_gpu_scheduler *sched, } wake_up_interruptible(&sched->wait_queue); - return 0; + + return atomic64_inc_return(&c_entity->last_queued_v_seq); } /** diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 9ab3adc1fa32..37dd6370bd98 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -129,7 +129,7 @@ struct amd_gpu_scheduler *amd_sched_create(void *device, int amd_sched_destroy(struct amd_gpu_scheduler *sched); -int amd_sched_push_job(struct amd_gpu_scheduler *sched, +uint64_t amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_context_entity *c_entity, void *job); -- cgit From 351dba73691fc632b269f531bbce80157f79c5b3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 3 Aug 2015 20:39:12 +0200 Subject: drm/amdgpu: reorder the code to avoid forward declerations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 61 ++++++++++++++++------------------ 1 file changed, 28 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 468f884271b3..d4cc232ccff3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -41,11 +41,6 @@ struct amdgpu_cs_buckets { struct list_head bucket[AMDGPU_CS_NUM_BUCKETS]; }; -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, - int error, bool backoff); -static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff); -static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser); - static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b) { unsigned i; @@ -470,34 +465,6 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a, return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; } -/** - * cs_parser_fini() - clean parser states - * @parser: parser structure holding parsing context. - * @error: error number - * - * If error is set than unvalidate buffer, otherwise just free memory - * used by parsing context. - **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) -{ - amdgpu_cs_parser_fini_early(parser, error, backoff); - amdgpu_cs_parser_fini_late(parser); -} - -static int amdgpu_cs_parser_run_job( - struct amdgpu_cs_parser *sched_job) -{ - amdgpu_cs_parser_fini_early(sched_job, 0, true); - return 0; -} - -static int amdgpu_cs_parser_free_job( - struct amdgpu_cs_parser *sched_job) -{ - amdgpu_cs_parser_fini_late(sched_job); - return 0; -} - static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff) { if (!error) { @@ -548,6 +515,34 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) kfree(parser); } +/** + * cs_parser_fini() - clean parser states + * @parser: parser structure holding parsing context. + * @error: error number + * + * If error is set than unvalidate buffer, otherwise just free memory + * used by parsing context. + **/ +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) +{ + amdgpu_cs_parser_fini_early(parser, error, backoff); + amdgpu_cs_parser_fini_late(parser); +} + +static int amdgpu_cs_parser_run_job( + struct amdgpu_cs_parser *sched_job) +{ + amdgpu_cs_parser_fini_early(sched_job, 0, true); + return 0; +} + +static int amdgpu_cs_parser_free_job( + struct amdgpu_cs_parser *sched_job) +{ + amdgpu_cs_parser_fini_late(sched_job); + return 0; +} + static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, struct amdgpu_vm *vm) { -- cgit From 34cb581a7d99401cad0e1c43b528690885435f5b Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 4 Aug 2015 11:54:48 +0200 Subject: drm/amdgpu: fix bo list handling in CS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We didn't initialized the mutex in the cloned bo list resulting in nice warnings from lockdep. Also fixes error handling in this function. Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 83 ++++++++++++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 +- 3 files changed, 37 insertions(+), 56 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4d6a3e825096..eadbe792c8aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1077,12 +1077,11 @@ struct amdgpu_bo_list { struct amdgpu_bo_list_entry *array; }; +struct amdgpu_bo_list * +amdgpu_bo_list_clone(struct amdgpu_bo_list *list); struct amdgpu_bo_list * amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); void amdgpu_bo_list_put(struct amdgpu_bo_list *list); -void amdgpu_bo_list_copy(struct amdgpu_device *adev, - struct amdgpu_bo_list *dst, - struct amdgpu_bo_list *src); void amdgpu_bo_list_free(struct amdgpu_bo_list *list); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 4d27fa1660b9..7eed523bf28f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -62,6 +62,39 @@ static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, return 0; } +struct amdgpu_bo_list * +amdgpu_bo_list_clone(struct amdgpu_bo_list *list) +{ + struct amdgpu_bo_list *result; + unsigned i; + + result = kmalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); + if (!result) + return NULL; + + result->array = drm_calloc_large(list->num_entries, + sizeof(struct amdgpu_bo_list_entry)); + if (!result->array) { + kfree(result); + return NULL; + } + + mutex_init(&result->lock); + result->gds_obj = list->gds_obj; + result->gws_obj = list->gws_obj; + result->oa_obj = list->oa_obj; + result->has_userptr = list->has_userptr; + result->num_entries = list->num_entries; + + memcpy(result->array, list->array, list->num_entries * + sizeof(struct amdgpu_bo_list_entry)); + + for (i = 0; i < result->num_entries; ++i) + amdgpu_bo_ref(result->array[i].robj); + + return result; +} + static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *list; @@ -166,56 +199,6 @@ void amdgpu_bo_list_put(struct amdgpu_bo_list *list) mutex_unlock(&list->lock); } -void amdgpu_bo_list_copy(struct amdgpu_device *adev, - struct amdgpu_bo_list *dst, - struct amdgpu_bo_list *src) -{ - struct amdgpu_bo_list_entry *array; - struct amdgpu_bo *gds_obj = adev->gds.gds_gfx_bo; - struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo; - struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo; - - bool has_userptr = false; - unsigned i; - - array = drm_calloc_large(src->num_entries, sizeof(struct amdgpu_bo_list_entry)); - if (!array) - return; - memset(array, 0, src->num_entries * sizeof(struct amdgpu_bo_list_entry)); - - for (i = 0; i < src->num_entries; ++i) { - memcpy(array, src->array, - src->num_entries * sizeof(struct amdgpu_bo_list_entry)); - array[i].robj = amdgpu_bo_ref(src->array[i].robj); - if (amdgpu_ttm_tt_has_userptr(array[i].robj->tbo.ttm)) { - has_userptr = true; - array[i].prefered_domains = AMDGPU_GEM_DOMAIN_GTT; - array[i].allowed_domains = AMDGPU_GEM_DOMAIN_GTT; - } - array[i].tv.bo = &array[i].robj->tbo; - array[i].tv.shared = true; - - if (array[i].prefered_domains == AMDGPU_GEM_DOMAIN_GDS) - gds_obj = array[i].robj; - if (array[i].prefered_domains == AMDGPU_GEM_DOMAIN_GWS) - gws_obj = array[i].robj; - if (array[i].prefered_domains == AMDGPU_GEM_DOMAIN_OA) - oa_obj = array[i].robj; - } - - for (i = 0; i < dst->num_entries; ++i) - amdgpu_bo_unref(&dst->array[i].robj); - - drm_free_large(dst->array); - - dst->gds_obj = gds_obj; - dst->gws_obj = gws_obj; - dst->oa_obj = oa_obj; - dst->has_userptr = has_userptr; - dst->array = array; - dst->num_entries = src->num_entries; -} - void amdgpu_bo_list_free(struct amdgpu_bo_list *list) { unsigned i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d4cc232ccff3..aa1bc24b7edb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -186,11 +186,10 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); if (bo_list && !bo_list->has_userptr) { - p->bo_list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); + p->bo_list = amdgpu_bo_list_clone(bo_list); + amdgpu_bo_list_put(bo_list); if (!p->bo_list) return -ENOMEM; - amdgpu_bo_list_copy(p->adev, p->bo_list, bo_list); - amdgpu_bo_list_put(bo_list); } else if (bo_list && bo_list->has_userptr) p->bo_list = bo_list; else -- cgit From 80de5913cf31c86d64547af0715de4822c9b1abe Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 5 Aug 2015 19:07:08 +0800 Subject: Revert "drm/amdgpu: return new seq_no for amd_sched_push_job" This reverts commit d1d33da8eb86b8ca41dd9ed95738030df5267b95. Reviewed-by: Christian K?nig Conflicts: drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 27 +++++++++++++++++++++------ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 10 ++++++---- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 +- 5 files changed, 38 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index aa1bc24b7edb..f72a8583b1a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -901,6 +901,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); + parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( + &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) @@ -910,8 +912,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser->ring = ring; parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; - parser->ibs[parser->num_ibs - 1].sequence = - amd_sched_push_job(ring->scheduler, + amd_sched_push_job(ring->scheduler, &parser->ctx->rings[ring->idx].c_entity, parser); cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 995901b9e428..0fcf020917d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -121,6 +121,7 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, { int r = 0; if (amdgpu_enable_scheduler) { + uint64_t v_seq; struct amdgpu_cs_parser *sched_job = amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, ibs, 1); @@ -128,12 +129,16 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; } sched_job->free_job = free_job; - ibs[num_ibs - 1].sequence = amd_sched_push_job(ring->scheduler, + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + ibs[num_ibs - 1].sequence = v_seq; + amd_sched_push_job(ring->scheduler, &adev->kernel_ctx.rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit( &adev->kernel_ctx.rings[ring->idx].c_entity, - ibs[num_ibs - 1].sequence, false, -1); + v_seq, + false, + -1); if (r) WARN(true, "emit timeout\n"); } else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 78713ae3b158..9d5043c42fc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -371,6 +371,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; + uint64_t v_seq; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, &adev->kernel_ctx, ib, 1); if(!sched_job) @@ -378,11 +379,15 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, sched_job->job_param.vm.bo = bo; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; - ib->sequence = amd_sched_push_job(ring->scheduler, + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + ib->sequence = v_seq; + amd_sched_push_job(ring->scheduler, &adev->kernel_ctx.rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, - ib->sequence, false, -1); + v_seq, + false, + -1); if (r) DRM_ERROR("emit timeout\n"); @@ -516,6 +521,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; + uint64_t v_seq; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, &adev->kernel_ctx, ib, 1); @@ -524,11 +530,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job->job_param.vm.bo = pd; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; - ib->sequence = amd_sched_push_job(ring->scheduler, + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + ib->sequence = v_seq; + amd_sched_push_job(ring->scheduler, &adev->kernel_ctx.rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, - ib->sequence, false, -1); + v_seq, + false, + -1); if (r) DRM_ERROR("emit timeout\n"); } else { @@ -862,6 +872,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (amdgpu_enable_scheduler) { int r; + uint64_t v_seq; sched_job = amdgpu_cs_parser_create(adev, AMDGPU_FENCE_OWNER_VM, &adev->kernel_ctx, ib, 1); if(!sched_job) @@ -872,11 +883,15 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job->job_param.vm_mapping.fence = fence; sched_job->run_job = amdgpu_vm_bo_update_mapping_run_job; sched_job->free_job = amdgpu_vm_free_job; - ib->sequence = amd_sched_push_job(ring->scheduler, + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + ib->sequence = v_seq; + amd_sched_push_job(ring->scheduler, &adev->kernel_ctx.rings[ring->idx].c_entity, sched_job); r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, - ib->sequence, false, -1); + v_seq, + false, + -1); if (r) DRM_ERROR("emit timeout\n"); } else { diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index b9aa572980d2..1204b7386b39 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -289,9 +289,12 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, * @sched The pointer to the scheduler * @c_entity The pointer to amd_context_entity * @job The pointer to job required to submit - * return the virtual sequence number + * return 0 if succeed. -1 if failed. + * -2 indicate queue is full for this client, client should wait untill + * scheduler consum some queued command. + * -1 other fail. */ -uint64_t amd_sched_push_job(struct amd_gpu_scheduler *sched, +int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_context_entity *c_entity, void *job) { @@ -305,8 +308,7 @@ uint64_t amd_sched_push_job(struct amd_gpu_scheduler *sched, } wake_up_interruptible(&sched->wait_queue); - - return atomic64_inc_return(&c_entity->last_queued_v_seq); + return 0; } /** diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index c46d0854ab75..1a01ac45cd4c 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -124,7 +124,7 @@ struct amd_gpu_scheduler *amd_sched_create(void *device, int amd_sched_destroy(struct amd_gpu_scheduler *sched); -uint64_t amd_sched_push_job(struct amd_gpu_scheduler *sched, +int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_context_entity *c_entity, void *job); -- cgit From 4cd7f42cf8f57512b13a13bb7dcbeabb644f5264 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 5 Aug 2015 18:18:52 +0200 Subject: drm/amdgpu: fix coding style in a couple of places MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++---- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 17 ++++++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f72a8583b1a9..d26688ddaa20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -528,15 +528,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo amdgpu_cs_parser_fini_late(parser); } -static int amdgpu_cs_parser_run_job( - struct amdgpu_cs_parser *sched_job) +static int amdgpu_cs_parser_run_job(struct amdgpu_cs_parser *sched_job) { amdgpu_cs_parser_fini_early(sched_job, 0, true); return 0; } -static int amdgpu_cs_parser_free_job( - struct amdgpu_cs_parser *sched_job) +static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job) { amdgpu_cs_parser_fini_late(sched_job); return 0; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 6f0d40b13a23..1f78ad60224a 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -64,6 +64,7 @@ static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq) { struct amd_sched_entity *p = rq->current_entity; int i = atomic_read(&rq->nr_entity) + 1; /*real count + dummy head*/ + while (i) { p = list_entry(p->list.next, typeof(*p), list); if (!rq->check_entity_status(p)) { @@ -83,7 +84,7 @@ static bool context_entity_is_waiting(struct amd_context_entity *entity) static int gpu_entity_check_status(struct amd_sched_entity *entity) { - struct amd_context_entity *tmp = NULL; + struct amd_context_entity *tmp; if (entity == &entity->belongto_rq->head) return -1; @@ -109,6 +110,7 @@ static bool is_scheduler_ready(struct amd_gpu_scheduler *sched) { unsigned long flags; bool full; + spin_lock_irqsave(&sched->queue_lock, flags); full = atomic64_read(&sched->hw_rq_count) < sched->hw_submission_limit ? true : false; @@ -121,10 +123,10 @@ static bool is_scheduler_ready(struct amd_gpu_scheduler *sched) * Select next entity from the kernel run queue, if not available, * return null. */ -static struct amd_context_entity *kernel_rq_select_context( - struct amd_gpu_scheduler *sched) +static struct amd_context_entity * +kernel_rq_select_context(struct amd_gpu_scheduler *sched) { - struct amd_sched_entity *sched_entity = NULL; + struct amd_sched_entity *sched_entity; struct amd_context_entity *tmp = NULL; struct amd_run_queue *rq = &sched->kernel_rq; @@ -141,8 +143,8 @@ static struct amd_context_entity *kernel_rq_select_context( /** * Select next entity containing real IB submissions */ -static struct amd_context_entity *select_context( - struct amd_gpu_scheduler *sched) +static struct amd_context_entity * +select_context(struct amd_gpu_scheduler *sched) { struct amd_context_entity *wake_entity = NULL; struct amd_context_entity *tmp; @@ -413,6 +415,7 @@ void amd_sched_process_job(struct amd_sched_job *sched_job) { unsigned long flags; struct amd_gpu_scheduler *sched; + if (!sched_job) return; sched = sched_job->sched; @@ -445,7 +448,7 @@ struct amd_gpu_scheduler *amd_sched_create(void *device, unsigned hw_submission) { struct amd_gpu_scheduler *sched; - char name[20] = "gpu_sched[0]"; + char name[20]; sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL); if (!sched) -- cgit From 91404fb20825418fd9ab8e6533bc336e1ffc748e Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 5 Aug 2015 18:33:21 +0200 Subject: drm/amdgpu: merge amd_sched_entity and amd_context_entity v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoiding a couple of casts. v2: rename c_entity to entity as well Signed-off-by: Christian König Reviewed-by: Chunming Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 18 +++---- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 12 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++---- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 67 +++++++++++---------------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 57 ++++++++++------------- 7 files changed, 81 insertions(+), 101 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 423cf91ef652..1e6800050ad8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1013,9 +1013,9 @@ struct amdgpu_vm_manager { #define AMDGPU_CTX_MAX_CS_PENDING 16 struct amdgpu_ctx_ring { - uint64_t sequence; - struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; - struct amd_context_entity c_entity; + uint64_t sequence; + struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING]; + struct amd_sched_entity entity; }; struct amdgpu_ctx { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d26688ddaa20..b1dc7e1ed271 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -900,7 +900,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( - &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); + &parser->ctx->rings[ring->idx].entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) @@ -911,7 +911,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; amd_sched_push_job(ring->scheduler, - &parser->ctx->rings[ring->idx].c_entity, + &parser->ctx->rings[ring->idx].entity, parser); cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; up_read(&adev->exclusive_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index e04364cdcc9e..232e800eea56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -46,17 +46,17 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel, rq = &adev->rings[i]->scheduler->kernel_rq; else rq = &adev->rings[i]->scheduler->sched_rq; - r = amd_context_entity_init(adev->rings[i]->scheduler, - &ctx->rings[i].c_entity, - rq, amdgpu_sched_jobs); + r = amd_sched_entity_init(adev->rings[i]->scheduler, + &ctx->rings[i].entity, + rq, amdgpu_sched_jobs); if (r) break; } if (i < adev->num_rings) { for (j = 0; j < i; j++) - amd_context_entity_fini(adev->rings[j]->scheduler, - &ctx->rings[j].c_entity); + amd_sched_entity_fini(adev->rings[j]->scheduler, + &ctx->rings[j].entity); kfree(ctx); return r; } @@ -75,8 +75,8 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) if (amdgpu_enable_scheduler) { for (i = 0; i < adev->num_rings; i++) - amd_context_entity_fini(adev->rings[i]->scheduler, - &ctx->rings[i].c_entity); + amd_sched_entity_fini(adev->rings[i]->scheduler, + &ctx->rings[i].entity); } } @@ -271,7 +271,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, int r; if (amdgpu_enable_scheduler) { - r = amd_sched_wait_emit(&cring->c_entity, + r = amd_sched_wait_emit(&cring->entity, seq, false, -1); @@ -281,7 +281,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) - queued_seq = amd_sched_next_queued_seq(&cring->c_entity); + queued_seq = amd_sched_next_queued_seq(&cring->entity); else queued_seq = cring->sequence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 71a4a7e4b1ae..787b93db6796 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -28,7 +28,7 @@ #include "amdgpu.h" static int amdgpu_sched_prepare_job(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *entity, void *job) { int r = 0; @@ -51,7 +51,7 @@ static void amdgpu_fence_sched_cb(struct fence *f, struct fence_cb *cb) } static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *entity, struct amd_sched_job *job) { int r = 0; @@ -83,7 +83,7 @@ static void amdgpu_sched_run_job(struct amd_gpu_scheduler *sched, goto err; } - amd_sched_emit(c_entity, sched_job->ibs[sched_job->num_ibs - 1].sequence); + amd_sched_emit(entity, sched_job->ibs[sched_job->num_ibs - 1].sequence); mutex_unlock(&sched_job->job_lock); return; @@ -136,13 +136,13 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; } sched_job->free_job = free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); ibs[num_ibs - 1].sequence = v_seq; amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, sched_job); r = amd_sched_wait_emit( - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, v_seq, false, -1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d5043c42fc5..230bf1f34ead 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -379,12 +379,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, sched_job->job_param.vm.bo = bo; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].entity, v_seq, false, -1); @@ -530,12 +530,12 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, sched_job->job_param.vm.bo = pd; sched_job->run_job = amdgpu_vm_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].entity, v_seq, false, -1); @@ -883,12 +883,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, sched_job->job_param.vm_mapping.fence = fence; sched_job->run_job = amdgpu_vm_bo_update_mapping_run_job; sched_job->free_job = amdgpu_vm_free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].c_entity.last_queued_v_seq); + v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); ib->sequence = v_seq; amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].c_entity, + &adev->kernel_ctx.rings[ring->idx].entity, sched_job); - r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].c_entity, + r = amd_sched_wait_emit(&adev->kernel_ctx.rings[ring->idx].entity, v_seq, false, -1); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 1f78ad60224a..eb3b0993a8cd 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -76,7 +76,7 @@ static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq) return i ? p : NULL; } -static bool context_entity_is_waiting(struct amd_context_entity *entity) +static bool context_entity_is_waiting(struct amd_sched_entity *entity) { /* TODO: sync obj for multi-ring synchronization */ return false; @@ -84,14 +84,11 @@ static bool context_entity_is_waiting(struct amd_context_entity *entity) static int gpu_entity_check_status(struct amd_sched_entity *entity) { - struct amd_context_entity *tmp; - if (entity == &entity->belongto_rq->head) return -1; - tmp = container_of(entity, typeof(*tmp), generic_entity); - if (kfifo_is_empty(&tmp->job_queue) || - context_entity_is_waiting(tmp)) + if (kfifo_is_empty(&entity->job_queue) || + context_entity_is_waiting(entity)) return -1; return 0; @@ -123,31 +120,26 @@ static bool is_scheduler_ready(struct amd_gpu_scheduler *sched) * Select next entity from the kernel run queue, if not available, * return null. */ -static struct amd_context_entity * +static struct amd_sched_entity * kernel_rq_select_context(struct amd_gpu_scheduler *sched) { struct amd_sched_entity *sched_entity; - struct amd_context_entity *tmp = NULL; struct amd_run_queue *rq = &sched->kernel_rq; mutex_lock(&rq->lock); sched_entity = rq_select_entity(rq); - if (sched_entity) - tmp = container_of(sched_entity, - typeof(*tmp), - generic_entity); mutex_unlock(&rq->lock); - return tmp; + return sched_entity; } /** * Select next entity containing real IB submissions */ -static struct amd_context_entity * +static struct amd_sched_entity * select_context(struct amd_gpu_scheduler *sched) { - struct amd_context_entity *wake_entity = NULL; - struct amd_context_entity *tmp; + struct amd_sched_entity *wake_entity = NULL; + struct amd_sched_entity *tmp; struct amd_run_queue *rq; if (!is_scheduler_ready(sched)) @@ -158,12 +150,9 @@ select_context(struct amd_gpu_scheduler *sched) if (tmp != NULL) goto exit; - WARN_ON(offsetof(struct amd_context_entity, generic_entity) != 0); - rq = &sched->sched_rq; mutex_lock(&rq->lock); - tmp = container_of(rq_select_entity(rq), - typeof(*tmp), generic_entity); + tmp = rq_select_entity(rq); mutex_unlock(&rq->lock); exit: if (sched->current_entity && (sched->current_entity != tmp)) @@ -178,15 +167,15 @@ exit: * Init a context entity used by scheduler when submit to HW ring. * * @sched The pointer to the scheduler - * @entity The pointer to a valid amd_context_entity + * @entity The pointer to a valid amd_sched_entity * @rq The run queue this entity belongs * @kernel If this is an entity for the kernel * @jobs The max number of jobs in the job queue * * return 0 if succeed. negative error code on failure */ -int amd_context_entity_init(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity, +int amd_sched_entity_init(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity, struct amd_run_queue *rq, uint32_t jobs) { @@ -195,10 +184,10 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, if (!(sched && entity && rq)) return -EINVAL; - memset(entity, 0, sizeof(struct amd_context_entity)); + memset(entity, 0, sizeof(struct amd_sched_entity)); seq_ring = ((uint64_t)sched->ring_id) << 60; spin_lock_init(&entity->lock); - entity->generic_entity.belongto_rq = rq; + entity->belongto_rq = rq; entity->scheduler = sched; init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_emit); @@ -213,7 +202,7 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, /* Add the entity to the run queue */ mutex_lock(&rq->lock); - rq_add_entity(rq, &entity->generic_entity); + rq_add_entity(rq, entity); mutex_unlock(&rq->lock); return 0; } @@ -227,14 +216,14 @@ int amd_context_entity_init(struct amd_gpu_scheduler *sched, * return true if entity is initialized, false otherwise */ static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity) + struct amd_sched_entity *entity) { return entity->scheduler == sched && - entity->generic_entity.belongto_rq != NULL; + entity->belongto_rq != NULL; } static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity) + struct amd_sched_entity *entity) { /** * Idle means no pending IBs, and the entity is not @@ -256,11 +245,11 @@ static bool is_context_entity_idle(struct amd_gpu_scheduler *sched, * * return 0 if succeed. negative error code on failure */ -int amd_context_entity_fini(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity) +int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity) { int r = 0; - struct amd_run_queue *rq = entity->generic_entity.belongto_rq; + struct amd_run_queue *rq = entity->belongto_rq; if (!is_context_entity_initialized(sched, entity)) return 0; @@ -283,7 +272,7 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, } mutex_lock(&rq->lock); - rq_remove_entity(rq, &entity->generic_entity); + rq_remove_entity(rq, entity); mutex_unlock(&rq->lock); kfifo_free(&entity->job_queue); return r; @@ -293,7 +282,7 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, * Submit a normal job to the job queue * * @sched The pointer to the scheduler - * @c_entity The pointer to amd_context_entity + * @c_entity The pointer to amd_sched_entity * @job The pointer to job required to submit * return 0 if succeed. -1 if failed. * -2 indicate queue is full for this client, client should wait untill @@ -301,7 +290,7 @@ int amd_context_entity_fini(struct amd_gpu_scheduler *sched, * -1 other fail. */ int amd_sched_push_job(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *c_entity, void *job) { while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), @@ -328,7 +317,7 @@ int amd_sched_push_job(struct amd_gpu_scheduler *sched, * * return =0 signaled , <0 failed */ -int amd_sched_wait_emit(struct amd_context_entity *c_entity, +int amd_sched_wait_emit(struct amd_sched_entity *c_entity, uint64_t seq, bool intr, long timeout) @@ -369,7 +358,7 @@ static int amd_sched_main(void *param) int r; void *job; struct sched_param sparam = {.sched_priority = 1}; - struct amd_context_entity *c_entity = NULL; + struct amd_sched_entity *c_entity = NULL; struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; sched_setscheduler(current, SCHED_FIFO, &sparam); @@ -505,7 +494,7 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched) * @entity The context entity * @seq The sequence number for the latest emitted job */ -void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq) +void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq) { atomic64_set(&c_entity->last_emitted_v_seq, seq); wake_up_all(&c_entity->wait_emit); @@ -518,7 +507,7 @@ void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq) * * return the next queued sequence number */ -uint64_t amd_sched_next_queued_seq(struct amd_context_entity *c_entity) +uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity) { return atomic64_read(&c_entity->last_queued_v_seq) + 1; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 64ef0e2b1543..a3e29df957fc 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -41,6 +41,17 @@ struct amd_run_queue; struct amd_sched_entity { struct list_head list; struct amd_run_queue *belongto_rq; + spinlock_t lock; + /* the virtual_seq is unique per context per ring */ + atomic64_t last_queued_v_seq; + atomic64_t last_emitted_v_seq; + /* the job_queue maintains the jobs submitted by clients */ + struct kfifo job_queue; + spinlock_t queue_lock; + struct amd_gpu_scheduler *scheduler; + wait_queue_head_t wait_queue; + wait_queue_head_t wait_emit; + bool is_pending; }; /** @@ -61,25 +72,6 @@ struct amd_run_queue { int (*check_entity_status)(struct amd_sched_entity *entity); }; -/** - * Context based scheduler entity, there can be multiple entities for - * each context, and one entity per ring -*/ -struct amd_context_entity { - struct amd_sched_entity generic_entity; - spinlock_t lock; - /* the virtual_seq is unique per context per ring */ - atomic64_t last_queued_v_seq; - atomic64_t last_emitted_v_seq; - /* the job_queue maintains the jobs submitted by clients */ - struct kfifo job_queue; - spinlock_t queue_lock; - struct amd_gpu_scheduler *scheduler; - wait_queue_head_t wait_queue; - wait_queue_head_t wait_emit; - bool is_pending; -}; - struct amd_sched_job { struct list_head list; struct fence_cb cb; @@ -93,10 +85,10 @@ struct amd_sched_job { */ struct amd_sched_backend_ops { int (*prepare_job)(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *c_entity, void *job); void (*run_job)(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *c_entity, struct amd_sched_job *job); void (*process_job)(struct amd_gpu_scheduler *sched, void *job); }; @@ -116,7 +108,7 @@ struct amd_gpu_scheduler { uint32_t granularity; /* in ms unit */ uint32_t preemption; wait_queue_head_t wait_queue; - struct amd_context_entity *current_entity; + struct amd_sched_entity *current_entity; struct mutex sched_lock; spinlock_t queue_lock; uint32_t hw_submission_limit; @@ -132,10 +124,10 @@ struct amd_gpu_scheduler *amd_sched_create(void *device, int amd_sched_destroy(struct amd_gpu_scheduler *sched); int amd_sched_push_job(struct amd_gpu_scheduler *sched, - struct amd_context_entity *c_entity, + struct amd_sched_entity *c_entity, void *job); -int amd_sched_wait_emit(struct amd_context_entity *c_entity, +int amd_sched_wait_emit(struct amd_sched_entity *c_entity, uint64_t seq, bool intr, long timeout); @@ -143,16 +135,15 @@ int amd_sched_wait_emit(struct amd_context_entity *c_entity, void amd_sched_process_job(struct amd_sched_job *sched_job); uint64_t amd_sched_get_handled_seq(struct amd_gpu_scheduler *sched); -int amd_context_entity_fini(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity); - -int amd_context_entity_init(struct amd_gpu_scheduler *sched, - struct amd_context_entity *entity, - struct amd_run_queue *rq, - uint32_t jobs); +int amd_sched_entity_init(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity, + struct amd_run_queue *rq, + uint32_t jobs); +int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, + struct amd_sched_entity *entity); -void amd_sched_emit(struct amd_context_entity *c_entity, uint64_t seq); +void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq); -uint64_t amd_sched_next_queued_seq(struct amd_context_entity *c_entity); +uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); #endif -- cgit From f556cb0caeec1ba9b8e5e2aa85b47e76277f5d4b Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Sun, 2 Aug 2015 11:18:04 +0800 Subject: drm/amd: add scheduler fence implementation (v2) scheduler fence is based on kernel fence framework. v2: squash in Christian's build fix Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 10 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 34 ++++---- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 26 +++++- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 33 +++++++- drivers/gpu/drm/amd/scheduler/sched_fence.c | 112 ++++++++++++++++++++++++++ 9 files changed, 202 insertions(+), 38 deletions(-) create mode 100644 drivers/gpu/drm/amd/scheduler/sched_fence.c (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index f1cb7d2fa411..04c270757030 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -86,6 +86,7 @@ amdgpu-y += amdgpu_cgs.o # GPU scheduler amdgpu-y += \ ../scheduler/gpu_scheduler.o \ + ../scheduler/sched_fence.o \ amdgpu_sched.o amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 987e3075a03f..2ba448ee948b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1261,6 +1261,7 @@ struct amdgpu_cs_parser { int (*prepare_job)(struct amdgpu_cs_parser *sched_job); int (*run_job)(struct amdgpu_cs_parser *sched_job); int (*free_job)(struct amdgpu_cs_parser *sched_job); + struct amd_sched_fence *s_fence; }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b1dc7e1ed271..f428288d8363 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -899,8 +899,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - parser->ibs[parser->num_ibs - 1].sequence = atomic64_inc_return( - &parser->ctx->rings[ring->idx].entity.last_queued_v_seq); if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) @@ -910,10 +908,21 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) parser->ring = ring; parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; - amd_sched_push_job(ring->scheduler, - &parser->ctx->rings[ring->idx].entity, - parser); - cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence; + mutex_lock(&parser->job_lock); + r = amd_sched_push_job(ring->scheduler, + &parser->ctx->rings[ring->idx].entity, + parser, + &parser->s_fence); + if (r) { + mutex_unlock(&parser->job_lock); + goto out; + } + parser->ibs[parser->num_ibs - 1].sequence = + amdgpu_ctx_add_fence(parser->ctx, ring, + &parser->s_fence->base, + parser->s_fence->v_seq); + cs->out.handle = parser->s_fence->v_seq; + mutex_unlock(&parser->job_lock); up_read(&adev->exclusive_lock); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 232e800eea56..1833f05c7e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -268,16 +268,6 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; struct fence *fence; uint64_t queued_seq; - int r; - - if (amdgpu_enable_scheduler) { - r = amd_sched_wait_emit(&cring->entity, - seq, - false, - -1); - if (r) - return NULL; - } spin_lock(&ctx->ring_lock); if (amdgpu_enable_scheduler) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index eed409c59492..5104e64e9ad8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -218,7 +218,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, sequence = amdgpu_enable_scheduler ? ib->sequence : 0; - if (ib->ctx) + if (!amdgpu_enable_scheduler && ib->ctx) ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, &ib->fence->base, sequence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index d82f2481bd0e..6a7e83edcaa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -118,7 +118,6 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, { int r = 0; if (amdgpu_enable_scheduler) { - uint64_t v_seq; struct amdgpu_cs_parser *sched_job = amdgpu_cs_parser_create(adev, owner, &adev->kernel_ctx, ibs, num_ibs); @@ -126,22 +125,23 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return -ENOMEM; } sched_job->free_job = free_job; - v_seq = atomic64_inc_return(&adev->kernel_ctx.rings[ring->idx].entity.last_queued_v_seq); - ibs[num_ibs - 1].sequence = v_seq; - amd_sched_push_job(ring->scheduler, - &adev->kernel_ctx.rings[ring->idx].entity, - sched_job); - r = amd_sched_wait_emit( - &adev->kernel_ctx.rings[ring->idx].entity, - v_seq, - false, - -1); - if (r) - WARN(true, "emit timeout\n"); - } else + mutex_lock(&sched_job->job_lock); + r = amd_sched_push_job(ring->scheduler, + &adev->kernel_ctx.rings[ring->idx].entity, + sched_job, &sched_job->s_fence); + if (r) { + mutex_unlock(&sched_job->job_lock); + kfree(sched_job); + return r; + } + ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; + *f = &sched_job->s_fence->base; + mutex_unlock(&sched_job->job_lock); + } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); - if (r) - return r; - *f = &ibs[num_ibs - 1].fence->base; + if (r) + return r; + *f = &ibs[num_ibs - 1].fence->base; + } return 0; } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 33b4f55e48b1..402086d96889 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -180,6 +180,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, uint32_t jobs) { uint64_t seq_ring = 0; + char name[20]; if (!(sched && entity && rq)) return -EINVAL; @@ -191,6 +192,10 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, entity->scheduler = sched; init_waitqueue_head(&entity->wait_queue); init_waitqueue_head(&entity->wait_emit); + entity->fence_context = fence_context_alloc(1); + snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context); + memcpy(entity->name, name, 20); + INIT_LIST_HEAD(&entity->fence_list); if(kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL)) @@ -199,6 +204,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, spin_lock_init(&entity->queue_lock); atomic64_set(&entity->last_emitted_v_seq, seq_ring); atomic64_set(&entity->last_queued_v_seq, seq_ring); + atomic64_set(&entity->last_signaled_v_seq, seq_ring); /* Add the entity to the run queue */ mutex_lock(&rq->lock); @@ -291,15 +297,25 @@ int amd_sched_entity_fini(struct amd_gpu_scheduler *sched, */ int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_sched_entity *c_entity, - void *data) + void *data, + struct amd_sched_fence **fence) { - struct amd_sched_job *job = kzalloc(sizeof(struct amd_sched_job), - GFP_KERNEL); + struct amd_sched_job *job; + + if (!fence) + return -EINVAL; + job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL); if (!job) return -ENOMEM; job->sched = sched; job->s_entity = c_entity; job->data = data; + *fence = amd_sched_fence_create(c_entity); + if ((*fence) == NULL) { + kfree(job); + return -EINVAL; + } + job->s_fence = *fence; while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), &c_entity->queue_lock) != sizeof(void *)) { /** @@ -368,12 +384,16 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) unsigned long flags; sched = sched_job->sched; + atomic64_set(&sched_job->s_entity->last_signaled_v_seq, + sched_job->s_fence->v_seq); + amd_sched_fence_signal(sched_job->s_fence); spin_lock_irqsave(&sched->queue_lock, flags); list_del(&sched_job->list); atomic64_dec(&sched->hw_rq_count); spin_unlock_irqrestore(&sched->queue_lock, flags); sched->ops->process_job(sched, sched_job); + fence_put(&sched_job->s_fence->base); kfree(sched_job); wake_up_interruptible(&sched->wait_queue); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index f54615d6a500..300132f14d74 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -45,6 +45,7 @@ struct amd_sched_entity { /* the virtual_seq is unique per context per ring */ atomic64_t last_queued_v_seq; atomic64_t last_emitted_v_seq; + atomic64_t last_signaled_v_seq; /* the job_queue maintains the jobs submitted by clients */ struct kfifo job_queue; spinlock_t queue_lock; @@ -52,6 +53,9 @@ struct amd_sched_entity { wait_queue_head_t wait_queue; wait_queue_head_t wait_emit; bool is_pending; + uint64_t fence_context; + struct list_head fence_list; + char name[20]; }; /** @@ -72,14 +76,35 @@ struct amd_run_queue { int (*check_entity_status)(struct amd_sched_entity *entity); }; +struct amd_sched_fence { + struct fence base; + struct fence_cb cb; + struct list_head list; + struct amd_sched_entity *entity; + uint64_t v_seq; + spinlock_t lock; +}; + struct amd_sched_job { struct list_head list; struct fence_cb cb; struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; void *data; + struct amd_sched_fence *s_fence; }; +extern const struct fence_ops amd_sched_fence_ops; +static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) +{ + struct amd_sched_fence *__f = container_of(f, struct amd_sched_fence, base); + + if (__f->base.ops == &amd_sched_fence_ops) + return __f; + + return NULL; +} + /** * Define the backend operations called by the scheduler, * these functions should be implemented in driver side @@ -126,7 +151,8 @@ int amd_sched_destroy(struct amd_gpu_scheduler *sched); int amd_sched_push_job(struct amd_gpu_scheduler *sched, struct amd_sched_entity *c_entity, - void *data); + void *data, + struct amd_sched_fence **fence); int amd_sched_wait_emit(struct amd_sched_entity *c_entity, uint64_t seq, @@ -146,4 +172,9 @@ void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq); uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity); +struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity); +void amd_sched_fence_signal(struct amd_sched_fence *fence); + + #endif diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c new file mode 100644 index 000000000000..d580a357c547 --- /dev/null +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -0,0 +1,112 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#include +#include +#include +#include +#include "gpu_scheduler.h" + +static void amd_sched_fence_wait_cb(struct fence *f, struct fence_cb *cb) +{ + struct amd_sched_fence *fence = + container_of(cb, struct amd_sched_fence, cb); + list_del_init(&fence->list); + fence_put(&fence->base); +} + +struct amd_sched_fence *amd_sched_fence_create( + struct amd_sched_entity *s_entity) +{ + struct amd_sched_fence *fence = NULL; + fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + fence->v_seq = atomic64_inc_return(&s_entity->last_queued_v_seq); + fence->entity = s_entity; + spin_lock_init(&fence->lock); + fence_init(&fence->base, &amd_sched_fence_ops, + &fence->lock, + s_entity->fence_context, + fence->v_seq); + fence_get(&fence->base); + list_add_tail(&fence->list, &s_entity->fence_list); + if (fence_add_callback(&fence->base,&fence->cb, + amd_sched_fence_wait_cb)) { + fence_put(&fence->base); + kfree(fence); + return NULL; + } + return fence; +} + +bool amd_sched_check_ts(struct amd_sched_entity *s_entity, uint64_t v_seq) +{ + return atomic64_read(&s_entity->last_signaled_v_seq) >= v_seq ? true : false; +} + +void amd_sched_fence_signal(struct amd_sched_fence *fence) +{ + if (amd_sched_check_ts(fence->entity, fence->v_seq)) { + int ret = fence_signal_locked(&fence->base); + if (!ret) + FENCE_TRACE(&fence->base, "signaled from irq context\n"); + else + FENCE_TRACE(&fence->base, "was already signaled\n"); + } else + WARN(true, "fence process dismattch with job!\n"); +} + +static const char *amd_sched_fence_get_driver_name(struct fence *fence) +{ + return "amd_sched"; +} + +static const char *amd_sched_fence_get_timeline_name(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + return (const char *)fence->entity->name; +} + +static bool amd_sched_fence_enable_signaling(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + + return !amd_sched_check_ts(fence->entity, fence->v_seq); +} + +static bool amd_sched_fence_is_signaled(struct fence *f) +{ + struct amd_sched_fence *fence = to_amd_sched_fence(f); + + return amd_sched_check_ts(fence->entity, fence->v_seq); +} + +const struct fence_ops amd_sched_fence_ops = { + .get_driver_name = amd_sched_fence_get_driver_name, + .get_timeline_name = amd_sched_fence_get_timeline_name, + .enable_signaling = amd_sched_fence_enable_signaling, + .signaled = amd_sched_fence_is_signaled, + .wait = fence_default_wait, + .release = NULL, +}; -- cgit From 281b42230175608dec0cd8dab9908250e7aa36a9 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 12 Aug 2015 12:58:31 +0800 Subject: drm/amdgpu: add reference for **fence fix fence is released when pass to **fence sometimes. add reference for it. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +++ drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + 11 files changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f428288d8363..8796938216d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -136,6 +136,7 @@ static void amdgpu_job_work_func(struct work_struct *work) sched_job->free_job(sched_job); mutex_unlock(&sched_job->job_lock); /* after processing job, free memory */ + fence_put(&sched_job->s_fence->base); kfree(sched_job); } struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index d2e5f3b90a3c..a86e38158afa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -133,13 +133,13 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return r; } ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; - *f = &sched_job->s_fence->base; + *f = fence_get(&sched_job->s_fence->base); mutex_unlock(&sched_job->job_lock); } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); if (r) return r; - *f = &ibs[num_ibs - 1].fence->base; + *f = fence_get(&ibs[num_ibs - 1].fence->base); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e7336a95fe59..68369cf1e318 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -877,7 +877,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, if (fence) *fence = fence_get(f); amdgpu_bo_unref(&bo); - + fence_put(f); if (amdgpu_enable_scheduler) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 38660eac67d6..33ee6ae28f37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -415,6 +415,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, goto err; if (fence) *fence = fence_get(f); + fence_put(f); if (amdgpu_enable_scheduler) return 0; err: @@ -481,6 +482,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, goto err; if (fence) *fence = fence_get(f); + fence_put(f); if (amdgpu_enable_scheduler) return 0; err: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b3f5d0484980..de882b0db350 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -366,6 +366,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, &fence); if (!r) amdgpu_bo_fence(bo, fence, true); + fence_put(fence); if (amdgpu_enable_scheduler) { amdgpu_bo_unreserve(bo); return 0; @@ -495,6 +496,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (r) goto error_free; amdgpu_bo_fence(pd, fence, true); + fence_put(fence); } if (!amdgpu_enable_scheduler || ib->length_dw == 0) { @@ -812,6 +814,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, fence_put(*fence); *fence = fence_get(f); } + fence_put(f); if (!amdgpu_enable_scheduler) { amdgpu_ib_free(adev, ib); kfree(ib); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c3ed5b22d732..2b4242b39b0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -669,6 +669,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) } err1: + fence_put(f); amdgpu_ib_free(adev, &ib); err0: amdgpu_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index ee1c47f9a2b6..9b0cab413677 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2698,6 +2698,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) } err2: + fence_put(f); amdgpu_ib_free(adev, &ib); err1: amdgpu_gfx_scratch_free(adev, scratch); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a865d96b67af..4b68e6306f40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -659,6 +659,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) r = -EINVAL; } err2: + fence_put(f); amdgpu_ib_free(adev, &ib); err1: amdgpu_gfx_scratch_free(adev, scratch); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 6de7dc88d53c..9de8104eddeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -733,6 +733,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) } err1: + fence_put(f); amdgpu_ib_free(adev, &ib); err0: amdgpu_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 963a991fea00..029f3455f9f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -853,6 +853,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) r = -EINVAL; } err1: + fence_put(f); amdgpu_ib_free(adev, &ib); err0: amdgpu_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 39577f6c0241..5017c71ba700 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -313,6 +313,7 @@ int amd_sched_push_job(struct amd_gpu_scheduler *sched, kfree(job); return -EINVAL; } + fence_get(&(*fence)->base); job->s_fence = *fence; while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), &c_entity->queue_lock) != sizeof(void *)) { -- cgit From 1939e3e265d2fb984b56829c51f5843bfc6d5292 Mon Sep 17 00:00:00 2001 From: "monk.liu" Date: Thu, 13 Aug 2015 16:19:54 +0800 Subject: drm/amdgpu: drop bo_list_clone when no scheduler bo_list_clone() will take a lot of time when bo_list hold too much elements, like above 7000 Signed-off-by: Monk.Liu Reviewed-by: Chunming Zhou Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8796938216d6..07e3380ee0f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -186,15 +186,19 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto out; } bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - if (bo_list && !bo_list->has_userptr) { - p->bo_list = amdgpu_bo_list_clone(bo_list); - amdgpu_bo_list_put(bo_list); - if (!p->bo_list) - return -ENOMEM; - } else if (bo_list && bo_list->has_userptr) + if (!amdgpu_enable_scheduler) p->bo_list = bo_list; - else - p->bo_list = NULL; + else { + if (bo_list && !bo_list->has_userptr) { + p->bo_list = amdgpu_bo_list_clone(bo_list); + amdgpu_bo_list_put(bo_list); + if (!p->bo_list) + return -ENOMEM; + } else if (bo_list && bo_list->has_userptr) + p->bo_list = bo_list; + else + p->bo_list = NULL; + } /* get chunks */ INIT_LIST_HEAD(&p->validated); @@ -495,7 +499,7 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) if (parser->ctx) amdgpu_ctx_put(parser->ctx); if (parser->bo_list) { - if (!parser->bo_list->has_userptr) + if (amdgpu_enable_scheduler && !parser->bo_list->has_userptr) amdgpu_bo_list_free(parser->bo_list); else amdgpu_bo_list_put(parser->bo_list); -- cgit From c3b95d4f9e460704e184ded7af60b9c4898f6181 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 14 Aug 2015 14:55:27 +0800 Subject: drm/amdgpu: move prepare work out of scheduler to cs_ioctl Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 07e3380ee0f2..11edac725210 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -533,12 +533,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo amdgpu_cs_parser_fini_late(parser); } -static int amdgpu_cs_parser_run_job(struct amdgpu_cs_parser *sched_job) -{ - amdgpu_cs_parser_fini_early(sched_job, 0, true); - return 0; -} - static int amdgpu_cs_parser_free_job(struct amdgpu_cs_parser *sched_job) { amdgpu_cs_parser_fini_late(sched_job); @@ -904,14 +898,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_enable_scheduler && parser->num_ibs) { struct amdgpu_ring * ring = amdgpu_cs_parser_get_ring(adev, parser); - if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { - r = amdgpu_cs_parser_prepare_job(parser); - if (r) - goto out; - } else - parser->prepare_job = amdgpu_cs_parser_prepare_job; + r = amdgpu_cs_parser_prepare_job(parser); + if (r) + goto out; parser->ring = ring; - parser->run_job = amdgpu_cs_parser_run_job; parser->free_job = amdgpu_cs_parser_free_job; mutex_lock(&parser->job_lock); r = amd_sched_push_job(ring->scheduler, @@ -927,6 +917,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) &parser->s_fence->base, parser->s_fence->v_seq); cs->out.handle = parser->s_fence->v_seq; + list_sort(NULL, &parser->validated, cmp_size_smaller_first); + ttm_eu_fence_buffer_objects(&parser->ticket, + &parser->validated, + &parser->s_fence->base); + mutex_unlock(&parser->job_lock); up_read(&adev->exclusive_lock); return 0; -- cgit From 05906dec7d7daf197b9b773295c95ad6b9af2a5a Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Fri, 14 Aug 2015 20:08:40 +0200 Subject: drm/amdgpu: wait on page directory changes. v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pagetables can be moved and therefore the page directory update can be necessary for the current cs even if none of the the bo's are moved. In that scenario there is no fence between the sdma0 and gfx ring, so we add one. v2 (chk): rebased Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++++ 3 files changed, 11 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 68beb40e283b..2fc58e658986 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -982,6 +982,7 @@ struct amdgpu_vm { /* contains the page directory */ struct amdgpu_bo *page_directory; unsigned max_pde_used; + struct fence *page_directory_fence; /* array of page tables, one for each page directory entry */ struct amdgpu_vm_pt *page_tables; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 11edac725210..e4424b4db5d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -551,6 +551,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (r) return r; + r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence); + if (r) + return r; + r = amdgpu_vm_clear_freed(adev, vm); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2fc909f5d710..a78a206e176e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -495,7 +495,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, &fence); if (r) goto error_free; + amdgpu_bo_fence(pd, fence, true); + fence_put(vm->page_directory_fence); + vm->page_directory_fence = fence_get(fence); fence_put(fence); } @@ -1291,6 +1294,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) return -ENOMEM; } + vm->page_directory_fence = NULL; + r = amdgpu_bo_create(adev, pd_size, align, true, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &vm->page_directory); @@ -1339,6 +1344,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) kfree(vm->page_tables); amdgpu_bo_unref(&vm->page_directory); + fence_put(vm->page_directory_fence); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { amdgpu_fence_unref(&vm->ids[i].flushed_updates); -- cgit