From 672c368f9398042b629740cc9816e8e939eff2db Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Jan 2020 18:47:52 +0000 Subject: drm/i915: Keep track of request among the scheduling lists If we keep track of when the i915_request.sched.link is on the HW runlist, or in the priority queue we can simplify our interactions with the request (such as during rescheduling). This also simplifies the next patch where we introduce a new in-between list, for requests that are ready but neither on the run list or in the queue. v2: Update i915_sched_node.link explanation for current usage where it is a link on both the queue and on the runlists. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200116184754.2860848-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index be185886e4fc..9ed0d3bc7249 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request) xfer: /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { list_move_tail(&request->sched.link, &engine->active.requests); + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + } if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && -- cgit From b4a9a149f91ea345da76bcfe3f8a39715ac346a6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Jan 2020 14:02:43 +0000 Subject: drm/i915: Mark the removal of the i915_request from the sched.link Keep the rq->fence.flags consistent with the status of the rq->sched.link, and clear the associated bits when decoupling the link on retirement (as we may wish to inspect those flags independent of other state). Fixes: 32ff621fd744 ("drm/i915/gt: Allow temporary suspension of inflight requests") References: https://gitlab.freedesktop.org/drm/intel/issues/997 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200122140243.495621-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9ed0d3bc7249..78a5f5d3c070 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -221,6 +221,8 @@ static void remove_from_engine(struct i915_request *rq) locked = engine; } list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); spin_unlock_irq(&locked->active.lock); } -- cgit From 855e39e65cfc33a73724f1cc644ffc5754864a20 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Feb 2020 09:41:48 +0000 Subject: drm/i915: Initialise basic fence before acquiring seqno Inside the intel_timeline_get_seqno(), we currently track the retirement of the old cachelines by listening to the new request. This requires that the new request is ready to be used and so requires a minimum bit of initialisation prior to getting the new seqno. Fixes: b1e3177bd1d8 ("drm/i915: Coordinate i915_active with its own mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200203094152.4150550-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 78a5f5d3c070..f56b046a32de 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -595,6 +595,8 @@ static void __i915_request_ctor(void *arg) i915_sw_fence_init(&rq->submit, submit_notify); i915_sw_fence_init(&rq->semaphore, semaphore_notify); + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); + rq->file_priv = NULL; rq->capture_list = NULL; @@ -653,25 +655,30 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - ret = intel_timeline_get_seqno(tl, rq, &seqno); - if (ret) - goto err_free; - rq->i915 = ce->engine->i915; rq->context = ce; rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; + kref_init(&rq->fence.refcount); + rq->fence.flags = 0; + rq->fence.error = 0; + INIT_LIST_HEAD(&rq->fence.cb_list); + + ret = intel_timeline_get_seqno(tl, rq, &seqno); + if (ret) + goto err_free; + + rq->fence.context = tl->fence_context; + rq->fence.seqno = seqno; + RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ - dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, - tl->fence_context, seqno); - /* We bump the ref for the fence chain */ i915_sw_fence_reinit(&i915_request_get(rq)->submit); i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); -- cgit From 602ddb410dff397e791b7caafa2b2d40e5e52a29 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 5 Feb 2020 09:54:40 +0000 Subject: drm/i915: Flush execution tasklets before checking request status Rather than flushing the submission tasklets just before we sleep, flush before we check the request status. Ideally this gives us a moment to process the tasklets after sleeping just before we timeout. v2: Compromise by pushing the flush prior to the timeout, but after the check on completion so that we do not further delay the ready client. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200205095441.1769599-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f56b046a32de..0ecc2cf64216 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1571,6 +1571,8 @@ long i915_request_wait(struct i915_request *rq, break; } + intel_engine_flush_submission(rq->engine); + if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; @@ -1581,7 +1583,6 @@ long i915_request_wait(struct i915_request *rq, break; } - intel_engine_flush_submission(rq->engine); timeout = io_schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); -- cgit From f16ccb6445d3c2038998375a8491686c43e6026e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 10 Feb 2020 20:57:20 +0000 Subject: drm/i915: Disable use of hwsp_cacheline for kernel_context Currently on execlists, we use a local hwsp for the kernel_context, rather than the engine's HWSP, as this is the default for execlists. However, seqno wrap requires allocating a new HWSP cacheline, and may require pinning a new HWSP page in the GGTT. This operation requiring pinning in the GGTT is not allowed within the kernel_context timeline, as doing so may require re-entering the kernel_context in order to evict from the GGTT. As we want to avoid requiring a new HWSP for the kernel_context, we can use the permanently pinned engine's HWSP instead. However to do so we must prevent the use of semaphores reading the kernel_context's HWSP, as the use of semaphores do not support rollover onto the same cacheline. Fortunately, the kernel_context is mostly isolated, so unlikely to give benefit to semaphores. Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200210205722.794180-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 14 ++++++++++++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 12 +++++++++--- drivers/gpu/drm/i915/i915_request.c | 14 +++++++++----- 3 files changed, 30 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 70d91ad923ef..902d440ef07d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2964,7 +2964,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; - GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb); + if (!i915_request_timeline(rq)->has_initial_breadcrumb) + return 0; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -4616,8 +4617,17 @@ static int __execlists_context_alloc(struct intel_context *ce, if (!ce->timeline) { struct intel_timeline *tl; + struct i915_vma *hwsp; + + /* + * Use the static global HWSP for the kernel context, and + * a dynamically allocated cacheline for everyone else. + */ + hwsp = NULL; + if (unlikely(intel_context_is_barrier(ce))) + hwsp = engine->status_page.vma; - tl = intel_timeline_create(engine->gt, NULL); + tl = intel_timeline_create(engine->gt, hwsp); if (IS_ERR(tl)) { ret = PTR_ERR(tl); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 78dfa7a8767b..e53bfedeb97e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -3295,15 +3295,21 @@ static int bond_virtual_engine(struct intel_gt *gt, rq[0] = ERR_PTR(-ENOMEM); for_each_engine(master, gt, id) { struct i915_sw_fence fence = {}; + struct intel_context *ce; if (master->class == class) continue; + ce = intel_context_create(master); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - rq[0] = igt_spinner_create_request(&spin, - master->kernel_context, - MI_NOOP); + rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); + intel_context_put(ce); if (IS_ERR(rq[0])) { err = PTR_ERR(rq[0]); goto out; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0ecc2cf64216..1adb8cf35f75 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -886,6 +886,12 @@ emit_semaphore_wait(struct i915_request *to, struct i915_request *from, gfp_t gfp) { + if (!intel_context_use_semaphores(to->context)) + goto await_fence; + + if (!rcu_access_pointer(from->hwsp_cacheline)) + goto await_fence; + /* Just emit the first semaphore we see as request space is limited. */ if (already_busywaiting(to) & from->engine->mask) goto await_fence; @@ -931,12 +937,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, I915_FENCE_GFP); - else if (intel_context_use_semaphores(to->context)) - ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); else - ret = i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - I915_FENCE_GFP); + ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); if (ret < 0) return ret; @@ -1035,6 +1037,8 @@ __i915_request_await_execution(struct i915_request *to, { int err; + GEM_BUG_ON(intel_context_is_barrier(from->context)); + /* Submit both requests at the same time */ err = __await_execution(to, from, hook, I915_FENCE_GFP); if (err) -- cgit From 89dd019a8a99e1a08c9c724dc2831c2ec3c050a3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Feb 2020 20:56:14 +0000 Subject: drm/i915: Poison rings after use On retiring the request, we should not re-use these elements in the ring (at least not until we fill the ringbuffer and knowingly reuse the space). Leave behind some poison to (hopefully) trap ourselves if we make a mistake. Suggested-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200211205615.1190127-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1adb8cf35f75..6daf18dbb3d4 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -203,6 +203,19 @@ static void free_capture_list(struct i915_request *request) } } +static void __i915_request_fill(struct i915_request *rq, u8 val) +{ + void *vaddr = rq->ring->vaddr; + u32 head; + + head = rq->infix; + if (rq->postfix < head) { + memset(vaddr + head, val, rq->ring->size - head); + head = 0; + } + memset(vaddr + head, val, rq->postfix - head); +} + static void remove_from_engine(struct i915_request *rq) { struct intel_engine_cs *engine, *locked; @@ -247,6 +260,9 @@ bool i915_request_retire(struct i915_request *rq) */ GEM_BUG_ON(!list_is_first(&rq->link, &i915_request_timeline(rq)->requests)); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + /* Poison before we release our space in the ring */ + __i915_request_fill(rq, POISON_FREE); rq->ring->head = rq->postfix; /* @@ -1179,9 +1195,6 @@ i915_request_await_object(struct i915_request *to, void i915_request_skip(struct i915_request *rq, int error) { - void *vaddr = rq->ring->vaddr; - u32 head; - GEM_BUG_ON(!IS_ERR_VALUE((long)error)); dma_fence_set_error(&rq->fence, error); @@ -1193,12 +1206,7 @@ void i915_request_skip(struct i915_request *rq, int error) * context, clear out all the user operations leaving the * breadcrumb at the end (so we get the fence notifications). */ - head = rq->infix; - if (rq->postfix < head) { - memset(vaddr + head, 0, rq->ring->size - head); - head = 0; - } - memset(vaddr + head, 0, rq->postfix - head); + __i915_request_fill(rq, 0); rq->infix = rq->postfix; } -- cgit From df6b1f3da89f1ff87399316c836fc3981de96c38 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 20 Feb 2020 10:57:07 +0000 Subject: drm/i915: remove the other slab_dependencies The real one can be found in i915_scheduler.c. Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200220105707.344522-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_request.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 6daf18dbb3d4..d53af93b919b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -51,7 +51,6 @@ struct execute_cb { static struct i915_global_request { struct i915_global base; struct kmem_cache *slab_requests; - struct kmem_cache *slab_dependencies; struct kmem_cache *slab_execute_cbs; } global; @@ -1614,14 +1613,12 @@ out: static void i915_global_request_shrink(void) { - kmem_cache_shrink(global.slab_dependencies); kmem_cache_shrink(global.slab_execute_cbs); kmem_cache_shrink(global.slab_requests); } static void i915_global_request_exit(void) { - kmem_cache_destroy(global.slab_dependencies); kmem_cache_destroy(global.slab_execute_cbs); kmem_cache_destroy(global.slab_requests); } @@ -1651,17 +1648,9 @@ int __init i915_global_request_init(void) if (!global.slab_execute_cbs) goto err_requests; - global.slab_dependencies = KMEM_CACHE(i915_dependency, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT); - if (!global.slab_dependencies) - goto err_execute_cbs; - i915_global_register(&global.base); return 0; -err_execute_cbs: - kmem_cache_destroy(global.slab_execute_cbs); err_requests: kmem_cache_destroy(global.slab_requests); return -ENOMEM; -- cgit