From 36620032ceccb4bf07bbe780a3998e88a585ad69 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 7 Mar 2018 13:42:23 +0000 Subject: drm/i915: Update ring position from request on retiring When wedged, we do not update the ring->tail as we submit the requests causing us to leak the ring->space upon cleaning up the wedged driver. We can just use the value stored in rq->tail, and keep the submission backend details away from set-wedge. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180307134226.25492-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d437beac3969..75c8826c8cae 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -358,7 +358,7 @@ static void advance_ring(struct i915_request *request) * is just about to be. Either works, if we miss the last two * noops - they are safe to be replayed on a reset. */ - tail = READ_ONCE(request->ring->tail); + tail = READ_ONCE(request->tail); } else { tail = request->postfix; } -- cgit From 47650db02dd52267953df81438c93cf8a0eb0e5e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 7 Mar 2018 13:42:25 +0000 Subject: drm/i915: Wrap engine->schedule in RCU locks for set-wedge protection Similar to the staging around handling of engine->submit_request, we need to stop adding to the execlists->queue prior to calling engine->cancel_requests. cancel_requests will move requests from the queue onto the timeline, so if we add a request onto the queue after that point, it will be lost. Fixes: af7a8ffad9c5 ("drm/i915: Use rcu instead of stop_machine in set_wedged") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180307134226.25492-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 13 +++++++------ drivers/gpu/drm/i915/i915_request.c | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c3d650706329..50e165b5b60d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -479,10 +479,11 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rq = to_request(fence); engine = rq->engine; - if (!engine->schedule) - return; - engine->schedule(rq, prio); + rcu_read_lock(); + if (engine->schedule) + engine->schedule(rq, prio); + rcu_read_unlock(); } static void fence_set_priority(struct dma_fence *fence, int prio) @@ -3222,8 +3223,11 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) */ for_each_engine(engine, i915, id) { i915_gem_reset_prepare_engine(engine); + engine->submit_request = nop_submit_request; + engine->schedule = NULL; } + i915->caps.scheduler = 0; /* * Make sure no one is running the old callback before we proceed with @@ -3241,11 +3245,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * start to complete all requests. */ engine->submit_request = nop_complete_submit_request; - engine->schedule = NULL; } - i915->caps.scheduler = 0; - /* * Make sure no request can slip through without getting completed by * either this call here to intel_engine_init_global_seqno, or the one diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 75c8826c8cae..2f62acd2dc3d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1081,8 +1081,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ + rcu_read_lock(); if (engine->schedule) engine->schedule(request, request->ctx->priority); + rcu_read_unlock(); local_bh_disable(); i915_sw_fence_commit(&request->submit); -- cgit From 6f9ec414ec47eea3f3e2c5ad4c67b4265bbff2a3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Mar 2018 14:07:32 +0000 Subject: drm/i915: Remove the impedance mismatch around intel_engine_enable_signaling There is some redundancy between dma_fence->ops->enable_signaling (via i915_fence_enable_signaling) and our backend, intel_engine_enable_signaling() in that both levels recheck the fence status multiple times. If we convert intel_engine_enable_signaling() to return the information desired by dma_fence->ops->enable_signaling, we can reduce i915_fence_enable_signaling to a simple stub and avoid trying to reinterpret the same information. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Michal Winiarski Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180308140732.25090-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 6 +----- drivers/gpu/drm/i915/intel_breadcrumbs.c | 21 +++++++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2f62acd2dc3d..1810fa1b81cb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -59,11 +59,7 @@ static bool i915_fence_signaled(struct dma_fence *fence) static bool i915_fence_enable_signaling(struct dma_fence *fence) { - if (i915_fence_signaled(fence)) - return false; - - intel_engine_enable_signaling(to_request(fence), true); - return !i915_fence_signaled(fence); + return intel_engine_enable_signaling(to_request(fence), true); } static signed long i915_fence_wait(struct dma_fence *fence, diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 1f79e7a47433..671a6d61e29d 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -730,10 +730,11 @@ static void insert_signal(struct intel_breadcrumbs *b, list_add(&request->signaling.link, &iter->signaling.link); } -void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) +bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_wait *wait = &request->signaling.wait; u32 seqno; /* @@ -750,12 +751,12 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) seqno = i915_request_global_seqno(request); if (!seqno) /* will be enabled later upon execution */ - return; + return true; - GEM_BUG_ON(request->signaling.wait.seqno); - request->signaling.wait.tsk = b->signaler; - request->signaling.wait.request = request; - request->signaling.wait.seqno = seqno; + GEM_BUG_ON(wait->seqno); + wait->tsk = b->signaler; + wait->request = request; + wait->seqno = seqno; /* * Add ourselves into the list of waiters, but registering our @@ -768,11 +769,15 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) */ spin_lock(&b->rb_lock); insert_signal(b, request, seqno); - wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); + wakeup &= __intel_engine_add_wait(engine, wait); spin_unlock(&b->rb_lock); - if (wakeup) + if (wakeup) { wake_up_process(b->signaler); + return !intel_wait_complete(wait); + } + + return true; } void intel_engine_cancel_signaling(struct i915_request *request) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c31258d27e20..81cdbbf257ec 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -940,7 +940,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); -void intel_engine_enable_signaling(struct i915_request *request, bool wakeup); +bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); void intel_engine_cancel_signaling(struct i915_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) -- cgit From d9b13c4dde6cacd8f2c4385cd6d293b0ac622e0b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Mar 2018 13:14:50 +0000 Subject: drm/i915: Trace GEM steps between submit and wedging We still have an odd race with wedging/unwedging as shown by igt/gem_eio that defies expectations. Add some more trace_printks to try and visualize the flow over the precipice. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180315131451.4060-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++++++++ drivers/gpu/drm/i915/i915_request.c | 23 +++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 13d4b0e74641..2fbd622bba30 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3193,6 +3193,9 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct i915_request *request) { + GEM_TRACE("%s fence %llx:%d -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); i915_request_submit(request); @@ -3202,6 +3205,9 @@ static void nop_complete_submit_request(struct i915_request *request) { unsigned long flags; + GEM_TRACE("%s fence %llx:%d -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&request->engine->timeline->lock, flags); @@ -3215,6 +3221,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + GEM_TRACE("start\n"); + if (drm_debug & DRM_UT_DRIVER) { struct drm_printer p = drm_debug_printer(__func__); @@ -3279,6 +3287,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) i915_gem_reset_finish_engine(engine); } + GEM_TRACE("end\n"); + wake_up_all(&i915->gpu_error.reset_queue); } @@ -3291,6 +3301,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) return true; + GEM_TRACE("start\n"); + /* * Before unwedging, make sure that all pending operations * are flushed and errored out - we may have requests waiting upon @@ -3341,6 +3353,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) intel_engines_reset_default_submission(i915); i915_gem_contexts_lost(i915); + GEM_TRACE("end\n"); + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, &i915->gpu_error.flags); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1810fa1b81cb..43c7134a9b93 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -207,11 +207,16 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) if (ret) return ret; + GEM_BUG_ON(i915->gt.active_requests); + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { struct i915_gem_timeline *timeline; struct intel_timeline *tl = engine->timeline; + GEM_TRACE("%s seqno %d -> %d\n", + engine->name, tl->seqno, seqno); + if (!i915_seqno_passed(seqno, tl->seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); @@ -381,6 +386,11 @@ static void i915_request_retire(struct i915_request *request) struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; + GEM_TRACE("%s(%d) fence %llx:%d, global_seqno %d\n", + engine->name, intel_engine_get_seqno(engine), + request->fence.context, request->fence.seqno, + request->global_seqno); + lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); GEM_BUG_ON(!i915_request_completed(request)); @@ -488,6 +498,11 @@ void __i915_request_submit(struct i915_request *request) struct intel_timeline *timeline; u32 seqno; + GEM_TRACE("%s fence %llx:%d -> global_seqno %d\n", + request->engine->name, + request->fence.context, request->fence.seqno, + engine->timeline->seqno); + GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); @@ -537,6 +552,11 @@ void __i915_request_unsubmit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; struct intel_timeline *timeline; + GEM_TRACE("%s fence %llx:%d <- global_seqno %d\n", + request->engine->name, + request->fence.context, request->fence.seqno, + request->global_seqno); + GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); @@ -996,6 +1016,9 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) u32 *cs; int err; + GEM_TRACE("%s fence %llx:%d\n", + engine->name, request->fence.context, request->fence.seqno); + lockdep_assert_held(&request->i915->drm.struct_mutex); trace_i915_request_add(request); -- cgit From ce80075470f6328e487389262c95af092d421ffc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 20 Mar 2018 10:04:49 +0000 Subject: drm/i915: Add control flags to i915_handle_error() Not all callers want the GPU error to handled in the same way, so expose a control parameter. In the first instance, some callers do not want the heavyweight error capture so add a bit to request the state to be captured and saved. v2: Pass msg down to i915_reset/i915_reset_engine so that we include the reason for the reset in the dev_notice(), superseding the earlier option to not print that notice. v3: Stash the reason inside the i915->gpu_error to handover to the direct reset from the blocking waiter. Signed-off-by: Chris Wilson Cc: Jeff McGee Cc: Mika Kuoppala Cc: Michel Thierry Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180320100449.1360-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.c | 17 ++++---- drivers/gpu/drm/i915/i915_drv.h | 10 ++--- drivers/gpu/drm/i915/i915_gpu_error.h | 3 ++ drivers/gpu/drm/i915/i915_irq.c | 55 ++++++++++++++---------- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/intel_hangcheck.c | 13 +++--- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 13 +++--- 8 files changed, 62 insertions(+), 55 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 964ea1a12357..7816cd53100a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4011,8 +4011,8 @@ i915_wedged_set(void *data, u64 val) engine->hangcheck.stalled = true; } - i915_handle_error(i915, val, "Manually set wedged engine mask = %llx", - val); + i915_handle_error(i915, val, I915_ERROR_CAPTURE, + "Manually set wedged engine mask = %llx", val); wait_on_bit(&i915->gpu_error.flags, I915_RESET_HANDOFF, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ba5f150a29c0..3f637ab89e51 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1873,7 +1873,6 @@ static int i915_resume_switcheroo(struct drm_device *dev) /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset - * @flags: Instructions * * Reset the chip. Useful if a hang is detected. Marks the device as wedged * on failure. @@ -1888,7 +1887,7 @@ static int i915_resume_switcheroo(struct drm_device *dev) * - re-init interrupt state * - re-init display */ -void i915_reset(struct drm_i915_private *i915, unsigned int flags) +void i915_reset(struct drm_i915_private *i915) { struct i915_gpu_error *error = &i915->gpu_error; int ret; @@ -1905,8 +1904,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) if (!i915_gem_unset_wedged(i915)) goto wakeup; - if (!(flags & I915_RESET_QUIET)) - dev_notice(i915->drm.dev, "Resetting chip after gpu hang\n"); + if (error->reason) + dev_notice(i915->drm.dev, + "Resetting chip for %s\n", error->reason); error->reset_count++; disable_irq(i915->drm.irq); @@ -2007,7 +2007,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, /** * i915_reset_engine - reset GPU engine to recover from a hang * @engine: engine to reset - * @flags: options + * @msg: reason for GPU reset; or NULL for no dev_notice() * * Reset a specific GPU engine. Useful if a hang is detected. * Returns zero on successful reset or otherwise an error code. @@ -2017,7 +2017,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, * - reset engine (which will force the engine to idle) * - re-init/configure engine */ -int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) +int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) { struct i915_gpu_error *error = &engine->i915->gpu_error; struct i915_request *active_request; @@ -2032,10 +2032,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) goto out; } - if (!(flags & I915_RESET_QUIET)) { + if (msg) dev_notice(engine->i915->drm.dev, - "Resetting %s after gpu hang\n", engine->name); - } + "Resetting %s for %s\n", engine->name, msg); error->reset_engine_count[engine->id]++; if (!engine->i915->guc.execbuf_client) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e27ba8fb64e6..c9c3b2ba6a86 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2700,10 +2700,8 @@ extern void i915_driver_unload(struct drm_device *dev); extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); -#define I915_RESET_QUIET BIT(0) -extern void i915_reset(struct drm_i915_private *i915, unsigned int flags); -extern int i915_reset_engine(struct intel_engine_cs *engine, - unsigned int flags); +extern void i915_reset(struct drm_i915_private *i915); +extern int i915_reset_engine(struct intel_engine_cs *engine, const char *msg); extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); extern int intel_reset_guc(struct drm_i915_private *dev_priv); @@ -2751,10 +2749,12 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) &dev_priv->gpu_error.hangcheck_work, delay); } -__printf(3, 4) +__printf(4, 5) void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, + unsigned long flags, const char *fmt, ...); +#define I915_ERROR_CAPTURE BIT(0) extern void intel_irq_init(struct drm_i915_private *dev_priv); extern void intel_irq_fini(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index ebbdf37e2879..ac5760673cc9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -269,6 +269,9 @@ struct i915_gpu_error { /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; + /** Reason for the current *global* reset */ + const char *reason; + /** * Waitqueue to signal when a hang is detected. Used to for waiters * to release the struct_mutex for the reset to procede. diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 44eef355e12c..fa7310766217 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2877,15 +2877,10 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -/** - * i915_reset_device - do process context error handling work - * @dev_priv: i915 device private - * - * Fire an error uevent so userspace can see that a hang or error - * was detected. - */ -static void i915_reset_device(struct drm_i915_private *dev_priv) +static void i915_reset_device(struct drm_i915_private *dev_priv, + const char *msg) { + struct i915_gpu_error *error = &dev_priv->gpu_error; struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; @@ -2901,29 +2896,32 @@ static void i915_reset_device(struct drm_i915_private *dev_priv) i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { intel_prepare_reset(dev_priv); + error->reason = msg; + /* Signal that locked waiters should reset the GPU */ - set_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags); - wake_up_all(&dev_priv->gpu_error.wait_queue); + set_bit(I915_RESET_HANDOFF, &error->flags); + wake_up_all(&error->wait_queue); /* Wait for anyone holding the lock to wakeup, without * blocking indefinitely on struct_mutex. */ do { if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - i915_reset(dev_priv, 0); + i915_reset(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); } - } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags, + } while (wait_on_bit_timeout(&error->flags, I915_RESET_HANDOFF, TASK_UNINTERRUPTIBLE, 1)); + error->reason = NULL; + intel_finish_reset(dev_priv); } - if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) - kobject_uevent_env(kobj, - KOBJ_CHANGE, reset_done_event); + if (!test_bit(I915_WEDGED, &error->flags)) + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); } static void i915_clear_error_registers(struct drm_i915_private *dev_priv) @@ -2955,6 +2953,7 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) * i915_handle_error - handle a gpu error * @dev_priv: i915 device private * @engine_mask: mask representing engines that are hung + * @flags: control flags * @fmt: Error message format string * * Do some basic checking of register state at error time and @@ -2965,16 +2964,23 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) */ void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, + unsigned long flags, const char *fmt, ...) { struct intel_engine_cs *engine; unsigned int tmp; - va_list args; char error_msg[80]; + char *msg = NULL; - va_start(args, fmt); - vscnprintf(error_msg, sizeof(error_msg), fmt, args); - va_end(args); + if (fmt) { + va_list args; + + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + msg = error_msg; + } /* * In most cases it's guaranteed that we get here with an RPM @@ -2986,8 +2992,11 @@ void i915_handle_error(struct drm_i915_private *dev_priv, intel_runtime_pm_get(dev_priv); engine_mask &= INTEL_INFO(dev_priv)->ring_mask; - i915_capture_error_state(dev_priv, engine_mask, error_msg); - i915_clear_error_registers(dev_priv); + + if (flags & I915_ERROR_CAPTURE) { + i915_capture_error_state(dev_priv, engine_mask, msg); + i915_clear_error_registers(dev_priv); + } /* * Try engine reset when available. We fall back to full reset if @@ -3000,7 +3009,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, &dev_priv->gpu_error.flags)) continue; - if (i915_reset_engine(engine, 0) == 0) + if (i915_reset_engine(engine, msg) == 0) engine_mask &= ~intel_engine_flag(engine); clear_bit(I915_RESET_ENGINE + engine->id, @@ -3030,7 +3039,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, TASK_UNINTERRUPTIBLE); } - i915_reset_device(dev_priv); + i915_reset_device(dev_priv, msg); for_each_engine(engine, dev_priv, tmp) { clear_bit(I915_RESET_ENGINE + engine->id, diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 43c7134a9b93..2325886d1d55 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1229,7 +1229,7 @@ static bool __i915_wait_request_check_and_reset(struct i915_request *request) return false; __set_current_state(TASK_RUNNING); - i915_reset(request->i915, 0); + i915_reset(request->i915); return true; } diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index c8ea510629fa..fd0ffb8328d0 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = I915_READ_CTL(engine); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, BIT(engine->id), - "Kicking stuck wait on %s", - engine->name); + i915_handle_error(dev_priv, BIT(engine->id), 0, + "stuck wait on %s", engine->name); I915_WRITE_CTL(engine, tmp); return ENGINE_WAIT_KICK; } @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) default: return ENGINE_DEAD; case 1: - i915_handle_error(dev_priv, ALL_ENGINES, - "Kicking stuck semaphore on %s", + i915_handle_error(dev_priv, ALL_ENGINES, 0, + "stuck semaphore on %s", engine->name); I915_WRITE_CTL(engine, tmp); return ENGINE_WAIT_KICK; @@ -386,13 +385,13 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, if (stuck != hung) hung &= ~stuck; len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "No progress" : "Hang"); + "%s on ", stuck == hung ? "no progress" : "hang"); for_each_engine_masked(engine, i915, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; - return i915_handle_error(i915, hung, "%s", msg); + return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg); } /* diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index df7898c8edcb..4372826998aa 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -433,7 +433,7 @@ static int igt_global_reset(void *arg) mutex_lock(&i915->drm.struct_mutex); reset_count = i915_reset_count(&i915->gpu_error); - i915_reset(i915, I915_RESET_QUIET); + i915_reset(i915); if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); @@ -518,7 +518,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) engine->hangcheck.seqno = intel_engine_get_seqno(engine); - err = i915_reset_engine(engine, I915_RESET_QUIET); + err = i915_reset_engine(engine, NULL); if (err) { pr_err("i915_reset_engine failed\n"); break; @@ -725,7 +725,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, engine->hangcheck.seqno = intel_engine_get_seqno(engine); - err = i915_reset_engine(engine, I915_RESET_QUIET); + err = i915_reset_engine(engine, NULL); if (err) { pr_err("i915_reset_engine(%s:%s) failed, err=%d\n", engine->name, active ? "active" : "idle", err); @@ -865,7 +865,6 @@ static int igt_wait_reset(void *arg) __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; @@ -962,7 +961,6 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; @@ -971,7 +969,7 @@ static int igt_reset_queue(void *arg) reset_count = fake_hangcheck(prev); - i915_reset(i915, I915_RESET_QUIET); + i915_reset(i915); GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); @@ -1069,7 +1067,6 @@ static int igt_handle_error(void *arg) __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; @@ -1084,7 +1081,7 @@ static int igt_handle_error(void *arg) engine->hangcheck.stalled = true; engine->hangcheck.seqno = intel_engine_get_seqno(engine); - i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__); + i915_handle_error(i915, intel_engine_flag(engine), 0, NULL); xchg(&i915->gpu_error.first_error, error); -- cgit From 0e59c209f4ccf9f9d505babdb04731294e18c4ed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Mar 2018 11:00:59 +0000 Subject: drm/i915: Fix tracing of submit seqno We pre-increment the timeline->seqno when handing it to the request, make sure the GEM_TRACE takes this into account. Otherwise, it appears that we go backwards over a preemption point: 1d..1 157681077us : __i915_request_unsubmit: vcs0 fence 75e:3 <- global_seqno 17 0d.s1 157681113us : __i915_request_submit: vcs0 fence 75e:3 -> global_seqno 16 Fixes: d9b13c4dde6c ("drm/i915: Trace GEM steps between submit and wedging") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180322110059.4467-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2325886d1d55..f1b81fe4f9ab 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -501,7 +501,7 @@ void __i915_request_submit(struct i915_request *request) GEM_TRACE("%s fence %llx:%d -> global_seqno %d\n", request->engine->name, request->fence.context, request->fence.seqno, - engine->timeline->seqno); + engine->timeline->seqno + 1); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); -- cgit From 4ccfee92f4b6fbbedee1eb68f110a66f03edf7c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Mar 2018 13:10:34 +0000 Subject: drm/i915: Remove local timeline var from submit/unsubmit Both request_submit and request_unsubmit deal with transferring the request from the client's timeline onto the execution timeline and back again. As both functions deal with a pair of timeline's, using a shorthand for just one of them is slightly confusing, especially as the different functions use the shorthand for the alternate timeline. Instead, use the full version of each timeline so it should be easier to keep track of the transfer between the request/client and the engine. v2: Refactor the common lock+list_move v3: Be clear we require the other timeline list to be locked as well. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180322131034.6036-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f1b81fe4f9ab..2314a26cd7f8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -492,10 +492,20 @@ static u32 timeline_get_seqno(struct intel_timeline *tl) return ++tl->seqno; } +static void move_to_timeline(struct i915_request *request, + struct intel_timeline *timeline) +{ + GEM_BUG_ON(request->timeline == request->engine->timeline); + lockdep_assert_held(&request->engine->timeline->lock); + + spin_lock(&request->timeline->lock); + list_move_tail(&request->link, &timeline->requests); + spin_unlock(&request->timeline->lock); +} + void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; u32 seqno; GEM_TRACE("%s fence %llx:%d -> global_seqno %d\n", @@ -506,12 +516,9 @@ void __i915_request_submit(struct i915_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); - /* Transfer from per-context onto the global per-engine timeline */ - timeline = engine->timeline; - GEM_BUG_ON(timeline == request->timeline); GEM_BUG_ON(request->global_seqno); - seqno = timeline_get_seqno(timeline); + seqno = timeline_get_seqno(engine->timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); @@ -525,9 +532,8 @@ void __i915_request_submit(struct i915_request *request) engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); - spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); - spin_unlock(&request->timeline->lock); + /* Transfer from per-context onto the global per-engine timeline */ + move_to_timeline(request, engine->timeline); trace_i915_request_execute(request); @@ -550,7 +556,6 @@ void i915_request_submit(struct i915_request *request) void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; GEM_TRACE("%s fence %llx:%d <- global_seqno %d\n", request->engine->name, @@ -578,12 +583,7 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); /* Transfer back from the global per-engine timeline to per-context */ - timeline = request->timeline; - GEM_BUG_ON(timeline == engine->timeline); - - spin_lock(&timeline->lock); - list_move(&request->link, &timeline->requests); - spin_unlock(&timeline->lock); + move_to_timeline(request, request->timeline); /* * We don't need to wake_up any waiters on request->execute, they -- cgit From e770276079fd6e1088a255dee182a3c09a2d7aa9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 27 Mar 2018 22:01:57 +0100 Subject: drm/i915: Include the HW breadcrumb whenever we trace the global_seqno When we include a request's global_seqno in a GEM_TRACE it often helps to know how that relates to the current breadcrumb as seen by the hardware. Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180327210157.16896-3-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_request.c | 28 +++++++++++++++++----------- drivers/gpu/drm/i915/intel_lrc.c | 6 ++++-- 2 files changed, 21 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2314a26cd7f8..585242831974 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -214,8 +214,11 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) struct i915_gem_timeline *timeline; struct intel_timeline *tl = engine->timeline; - GEM_TRACE("%s seqno %d -> %d\n", - engine->name, tl->seqno, seqno); + GEM_TRACE("%s seqno %d (current %d) -> %d\n", + engine->name, + tl->seqno, + intel_engine_get_seqno(engine), + seqno); if (!i915_seqno_passed(seqno, tl->seqno)) { /* Flush any waiters before we reuse the seqno */ @@ -386,10 +389,11 @@ static void i915_request_retire(struct i915_request *request) struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; - GEM_TRACE("%s(%d) fence %llx:%d, global_seqno %d\n", - engine->name, intel_engine_get_seqno(engine), + GEM_TRACE("%s fence %llx:%d, global_seqno %d, current %d\n", + engine->name, request->fence.context, request->fence.seqno, - request->global_seqno); + request->global_seqno, + intel_engine_get_seqno(engine)); lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); @@ -508,10 +512,11 @@ void __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; u32 seqno; - GEM_TRACE("%s fence %llx:%d -> global_seqno %d\n", - request->engine->name, + GEM_TRACE("%s fence %llx:%d -> global_seqno %d, current %d\n", + engine->name, request->fence.context, request->fence.seqno, - engine->timeline->seqno + 1); + engine->timeline->seqno + 1, + intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); @@ -557,10 +562,11 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%d <- global_seqno %d\n", - request->engine->name, + GEM_TRACE("%s fence %llx:%d <- global_seqno %d, current %d\n", + engine->name, request->fence.context, request->fence.seqno, - request->global_seqno); + request->global_seqno, + intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 654634254b64..f60b61bf8b3b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -454,10 +454,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%d (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, + intel_engine_get_seqno(engine), rq_prio(rq)); } else { GEM_BUG_ON(!n); @@ -999,10 +1000,11 @@ static void execlists_submission_tasklet(unsigned long data) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%d (current %d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, + intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ -- cgit From e4d2006f8f040825fa371e774a5debacdbf20b08 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Apr 2018 16:51:44 +0100 Subject: drm/i915: Split out parking from the idle worker for reuse We will want to park GEM before disengaging the drive^W^W^W unwedging. Since we already do the work for idling, expose the guts as a new function that we can then reuse. v2: Just skip if already parked; makes it more forgiving to use by future callers. v3: Extract mark_busy, rename it to i915_gem_unpark and place it next to i915_gem_park so that we can evaluate it for symmetry more easily. Calling GEM from inside i915_request looks to be a bit of a layering violation, for the moment I am imaging them as being notify_cb. Signed-off-by: Chris Wilson Cc: Michal Wajdeczko Cc: Sagar Arun Kamble Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala #v1 Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20180406155144.27791-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 123 ++++++++++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem.h | 5 ++ drivers/gpu/drm/i915/i915_request.c | 52 +-------------- 3 files changed, 103 insertions(+), 77 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9650a7b10c5f..a69dc19a0bdb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -136,6 +136,100 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) return 0; } +static u32 __i915_gem_park(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + + if (!i915->gt.awake) + return I915_EPOCH_INVALID; + + GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); + + /* + * Be paranoid and flush a concurrent interrupt to make sure + * we don't reactivate any irq tasklets after parking. + * + * FIXME: Note that even though we have waited for execlists to be idle, + * there may still be an in-flight interrupt even though the CSB + * is now empty. synchronize_irq() makes sure that a residual interrupt + * is completed before we continue, but it doesn't prevent the HW from + * raising a spurious interrupt later. To complete the shield we should + * coordinate disabling the CS irq with flushing the interrupts. + */ + synchronize_irq(i915->drm.irq); + + intel_engines_park(i915); + i915_gem_timelines_park(i915); + + i915_pmu_gt_parked(i915); + + i915->gt.awake = false; + + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); + + intel_runtime_pm_put(i915); + + return i915->gt.epoch; +} + +void i915_gem_park(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + + if (!i915->gt.awake) + return; + + /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ + mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); +} + +void i915_gem_unpark(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915->gt.active_requests); + + if (i915->gt.awake) + return; + + intel_runtime_pm_get_noresume(i915); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + + i915->gt.awake = true; + if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ + i915->gt.epoch = 1; + + intel_enable_gt_powersave(i915); + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + + intel_engines_unpark(i915); + + i915_queue_hangcheck(i915); + + queue_delayed_work(i915->wq, + &i915->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -3496,36 +3590,9 @@ i915_gem_idle_work_handler(struct work_struct *work) if (new_requests_since_last_retire(dev_priv)) goto out_unlock; - /* - * Be paranoid and flush a concurrent interrupt to make sure - * we don't reactivate any irq tasklets after parking. - * - * FIXME: Note that even though we have waited for execlists to be idle, - * there may still be an in-flight interrupt even though the CSB - * is now empty. synchronize_irq() makes sure that a residual interrupt - * is completed before we continue, but it doesn't prevent the HW from - * raising a spurious interrupt later. To complete the shield we should - * coordinate disabling the CS irq with flushing the interrupts. - */ - synchronize_irq(dev_priv->drm.irq); - - intel_engines_park(dev_priv); - i915_gem_timelines_park(dev_priv); - - i915_pmu_gt_parked(dev_priv); + epoch = __i915_gem_park(dev_priv); - GEM_BUG_ON(!dev_priv->gt.awake); - dev_priv->gt.awake = false; - epoch = dev_priv->gt.epoch; - GEM_BUG_ON(epoch == I915_EPOCH_INVALID); rearm_hangcheck = false; - - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_idle(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); - - intel_runtime_pm_put(dev_priv); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 8922344fc21b..deaf78d2ae8b 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -27,6 +27,8 @@ #include +struct drm_i915_private; + #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ pr_err("%s:%d GEM_BUG_ON(%s)\n", \ @@ -61,4 +63,7 @@ #define I915_NUM_ENGINES 8 +void i915_gem_park(struct drm_i915_private *i915); +void i915_gem_unpark(struct drm_i915_private *i915); + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 585242831974..a9d0bde16443 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -255,47 +255,6 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) return reset_all_global_seqno(i915, seqno - 1); } -static void mark_busy(struct drm_i915_private *i915) -{ - if (i915->gt.awake) - return; - - GEM_BUG_ON(!i915->gt.active_requests); - - intel_runtime_pm_get_noresume(i915); - - /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. - * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. - */ - intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - - i915->gt.awake = true; - if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ - i915->gt.epoch = 1; - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - static int reserve_engine(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -313,7 +272,7 @@ static int reserve_engine(struct intel_engine_cs *engine) } if (!i915->gt.active_requests++) - mark_busy(i915); + i915_gem_unpark(i915); return 0; } @@ -322,13 +281,8 @@ static void unreserve_engine(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - if (!--i915->gt.active_requests) { - /* Cancel the mark_busy() from our reserve_engine() */ - GEM_BUG_ON(!i915->gt.awake); - mod_delayed_work(i915->wq, - &i915->gt.idle_work, - msecs_to_jiffies(100)); - } + if (!--i915->gt.active_requests) + i915_gem_park(i915); GEM_BUG_ON(!engine->timeline->inflight_seqnos); engine->timeline->inflight_seqnos--; -- cgit From d0667e9ce52eb2d5d32db4f16976226e78f88784 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Apr 2018 23:03:54 +0100 Subject: drm/i915: Pass the set of guilty engines to i915_reset() Currently, we rely on inspecting the hangcheck state from within the i915_reset() routines to determine which engines were guilty of the hang. This is problematic for cases where we want to run i915_handle_error() and call i915_reset() independently of hangcheck. Instead of relying on the indirect parameter passing, turn it into an explicit parameter providing the set of stalled engines which then are treated as guilty until proven innocent. While we are removing the implicit stalled parameter, also make the reason into an explicit parameter to i915_reset(). We still need a back-channel for i915_handle_error() to hand over the task to the locked waiter, but let's keep that its own channel rather than incriminate another. This leaves stalled/seqno as being private to hangcheck, with no more nefarious snooping by reset, be it whole-device or per-engine. \o/ The only real issue now is that this makes it crystal clear that we don't actually do any testing of hangcheck per se in drv_selftest/live_hangcheck, merely of resets! Signed-off-by: Chris Wilson Cc: Michel Thierry Cc: Jeff McGee Cc: Mika Kuoppala Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180406220354.18911-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 13 ++++++---- drivers/gpu/drm/i915/i915_drv.h | 10 +++++--- drivers/gpu/drm/i915/i915_gem.c | 5 ++-- drivers/gpu/drm/i915/i915_gpu_error.h | 3 +++ drivers/gpu/drm/i915/i915_irq.c | 12 ++++++---- drivers/gpu/drm/i915/i915_request.c | 6 +++-- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 30 +++++++++++------------- 7 files changed, 47 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7ce229c6f424..f770be18b2d7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1866,6 +1866,8 @@ static int i915_resume_switcheroo(struct drm_device *dev) /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset + * @stalled_mask: mask of the stalled engines with the guilty requests + * @reason: user error message for why we are resetting * * Reset the chip. Useful if a hang is detected. Marks the device as wedged * on failure. @@ -1880,7 +1882,9 @@ static int i915_resume_switcheroo(struct drm_device *dev) * - re-init interrupt state * - re-init display */ -void i915_reset(struct drm_i915_private *i915) +void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason) { struct i915_gpu_error *error = &i915->gpu_error; int ret; @@ -1899,9 +1903,8 @@ void i915_reset(struct drm_i915_private *i915) if (!i915_gem_unset_wedged(i915)) goto wakeup; - if (error->reason) - dev_notice(i915->drm.dev, - "Resetting chip for %s\n", error->reason); + if (reason) + dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; disable_irq(i915->drm.irq); @@ -1944,7 +1947,7 @@ void i915_reset(struct drm_i915_private *i915) goto error; } - i915_gem_reset(i915); + i915_gem_reset(i915, stalled_mask); intel_overlay_reset(i915); /* diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6b3f2f651def..9bca104c409e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2701,8 +2701,11 @@ extern void i915_driver_unload(struct drm_device *dev); extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); -extern void i915_reset(struct drm_i915_private *i915); -extern int i915_reset_engine(struct intel_engine_cs *engine, const char *msg); +extern void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason); +extern int i915_reset_engine(struct intel_engine_cs *engine, + const char *reason); extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); extern int intel_reset_guc(struct drm_i915_private *dev_priv); @@ -3126,7 +3129,8 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); -void i915_gem_reset(struct drm_i915_private *dev_priv); +void i915_gem_reset(struct drm_i915_private *dev_priv, + unsigned int stalled_mask); void i915_gem_reset_finish_engine(struct intel_engine_cs *engine); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 306d7a805eb7..28ab0beff86c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3213,7 +3213,8 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, engine->reset_hw(engine, request); } -void i915_gem_reset(struct drm_i915_private *dev_priv) +void i915_gem_reset(struct drm_i915_private *dev_priv, + unsigned int stalled_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -3227,7 +3228,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) i915_gem_reset_engine(engine, engine->hangcheck.active_request, - engine->hangcheck.stalled); + stalled_mask & ENGINE_MASK(id)); ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) engine->context_unpin(engine, ctx); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index ac5760673cc9..c05b6034d718 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -269,6 +269,9 @@ struct i915_gpu_error { /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; + /** Set of stalled engines with guilty requests, in the current reset */ + u32 stalled_mask; + /** Reason for the current *global* reset */ const char *reason; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c2f878ace0ea..b03d18561b55 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2961,7 +2961,8 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) } static void i915_reset_device(struct drm_i915_private *dev_priv, - const char *msg) + u32 engine_mask, + const char *reason) { struct i915_gpu_error *error = &dev_priv->gpu_error; struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; @@ -2979,9 +2980,11 @@ static void i915_reset_device(struct drm_i915_private *dev_priv, i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { intel_prepare_reset(dev_priv); - error->reason = msg; + error->reason = reason; + error->stalled_mask = engine_mask; /* Signal that locked waiters should reset the GPU */ + smp_mb__before_atomic(); set_bit(I915_RESET_HANDOFF, &error->flags); wake_up_all(&error->wait_queue); @@ -2990,7 +2993,7 @@ static void i915_reset_device(struct drm_i915_private *dev_priv, */ do { if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - i915_reset(dev_priv); + i915_reset(dev_priv, engine_mask, reason); mutex_unlock(&dev_priv->drm.struct_mutex); } } while (wait_on_bit_timeout(&error->flags, @@ -2998,6 +3001,7 @@ static void i915_reset_device(struct drm_i915_private *dev_priv, TASK_UNINTERRUPTIBLE, 1)); + error->stalled_mask = 0; error->reason = NULL; intel_finish_reset(dev_priv); @@ -3122,7 +3126,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, TASK_UNINTERRUPTIBLE); } - i915_reset_device(dev_priv, msg); + i915_reset_device(dev_priv, engine_mask, msg); for_each_engine(engine, dev_priv, tmp) { clear_bit(I915_RESET_ENGINE + engine->id, diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a9d0bde16443..629f3e860592 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1185,11 +1185,13 @@ static bool __i915_spin_request(const struct i915_request *rq, static bool __i915_wait_request_check_and_reset(struct i915_request *request) { - if (likely(!i915_reset_handoff(&request->i915->gpu_error))) + struct i915_gpu_error *error = &request->i915->gpu_error; + + if (likely(!i915_reset_handoff(error))) return false; __set_current_state(TASK_RUNNING); - i915_reset(request->i915); + i915_reset(request->i915, error->stalled_mask, error->reason); return true; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index acfb4dcc9fb5..24f913f26a7b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -437,7 +437,7 @@ static int igt_global_reset(void *arg) mutex_lock(&i915->drm.struct_mutex); reset_count = i915_reset_count(&i915->gpu_error); - i915_reset(i915); + i915_reset(i915, ALL_ENGINES, NULL); if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); @@ -881,17 +881,18 @@ static int igt_reset_engines(void *arg) return 0; } -static u32 fake_hangcheck(struct i915_request *rq) +static u32 fake_hangcheck(struct i915_request *rq, u32 mask) { - u32 reset_count; + struct i915_gpu_error *error = &rq->i915->gpu_error; + u32 reset_count = i915_reset_count(error); - rq->engine->hangcheck.stalled = true; - rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); + error->stalled_mask = mask; - reset_count = i915_reset_count(&rq->i915->gpu_error); + /* set_bit() must be after we have setup the backchannel (mask) */ + smp_mb__before_atomic(); + set_bit(I915_RESET_HANDOFF, &error->flags); - set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags); - wake_up_all(&rq->i915->gpu_error.wait_queue); + wake_up_all(&error->wait_queue); return reset_count; } @@ -939,7 +940,7 @@ static int igt_wait_reset(void *arg) goto out_rq; } - reset_count = fake_hangcheck(rq); + reset_count = fake_hangcheck(rq, ALL_ENGINES); timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { @@ -1075,9 +1076,9 @@ static int igt_reset_queue(void *arg) goto fini; } - reset_count = fake_hangcheck(prev); + reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); - i915_reset(i915); + i915_reset(i915, ENGINE_MASK(id), NULL); GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); @@ -1150,7 +1151,7 @@ static int igt_handle_error(void *arg) if (!intel_has_reset_engine(i915)) return 0; - if (!intel_engine_can_store_dword(i915->engine[RCS])) + if (!engine || !intel_engine_can_store_dword(engine)) return 0; mutex_lock(&i915->drm.struct_mutex); @@ -1186,10 +1187,7 @@ static int igt_handle_error(void *arg) /* Temporarily disable error capture */ error = xchg(&i915->gpu_error.first_error, (void *)-1); - engine->hangcheck.stalled = true; - engine->hangcheck.seqno = intel_engine_get_seqno(engine); - - i915_handle_error(i915, intel_engine_flag(engine), 0, NULL); + i915_handle_error(i915, ENGINE_MASK(engine->id), 0, NULL); xchg(&i915->gpu_error.first_error, error); -- cgit From 0c5c7df360dbcfefac61ebd118c8551acf714d79 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 6 Apr 2018 13:35:14 +0100 Subject: drm/i915/execlists: Log fence context & seqno throughout GEM_TRACE Include fence context and seqno in low level tracing so it is easier to follow flows of individual requests when things go bad. Also added tracing on the reset side of things. v2: Chris Wilson: * Standardize global_seqno and seqno as global. * Include current hws seqno in execlists_cancel_port_requests. v3: * Fix port printk format for all builds. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson # v2 Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180406123514.5809-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_request.c | 6 +++--- drivers/gpu/drm/i915/intel_lrc.c | 22 +++++++++++++++++----- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 629f3e860592..9ca9c24b4421 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -343,7 +343,7 @@ static void i915_request_retire(struct i915_request *request) struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; - GEM_TRACE("%s fence %llx:%d, global_seqno %d, current %d\n", + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, request->global_seqno, @@ -466,7 +466,7 @@ void __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; u32 seqno; - GEM_TRACE("%s fence %llx:%d -> global_seqno %d, current %d\n", + GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, engine->timeline->seqno + 1, @@ -516,7 +516,7 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%d <- global_seqno %d, current %d\n", + GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, request->global_seqno, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3592288e4696..02b25bf2378a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -468,10 +468,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%d (current %d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, + rq->fence.context, rq->fence.seqno, intel_engine_get_seqno(engine), rq_prio(rq)); } else { @@ -742,6 +743,13 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); + GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n", + rq->engine->name, + (unsigned int)(port - execlists->port), + rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(rq->engine)); + GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); @@ -817,7 +825,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + GEM_TRACE("%s current %d\n", + engine->name, intel_engine_get_seqno(engine)); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -1014,10 +1023,12 @@ static void execlists_submission_tasklet(unsigned long data) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%d (current %d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, + rq ? rq->fence.context : 0, + rq ? rq->fence.seqno : 0, intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); @@ -1744,8 +1755,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct intel_context *ce; unsigned long flags; - GEM_TRACE("%s seqno=%x\n", - engine->name, request ? request->global_seqno : 0); + GEM_TRACE("%s request global=%x, current=%d\n", + engine->name, request ? request->global_seqno : 0, + intel_engine_get_seqno(engine)); /* See execlists_cancel_requests() for the irq/spinlock split. */ local_irq_save(flags); -- cgit From 0c7112a00272c633a79cad91ea9c1a0f40330f5d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 18 Apr 2018 19:40:51 +0100 Subject: drm/i915: Rename priotree to sched Having moved the priotree struct into i915_scheduler.h, identify it as the scheduling element and rebrand into i915_sched. This becomes more useful as we start attaching more information we require to propagate through the scheduler. v2: Use i915_sched_node for future distinctiveness Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180418184052.7129-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 66 +++++++++++++------------ drivers/gpu/drm/i915/i915_request.h | 6 +-- drivers/gpu/drm/i915/i915_scheduler.h | 4 +- drivers/gpu/drm/i915/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/intel_guc_submission.c | 8 +-- drivers/gpu/drm/i915/intel_lrc.c | 77 +++++++++++++++-------------- 7 files changed, 85 insertions(+), 82 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index effaf982b19b..6b5b9b3ded02 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1278,7 +1278,7 @@ static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; - erq->priority = request->priotree.priority; + erq->priority = request->sched.priority; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9ca9c24b4421..dfcc6a0df3fb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -125,22 +125,22 @@ i915_dependency_free(struct drm_i915_private *i915, } static void -__i915_priotree_add_dependency(struct i915_priotree *pt, - struct i915_priotree *signal, - struct i915_dependency *dep, - unsigned long flags) +__i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags) { INIT_LIST_HEAD(&dep->dfs_link); list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &pt->signalers_list); + list_add(&dep->signal_link, &node->signalers_list); dep->signaler = signal; dep->flags = flags; } static int -i915_priotree_add_dependency(struct drm_i915_private *i915, - struct i915_priotree *pt, - struct i915_priotree *signal) +i915_sched_node_add_dependency(struct drm_i915_private *i915, + struct i915_sched_node *node, + struct i915_sched_node *signal) { struct i915_dependency *dep; @@ -148,16 +148,18 @@ i915_priotree_add_dependency(struct drm_i915_private *i915, if (!dep) return -ENOMEM; - __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); + __i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_ALLOC); return 0; } static void -i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) +i915_sched_node_fini(struct drm_i915_private *i915, + struct i915_sched_node *node) { - struct i915_dependency *dep, *next; + struct i915_dependency *dep, *tmp; - GEM_BUG_ON(!list_empty(&pt->link)); + GEM_BUG_ON(!list_empty(&node->link)); /* * Everyone we depended upon (the fences we wait to be signaled) @@ -165,8 +167,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) * However, retirement is run independently on each timeline and * so we may be called out-of-order. */ - list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { - GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { + GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->wait_link); @@ -175,8 +177,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != pt); + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != node); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->signal_link); @@ -186,12 +188,12 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } static void -i915_priotree_init(struct i915_priotree *pt) +i915_sched_node_init(struct i915_sched_node *node) { - INIT_LIST_HEAD(&pt->signalers_list); - INIT_LIST_HEAD(&pt->waiters_list); - INIT_LIST_HEAD(&pt->link); - pt->priority = I915_PRIORITY_INVALID; + INIT_LIST_HEAD(&node->signalers_list); + INIT_LIST_HEAD(&node->waiters_list); + INIT_LIST_HEAD(&node->link); + node->priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) @@ -422,7 +424,7 @@ static void i915_request_retire(struct i915_request *request) } spin_unlock_irq(&request->lock); - i915_priotree_fini(request->i915, &request->priotree); + i915_sched_node_fini(request->i915, &request->sched); i915_request_put(request); } @@ -725,7 +727,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); init_waitqueue_head(&rq->execute); - i915_priotree_init(&rq->priotree); + i915_sched_node_init(&rq->sched); INIT_LIST_HEAD(&rq->active_list); rq->i915 = i915; @@ -777,8 +779,8 @@ err_unwind: /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&rq->active_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); + GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); + GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); kmem_cache_free(i915->requests, rq); err_unreserve: @@ -800,9 +802,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; if (to->engine->schedule) { - ret = i915_priotree_add_dependency(to->i915, - &to->priotree, - &from->priotree); + ret = i915_sched_node_add_dependency(to->i915, + &to->sched, + &from->sched); if (ret < 0) return ret; } @@ -1033,10 +1035,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); if (engine->schedule) - __i915_priotree_add_dependency(&request->priotree, - &prev->priotree, - &request->dep, - 0); + __i915_sched_node_add_dependency(&request->sched, + &prev->sched, + &request->dep, + 0); } spin_lock_irq(&timeline->lock); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index e6f7c5f4ec7f..35b8a9856daa 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -117,7 +117,7 @@ struct i915_request { * to retirement), i.e. bidirectional dependency information for the * request not tied to individual fences. */ - struct i915_priotree priotree; + struct i915_sched_node sched; struct i915_dependency dep; /** @@ -306,10 +306,10 @@ static inline bool i915_request_started(const struct i915_request *rq) seqno - 1); } -static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) { const struct i915_request *rq = - container_of(pt, const struct i915_request, priotree); + container_of(node, const struct i915_request, sched); return i915_request_completed(rq); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 9d6ea9fa6e59..754243e0f955 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -38,7 +38,7 @@ enum { * is ready, and are able to reorder its portion of the graph to accommodate * dynamic priority changes. */ -struct i915_priotree { +struct i915_sched_node { struct list_head signalers_list; /* those before us, we depend upon */ struct list_head waiters_list; /* those after us, they depend upon us */ struct list_head link; @@ -46,7 +46,7 @@ struct i915_priotree { }; struct i915_dependency { - struct i915_priotree *signaler; + struct i915_sched_node *signaler; struct list_head signal_link; struct list_head wait_link; struct list_head dfs_link; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1a8370779bbb..b542b1a4dddc 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1123,7 +1123,7 @@ static void print_request(struct drm_printer *m, rq->global_seqno, i915_request_completed(rq) ? "!" : "", rq->fence.context, rq->fence.seqno, - rq->priotree.priority, + rq->sched.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); } @@ -1367,7 +1367,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - list_for_each_entry(rq, &p->requests, priotree.link) + list_for_each_entry(rq, &p->requests, sched.link) print_request(m, rq, "\t\tQ "); } spin_unlock_irq(&engine->timeline->lock); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 97121230656c..0755f5cae950 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -659,7 +659,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.priority; } static inline int port_prio(const struct execlist_port *port) @@ -706,11 +706,11 @@ static void guc_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { if (last && rq->ctx != last->ctx) { if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -719,7 +719,7 @@ static void guc_dequeue(struct intel_engine_cs *engine) port++; } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4f728587a756..062ed6e54420 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -177,7 +177,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.priority; } static inline bool need_preempt(const struct intel_engine_cs *engine, @@ -258,7 +258,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, static struct i915_priolist * lookup_priolist(struct intel_engine_cs *engine, - struct i915_priotree *pt, + struct i915_sched_node *node, int prio) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -344,10 +344,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != last_prio) { last_prio = rq_prio(rq); - p = lookup_priolist(engine, &rq->priotree, last_prio); + p = lookup_priolist(engine, &rq->sched, last_prio); } - list_add(&rq->priotree.link, &p->requests); + list_add(&rq->sched.link, &p->requests); } } @@ -654,7 +654,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -674,7 +674,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -688,7 +688,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (ctx_single_port_submission(last->ctx) || ctx_single_port_submission(rq->ctx)) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -701,7 +701,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); last = rq; @@ -882,8 +882,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) while (rb) { struct i915_priolist *p = to_priolist(rb); - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { - INIT_LIST_HEAD(&rq->priotree.link); + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { + INIT_LIST_HEAD(&rq->sched.link); dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); @@ -1116,10 +1116,11 @@ static void execlists_submission_tasklet(unsigned long data) } static void queue_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, + struct i915_sched_node *node, int prio) { - list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); + list_add_tail(&node->link, + &lookup_priolist(engine, node, prio)->requests); } static void __submit_queue(struct intel_engine_cs *engine, int prio) @@ -1142,24 +1143,24 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - queue_request(engine, &request->priotree, rq_prio(request)); + queue_request(engine, &request->sched, rq_prio(request)); submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); - GEM_BUG_ON(list_empty(&request->priotree.link)); + GEM_BUG_ON(list_empty(&request->sched.link)); spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static struct i915_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *sched_to_request(struct i915_sched_node *node) { - return container_of(pt, struct i915_request, priotree); + return container_of(node, struct i915_request, sched); } static struct intel_engine_cs * -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = pt_to_request(pt)->engine; + struct intel_engine_cs *engine = sched_to_request(node)->engine; GEM_BUG_ON(!locked); @@ -1183,23 +1184,23 @@ static void execlists_schedule(struct i915_request *request, int prio) if (i915_request_completed(request)) return; - if (prio <= READ_ONCE(request->priotree.priority)) + if (prio <= READ_ONCE(request->sched.priority)) return; /* Need BKL in order to use the temporary link inside i915_dependency */ lockdep_assert_held(&request->i915->drm.struct_mutex); - stack.signaler = &request->priotree; + stack.signaler = &request->sched; list_add(&stack.dfs_link, &dfs); /* * Recursively bump all dependent priorities to match the new request. * * A naive approach would be to use recursion: - * static void update_priorities(struct i915_priotree *pt, prio) { - * list_for_each_entry(dep, &pt->signalers_list, signal_link) + * static void update_priorities(struct i915_sched_node *node, prio) { + * list_for_each_entry(dep, &node->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * queue_request(pt); + * queue_request(node); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1211,7 +1212,7 @@ static void execlists_schedule(struct i915_request *request, int prio) * last element in the list is the request we must execute first. */ list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; /* * Within an engine, there can be no cycle, but we may @@ -1219,13 +1220,13 @@ static void execlists_schedule(struct i915_request *request, int prio) * (redundant dependencies are not eliminated) and across * engines. */ - list_for_each_entry(p, &pt->signalers_list, signal_link) { + list_for_each_entry(p, &node->signalers_list, signal_link) { GEM_BUG_ON(p == dep); /* no cycles! */ - if (i915_priotree_signaled(p->signaler)) + if (i915_sched_node_signaled(p->signaler)) continue; - GEM_BUG_ON(p->signaler->priority < pt->priority); + GEM_BUG_ON(p->signaler->priority < node->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); } @@ -1237,9 +1238,9 @@ static void execlists_schedule(struct i915_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&request->priotree.link)); - request->priotree.priority = prio; + if (request->sched.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&request->sched.link)); + request->sched.priority = prio; if (stack.dfs_link.next == stack.dfs_link.prev) return; __list_del_entry(&stack.dfs_link); @@ -1250,23 +1251,23 @@ static void execlists_schedule(struct i915_request *request, int prio) /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; INIT_LIST_HEAD(&dep->dfs_link); - engine = pt_lock_engine(pt, engine); + engine = sched_lock_engine(node, engine); - if (prio <= pt->priority) + if (prio <= node->priority) continue; - pt->priority = prio; - if (!list_empty(&pt->link)) { - __list_del_entry(&pt->link); - queue_request(engine, pt, prio); + node->priority = prio; + if (!list_empty(&node->link)) { + __list_del_entry(&node->link); + queue_request(engine, node, prio); } if (prio > engine->execlists.queue_priority && - i915_sw_fence_done(&pt_to_request(pt)->submit)) + i915_sw_fence_done(&sched_to_request(node)->submit)) __submit_queue(engine, prio); } -- cgit From b7268c5eed0ab4f052d614b4b0e3fe8a51c9d5a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 18 Apr 2018 19:40:52 +0100 Subject: drm/i915: Pack params to engine->schedule() into a struct Today we only want to pass along the priority to engine->schedule(), but in the future we want to have much more control over the various aspects of the GPU during a context's execution, for example controlling the frequency allowed. As we need an ever growing number of parameters for scheduling, move those into a struct for convenience. v2: Move the anonymous struct into its own function for legibility and ye olde gcc. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180418184052.7129-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 18 ++++++++++-------- drivers/gpu/drm/i915/i915_gem_context.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem_context.h | 13 +------------ drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++---- drivers/gpu/drm/i915/i915_gpu_error.h | 5 +++-- drivers/gpu/drm/i915/i915_request.c | 4 ++-- drivers/gpu/drm/i915/i915_request.h | 1 + drivers/gpu/drm/i915/i915_scheduler.h | 17 ++++++++++++++++- drivers/gpu/drm/i915/intel_display.c | 11 ++++++++++- drivers/gpu/drm/i915/intel_engine_cs.c | 18 +++++++++++++++--- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 20 +++++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +++- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 4 ++-- drivers/gpu/drm/i915/selftests/intel_lrc.c | 8 +++++--- 17 files changed, 91 insertions(+), 55 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 638abe84857c..f3d21849b0cb 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1135,7 +1135,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) return PTR_ERR(s->shadow_ctx); if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv)) - s->shadow_ctx->priority = INT_MAX; + s->shadow_ctx->sched.priority = INT_MAX; bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8e8667d9b084..028691108125 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -75,6 +75,7 @@ #include "i915_gem_timeline.h" #include "i915_gpu_error.h" #include "i915_request.h" +#include "i915_scheduler.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -3158,7 +3159,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, struct intel_rps_client *rps); int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int priority); + const struct i915_sched_attr *attr); #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX int __must_check diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4c9d2a6f7d28..795ca83aed7a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -564,7 +564,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, int prio) +static void __fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { struct i915_request *rq; struct intel_engine_cs *engine; @@ -577,11 +578,12 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rcu_read_lock(); if (engine->schedule) - engine->schedule(rq, prio); + engine->schedule(rq, attr); rcu_read_unlock(); } -static void fence_set_priority(struct dma_fence *fence, int prio) +static void fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { @@ -589,16 +591,16 @@ static void fence_set_priority(struct dma_fence *fence, int prio) int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], prio); + __fence_set_priority(array->fences[i], attr); } else { - __fence_set_priority(fence, prio); + __fence_set_priority(fence, attr); } } int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int prio) + const struct i915_sched_attr *attr) { struct dma_fence *excl; @@ -613,7 +615,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], prio); + fence_set_priority(shared[i], attr); dma_fence_put(shared[i]); } @@ -623,7 +625,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, prio); + fence_set_priority(excl, attr); dma_fence_put(excl); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9b3834a846e8..74435affe23f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -281,7 +281,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, kref_init(&ctx->ref); list_add_tail(&ctx->link, &dev_priv->contexts.list); ctx->i915 = dev_priv; - ctx->priority = I915_PRIORITY_NORMAL; + ctx->sched.priority = I915_PRIORITY_NORMAL; INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -431,7 +431,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; i915_gem_context_clear_bannable(ctx); - ctx->priority = prio; + ctx->sched.priority = prio; ctx->ring_size = PAGE_SIZE; GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -753,7 +753,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_bannable(ctx); break; case I915_CONTEXT_PARAM_PRIORITY: - args->value = ctx->priority; + args->value = ctx->sched.priority; break; default: ret = -EINVAL; @@ -826,7 +826,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, !capable(CAP_SYS_NICE)) ret = -EPERM; else - ctx->priority = priority; + ctx->sched.priority = priority; } break; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 7854262ddfd9..b12a8a8c5af9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -137,18 +137,7 @@ struct i915_gem_context { */ u32 user_handle; - /** - * @priority: execution and service priority - * - * All clients are equal, but some are more equal than others! - * - * Requests from a context with a greater (more positive) value of - * @priority will be executed before those with a lower @priority - * value, forming a simple QoS. - * - * The &drm_i915_private.kernel_context is assigned the lowest priority. - */ - int priority; + struct i915_sched_attr sched; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 6b5b9b3ded02..671ffa37614e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -411,7 +411,7 @@ static void error_print_request(struct drm_i915_error_state_buf *m, err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->priority, + erq->context, erq->seqno, erq->sched_attr.priority, jiffies_to_msecs(jiffies - erq->jiffies), erq->head, erq->tail); } @@ -422,7 +422,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m, { err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->priority, ctx->ban_score, bannable(ctx), + ctx->sched_attr.priority, ctx->ban_score, bannable(ctx), ctx->guilty, ctx->active); } @@ -1278,7 +1278,7 @@ static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; - erq->priority = request->sched.priority; + erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; @@ -1372,7 +1372,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; - e->priority = ctx->priority; + e->sched_attr = ctx->sched; e->ban_score = atomic_read(&ctx->ban_score); e->bannable = i915_gem_context_is_bannable(ctx); e->guilty = atomic_read(&ctx->guilty_count); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index c05b6034d718..5d6fdcbc092c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -20,6 +20,7 @@ #include "i915_gem.h" #include "i915_gem_gtt.h" #include "i915_params.h" +#include "i915_scheduler.h" struct drm_i915_private; struct intel_overlay_error_state; @@ -122,11 +123,11 @@ struct i915_gpu_state { pid_t pid; u32 handle; u32 hw_id; - int priority; int ban_score; int active; int guilty; bool bannable; + struct i915_sched_attr sched_attr; } context; struct drm_i915_error_object { @@ -147,11 +148,11 @@ struct i915_gpu_state { long jiffies; pid_t pid; u32 context; - int priority; int ban_score; u32 seqno; u32 head; u32 tail; + struct i915_sched_attr sched_attr; } *requests, execlist[EXECLIST_MAX_PORTS]; unsigned int num_ports; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index dfcc6a0df3fb..b692a9f7c357 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -193,7 +193,7 @@ i915_sched_node_init(struct i915_sched_node *node) INIT_LIST_HEAD(&node->signalers_list); INIT_LIST_HEAD(&node->waiters_list); INIT_LIST_HEAD(&node->link); - node->priority = I915_PRIORITY_INVALID; + node->attr.priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) @@ -1064,7 +1064,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) */ rcu_read_lock(); if (engine->schedule) - engine->schedule(request, request->ctx->priority); + engine->schedule(request, &request->ctx->sched); rcu_read_unlock(); local_bh_disable(); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 35b8a9856daa..8f31ca8272f8 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -30,6 +30,7 @@ #include "i915_gem.h" #include "i915_scheduler.h" #include "i915_sw_fence.h" +#include "i915_scheduler.h" #include diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 754243e0f955..70a42220358d 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -19,6 +19,21 @@ enum { I915_PRIORITY_INVALID = INT_MIN }; +struct i915_sched_attr { + /** + * @priority: execution and service priority + * + * All clients are equal, but some are more equal than others! + * + * Requests from a context with a greater (more positive) value of + * @priority will be executed before those with a lower @priority + * value, forming a simple QoS. + * + * The &drm_i915_private.kernel_context is assigned the lowest priority. + */ + int priority; +}; + /* * "People assume that time is a strict progression of cause to effect, but * actually, from a nonlinear, non-subjective viewpoint, it's more like a big @@ -42,7 +57,7 @@ struct i915_sched_node { struct list_head signalers_list; /* those before us, we depend upon */ struct list_head waiters_list; /* those after us, they depend upon us */ struct list_head link; - int priority; + struct i915_sched_attr attr; }; struct i915_dependency { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e04050ea3e28..43d54c7231ff 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12761,6 +12761,15 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_unpin_fb_vma(vma, old_plane_state->flags); } +static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) +{ + struct i915_sched_attr attr = { + .priority = I915_PRIORITY_DISPLAY, + }; + + i915_gem_object_wait_priority(obj, 0, &attr); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for @@ -12837,7 +12846,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); + fb_obj_bump_render_priority(obj); mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index b542b1a4dddc..be608f7111f5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1113,17 +1113,29 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) return which; } +static void print_sched_attr(struct drm_printer *m, + const struct drm_i915_private *i915, + const struct i915_sched_attr *attr) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return; + + drm_printf(m, "prio=%d", attr->priority); +} + static void print_request(struct drm_printer *m, struct i915_request *rq, const char *prefix) { const char *name = rq->fence.ops->get_timeline_name(&rq->fence); - drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, + drm_printf(m, "%s%x%s [%llx:%x] ", + prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", - rq->fence.context, rq->fence.seqno, - rq->sched.priority, + rq->fence.context, rq->fence.seqno); + print_sched_attr(m, rq->i915, &rq->sched.attr); + drm_printf(m, " @ %dms: %s\n", jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); } diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 0755f5cae950..02da05875aa7 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -659,7 +659,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.priority; + return rq->sched.attr.priority; } static inline int port_prio(const struct execlist_port *port) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 062ed6e54420..029901a8fa38 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -177,7 +177,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.priority; + return rq->sched.attr.priority; } static inline bool need_preempt(const struct intel_engine_cs *engine, @@ -1172,11 +1172,13 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) return engine; } -static void execlists_schedule(struct i915_request *request, int prio) +static void execlists_schedule(struct i915_request *request, + const struct i915_sched_attr *attr) { struct intel_engine_cs *engine; struct i915_dependency *dep, *p; struct i915_dependency stack; + const int prio = attr->priority; LIST_HEAD(dfs); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); @@ -1184,7 +1186,7 @@ static void execlists_schedule(struct i915_request *request, int prio) if (i915_request_completed(request)) return; - if (prio <= READ_ONCE(request->sched.priority)) + if (prio <= READ_ONCE(request->sched.attr.priority)) return; /* Need BKL in order to use the temporary link inside i915_dependency */ @@ -1226,8 +1228,8 @@ static void execlists_schedule(struct i915_request *request, int prio) if (i915_sched_node_signaled(p->signaler)) continue; - GEM_BUG_ON(p->signaler->priority < node->priority); - if (prio > READ_ONCE(p->signaler->priority)) + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); + if (prio > READ_ONCE(p->signaler->attr.priority)) list_move_tail(&p->dfs_link, &dfs); } } @@ -1238,9 +1240,9 @@ static void execlists_schedule(struct i915_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->sched.priority == I915_PRIORITY_INVALID) { + if (request->sched.attr.priority == I915_PRIORITY_INVALID) { GEM_BUG_ON(!list_empty(&request->sched.link)); - request->sched.priority = prio; + request->sched.attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) return; __list_del_entry(&stack.dfs_link); @@ -1257,10 +1259,10 @@ static void execlists_schedule(struct i915_request *request, int prio) engine = sched_lock_engine(node, engine); - if (prio <= node->priority) + if (prio <= node->attr.priority) continue; - node->priority = prio; + node->attr.priority = prio; if (!list_empty(&node->link)) { __list_del_entry(&node->link); queue_request(engine, node, prio); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 717041640135..c5e27905b0e1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -14,6 +14,7 @@ #include "intel_gpu_commands.h" struct drm_printer; +struct i915_sched_attr; #define I915_CMD_HASH_ORDER 9 @@ -460,7 +461,8 @@ struct intel_engine_cs { * * Called under the struct_mutex. */ - void (*schedule)(struct i915_request *request, int priority); + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); /* * Cancel all requests on the hardware, or queued for execution. diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 24f913f26a7b..f7ee54e109ae 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -628,7 +628,7 @@ static int active_engine(void *data) } if (arg->flags & TEST_PRIORITY) - ctx[idx]->priority = + ctx[idx]->sched.priority = i915_prandom_u32_max_state(512, &prng); rq[idx] = i915_request_get(new); @@ -683,7 +683,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, return err; if (flags & TEST_PRIORITY) - h.ctx->priority = 1024; + h.ctx->sched.priority = 1024; } for_each_engine(engine, i915, id) { diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 0481e2e01146..ee7e22d18ff8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -335,12 +335,12 @@ static int live_preempt(void *arg) ctx_hi = kernel_context(i915); if (!ctx_hi) goto err_spin_lo; - ctx_hi->priority = I915_CONTEXT_MAX_USER_PRIORITY; + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->priority = I915_CONTEXT_MIN_USER_PRIORITY; + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; for_each_engine(engine, i915, id) { struct i915_request *rq; @@ -407,6 +407,7 @@ static int live_late_preempt(void *arg) struct i915_gem_context *ctx_hi, *ctx_lo; struct spinner spin_hi, spin_lo; struct intel_engine_cs *engine; + struct i915_sched_attr attr = {}; enum intel_engine_id id; int err = -ENOMEM; @@ -458,7 +459,8 @@ static int live_late_preempt(void *arg) goto err_wedged; } - engine->schedule(rq, I915_PRIORITY_MAX); + attr.priority = I915_PRIORITY_MAX; + engine->schedule(rq, &attr); if (!wait_for_spinner(&spin_hi, rq)) { pr_err("High priority context failed to preempt the low priority context\n"); -- cgit From 52d7f16e5543ca892ae2393a716083d209ce3b36 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Apr 2018 14:15:00 +0100 Subject: drm/i915: Stop tracking timeline->inflight_seqnos In commit 9b6586ae9f6b ("drm/i915: Keep a global seqno per-engine"), we moved from a global inflight counter to per-engine counters in the hope that will be easy to run concurrently in future. However, with the advent of the desire to move requests between engines, we do need a global counter to preserve the semantics that no engine wraps in the middle of a submit. (Although this semantic is now only required for gen7 semaphore support, which only supports greater-then comparisons!) v2: Keep a global counter of all requests ever submitted and force the reset when it wraps. References: 9b6586ae9f6b ("drm/i915: Keep a global seqno per-engine") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180430131503.5375-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++--- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_timeline.h | 6 ------ drivers/gpu/drm/i915/i915_request.c | 33 ++++++++++++++++---------------- drivers/gpu/drm/i915/intel_engine_cs.c | 5 ++--- 5 files changed, 22 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cb1a804bf72e..747dad2666aa 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1340,10 +1340,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", + seq_printf(m, "\tseqno = %x [current %x, last %x]\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine), - engine->timeline->inflight_seqnos); + intel_engine_last_submit(engine)); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 193176bcddf5..dd4d6b918e86 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2062,6 +2062,7 @@ struct drm_i915_private { struct list_head timelines; struct i915_gem_timeline global_timeline; u32 active_requests; + u32 request_serial; /** * Is the GPU currently considered idle, or busy executing diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 33e01bf6aa36..6e82119e2cd8 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -37,12 +37,6 @@ struct intel_timeline { u64 fence_context; u32 seqno; - /** - * Count of outstanding requests, from the time they are constructed - * to the moment they are retired. Loosely coupled to hardware. - */ - u32 inflight_seqnos; - spinlock_t lock; /** diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b692a9f7c357..b1993d4a1a53 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -241,6 +241,7 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) sizeof(timeline->engine[id].global_sync)); } + i915->gt.request_serial = seqno; return 0; } @@ -257,18 +258,22 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) return reset_all_global_seqno(i915, seqno - 1); } -static int reserve_engine(struct intel_engine_cs *engine) +static int reserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; int ret; - /* Reservation is fine until we need to wrap around */ - if (unlikely(add_overflows(seqno, active))) { + /* + * Reservation is fine until we may need to wrap around + * + * By incrementing the serial for every request, we know that no + * individual engine may exceed that serial (as each is reset to 0 + * on any wrap). This protects even the most pessimistic of migrations + * of every request from all engines onto just one. + */ + while (unlikely(++i915->gt.request_serial == 0)) { ret = reset_all_global_seqno(i915, 0); if (ret) { - engine->timeline->inflight_seqnos--; + i915->gt.request_serial--; return ret; } } @@ -279,15 +284,10 @@ static int reserve_engine(struct intel_engine_cs *engine) return 0; } -static void unreserve_engine(struct intel_engine_cs *engine) +static void unreserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - if (!--i915->gt.active_requests) i915_gem_park(i915); - - GEM_BUG_ON(!engine->timeline->inflight_seqnos); - engine->timeline->inflight_seqnos--; } void i915_gem_retire_noop(struct i915_gem_active *active, @@ -362,7 +362,6 @@ static void i915_request_retire(struct i915_request *request) list_del_init(&request->link); spin_unlock_irq(&engine->timeline->lock); - unreserve_engine(request->engine); advance_ring(request); free_capture_list(request); @@ -424,6 +423,8 @@ static void i915_request_retire(struct i915_request *request) } spin_unlock_irq(&request->lock); + unreserve_gt(request->i915); + i915_sched_node_fini(request->i915, &request->sched); i915_request_put(request); } @@ -642,7 +643,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) return ERR_CAST(ring); GEM_BUG_ON(!ring); - ret = reserve_engine(engine); + ret = reserve_gt(i915); if (ret) goto err_unpin; @@ -784,7 +785,7 @@ err_unwind: kmem_cache_free(i915->requests, rq); err_unreserve: - unreserve_engine(engine); + unreserve_gt(i915); err_unpin: engine->context_unpin(engine, ctx); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ac009f10c948..eba81d55dc3a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1321,12 +1321,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); -- cgit From ab82a0635cdf0b91a134aaae34abd4e864595c5b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Apr 2018 14:15:01 +0100 Subject: drm/i915: Wrap engine->context_pin() and engine->context_unpin() Make life easier in upcoming patches by moving the context_pin and context_unpin vfuncs into inline helpers. v2: Fixup mock_engine to mark the context as pinned on use. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180430131503.5375-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/mmio_context.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 20 +++++++++---------- drivers/gpu/drm/i915/i915_debugfs.c | 20 +++++++++++-------- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_context.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem_context.h | 30 +++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_gpu_error.c | 3 ++- drivers/gpu/drm/i915/i915_perf.c | 9 +++++---- drivers/gpu/drm/i915/i915_request.c | 6 +++--- drivers/gpu/drm/i915/intel_engine_cs.c | 13 ++++++------ drivers/gpu/drm/i915/intel_guc_ads.c | 3 ++- drivers/gpu/drm/i915/intel_guc_submission.c | 5 +++-- drivers/gpu/drm/i915/intel_lrc.c | 29 ++++++++++++++++----------- drivers/gpu/drm/i915/intel_lrc.h | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 19 +++++++++--------- drivers/gpu/drm/i915/selftests/mock_engine.c | 13 +++++++++--- 16 files changed, 117 insertions(+), 69 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index a5bac83d53a9..0f949554d118 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -448,7 +448,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) { - u32 *reg_state = ctx->engine[ring_id].lrc_reg_state; + u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 35f7cfd7a6b4..ffb45a9ee228 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -58,7 +58,7 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -130,7 +130,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -283,7 +283,7 @@ static int shadow_context_status_change(struct notifier_block *nb, static void shadow_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc = 0; desc = ce->lrc_desc; @@ -389,7 +389,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) * shadow_ctx pages invalid. So gvt need to pin itself. After update * the guest context, gvt can unpin the shadow_ctx safely. */ - ring = engine->context_pin(engine, shadow_ctx); + ring = intel_context_pin(shadow_ctx, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); gvt_vgpu_err("fail to pin shadow context\n"); @@ -403,7 +403,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); err_scan: @@ -437,7 +437,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); release_shadow_wa_ctx(&workload->wa_ctx); return ret; } @@ -526,7 +526,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) struct intel_vgpu_submission *s = &workload->vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -688,7 +688,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ret = prepare_workload(workload); if (ret) { - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); goto out; } @@ -771,7 +771,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ring_id = workload->ring_id; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; @@ -898,7 +898,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) } mutex_lock(&dev_priv->drm.struct_mutex); /* unpin shadow ctx as the shadow_ctx update is done */ - engine->context_unpin(engine, s->shadow_ctx); + intel_context_unpin(s->shadow_ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 747dad2666aa..85911bc0b703 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -377,16 +377,19 @@ static void print_batch_pool_stats(struct seq_file *m, print_file_stats(m, "[k]batch pool", stats); } -static int per_file_ctx_stats(int id, void *ptr, void *data) +static int per_file_ctx_stats(int idx, void *ptr, void *data) { struct i915_gem_context *ctx = ptr; - int n; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce = to_intel_context(ctx, engine); - for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { - if (ctx->engine[n].state) - per_file_stats(0, ctx->engine[n].state->obj, data); - if (ctx->engine[n].ring) - per_file_stats(0, ctx->engine[n].ring->vma->obj, data); + if (ce->state) + per_file_stats(0, ce->state->obj, data); + if (ce->ring) + per_file_stats(0, ce->ring->vma->obj, data); } return 0; @@ -1959,7 +1962,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); seq_printf(m, "%s: ", engine->name); if (ce->state) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6b0c67a4f214..4090bfdda340 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3234,7 +3234,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, stalled_mask & ENGINE_MASK(id)); ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); /* * Ostensibily, we always want a context loaded for powersaving, @@ -5291,7 +5291,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { struct i915_vma *state; - state = ctx->engine[id].state; + state = to_intel_context(ctx, engine)->state; if (!state) continue; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 74435affe23f..59d4bd4a7b73 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -117,15 +117,15 @@ static void lut_close(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - int i; + unsigned int n; lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); i915_ppgtt_put(ctx->ppgtt); - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_context *ce = &ctx->engine[i]; + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; if (!ce->state) continue; @@ -521,7 +521,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) if (!engine->last_retired_context) continue; - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); engine->last_retired_context = NULL; } } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index b12a8a8c5af9..ace3b129c189 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -149,7 +149,7 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - } engine[I915_NUM_ENGINES]; + } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; @@ -256,6 +256,34 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) return !ctx->file_priv; } +static inline struct intel_context * +to_intel_context(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + return &ctx->__engine[engine->id]; +} + +static inline struct intel_ring * +intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + return engine->context_pin(engine, ctx); +} + +static inline void __intel_context_pin(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + struct intel_context *ce = to_intel_context(ctx, engine); + + GEM_BUG_ON(!ce->pin_count); + ce->pin_count++; +} + +static inline void intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + engine->context_unpin(engine, ctx); +} + /* i915_gem_context.c */ int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 671ffa37614e..c0127965b578 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1472,7 +1472,8 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->ctx = i915_error_object_create(i915, - request->ctx->engine[i].state); + to_intel_context(request->ctx, + engine)->state); error->simulated |= i915_gem_context_no_error_capture(request->ctx); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index bfc906cd4e5e..4b1da01168ae 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1234,7 +1234,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - ring = engine->context_pin(engine, stream->ctx); + ring = intel_context_pin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -1246,7 +1246,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * with gen8+ and execlists */ dev_priv->perf.oa.specific_ctx_id = - i915_ggtt_offset(stream->ctx->engine[engine->id].state); + i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state); } return 0; @@ -1271,7 +1271,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - engine->context_unpin(engine, stream->ctx); + intel_context_unpin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1759,6 +1759,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { + struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_context *ctx; int ret; unsigned int wait_flags = I915_WAIT_LOCKED; @@ -1789,7 +1790,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = &ctx->engine[RCS]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 *regs; /* OA settings will be set upon first use */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b1993d4a1a53..9358f2cf0c32 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -409,7 +409,7 @@ static void i915_request_retire(struct i915_request *request) * the subsequent request. */ if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); engine->last_retired_context = request->ctx; spin_lock_irq(&request->lock); @@ -638,7 +638,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ring = engine->context_pin(engine, ctx); + ring = intel_context_pin(ctx, engine); if (IS_ERR(ring)) return ERR_CAST(ring); GEM_BUG_ON(!ring); @@ -787,7 +787,7 @@ err_unwind: err_unreserve: unreserve_gt(i915); err_unpin: - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index eba81d55dc3a..238c8d3da041 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -685,7 +685,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = engine->context_pin(engine, engine->i915->kernel_context); + ring = intel_context_pin(engine->i915->kernel_context, engine); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -694,8 +694,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * we can interrupt the engine at any time. */ if (engine->i915->preempt_context) { - ring = engine->context_pin(engine, - engine->i915->preempt_context); + ring = intel_context_pin(engine->i915->preempt_context, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto err_unpin_kernel; @@ -719,9 +718,9 @@ err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); + intel_context_unpin(engine->i915->preempt_context, engine); err_unpin_kernel: - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->kernel_context, engine); return ret; } @@ -749,8 +748,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) i915_gem_object_put(engine->default_state); if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->preempt_context, engine); + intel_context_unpin(engine->i915->kernel_context, engine); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index 334cb5202e1c..dcaa3fb71765 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -121,7 +121,8 @@ int intel_guc_ads_create(struct intel_guc *guc) * to find it. Note that we have to skip our header (1 page), * because our GuC shared data is there. */ - kernel_ctx_vma = dev_priv->kernel_context->engine[RCS].state; + kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, + dev_priv->engine[RCS])->state; blob->ads.golden_context_lrca = intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 02da05875aa7..6e6ed0f46bd3 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -362,7 +362,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; @@ -990,7 +990,8 @@ static void guc_fill_preempt_context(struct intel_guc *guc) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &client->owner->engine[id]; + struct intel_context *ce = + to_intel_context(client->owner, engine); u32 addr = intel_hws_preempt_done_address(engine); u32 *cs; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 58cad2448184..099995619472 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -223,7 +223,7 @@ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -414,7 +414,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; + struct intel_context *ce = to_intel_context(rq->ctx, rq->engine); struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; @@ -523,7 +523,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = - &engine->i915->preempt_context->engine[engine->id]; + to_intel_context(engine->i915->preempt_context, engine); unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != @@ -1327,7 +1327,7 @@ static struct intel_ring * execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); void *vaddr; int ret; @@ -1380,7 +1380,7 @@ err: static void execlists_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1399,8 +1399,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, static int execlists_request_alloc(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(request->ctx, request->engine); int ret; GEM_BUG_ON(!ce->pin_count); @@ -1854,7 +1854,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = request->ctx->engine[engine->id].lrc_reg_state; + regs = to_intel_context(request->ctx, engine)->lrc_reg_state; if (engine->default_state) { void *defaults; @@ -2305,9 +2305,13 @@ static int logical_ring_init(struct intel_engine_cs *engine) } engine->execlists.preempt_complete_status = ~0u; - if (engine->i915->preempt_context) + if (engine->i915->preempt_context) { + struct intel_context *ce = + to_intel_context(engine->i915->preempt_context, engine); + engine->execlists.preempt_complete_status = - upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc); + upper_32_bits(ce->lrc_desc); + } return 0; @@ -2589,7 +2593,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; @@ -2660,7 +2664,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); u32 *reg; if (!ce->state) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 59d7b86012e9..4ec7d8dd13c8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -108,7 +108,7 @@ static inline uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - return ctx->engine[engine->id].lrc_desc; + return to_intel_context(ctx, engine)->lrc_desc; } #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c06c22c953b3..69ffc0dfe92b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -558,7 +558,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, */ if (request) { struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(request->ctx, + engine); struct i915_hw_ppgtt *ppgtt; if (ce->state) { @@ -1163,9 +1164,9 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int context_pin(struct i915_gem_context *ctx) +static int context_pin(struct intel_context *ce) { - struct i915_vma *vma = ctx->engine[RCS].state; + struct i915_vma *vma = ce->state; int ret; /* @@ -1256,7 +1257,7 @@ static struct intel_ring * intel_ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -1278,7 +1279,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, } if (ce->state) { - ret = context_pin(ctx); + ret = context_pin(ce); if (ret) goto err; @@ -1299,7 +1300,7 @@ err: static void intel_ring_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1427,7 +1428,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags; + *cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1518,7 +1519,7 @@ static int switch_context(struct i915_request *rq) hw_flags = MI_FORCE_RESTORE; } - if (to_ctx->engine[engine->id].state && + if (to_intel_context(to_ctx, engine)->state && (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { GEM_BUG_ON(engine->id != RCS); @@ -1566,7 +1567,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); + GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 78a89efa1119..b82420c6b810 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -71,14 +71,21 @@ static struct intel_ring * mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_get(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!ce->pin_count++) + i915_gem_context_get(ctx); + return engine->buffer; } static void mock_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_put(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!--ce->pin_count) + i915_gem_context_put(ctx); } static int mock_request_alloc(struct i915_request *request) @@ -217,7 +224,7 @@ void mock_engine_free(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); intel_engine_fini_breadcrumbs(engine); -- cgit From b887d61546245389c0304d8b1371bab9af8106c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Apr 2018 14:15:02 +0100 Subject: drm/i915: Retire requests along rings In the next patch, rings are the central timeline as requests may jump between engines. Therefore in the future as we retire in order along the engine timeline, we may retire out-of-order within a ring (as the ring now occurs along multiple engines), leading to much hilarity in miscomputing the position of ring->head. As an added bonus, retiring along the ring reduces the penalty of having one execlists client do cleanup for another (old legacy submission shares a ring between all clients). The downside is that slow and irregular (off the critical path) process of cleaning up stale requests after userspace becomes a modicum less efficient. In the long run, it will become apparent that the ordered ring->request_list matches the ring->timeline, a fun challenge for the future will be unifying the two lists to avoid duplication! v2: We need both engine-order and ring-order processing to maintain our knowledge of where individual rings have completed upto as well as knowing what was last executing on any engine. And finally by decoupling retiring the contexts on the engine and the timelines along the rings, we do have to keep a reference to the context on each request (previously it was guaranteed by the context being pinned). v3: Not just a reference to the context, but we need to keep it pinned as we manipulate the rings; i.e. we need a pin for both the manipulation of the engine state during its retirements, and a separate pin for the manipulation of the ring state. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180430131503.5375-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_request.c | 150 ++++++++++++++--------- drivers/gpu/drm/i915/i915_utils.h | 6 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + drivers/gpu/drm/i915/selftests/mock_engine.c | 27 ++-- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 + 8 files changed, 131 insertions(+), 65 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dd4d6b918e86..edc33e059191 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2059,8 +2059,9 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct list_head timelines; struct i915_gem_timeline global_timeline; + struct list_head timelines; + struct list_head rings; u32 active_requests; u32 request_serial; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4090bfdda340..f0644d1fbd75 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5600,6 +5600,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) goto err_dependencies; mutex_lock(&dev_priv->drm.struct_mutex); + INIT_LIST_HEAD(&dev_priv->gt.rings); INIT_LIST_HEAD(&dev_priv->gt.timelines); err = i915_gem_timeline_init__global(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9358f2cf0c32..e6535255d445 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -286,6 +286,7 @@ static int reserve_gt(struct drm_i915_private *i915) static void unreserve_gt(struct drm_i915_private *i915) { + GEM_BUG_ON(!i915->gt.active_requests); if (!--i915->gt.active_requests) i915_gem_park(i915); } @@ -298,6 +299,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active, static void advance_ring(struct i915_request *request) { + struct intel_ring *ring = request->ring; unsigned int tail; /* @@ -309,7 +311,8 @@ static void advance_ring(struct i915_request *request) * Note this requires that we are always called in request * completion order. */ - if (list_is_last(&request->ring_link, &request->ring->request_list)) { + GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); + if (list_is_last(&request->ring_link, &ring->request_list)) { /* * We may race here with execlists resubmitting this request * as we retire it. The resubmission will move the ring->tail @@ -322,9 +325,9 @@ static void advance_ring(struct i915_request *request) } else { tail = request->postfix; } - list_del(&request->ring_link); + list_del_init(&request->ring_link); - request->ring->head = tail; + ring->head = tail; } static void free_capture_list(struct i915_request *request) @@ -340,30 +343,84 @@ static void free_capture_list(struct i915_request *request) } } +static void __retire_engine_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n", + __func__, engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(engine)); + + GEM_BUG_ON(!i915_request_completed(rq)); + + local_irq_disable(); + + spin_lock(&engine->timeline->lock); + GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline->requests)); + list_del_init(&rq->link); + spin_unlock(&engine->timeline->lock); + + spin_lock(&rq->lock); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + dma_fence_signal_locked(&rq->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + intel_engine_cancel_signaling(rq); + if (rq->waitboost) { + GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); + atomic_dec(&rq->i915->gt_pm.rps.num_waiters); + } + spin_unlock(&rq->lock); + + local_irq_enable(); + + /* + * The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + intel_context_unpin(engine->last_retired_context, engine); + engine->last_retired_context = rq->ctx; +} + +static void __retire_engine_upto(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + struct i915_request *tmp; + + if (list_empty(&rq->link)) + return; + + do { + tmp = list_first_entry(&engine->timeline->requests, + typeof(*tmp), link); + + GEM_BUG_ON(tmp->engine != engine); + __retire_engine_request(engine, tmp); + } while (tmp != rq); +} + static void i915_request_retire(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", - engine->name, + request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, - intel_engine_get_seqno(engine)); + intel_engine_get_seqno(request->engine)); lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); GEM_BUG_ON(!i915_request_completed(request)); - GEM_BUG_ON(!request->i915->gt.active_requests); trace_i915_request_retire(request); - spin_lock_irq(&engine->timeline->lock); - list_del_init(&request->link); - spin_unlock_irq(&engine->timeline->lock); - advance_ring(request); - free_capture_list(request); /* @@ -399,29 +456,9 @@ static void i915_request_retire(struct i915_request *request) /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->ctx->ban_score); + intel_context_unpin(request->ctx, request->engine); - /* - * The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. However, since we - * cannot take the required locks at i915_request_submit() we - * defer the unpinning of the active context to now, retirement of - * the subsequent request. - */ - if (engine->last_retired_context) - intel_context_unpin(engine->last_retired_context, engine); - engine->last_retired_context = request->ctx; - - spin_lock_irq(&request->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) - dma_fence_signal_locked(&request->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - if (request->waitboost) { - GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); - atomic_dec(&request->i915->gt_pm.rps.num_waiters); - } - spin_unlock_irq(&request->lock); + __retire_engine_upto(request->engine, request); unreserve_gt(request->i915); @@ -431,18 +468,24 @@ static void i915_request_retire(struct i915_request *request) void i915_request_retire_upto(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *tmp; + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", + rq->engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(rq->engine)); + lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->link)) + if (list_empty(&rq->ring_link)) return; do { - tmp = list_first_entry(&engine->timeline->requests, - typeof(*tmp), link); + tmp = list_first_entry(&ring->request_list, + typeof(*tmp), ring_link); i915_request_retire(tmp); } while (tmp != rq); @@ -651,9 +694,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unreserve; - /* Move the oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry_or_null(&engine->timeline->requests, - typeof(*rq), link); + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry_or_null(&ring->request_list, + typeof(*rq), ring_link); if (rq && i915_request_completed(rq)) i915_request_retire(rq); @@ -771,6 +814,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unwind; + /* Keep a second pin for the dual retirement along engine and ring */ + __intel_context_pin(rq->ctx, engine); + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; @@ -1357,38 +1403,30 @@ complete: return timeout; } -static void engine_retire_requests(struct intel_engine_cs *engine) +static void ring_retire_requests(struct intel_ring *ring) { struct i915_request *request, *next; - u32 seqno = intel_engine_get_seqno(engine); - LIST_HEAD(retire); - spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, - &engine->timeline->requests, link) { - if (!i915_seqno_passed(seqno, request->global_seqno)) + &ring->request_list, ring_link) { + if (!i915_request_completed(request)) break; - list_move_tail(&request->link, &retire); - } - spin_unlock_irq(&engine->timeline->lock); - - list_for_each_entry_safe(request, next, &retire, link) i915_request_retire(request); + } } void i915_retire_requests(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_ring *ring, *next; lockdep_assert_held(&i915->drm.struct_mutex); if (!i915->gt.active_requests) return; - for_each_engine(engine, i915, id) - engine_retire_requests(engine); + list_for_each_entry_safe(ring, next, &i915->gt.rings, link) + ring_retire_requests(ring); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 0695717522ea..00165ad55fb3 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -120,6 +120,12 @@ static inline u64 ptr_to_u64(const void *ptr) #include +static inline int list_is_first(const struct list_head *list, + const struct list_head *head) +{ + return head->next == list; +} + static inline void __list_del_many(struct list_head *head, struct list_head *first) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 69ffc0dfe92b..ae8958007df5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1066,7 +1066,6 @@ err: void intel_ring_reset(struct intel_ring *ring, u32 tail) { - GEM_BUG_ON(!list_empty(&ring->request_list)); ring->tail = tail; ring->head = tail; ring->emit = tail; @@ -1125,6 +1124,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) @@ -1150,6 +1150,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) } ring->vma = vma; + list_add(&ring->link, &engine->i915->gt.rings); + return ring; } @@ -1161,6 +1163,8 @@ intel_ring_free(struct intel_ring *ring) i915_vma_close(ring->vma); __i915_gem_object_release_unless_active(obj); + list_del(&ring->link); + kfree(ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 24af3f1088ba..deb80d01e0bd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -130,6 +130,7 @@ struct intel_ring { void *vaddr; struct list_head request_list; + struct list_head link; u32 head; u32 tail; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index b82420c6b810..d95fc481e5c1 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -147,9 +147,18 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) INIT_LIST_HEAD(&ring->request_list); intel_ring_update_space(ring); + list_add(&ring->link, &engine->i915->gt.rings); + return ring; } +static void mock_ring_free(struct intel_ring *ring) +{ + list_del(&ring->link); + + kfree(ring); +} + struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, const char *name, int id) @@ -162,12 +171,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, if (!engine) return NULL; - engine->base.buffer = mock_ring(&engine->base); - if (!engine->base.buffer) { - kfree(engine); - return NULL; - } - /* minimal engine setup for requests */ engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); @@ -192,7 +195,16 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) + goto err_breadcrumbs; + return &engine->base; + +err_breadcrumbs: + intel_engine_fini_breadcrumbs(&engine->base); + kfree(engine); + return NULL; } void mock_engine_flush(struct intel_engine_cs *engine) @@ -226,8 +238,9 @@ void mock_engine_free(struct intel_engine_cs *engine) if (engine->last_retired_context) intel_context_unpin(engine->last_retired_context, engine); + mock_ring_free(engine->buffer); + intel_engine_fini_breadcrumbs(engine); - kfree(engine->buffer); kfree(engine); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e6d4b882599a..ac4bacf8b5b9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -44,6 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915) mock_engine_flush(engine); i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); } static void mock_device_release(struct drm_device *dev) @@ -224,6 +225,7 @@ struct drm_i915_private *mock_gem_device(void) goto err_dependencies; mutex_lock(&i915->drm.struct_mutex); + INIT_LIST_HEAD(&i915->gt.rings); INIT_LIST_HEAD(&i915->gt.timelines); err = i915_gem_timeline_init__global(i915); if (err) { -- cgit From 643b450a594e9cb57fbd2534d1571d244faddd01 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Apr 2018 14:15:03 +0100 Subject: drm/i915: Only track live rings for retiring We don't need to track every ring for its lifetime as they are managed by the contexts/engines. What we do want to track are the live rings so that we can sporadically clean up requests if userspace falls behind. We can simply restrict the gt->rings list to being only gt->live_rings. v2: s/live/active/ for consistency with gt.active_requests Suggested-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180430131503.5375-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 6 ++++-- drivers/gpu/drm/i915/i915_request.c | 10 ++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ---- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 4 ---- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 5 +++-- 7 files changed, 18 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index edc33e059191..6268a5103dba 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2061,7 +2061,8 @@ struct drm_i915_private { struct i915_gem_timeline global_timeline; struct list_head timelines; - struct list_head rings; + + struct list_head active_rings; u32 active_requests; u32 request_serial; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f0644d1fbd75..fa1d94a4eb5f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -141,6 +141,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) { lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(i915->gt.active_requests); + GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); if (!i915->gt.awake) return I915_EPOCH_INVALID; @@ -5599,9 +5600,10 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - mutex_lock(&dev_priv->drm.struct_mutex); - INIT_LIST_HEAD(&dev_priv->gt.rings); INIT_LIST_HEAD(&dev_priv->gt.timelines); + INIT_LIST_HEAD(&dev_priv->gt.active_rings); + + mutex_lock(&dev_priv->drm.struct_mutex); err = i915_gem_timeline_init__global(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); if (err) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e6535255d445..c8fc4b323e62 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -322,6 +322,7 @@ static void advance_ring(struct i915_request *request) * noops - they are safe to be replayed on a reset. */ tail = READ_ONCE(request->tail); + list_del(&ring->active_link); } else { tail = request->postfix; } @@ -1096,6 +1097,8 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); + if (list_is_first(&request->ring_link, &ring->request_list)) + list_add(&ring->active_link, &request->i915->gt.active_rings); request->emitted_jiffies = jiffies; /* @@ -1418,14 +1421,17 @@ static void ring_retire_requests(struct intel_ring *ring) void i915_retire_requests(struct drm_i915_private *i915) { - struct intel_ring *ring, *next; + struct intel_ring *ring, *tmp; lockdep_assert_held(&i915->drm.struct_mutex); if (!i915->gt.active_requests) return; - list_for_each_entry_safe(ring, next, &i915->gt.rings, link) + /* An outstanding request must be on a still active ring somewhere */ + GEM_BUG_ON(list_empty(&i915->gt.active_rings)); + + list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) ring_retire_requests(ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ae8958007df5..007449cfa22b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1150,8 +1150,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) } ring->vma = vma; - list_add(&ring->link, &engine->i915->gt.rings); - return ring; } @@ -1163,8 +1161,6 @@ intel_ring_free(struct intel_ring *ring) i915_vma_close(ring->vma); __i915_gem_object_release_unless_active(obj); - list_del(&ring->link); - kfree(ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index deb80d01e0bd..fd679cec9ac6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -130,7 +130,7 @@ struct intel_ring { void *vaddr; struct list_head request_list; - struct list_head link; + struct list_head active_link; u32 head; u32 tail; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index d95fc481e5c1..19175ddcb45b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -147,15 +147,11 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) INIT_LIST_HEAD(&ring->request_list); intel_ring_update_space(ring); - list_add(&ring->link, &engine->i915->gt.rings); - return ring; } static void mock_ring_free(struct intel_ring *ring) { - list_del(&ring->link); - kfree(ring); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index ac4bacf8b5b9..f22a2b35a283 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -224,9 +224,10 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - mutex_lock(&i915->drm.struct_mutex); - INIT_LIST_HEAD(&i915->gt.rings); INIT_LIST_HEAD(&i915->gt.timelines); + INIT_LIST_HEAD(&i915->gt.active_rings); + + mutex_lock(&i915->drm.struct_mutex); err = i915_gem_timeline_init__global(i915); if (err) { mutex_unlock(&i915->drm.struct_mutex); -- cgit From 65fcb8064dd0e54d4674e8e2c6bf6ed7264a29e9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 2 May 2018 17:38:38 +0100 Subject: drm/i915: Move timeline from GTT to ring In the future, we want to move a request between engines. To achieve this, we first realise that we have two timelines in effect here. The first runs through the GTT is required for ordering vma access, which is tracked currently by engine. The second is implied by sequential execution of commands inside the ringbuffer. This timeline is one that maps to userspace's expectations when submitting requests (i.e. given the same context, batch A is executed before batch B). As the rings's timelines map to userspace and the GTT timeline an implementation detail, move the timeline from the GTT into the ring itself (per-context in logical-ring-contexts/execlists, or a global per-engine timeline for the shared ringbuffers in legacy submission. The two timelines are still assumed to be equivalent at the moment (no migrating requests between engines yet) and so we can simply move from one to the other without adding extra ordering. v2: Reinforce that one isn't allowed to mix the engine execution timeline with the client timeline from userspace (on the ring). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180502163839.3248-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 13 +----- drivers/gpu/drm/i915/i915_gem.c | 9 ++-- drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++- drivers/gpu/drm/i915/i915_gem_context.h | 2 + drivers/gpu/drm/i915/i915_gem_gtt.c | 3 -- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/i915_gem_timeline.c | 54 ++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_timeline.h | 4 ++ drivers/gpu/drm/i915/i915_request.c | 13 +++--- drivers/gpu/drm/i915/intel_engine_cs.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 12 +++++ drivers/gpu/drm/i915/selftests/mock_engine.c | 5 ++- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 4 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 - 17 files changed, 115 insertions(+), 41 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6268a5103dba..ffa87aef31e5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2059,7 +2059,8 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct i915_gem_timeline global_timeline; + struct i915_gem_timeline execution_timeline; + struct i915_gem_timeline legacy_timeline; struct list_head timelines; struct list_head active_rings; @@ -3235,16 +3236,6 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -static inline struct intel_timeline * -i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct i915_address_space *vm; - - vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; - return &vm->timeline.engine[engine->id]; -} - int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fa1d94a4eb5f..438a2fc5bba0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3110,10 +3110,10 @@ static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; - struct intel_timeline *timeline; + struct intel_timeline *timeline = request->timeline; unsigned long flags; - timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); + GEM_BUG_ON(timeline == engine->timeline); spin_lock_irqsave(&engine->timeline->lock, flags); spin_lock(&timeline->lock); @@ -3782,7 +3782,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) ret = wait_for_engines(i915); } else { - ret = wait_for_timeline(&i915->gt.global_timeline, flags); + ret = wait_for_timeline(&i915->gt.execution_timeline, flags); } return ret; @@ -5652,7 +5652,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) WARN_ON(dev_priv->mm.object_count); mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_timeline_fini(&dev_priv->gt.global_timeline); + i915_gem_timeline_fini(&dev_priv->gt.legacy_timeline); + i915_gem_timeline_fini(&dev_priv->gt.execution_timeline); WARN_ON(!list_empty(&dev_priv->gt.timelines)); mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 59d4bd4a7b73..1f4987dc6616 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -122,6 +122,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + i915_gem_timeline_free(ctx->timeline); i915_ppgtt_put(ctx->ppgtt); for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { @@ -376,6 +377,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, ctx->desc_template = default_desc_template(dev_priv, ppgtt); } + if (HAS_EXECLISTS(dev_priv)) { + struct i915_gem_timeline *timeline; + + timeline = i915_gem_timeline_create(dev_priv, ctx->name); + if (IS_ERR(timeline)) { + __destroy_hw_context(ctx, file_priv); + return ERR_CAST(timeline); + } + + ctx->timeline = timeline; + } + trace_i915_context_create(ctx); return ctx; @@ -584,7 +597,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { struct intel_timeline *tl; - if (timeline == &engine->i915->gt.global_timeline) + if (timeline == &engine->i915->gt.execution_timeline) continue; tl = &timeline->engine[engine->id]; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index ace3b129c189..ec53ba06f836 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -58,6 +58,8 @@ struct i915_gem_context { /** file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; + struct i915_gem_timeline *timeline; + /** * @ppgtt: unique address space (GTT) * diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 21d72f695adb..e9d828324f67 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2111,8 +2111,6 @@ static void i915_address_space_init(struct i915_address_space *vm, struct drm_i915_private *dev_priv, const char *name) { - i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; @@ -2129,7 +2127,6 @@ static void i915_address_space_fini(struct i915_address_space *vm) if (pagevec_count(&vm->free_pages)) vm_free_pages_release(vm, true); - i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6efc017e8bb3..98107925de48 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -257,7 +257,6 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; - struct i915_gem_timeline timeline; struct drm_i915_private *i915; struct device *dma; /* Every address space belongs to a struct file - except for the global diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index e9fd87604067..24f4068cc137 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -95,12 +95,28 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, int i915_gem_timeline_init__global(struct drm_i915_private *i915) { - static struct lock_class_key class; + static struct lock_class_key class1, class2; + int err; + + err = __i915_gem_timeline_init(i915, + &i915->gt.execution_timeline, + "[execution]", &class1, + "i915_execution_timeline"); + if (err) + return err; + + err = __i915_gem_timeline_init(i915, + &i915->gt.legacy_timeline, + "[global]", &class2, + "i915_global_timeline"); + if (err) + goto err_exec_timeline; + + return 0; - return __i915_gem_timeline_init(i915, - &i915->gt.global_timeline, - "[execution]", - &class, "&global_timeline->lock"); +err_exec_timeline: + i915_gem_timeline_fini(&i915->gt.execution_timeline); + return err; } /** @@ -148,6 +164,34 @@ void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) kfree(timeline->name); } +struct i915_gem_timeline * +i915_gem_timeline_create(struct drm_i915_private *i915, const char *name) +{ + struct i915_gem_timeline *timeline; + int err; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + err = i915_gem_timeline_init(i915, timeline, name); + if (err) { + kfree(timeline); + return ERR_PTR(err); + } + + return timeline; +} + +void i915_gem_timeline_free(struct i915_gem_timeline *timeline) +{ + if (!timeline) + return; + + i915_gem_timeline_fini(timeline); + kfree(timeline); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_timeline.c" #include "selftests/i915_gem_timeline.c" diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 6e82119e2cd8..780ed465c4fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -90,6 +90,10 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915); void i915_gem_timelines_park(struct drm_i915_private *i915); void i915_gem_timeline_fini(struct i915_gem_timeline *tl); +struct i915_gem_timeline * +i915_gem_timeline_create(struct drm_i915_private *i915, const char *name); +void i915_gem_timeline_free(struct i915_gem_timeline *timeline); + static inline int __intel_timeline_sync_set(struct intel_timeline *tl, u64 context, u32 seqno) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c8fc4b323e62..7bb613c00cc3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -758,7 +758,12 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } } - rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); + INIT_LIST_HEAD(&rq->active_list); + rq->i915 = i915; + rq->engine = engine; + rq->ctx = ctx; + rq->ring = ring; + rq->timeline = ring->timeline; GEM_BUG_ON(rq->timeline == engine->timeline); spin_lock_init(&rq->lock); @@ -774,12 +779,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sched_node_init(&rq->sched); - INIT_LIST_HEAD(&rq->active_list); - rq->i915 = i915; - rq->engine = engine; - rq->ctx = ctx; - rq->ring = ring; - /* No zalloc, must clear what we need by hand */ rq->global_seqno = 0; rq->signaling.wait.seqno = 0; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 9164e6d665f8..7af5fe85612d 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -453,7 +453,8 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) static void intel_engine_init_timeline(struct intel_engine_cs *engine) { - engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; + engine->timeline = + &engine->i915->gt.execution_timeline.engine[engine->id]; } static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 57396a2a6ea2..9b2407753ebd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2624,7 +2624,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - ring = intel_engine_create_ring(engine, ctx->ring_size); + ring = intel_engine_create_ring(engine, ctx->timeline, ctx->ring_size); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 007449cfa22b..b73e700c3048 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1117,13 +1117,16 @@ err: } struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_gem_timeline *timeline, + int size) { struct intel_ring *ring; struct i915_vma *vma; GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + GEM_BUG_ON(&timeline->engine[engine->id] == engine->timeline); lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); @@ -1131,6 +1134,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&ring->request_list); + ring->timeline = &timeline->engine[engine->id]; ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1327,7 +1331,9 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + ring = intel_engine_create_ring(engine, + &engine->i915->gt.legacy_timeline, + 32 * PAGE_SIZE); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fd679cec9ac6..da53aa2973a7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -129,6 +129,7 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; + struct intel_timeline *timeline; struct list_head request_list; struct list_head active_link; @@ -768,7 +769,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define CNL_HWS_CSB_WRITE_INDEX 0x2f struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_gem_timeline *timeline, + int size); int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, unsigned int offset_bias); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 7ecaed50d0b9..24ac648dc83a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -355,6 +355,18 @@ static int igt_ctx_exec(void *arg) if (first_shared_gtt) { ctx = __create_hw_context(i915, file->driver_priv); + if (!IS_ERR(ctx) && HAS_EXECLISTS(i915)) { + struct i915_gem_timeline *timeline; + + timeline = i915_gem_timeline_create(i915, ctx->name); + if (IS_ERR(timeline)) { + __destroy_hw_context(ctx, file->driver_priv); + ctx = ERR_CAST(timeline); + } else { + ctx->timeline = timeline; + } + } + first_shared_gtt = false; } else { ctx = i915_gem_create_context(i915, file->driver_priv); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 19175ddcb45b..6752498e2c73 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -140,6 +140,8 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; + ring->timeline = &engine->i915->gt.legacy_timeline.engine[engine->id]; + ring->size = sz; ring->effective_size = sz; ring->vaddr = (void *)(ring + 1); @@ -180,8 +182,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - engine->base.timeline = - &i915->gt.global_timeline.engine[engine->base.id]; + intel_engine_init_timeline(&engine->base); intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index f22a2b35a283..f11c83e8ff32 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -73,7 +73,9 @@ static void mock_device_release(struct drm_device *dev) mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(i915); - i915_gem_timeline_fini(&i915->gt.global_timeline); + i915_gem_timeline_fini(&i915->gt.legacy_timeline); + i915_gem_timeline_fini(&i915->gt.execution_timeline); + WARN_ON(!list_empty(&i915->gt.timelines)); mutex_unlock(&i915->drm.struct_mutex); destroy_workqueue(i915->wq); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index e96873f96116..36c112088940 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -76,7 +76,6 @@ mock_ppgtt(struct drm_i915_private *i915, INIT_LIST_HEAD(&ppgtt->base.global_link); drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); - i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); ppgtt->base.clear_range = nop_clear_range; ppgtt->base.insert_page = mock_insert_page; -- cgit From a89d1f921c15932b4c9a70861d134290f1a14a10 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 2 May 2018 17:38:39 +0100 Subject: drm/i915: Split i915_gem_timeline into individual timelines We need to move to a more flexible timeline that doesn't assume one fence context per engine, and so allow for a single timeline to be used across a combination of engines. This means that preallocating a fence context per engine is now a hindrance, and so we want to introduce the singular timeline. From the code perspective, this has the notable advantage of clearing up a lot of mirky semantics and some clumsy pointer chasing. By splitting the timeline up into a single entity rather than an array of per-engine timelines, we can realise the goal of the previous patch of tracking the timeline alongside the ring. v2: Tweak wait_for_idle to stop the compiling thinking that ret may be uninitialised. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180502163839.3248-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gem.c | 129 ++++----- drivers/gpu/drm/i915/i915_gem_context.c | 48 ++-- drivers/gpu/drm/i915/i915_gem_context.h | 2 - drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- drivers/gpu/drm/i915/i915_gem_timeline.c | 198 -------------- drivers/gpu/drm/i915/i915_gem_timeline.h | 121 --------- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 10 +- drivers/gpu/drm/i915/i915_request.c | 68 +++-- drivers/gpu/drm/i915/i915_request.h | 3 +- drivers/gpu/drm/i915/i915_timeline.c | 105 ++++++++ drivers/gpu/drm/i915/i915_timeline.h | 126 +++++++++ drivers/gpu/drm/i915/intel_engine_cs.c | 27 +- drivers/gpu/drm/i915/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 48 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 25 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 12 - drivers/gpu/drm/i915/selftests/i915_gem_timeline.c | 299 --------------------- drivers/gpu/drm/i915/selftests/i915_timeline.c | 267 ++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_engine.c | 32 ++- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 10 +- drivers/gpu/drm/i915/selftests/mock_timeline.c | 45 +--- drivers/gpu/drm/i915/selftests/mock_timeline.h | 28 +- 26 files changed, 723 insertions(+), 908 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.h create mode 100644 drivers/gpu/drm/i915/i915_timeline.c create mode 100644 drivers/gpu/drm/i915/i915_timeline.h delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_timeline.c create mode 100644 drivers/gpu/drm/i915/selftests/i915_timeline.c (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index dfe01452c8d1..00c13382b008 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -71,11 +71,11 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ffa87aef31e5..11ff84eef52a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,10 +72,10 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_timeline.h" #include "i915_gpu_error.h" #include "i915_request.h" #include "i915_scheduler.h" +#include "i915_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -2059,8 +2059,6 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); - struct i915_gem_timeline execution_timeline; - struct i915_gem_timeline legacy_timeline; struct list_head timelines; struct list_head active_rings; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 438a2fc5bba0..484354f25f98 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -162,7 +162,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) synchronize_irq(i915->drm.irq); intel_engines_park(i915); - i915_gem_timelines_park(i915); + i915_timelines_park(i915); i915_pmu_gt_parked(i915); @@ -2977,8 +2977,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->requests, link) { + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { if (__i915_request_completed(request, request->global_seqno)) continue; @@ -2989,7 +2989,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return active; } @@ -3110,15 +3110,15 @@ static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; - struct intel_timeline *timeline = request->timeline; + struct i915_timeline *timeline = request->timeline; unsigned long flags; - GEM_BUG_ON(timeline == engine->timeline); + GEM_BUG_ON(timeline == &engine->timeline); - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); spin_lock(&timeline->lock); - list_for_each_entry_continue(request, &engine->timeline->requests, link) + list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->ctx == hung_ctx) skip_request(request); @@ -3126,7 +3126,7 @@ static void engine_skip_context(struct i915_request *request) skip_request(request); spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } /* Returns the request if it was guilty of the hang */ @@ -3183,11 +3183,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine, dma_fence_set_error(&request->fence, -EAGAIN); /* Rewind the engine to replay the incomplete rq */ - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); request = list_prev_entry(request, link); - if (&request->link == &engine->timeline->requests) + if (&request->link == &engine->timeline.requests) request = NULL; - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } } @@ -3300,10 +3300,10 @@ static void nop_complete_submit_request(struct i915_request *request) request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&request->engine->timeline->lock, flags); + spin_lock_irqsave(&request->engine->timeline.lock, flags); __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline->lock, flags); + spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } void i915_gem_set_wedged(struct drm_i915_private *i915) @@ -3372,10 +3372,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); intel_engine_init_global_seqno(engine, intel_engine_last_submit(engine)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); i915_gem_reset_finish_engine(engine); } @@ -3387,8 +3387,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) bool i915_gem_unset_wedged(struct drm_i915_private *i915) { - struct i915_gem_timeline *tl; - int i; + struct i915_timeline *tl; lockdep_assert_held(&i915->drm.struct_mutex); if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) @@ -3407,29 +3406,27 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * No more can be submitted until we reset the wedged bit. */ list_for_each_entry(tl, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - struct i915_request *rq; + struct i915_request *rq; - rq = i915_gem_active_peek(&tl->engine[i].last_request, - &i915->drm.struct_mutex); - if (!rq) - continue; + rq = i915_gem_active_peek(&tl->last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; - /* - * We can't use our normal waiter as we want to - * avoid recursively trying to handle the current - * reset. The basic dma_fence_default_wait() installs - * a callback for dma_fence_signal(), which is - * triggered by our nop handler (indirectly, the - * callback enables the signaler thread which is - * woken by the nop_submit_request() advancing the seqno - * and when the seqno passes the fence, the signaler - * then signals the fence waking us up). - */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) - return false; - } + /* + * We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + if (dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT) < 0) + return false; } i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); @@ -3734,17 +3731,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) +static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) { - int ret, i; - - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); - if (ret) - return ret; - } - - return 0; + return i915_gem_active_wait(&tl->last_request, flags); } static int wait_for_engines(struct drm_i915_private *i915) @@ -3762,30 +3751,37 @@ static int wait_for_engines(struct drm_i915_private *i915) int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { - int ret; - /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) return 0; if (flags & I915_WAIT_LOCKED) { - struct i915_gem_timeline *tl; + struct i915_timeline *tl; + int err; lockdep_assert_held(&i915->drm.struct_mutex); list_for_each_entry(tl, &i915->gt.timelines, link) { - ret = wait_for_timeline(tl, flags); - if (ret) - return ret; + err = wait_for_timeline(tl, flags); + if (err) + return err; } i915_retire_requests(i915); - ret = wait_for_engines(i915); + return wait_for_engines(i915); } else { - ret = wait_for_timeline(&i915->gt.execution_timeline, flags); - } + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - return ret; + for_each_engine(engine, i915, id) { + err = wait_for_timeline(&engine->timeline, flags); + if (err) + return err; + } + + return 0; + } } static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) @@ -4954,7 +4950,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context != kernel_context); } } @@ -5603,12 +5599,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->gt.timelines); INIT_LIST_HEAD(&dev_priv->gt.active_rings); - mutex_lock(&dev_priv->drm.struct_mutex); - err = i915_gem_timeline_init__global(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - if (err) - goto err_priorities; - i915_gem_init__mm(dev_priv); INIT_DELAYED_WORK(&dev_priv->gt.retire_work, @@ -5628,8 +5618,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) return 0; -err_priorities: - kmem_cache_destroy(dev_priv->priorities); err_dependencies: kmem_cache_destroy(dev_priv->dependencies); err_requests: @@ -5650,12 +5638,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - - mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_timeline_fini(&dev_priv->gt.legacy_timeline); - i915_gem_timeline_fini(&dev_priv->gt.execution_timeline); WARN_ON(!list_empty(&dev_priv->gt.timelines)); - mutex_unlock(&dev_priv->drm.struct_mutex); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 1f4987dc6616..33f8a4b3c981 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -122,7 +122,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - i915_gem_timeline_free(ctx->timeline); i915_ppgtt_put(ctx->ppgtt); for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { @@ -377,18 +376,6 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, ctx->desc_template = default_desc_template(dev_priv, ppgtt); } - if (HAS_EXECLISTS(dev_priv)) { - struct i915_gem_timeline *timeline; - - timeline = i915_gem_timeline_create(dev_priv, ctx->name); - if (IS_ERR(timeline)) { - __destroy_hw_context(ctx, file_priv); - return ERR_CAST(timeline); - } - - ctx->timeline = timeline; - } - trace_i915_context_create(ctx); return ctx; @@ -590,19 +577,29 @@ void i915_gem_context_close(struct drm_file *file) idr_destroy(&file_priv->context_idr); } -static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +static struct i915_request * +last_request_on_engine(struct i915_timeline *timeline, + struct intel_engine_cs *engine) { - struct i915_gem_timeline *timeline; + struct i915_request *rq; - list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { - struct intel_timeline *tl; + if (timeline == &engine->timeline) + return NULL; - if (timeline == &engine->i915->gt.execution_timeline) - continue; + rq = i915_gem_active_raw(&timeline->last_request, + &engine->i915->drm.struct_mutex); + if (rq && rq->engine == engine) + return rq; + + return NULL; +} - tl = &timeline->engine[engine->id]; - if (i915_gem_active_peek(&tl->last_request, - &engine->i915->drm.struct_mutex)) +static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_timeline *timeline; + + list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { + if (last_request_on_engine(timeline, engine)) return false; } @@ -612,7 +609,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; enum intel_engine_id id; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -632,11 +629,8 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, - &dev_priv->drm.struct_mutex); + prev = last_request_on_engine(timeline, engine); if (prev) i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index ec53ba06f836..ace3b129c189 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -58,8 +58,6 @@ struct i915_gem_context { /** file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; - struct i915_gem_timeline *timeline; - /** * @ppgtt: unique address space (GTT) * diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 98107925de48..1db0dedb4059 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,10 +38,9 @@ #include #include -#include "i915_gem_timeline.h" - #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" #define I915_GTT_PAGE_SIZE_4K BIT(12) #define I915_GTT_PAGE_SIZE_64K BIT(16) diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c deleted file mode 100644 index 24f4068cc137..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "i915_syncmap.h" - -static void __intel_timeline_init(struct intel_timeline *tl, - struct i915_gem_timeline *parent, - u64 context, - struct lock_class_key *lockclass, - const char *lockname) -{ - tl->fence_context = context; - tl->common = parent; - spin_lock_init(&tl->lock); - lockdep_set_class_and_name(&tl->lock, lockclass, lockname); - init_request_active(&tl->last_request, NULL); - INIT_LIST_HEAD(&tl->requests); - i915_syncmap_init(&tl->sync); -} - -static void __intel_timeline_fini(struct intel_timeline *tl) -{ - GEM_BUG_ON(!list_empty(&tl->requests)); - - i915_syncmap_free(&tl->sync); -} - -static int __i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name, - struct lock_class_key *lockclass, - const char *lockname) -{ - unsigned int i; - u64 fences; - - lockdep_assert_held(&i915->drm.struct_mutex); - - /* - * Ideally we want a set of engines on a single leaf as we expect - * to mostly be tracking synchronisation between engines. It is not - * a huge issue if this is not the case, but we may want to mitigate - * any page crossing penalties if they become an issue. - */ - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); - - timeline->i915 = i915; - timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); - if (!timeline->name) - return -ENOMEM; - - list_add(&timeline->link, &i915->gt.timelines); - - /* Called during early_init before we know how many engines there are */ - fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_init(&timeline->engine[i], - timeline, fences++, - lockclass, lockname); - - return 0; -} - -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, timeline, name, - &class, "&timeline->lock"); -} - -int i915_gem_timeline_init__global(struct drm_i915_private *i915) -{ - static struct lock_class_key class1, class2; - int err; - - err = __i915_gem_timeline_init(i915, - &i915->gt.execution_timeline, - "[execution]", &class1, - "i915_execution_timeline"); - if (err) - return err; - - err = __i915_gem_timeline_init(i915, - &i915->gt.legacy_timeline, - "[global]", &class2, - "i915_global_timeline"); - if (err) - goto err_exec_timeline; - - return 0; - -err_exec_timeline: - i915_gem_timeline_fini(&i915->gt.execution_timeline); - return err; -} - -/** - * i915_gem_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void i915_gem_timelines_park(struct drm_i915_private *i915) -{ - struct i915_gem_timeline *timeline; - int i; - - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&tl->sync); - } - } -} - -void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) -{ - int i; - - lockdep_assert_held(&timeline->i915->drm.struct_mutex); - - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_fini(&timeline->engine[i]); - - list_del(&timeline->link); - kfree(timeline->name); -} - -struct i915_gem_timeline * -i915_gem_timeline_create(struct drm_i915_private *i915, const char *name) -{ - struct i915_gem_timeline *timeline; - int err; - - timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); - if (!timeline) - return ERR_PTR(-ENOMEM); - - err = i915_gem_timeline_init(i915, timeline, name); - if (err) { - kfree(timeline); - return ERR_PTR(err); - } - - return timeline; -} - -void i915_gem_timeline_free(struct i915_gem_timeline *timeline) -{ - if (!timeline) - return; - - i915_gem_timeline_fini(timeline); - kfree(timeline); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_timeline.c" -#include "selftests/i915_gem_timeline.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h deleted file mode 100644 index 780ed465c4fc..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef I915_GEM_TIMELINE_H -#define I915_GEM_TIMELINE_H - -#include - -#include "i915_request.h" -#include "i915_syncmap.h" -#include "i915_utils.h" - -struct i915_gem_timeline; - -struct intel_timeline { - u64 fence_context; - u32 seqno; - - spinlock_t lock; - - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. - */ - struct list_head requests; - - /* Contains an RCU guarded pointer to the last request. No reference is - * held to the request, users must carefully acquire a reference to - * the request using i915_gem_active_get_request_rcu(), or hold the - * struct_mutex. - */ - struct i915_gem_active last_request; - - /** - * We track the most recent seqno that we wait on in every context so - * that we only have to emit a new await and dependency on a more - * recent sync point. As the contexts may be executed out-of-order, we - * have to track each individually and can not rely on an absolute - * global_seqno. When we know that all tracked fences are completed - * (i.e. when the driver is idle), we know that the syncmap is - * redundant and we can discard it without loss of generality. - */ - struct i915_syncmap *sync; - /** - * Separately to the inter-context seqno map above, we track the last - * barrier (e.g. semaphore wait) to the global engine timelines. Note - * that this tracks global_seqno rather than the context.seqno, and - * so it is subject to the limitations of hw wraparound and that we - * may need to revoke global_seqno (on pre-emption). - */ - u32 global_sync[I915_NUM_ENGINES]; - - struct i915_gem_timeline *common; -}; - -struct i915_gem_timeline { - struct list_head link; - - struct drm_i915_private *i915; - const char *name; - - struct intel_timeline engine[I915_NUM_ENGINES]; -}; - -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *tl, - const char *name); -int i915_gem_timeline_init__global(struct drm_i915_private *i915); -void i915_gem_timelines_park(struct drm_i915_private *i915); -void i915_gem_timeline_fini(struct i915_gem_timeline *tl); - -struct i915_gem_timeline * -i915_gem_timeline_create(struct drm_i915_private *i915, const char *name); -void i915_gem_timeline_free(struct i915_gem_timeline *timeline); - -static inline int __intel_timeline_sync_set(struct intel_timeline *tl, - u64 context, u32 seqno) -{ - return i915_syncmap_set(&tl->sync, context, seqno); -} - -static inline int intel_timeline_sync_set(struct intel_timeline *tl, - const struct dma_fence *fence) -{ - return __intel_timeline_sync_set(tl, fence->context, fence->seqno); -} - -static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, - u64 context, u32 seqno) -{ - return i915_syncmap_is_later(&tl->sync, context, seqno); -} - -static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, - const struct dma_fence *fence) -{ - return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); -} - -#endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 944939947d30..df234dc23274 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1310,7 +1310,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) + list_for_each_entry_from(request, &engine->timeline.requests, link) count++; if (!count) return; @@ -1323,7 +1323,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) { + list_for_each_entry_from(request, &engine->timeline.requests, link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 4b1da01168ae..d9341415df40 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1695,7 +1695,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr const struct i915_oa_config *oa_config) { struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; struct i915_request *rq; int ret; @@ -1716,15 +1716,11 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, + prev = i915_gem_active_raw(&timeline->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - GFP_KERNEL); + i915_request_await_dma_fence(rq, &prev->fence); } i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7bb613c00cc3..5acf869f3ca3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -49,7 +49,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->timeline->common->name; + return to_request(fence)->timeline->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -199,6 +199,7 @@ i915_sched_node_init(struct i915_sched_node *node) static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct intel_engine_cs *engine; + struct i915_timeline *timeline; enum intel_engine_id id; int ret; @@ -213,16 +214,13 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { - struct i915_gem_timeline *timeline; - struct intel_timeline *tl = engine->timeline; - GEM_TRACE("%s seqno %d (current %d) -> %d\n", engine->name, - tl->seqno, + engine->timeline.seqno, intel_engine_get_seqno(engine), seqno); - if (!i915_seqno_passed(seqno, tl->seqno)) { + if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); @@ -230,18 +228,18 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* Check we are idle before we fiddle with hw state! */ GEM_BUG_ON(!intel_engine_is_idle(engine)); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); /* Finally reset hw state */ intel_engine_init_global_seqno(engine, seqno); - tl->seqno = seqno; - - list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].global_sync, 0, - sizeof(timeline->engine[id].global_sync)); + engine->timeline.seqno = seqno; } + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); + i915->gt.request_serial = seqno; + return 0; } @@ -357,10 +355,10 @@ static void __retire_engine_request(struct intel_engine_cs *engine, local_irq_disable(); - spin_lock(&engine->timeline->lock); - GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline->requests)); + spin_lock(&engine->timeline.lock); + GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); list_del_init(&rq->link); - spin_unlock(&engine->timeline->lock); + spin_unlock(&engine->timeline.lock); spin_lock(&rq->lock); if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) @@ -397,7 +395,7 @@ static void __retire_engine_upto(struct intel_engine_cs *engine, return; do { - tmp = list_first_entry(&engine->timeline->requests, + tmp = list_first_entry(&engine->timeline.requests, typeof(*tmp), link); GEM_BUG_ON(tmp->engine != engine); @@ -492,16 +490,16 @@ void i915_request_retire_upto(struct i915_request *rq) } while (tmp != rq); } -static u32 timeline_get_seqno(struct intel_timeline *tl) +static u32 timeline_get_seqno(struct i915_timeline *tl) { return ++tl->seqno; } static void move_to_timeline(struct i915_request *request, - struct intel_timeline *timeline) + struct i915_timeline *timeline) { - GEM_BUG_ON(request->timeline == request->engine->timeline); - lockdep_assert_held(&request->engine->timeline->lock); + GEM_BUG_ON(request->timeline == &request->engine->timeline); + lockdep_assert_held(&request->engine->timeline.lock); spin_lock(&request->timeline->lock); list_move_tail(&request->link, &timeline->requests); @@ -516,15 +514,15 @@ void __i915_request_submit(struct i915_request *request) GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, - engine->timeline->seqno + 1, + engine->timeline.seqno + 1, intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); GEM_BUG_ON(request->global_seqno); - seqno = timeline_get_seqno(engine->timeline); + seqno = timeline_get_seqno(&engine->timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); @@ -539,7 +537,7 @@ void __i915_request_submit(struct i915_request *request) request->ring->vaddr + request->postfix); /* Transfer from per-context onto the global per-engine timeline */ - move_to_timeline(request, engine->timeline); + move_to_timeline(request, &engine->timeline); trace_i915_request_execute(request); @@ -552,11 +550,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -570,17 +568,17 @@ void __i915_request_unsubmit(struct i915_request *request) intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); /* * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), request->global_seqno)); - engine->timeline->seqno--; + engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -607,11 +605,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static int __i915_sw_fence_call @@ -764,7 +762,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->ctx = ctx; rq->ring = ring; rq->timeline = ring->timeline; - GEM_BUG_ON(rq->timeline == engine->timeline); + GEM_BUG_ON(rq->timeline == &engine->timeline); spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, @@ -929,7 +927,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context != rq->i915->mm.unordered_timeline && - intel_timeline_sync_is_later(rq->timeline, fence)) + i915_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) @@ -943,7 +941,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Record the latest fence used against each timeline */ if (fence->context != rq->i915->mm.unordered_timeline) - intel_timeline_sync_set(rq->timeline, fence); + i915_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; @@ -1020,7 +1018,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; struct intel_ring *ring = request->ring; - struct intel_timeline *timeline = request->timeline; + struct i915_timeline *timeline = request->timeline; struct i915_request *prev; u32 *cs; int err; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 8f31ca8272f8..eddbd4245cb3 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -37,6 +37,7 @@ struct drm_file; struct drm_i915_gem_object; struct i915_request; +struct i915_timeline; struct intel_wait { struct rb_node node; @@ -95,7 +96,7 @@ struct i915_request { struct i915_gem_context *ctx; struct intel_engine_cs *engine; struct intel_ring *ring; - struct intel_timeline *timeline; + struct i915_timeline *timeline; struct intel_signal_node signaling; /* diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c new file mode 100644 index 000000000000..4667cc08c416 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016-2018 Intel Corporation + */ + +#include "i915_drv.h" + +#include "i915_timeline.h" +#include "i915_syncmap.h" + +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *timeline, + const char *name) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + + timeline->name = name; + + list_add(&timeline->link, &i915->gt.timelines); + + /* Called during early_init before we know how many engines there are */ + + timeline->fence_context = dma_fence_context_alloc(1); + + spin_lock_init(&timeline->lock); + + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); +} + +/** + * i915_timelines_park - called when the driver idles + * @i915: the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void i915_timelines_park(struct drm_i915_private *i915) +{ + struct i915_timeline *timeline; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&timeline->sync); + } +} + +void i915_timeline_fini(struct i915_timeline *timeline) +{ + GEM_BUG_ON(!list_empty(&timeline->requests)); + + i915_syncmap_free(&timeline->sync); + + list_del(&timeline->link); +} + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name) +{ + struct i915_timeline *timeline; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + i915_timeline_init(i915, timeline, name); + kref_init(&timeline->kref); + + return timeline; +} + +void __i915_timeline_free(struct kref *kref) +{ + struct i915_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + i915_timeline_fini(timeline); + kfree(timeline); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_timeline.c" +#include "selftests/i915_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h new file mode 100644 index 000000000000..dc2a4632faa7 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -0,0 +1,126 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H + +#include +#include + +#include "i915_request.h" +#include "i915_syncmap.h" +#include "i915_utils.h" + +struct i915_timeline { + u64 fence_context; + u32 seqno; + + spinlock_t lock; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head requests; + + /* Contains an RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_gem_active_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_gem_active last_request; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; + /** + * Separately to the inter-context seqno map above, we track the last + * barrier (e.g. semaphore wait) to the global engine timelines. Note + * that this tracks global_seqno rather than the context.seqno, and + * so it is subject to the limitations of hw wraparound and that we + * may need to revoke global_seqno (on pre-emption). + */ + u32 global_sync[I915_NUM_ENGINES]; + + struct list_head link; + const char *name; + + struct kref kref; +}; + +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl, + const char *name); +void i915_timeline_fini(struct i915_timeline *tl); + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name); + +static inline struct i915_timeline * +i915_timeline_get(struct i915_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __i915_timeline_free(struct kref *kref); +static inline void i915_timeline_put(struct i915_timeline *timeline) +{ + kref_put(&timeline->kref, __i915_timeline_free); +} + +static inline int __i915_timeline_sync_set(struct i915_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_set(&tl->sync, context, seqno); +} + +static inline int i915_timeline_sync_set(struct i915_timeline *tl, + const struct dma_fence *fence) +{ + return __i915_timeline_sync_set(tl, fence->context, fence->seqno); +} + +static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_is_later(&tl->sync, context, seqno); +} + +static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, + const struct dma_fence *fence) +{ + return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); +} + +void i915_timelines_park(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 7af5fe85612d..a90769b9954e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -451,12 +451,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } -static void intel_engine_init_timeline(struct intel_engine_cs *engine) -{ - engine->timeline = - &engine->i915->gt.execution_timeline.engine[engine->id]; -} - static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { i915_gem_batch_pool_init(&engine->batch_pool, engine); @@ -508,8 +502,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { + i915_timeline_init(engine->i915, &engine->timeline, engine->name); + intel_engine_init_execlist(engine); - intel_engine_init_timeline(engine); intel_engine_init_hangcheck(engine); intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); @@ -751,6 +746,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->i915->preempt_context) intel_context_unpin(engine->i915->preempt_context, engine); intel_context_unpin(engine->i915->kernel_context, engine); + + i915_timeline_fini(&engine->timeline); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) @@ -1003,7 +1000,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) * the last request that remains in the timeline. When idle, it is * the last executed context as tracked by retirement. */ - rq = __i915_gem_active_peek(&engine->timeline->last_request); + rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) return rq->ctx == kernel_context; else @@ -1335,14 +1332,14 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline->requests, + rq = list_first_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tfirst "); - rq = list_last_entry(&engine->timeline->requests, + rq = list_last_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tlast "); rq = i915_gem_find_active_request(engine); @@ -1374,11 +1371,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); last = NULL; count = 0; - list_for_each_entry(rq, &engine->timeline->requests, link) { + list_for_each_entry(rq, &engine->timeline.requests, link) { if (count++ < MAX_REQUESTS_TO_SHOW - 1) print_request(m, rq, "\t\tE "); else @@ -1416,7 +1413,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request(m, last, "\t\tQ "); } - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index c6bb5bebddfc..62828e39ee26 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -679,7 +679,7 @@ static void guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -750,7 +750,7 @@ done: GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); unlock: - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } static void guc_submission_tasklet(unsigned long data) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9b2407753ebd..e04798e98db2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -331,10 +331,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, + &engine->timeline.requests, link) { if (i915_request_completed(rq)) return; @@ -358,9 +358,9 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } static inline void @@ -584,7 +584,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -744,7 +744,7 @@ done: GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); unlock: - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); if (submit) { execlists_user_begin(execlists, execlists->port); @@ -894,10 +894,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists_cancel_port_requests(execlists); reset_irq(engine); - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline->requests, link) { + list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -929,7 +929,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock(&engine->timeline->lock); + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); } @@ -1167,7 +1167,7 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); queue_request(engine, &request->sched, rq_prio(request)); submit_queue(engine, rq_prio(request)); @@ -1175,7 +1175,7 @@ static void execlists_submit_request(struct i915_request *request) GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(list_empty(&request->sched.link)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static struct i915_request *sched_to_request(struct i915_sched_node *node) @@ -1191,8 +1191,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) GEM_BUG_ON(!locked); if (engine != locked) { - spin_unlock(&locked->timeline->lock); - spin_lock(&engine->timeline->lock); + spin_unlock(&locked->timeline.lock); + spin_lock(&engine->timeline.lock); } return engine; @@ -1275,7 +1275,7 @@ static void execlists_schedule(struct i915_request *request, } engine = request->engine; - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { @@ -1299,7 +1299,7 @@ static void execlists_schedule(struct i915_request *request, __submit_queue(engine, prio); } - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) @@ -1828,9 +1828,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, reset_irq(engine); /* Push back any incomplete requests for replay after the reset. */ - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock(&engine->timeline->lock); + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); @@ -2599,6 +2599,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; + struct i915_timeline *timeline; int ret; if (ce->state) @@ -2614,8 +2615,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ctx_obj = i915_gem_object_create(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { - DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); - return PTR_ERR(ctx_obj); + ret = PTR_ERR(ctx_obj); + goto error_deref_obj; } vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); @@ -2624,7 +2625,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - ring = intel_engine_create_ring(engine, ctx->timeline, ctx->ring_size); + timeline = i915_timeline_create(ctx->i915, ctx->name); + if (IS_ERR(timeline)) { + ret = PTR_ERR(timeline); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, timeline, ctx->ring_size); + i915_timeline_put(timeline); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b73e700c3048..8f19349a6055 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -697,17 +697,17 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline->requests, link) { + list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); if (!i915_request_completed(request)) dma_fence_set_error(&request->fence, -EIO); } /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -1118,7 +1118,7 @@ err: struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_gem_timeline *timeline, + struct i915_timeline *timeline, int size) { struct intel_ring *ring; @@ -1126,7 +1126,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - GEM_BUG_ON(&timeline->engine[engine->id] == engine->timeline); + GEM_BUG_ON(timeline == &engine->timeline); lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); @@ -1134,7 +1134,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&ring->request_list); - ring->timeline = &timeline->engine[engine->id]; + ring->timeline = i915_timeline_get(timeline); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1165,6 +1165,7 @@ intel_ring_free(struct intel_ring *ring) i915_vma_close(ring->vma); __i915_gem_object_release_unless_active(obj); + i915_timeline_put(ring->timeline); kfree(ring); } @@ -1323,6 +1324,7 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct intel_ring *ring; + struct i915_timeline *timeline; int err; intel_engine_setup_common(engine); @@ -1331,9 +1333,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err; - ring = intel_engine_create_ring(engine, - &engine->i915->gt.legacy_timeline, - 32 * PAGE_SIZE); + timeline = i915_timeline_create(engine->i915, engine->name); + if (IS_ERR(timeline)) { + err = PTR_ERR(timeline); + goto err; + } + + ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); + i915_timeline_put(timeline); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index da53aa2973a7..010750e8ee44 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -6,12 +6,12 @@ #include #include "i915_gem_batch_pool.h" -#include "i915_gem_timeline.h" #include "i915_reg.h" #include "i915_pmu.h" #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" #include "intel_gpu_commands.h" struct drm_printer; @@ -129,7 +129,7 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; - struct intel_timeline *timeline; + struct i915_timeline *timeline; struct list_head request_list; struct list_head active_link; @@ -338,7 +338,8 @@ struct intel_engine_cs { u32 mmio_base; struct intel_ring *buffer; - struct intel_timeline *timeline; + + struct i915_timeline timeline; struct drm_i915_gem_object *default_state; @@ -770,7 +771,7 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_gem_timeline *timeline, + struct i915_timeline *timeline, int size); int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, @@ -889,7 +890,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->seqno); + return READ_ONCE(engine->timeline.seqno); } void intel_engine_get_instdone(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 24ac648dc83a..7ecaed50d0b9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -355,18 +355,6 @@ static int igt_ctx_exec(void *arg) if (first_shared_gtt) { ctx = __create_hw_context(i915, file->driver_priv); - if (!IS_ERR(ctx) && HAS_EXECLISTS(i915)) { - struct i915_gem_timeline *timeline; - - timeline = i915_gem_timeline_create(i915, ctx->name); - if (IS_ERR(timeline)) { - __destroy_hw_context(ctx, file->driver_priv); - ctx = ERR_CAST(timeline); - } else { - ctx->timeline = timeline; - } - } - first_shared_gtt = false; } else { ctx = i915_gem_create_context(i915, file->driver_priv); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c deleted file mode 100644 index 3000e6a7d82d..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" -#include "i915_random.h" - -#include "mock_gem_device.h" -#include "mock_timeline.h" - -struct __igt_sync { - const char *name; - u32 seqno; - bool expected; - bool set; -}; - -static int __igt_sync(struct intel_timeline *tl, - u64 ctx, - const struct __igt_sync *p, - const char *name) -{ - int ret; - - if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { - pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", - name, p->name, ctx, p->seqno, yesno(p->expected)); - return -EINVAL; - } - - if (p->set) { - ret = __intel_timeline_sync_set(tl, ctx, p->seqno); - if (ret) - return ret; - } - - return 0; -} - -static int igt_sync(void *arg) -{ - const struct __igt_sync pass[] = { - { "unset", 0, false, false }, - { "new", 0, false, true }, - { "0a", 0, true, true }, - { "1a", 1, false, true }, - { "1b", 1, true, true }, - { "0b", 0, true, false }, - { "2a", 2, false, true }, - { "4", 4, false, true }, - { "INT_MAX", INT_MAX, false, true }, - { "INT_MAX-1", INT_MAX-1, true, false }, - { "INT_MAX+1", (u32)INT_MAX+1, false, true }, - { "INT_MAX", INT_MAX, true, false }, - { "UINT_MAX", UINT_MAX, false, true }, - { "wrap", 0, false, true }, - { "unwrap", UINT_MAX, true, false }, - {}, - }, *p; - struct intel_timeline *tl; - int order, offset; - int ret = -ENODEV; - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - - for (p = pass; p->name; p++) { - for (order = 1; order < 64; order++) { - for (offset = -1; offset <= (order > 1); offset++) { - u64 ctx = BIT_ULL(order) + offset; - - ret = __igt_sync(tl, ctx, p, "1"); - if (ret) - goto out; - } - } - } - mock_timeline_destroy(tl); - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - - for (order = 1; order < 64; order++) { - for (offset = -1; offset <= (order > 1); offset++) { - u64 ctx = BIT_ULL(order) + offset; - - for (p = pass; p->name; p++) { - ret = __igt_sync(tl, ctx, p, "2"); - if (ret) - goto out; - } - } - } - -out: - mock_timeline_destroy(tl); - return ret; -} - -static unsigned int random_engine(struct rnd_state *rnd) -{ - return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); -} - -static int bench_sync(void *arg) -{ - struct rnd_state prng; - struct intel_timeline *tl; - unsigned long end_time, count; - u64 prng32_1M; - ktime_t kt; - int order, last_order; - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - - /* Lookups from cache are very fast and so the random number generation - * and the loop itself becomes a significant factor in the per-iteration - * timings. We try to compensate the results by measuring the overhead - * of the prng and subtract it from the reported results. - */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u32 x; - - /* Make sure the compiler doesn't optimise away the prng call */ - WRITE_ONCE(x, prandom_u32_state(&prng)); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_debug("%s: %lu random evaluations, %lluns/prng\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); - - /* Benchmark (only) setting random context ids */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u64 id = i915_prandom_u64_state(&prng); - - __intel_timeline_sync_set(tl, id, 0); - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu random insertions, %lluns/insert\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - /* Benchmark looking up the exact same context ids as we just set */ - prandom_seed_state(&prng, i915_selftest.random_seed); - end_time = count; - kt = ktime_get(); - while (end_time--) { - u64 id = i915_prandom_u64_state(&prng); - - if (!__intel_timeline_sync_is_later(tl, id, 0)) { - mock_timeline_destroy(tl); - pr_err("Lookup of %llu failed\n", id); - return -EINVAL; - } - } - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu random lookups, %lluns/lookup\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - mock_timeline_destroy(tl); - cond_resched(); - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - - /* Benchmark setting the first N (in order) contexts */ - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - __intel_timeline_sync_set(tl, count++, 0); - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu in-order insertions, %lluns/insert\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - /* Benchmark looking up the exact same context ids as we just set */ - end_time = count; - kt = ktime_get(); - while (end_time--) { - if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { - pr_err("Lookup of %lu failed\n", end_time); - mock_timeline_destroy(tl); - return -EINVAL; - } - } - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu in-order lookups, %lluns/lookup\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - mock_timeline_destroy(tl); - cond_resched(); - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - - /* Benchmark searching for a random context id and maybe changing it */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u32 id = random_engine(&prng); - u32 seqno = prandom_u32_state(&prng); - - if (!__intel_timeline_sync_is_later(tl, id, seqno)) - __intel_timeline_sync_set(tl, id, seqno); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); - cond_resched(); - - /* Benchmark searching for a known context id and changing the seqno */ - for (last_order = 1, order = 1; order < 32; - ({ int tmp = last_order; last_order = order; order += tmp; })) { - unsigned int mask = BIT(order) - 1; - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - /* Without assuming too many details of the underlying - * implementation, try to identify its phase-changes - * (if any)! - */ - u64 id = (u64)(count & mask) << order; - - __intel_timeline_sync_is_later(tl, id, 0); - __intel_timeline_sync_set(tl, id, 0); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", - __func__, count, order, - (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); - cond_resched(); - } - - return 0; -} - -int i915_gem_timeline_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_sync), - SUBTEST(bench_sync), - }; - - return i915_subtests(tests, NULL); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c new file mode 100644 index 000000000000..19f1c6a5c8fb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -0,0 +1,267 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_gem_device.h" +#include "mock_timeline.h" + +struct __igt_sync { + const char *name; + u32 seqno; + bool expected; + bool set; +}; + +static int __igt_sync(struct i915_timeline *tl, + u64 ctx, + const struct __igt_sync *p, + const char *name) +{ + int ret; + + if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", + name, p->name, ctx, p->seqno, yesno(p->expected)); + return -EINVAL; + } + + if (p->set) { + ret = __i915_timeline_sync_set(tl, ctx, p->seqno); + if (ret) + return ret; + } + + return 0; +} + +static int igt_sync(void *arg) +{ + const struct __igt_sync pass[] = { + { "unset", 0, false, false }, + { "new", 0, false, true }, + { "0a", 0, true, true }, + { "1a", 1, false, true }, + { "1b", 1, true, true }, + { "0b", 0, true, false }, + { "2a", 2, false, true }, + { "4", 4, false, true }, + { "INT_MAX", INT_MAX, false, true }, + { "INT_MAX-1", INT_MAX-1, true, false }, + { "INT_MAX+1", (u32)INT_MAX+1, false, true }, + { "INT_MAX", INT_MAX, true, false }, + { "UINT_MAX", UINT_MAX, false, true }, + { "wrap", 0, false, true }, + { "unwrap", UINT_MAX, true, false }, + {}, + }, *p; + struct i915_timeline tl; + int order, offset; + int ret = -ENODEV; + + mock_timeline_init(&tl, 0); + for (p = pass; p->name; p++) { + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + ret = __igt_sync(&tl, ctx, p, "1"); + if (ret) + goto out; + } + } + } + mock_timeline_fini(&tl); + + mock_timeline_init(&tl, 0); + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + for (p = pass; p->name; p++) { + ret = __igt_sync(&tl, ctx, p, "2"); + if (ret) + goto out; + } + } + } + +out: + mock_timeline_fini(&tl); + return ret; +} + +static unsigned int random_engine(struct rnd_state *rnd) +{ + return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); +} + +static int bench_sync(void *arg) +{ + struct rnd_state prng; + struct i915_timeline tl; + unsigned long end_time, count; + u64 prng32_1M; + ktime_t kt; + int order, last_order; + + mock_timeline_init(&tl, 0); + + /* Lookups from cache are very fast and so the random number generation + * and the loop itself becomes a significant factor in the per-iteration + * timings. We try to compensate the results by measuring the overhead + * of the prng and subtract it from the reported results. + */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 x; + + /* Make sure the compiler doesn't optimise away the prng call */ + WRITE_ONCE(x, prandom_u32_state(&prng)); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_debug("%s: %lu random evaluations, %lluns/prng\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); + + /* Benchmark (only) setting random context ids */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u64 id = i915_prandom_u64_state(&prng); + + __i915_timeline_sync_set(&tl, id, 0); + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu random insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + prandom_seed_state(&prng, i915_selftest.random_seed); + end_time = count; + kt = ktime_get(); + while (end_time--) { + u64 id = i915_prandom_u64_state(&prng); + + if (!__i915_timeline_sync_is_later(&tl, id, 0)) { + mock_timeline_fini(&tl); + pr_err("Lookup of %llu failed\n", id); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu random lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_fini(&tl); + cond_resched(); + + mock_timeline_init(&tl, 0); + + /* Benchmark setting the first N (in order) contexts */ + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + __i915_timeline_sync_set(&tl, count++, 0); + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + end_time = count; + kt = ktime_get(); + while (end_time--) { + if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { + pr_err("Lookup of %lu failed\n", end_time); + mock_timeline_fini(&tl); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_fini(&tl); + cond_resched(); + + mock_timeline_init(&tl, 0); + + /* Benchmark searching for a random context id and maybe changing it */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 id = random_engine(&prng); + u32 seqno = prandom_u32_state(&prng); + + if (!__i915_timeline_sync_is_later(&tl, id, seqno)) + __i915_timeline_sync_set(&tl, id, seqno); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_fini(&tl); + cond_resched(); + + /* Benchmark searching for a known context id and changing the seqno */ + for (last_order = 1, order = 1; order < 32; + ({ int tmp = last_order; last_order = order; order += tmp; })) { + unsigned int mask = BIT(order) - 1; + + mock_timeline_init(&tl, 0); + + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + /* Without assuming too many details of the underlying + * implementation, try to identify its phase-changes + * (if any)! + */ + u64 id = (u64)(count & mask) << order; + + __i915_timeline_sync_is_later(&tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", + __func__, count, order, + (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_fini(&tl); + cond_resched(); + } + + return 0; +} + +int i915_gem_timeline_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_sync), + SUBTEST(bench_sync), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 6752498e2c73..26bf29d97007 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -25,6 +25,11 @@ #include "mock_engine.h" #include "mock_request.h" +struct mock_ring { + struct intel_ring base; + struct i915_timeline timeline; +}; + static struct mock_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -132,7 +137,7 @@ static void mock_submit_request(struct i915_request *request) static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; - struct intel_ring *ring; + struct mock_ring *ring; BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz); @@ -140,20 +145,24 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - ring->timeline = &engine->i915->gt.legacy_timeline.engine[engine->id]; + i915_timeline_init(engine->i915, &ring->timeline, engine->name); - ring->size = sz; - ring->effective_size = sz; - ring->vaddr = (void *)(ring + 1); + ring->base.size = sz; + ring->base.effective_size = sz; + ring->base.vaddr = (void *)(ring + 1); + ring->base.timeline = &ring->timeline; - INIT_LIST_HEAD(&ring->request_list); - intel_ring_update_space(ring); + INIT_LIST_HEAD(&ring->base.request_list); + intel_ring_update_space(&ring->base); - return ring; + return &ring->base; } -static void mock_ring_free(struct intel_ring *ring) +static void mock_ring_free(struct intel_ring *base) { + struct mock_ring *ring = container_of(base, typeof(*ring), base); + + i915_timeline_fini(&ring->timeline); kfree(ring); } @@ -182,8 +191,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - intel_engine_init_timeline(&engine->base); - + i915_timeline_init(i915, &engine->base.timeline, engine->base.name); intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ @@ -200,6 +208,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, err_breadcrumbs: intel_engine_fini_breadcrumbs(&engine->base); + i915_timeline_fini(&engine->base.timeline); kfree(engine); return NULL; } @@ -238,6 +247,7 @@ void mock_engine_free(struct intel_engine_cs *engine) mock_ring_free(engine->buffer); intel_engine_fini_breadcrumbs(engine); + i915_timeline_fini(&engine->timeline); kfree(engine); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index f11c83e8ff32..a662c0450e77 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -73,10 +73,8 @@ static void mock_device_release(struct drm_device *dev) mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(i915); - i915_gem_timeline_fini(&i915->gt.legacy_timeline); - i915_gem_timeline_fini(&i915->gt.execution_timeline); - WARN_ON(!list_empty(&i915->gt.timelines)); mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -230,12 +228,6 @@ struct drm_i915_private *mock_gem_device(void) INIT_LIST_HEAD(&i915->gt.active_rings); mutex_lock(&i915->drm.struct_mutex); - err = i915_gem_timeline_init__global(i915); - if (err) { - mutex_unlock(&i915->drm.struct_mutex); - goto err_priorities; - } - mock_init_ggtt(i915); mutex_unlock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index 47b1f47c5812..dcf3b16f5a07 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -1,45 +1,28 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ +#include "../i915_timeline.h" + #include "mock_timeline.h" -struct intel_timeline *mock_timeline(u64 context) +void mock_timeline_init(struct i915_timeline *timeline, u64 context) { - static struct lock_class_key class; - struct intel_timeline *tl; + timeline->fence_context = context; + + spin_lock_init(&timeline->lock); - tl = kzalloc(sizeof(*tl), GFP_KERNEL); - if (!tl) - return NULL; + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); - __intel_timeline_init(tl, NULL, context, &class, "mock"); + i915_syncmap_init(&timeline->sync); - return tl; + INIT_LIST_HEAD(&timeline->link); } -void mock_timeline_destroy(struct intel_timeline *tl) +void mock_timeline_fini(struct i915_timeline *timeline) { - __intel_timeline_fini(tl); - kfree(tl); + i915_timeline_fini(timeline); } diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h index c27ff4639b8b..b6deaa61110d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.h +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h @@ -1,33 +1,15 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #ifndef __MOCK_TIMELINE__ #define __MOCK_TIMELINE__ -#include "../i915_gem_timeline.h" +struct i915_timeline; -struct intel_timeline *mock_timeline(u64 context); -void mock_timeline_destroy(struct intel_timeline *tl); +void mock_timeline_init(struct i915_timeline *timeline, u64 context); +void mock_timeline_fini(struct i915_timeline *timeline); #endif /* !__MOCK_TIMELINE__ */ -- cgit From ea491b23b2ffba069537a8216060d4d3400931a7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 2 May 2018 23:03:12 +0100 Subject: drm/i915: Reset the hangcheck timestamp before repeating a seqno In the unusual circumstance where we reuse a seqno (for example, in igt), make sure that we reset the hangcheck timestamp before it sees the same seqno again. References: https://bugs.freedesktop.org/show_bug.cgi?id=106215 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180502220313.6459-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 1 + drivers/gpu/drm/i915/intel_hangcheck.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 5acf869f3ca3..63bb61089be5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -223,6 +223,7 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); + intel_engine_init_hangcheck(engine); GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 309e38b00e95..d47e346bd49e 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -452,6 +452,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); + engine->hangcheck.action_timestamp = jiffies; } void intel_hangcheck_init(struct drm_i915_private *i915) -- cgit From 7c572e1bdf8dea0c84ce8da01a84cdaa26d8e138 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 3 May 2018 20:51:15 +0100 Subject: drm/i915: Keep one request in our ring_list Don't pre-emptively retire the oldest request in our ring's list if it is the only request. We keep various bits of state alive using the active reference from the request and would rather transfer that state over to a new request rather than the more involved process of retiring and reacquiring it. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180503195115.22309-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 63bb61089be5..d68739b94dac 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -695,9 +695,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) goto err_unreserve; /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry_or_null(&ring->request_list, - typeof(*rq), ring_link); - if (rq && i915_request_completed(rq)) + rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ring->request_list) && + i915_request_completed(rq)) i915_request_retire(rq); /* -- cgit From 43c8c44105e30d912746a6dbd10c59ef42c230f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 May 2018 11:11:47 +0100 Subject: drm/i915: Remove assertion of active_rings must be non-empty if active_requests "An outstanding request must still be on an active ring somewhere" is only true if we haven't just been interrupted by the shrinker in the middle of allocating the request itself. (At the start of i915_request_alloc() we pin the context and prepare the GT for activity, marking it as active, and then try to allocate the request. If this allocation invokes the shrinker, we try to reclaim some space by calling i915_retire_requests() which may then be confused by the pre-reservation of active_requests.) <3>[ 125.472695] i915_retire_requests:1429 GEM_BUG_ON(list_empty(&i915->gt.active_rings)) <2>[ 125.472792] kernel BUG at drivers/gpu/drm/i915/i915_request.c:1429! <4>[ 125.472822] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI <4>[ 125.498764] Modules linked in: snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel btusb btrtl btbcm btintel cdc_ether snd_hda_codec_realtek bluetooth i915 snd_hda_codec_generic usbnet r8152 mii ecdh_generic lpc_ich mei_me snd_hda_intel snd_hda_codec mei snd_hwdep snd_hda_core snd_pcm prime_numbers <4>[ 125.498923] CPU: 0 PID: 1115 Comm: gem_exec_create Tainted: G U 4.17.0-rc3-gc49cbe0d1eb8-kasan_32+ #1 <4>[ 125.498955] Hardware name: GOOGLE Peppy/Peppy, BIOS MrChromebox 02/04/2018 <4>[ 125.499074] RIP: 0010:i915_retire_requests+0x3f2/0x590 [i915] <4>[ 125.499095] RSP: 0018:ffff88004e5dec40 EFLAGS: 00010282 <4>[ 125.499117] RAX: 0000000000000010 RBX: ffff8800458f0000 RCX: 0000000000000000 <4>[ 125.499140] RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff880060c2f6f0 <4>[ 125.499164] RBP: ffff88004e5dee30 R08: ffffed000c185ee6 R09: ffffed000c185ee6 <4>[ 125.499187] R10: 0000000000000001 R11: ffffed000c185ee5 R12: ffff8800553da160 <4>[ 125.499210] R13: dffffc0000000000 R14: 0000000000000000 R15: ffff8800458faed0 <4>[ 125.499235] FS: 00007fe18f052980(0000) GS:ffff880065400000(0000) knlGS:0000000000000000 <4>[ 125.499262] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 125.499282] CR2: 00007f01df11efb8 CR3: 00000000518d4001 CR4: 00000000000606f0 <4>[ 125.499304] Call Trace: <4>[ 125.499417] i915_gem_shrink+0x576/0xb50 [i915] <4>[ 125.499532] ? i915_gem_shrinker_count+0x2f0/0x2f0 [i915] <4>[ 125.499561] ? trace_hardirqs_on_thunk+0x1a/0x1c <4>[ 125.499671] ? i915_gem_shrinker_count+0x1d6/0x2f0 [i915] <4>[ 125.499782] ? i915_gem_shrinker_scan+0xc4/0x320 [i915] <4>[ 125.499889] i915_gem_shrinker_scan+0xc4/0x320 [i915] <4>[ 125.499997] ? i915_gem_shrinker_vmap+0x3a0/0x3a0 [i915] <4>[ 125.500021] ? do_raw_spin_unlock+0x4f/0x240 <4>[ 125.500042] ? _raw_spin_unlock+0x29/0x40 <4>[ 125.500149] ? i915_gem_shrinker_count+0x1d6/0x2f0 [i915] <4>[ 125.500177] shrink_slab.part.18+0x23e/0x8f0 <4>[ 125.500202] ? unregister_shrinker+0x1f0/0x1f0 <4>[ 125.500226] ? mem_cgroup_iter+0x379/0xcc0 <4>[ 125.500249] shrink_node+0xa7e/0x1180 <4>[ 125.500276] ? shrink_node_memcg+0x11f0/0x11f0 <4>[ 125.500297] ? __delayacct_freepages_start+0x38/0x80 <4>[ 125.500319] ? __is_insn_slot_addr+0xe3/0x1a0 <4>[ 125.500342] ? recalibrate_cpu_khz+0x10/0x10 <4>[ 125.500361] ? ktime_get+0xb2/0x140 <4>[ 125.500382] do_try_to_free_pages+0x2d3/0xe40 <4>[ 125.500407] ? allow_direct_reclaim.part.23+0x1e0/0x1e0 <4>[ 125.500429] ? shrink_node+0x1180/0x1180 <4>[ 125.500450] ? __read_once_size_nocheck.constprop.4+0x10/0x10 <4>[ 125.500476] try_to_free_pages+0x1af/0x560 <4>[ 125.500497] ? do_try_to_free_pages+0xe40/0xe40 <4>[ 125.500525] __alloc_pages_nodemask+0xadc/0x2130 <4>[ 125.500553] ? gfp_pfmemalloc_allowed+0x150/0x150 <4>[ 125.500654] ? i915_gem_do_execbuffer+0x219d/0x32e0 [i915] <4>[ 125.500678] ? debug_check_no_locks_freed+0x2a0/0x2a0 <4>[ 125.500701] ? __debug_object_init+0x322/0xd90 <4>[ 125.500722] ? debug_check_no_locks_freed+0x2a0/0x2a0 <4>[ 125.500827] ? i915_gem_do_execbuffer+0xdc2/0x32e0 [i915] <4>[ 125.500942] ? i915_request_alloc+0x5b5/0x13f0 [i915] <4>[ 125.500964] ? page_frag_free+0x170/0x170 <4>[ 125.500984] ? debug_check_no_locks_freed+0x2a0/0x2a0 <4>[ 125.501008] new_slab+0x21d/0x5c0 <4>[ 125.501029] ___slab_alloc.constprop.35+0x322/0x3e0 <4>[ 125.501052] ? reservation_object_reserve_shared+0x10b/0x250 <4>[ 125.501074] ? __ww_mutex_lock.constprop.3+0x1104/0x2cf0 <4>[ 125.501097] ? _raw_spin_unlock_irqrestore+0x39/0x60 <4>[ 125.501120] ? fs_reclaim_acquire+0x10/0x10 <4>[ 125.501138] ? lock_acquire+0x138/0x3c0 <4>[ 125.501156] ? lock_acquire+0x3c0/0x3c0 <4>[ 125.501176] ? reservation_object_reserve_shared+0x10b/0x250 <4>[ 125.501198] ? __slab_alloc.isra.27.constprop.34+0x3d/0x70 <4>[ 125.501219] __slab_alloc.isra.27.constprop.34+0x3d/0x70 <4>[ 125.501243] ? reservation_object_reserve_shared+0x10b/0x250 <4>[ 125.501265] __kmalloc_track_caller+0x313/0x350 <4>[ 125.501287] krealloc+0x62/0xb0 <4>[ 125.501305] reservation_object_reserve_shared+0x10b/0x250 <4>[ 125.501411] i915_gem_do_execbuffer+0x2040/0x32e0 [i915] <4>[ 125.501522] ? eb_relocate_slow+0xad0/0xad0 [i915] <4>[ 125.501544] ? debug_check_no_locks_freed+0x2a0/0x2a0 <4>[ 125.501646] ? i915_gem_execbuffer2_ioctl+0x108/0x770 [i915] <4>[ 125.501755] ? i915_gem_execbuffer2_ioctl+0x108/0x770 [i915] <4>[ 125.501779] ? drm_dev_get+0x20/0x20 <4>[ 125.501803] ? __might_fault+0xea/0x1a0 <4>[ 125.501902] ? i915_gem_execbuffer2_ioctl+0x108/0x770 [i915] <4>[ 125.502012] ? i915_gem_execbuffer_ioctl+0xb90/0xb90 [i915] <4>[ 125.502116] ? i915_gem_execbuffer_ioctl+0xb90/0xb90 [i915] <4>[ 125.502218] i915_gem_execbuffer2_ioctl+0x3c5/0x770 [i915] <4>[ 125.502243] ? drm_dev_enter+0xe0/0xe0 <4>[ 125.502260] ? lock_acquire+0x138/0x3c0 <4>[ 125.502362] ? i915_gem_execbuffer_ioctl+0xb90/0xb90 [i915] <4>[ 125.502470] ? i915_gem_object_create.part.28+0x570/0x570 [i915] <4>[ 125.502575] ? i915_gem_execbuffer_ioctl+0xb90/0xb90 [i915] <4>[ 125.502680] ? i915_gem_execbuffer_ioctl+0xb90/0xb90 [i915] <4>[ 125.502702] drm_ioctl_kernel+0x151/0x200 <4>[ 125.502721] ? drm_ioctl_permit+0x2a0/0x2a0 <4>[ 125.502746] drm_ioctl+0x63a/0x920 <4>[ 125.502844] ? i915_gem_execbuffer_ioctl+0xb90/0xb90 [i915] <4>[ 125.502868] ? drm_getstats+0x20/0x20 <4>[ 125.502886] ? trace_hardirqs_on_thunk+0x1a/0x1c <4>[ 125.502919] do_vfs_ioctl+0x173/0xe90 <4>[ 125.502936] ? trace_hardirqs_on_thunk+0x1a/0x1c <4>[ 125.502957] ? ioctl_preallocate+0x170/0x170 <4>[ 125.502978] ? trace_hardirqs_on_thunk+0x1a/0x1c <4>[ 125.503002] ? retint_kernel+0x2d/0x2d <4>[ 125.503024] ksys_ioctl+0x35/0x60 <4>[ 125.503043] __x64_sys_ioctl+0x6a/0xb0 <4>[ 125.503061] do_syscall_64+0x97/0x400 <4>[ 125.503081] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 125.503101] RIP: 0033:0x7fe18e4f65d7 <4>[ 125.503116] RSP: 002b:00007ffe2ffc06a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 <4>[ 125.503145] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe18e4f65d7 <4>[ 125.503168] RDX: 00007ffe2ffc07f0 RSI: 0000000040406469 RDI: 0000000000000003 <4>[ 125.503191] RBP: 00007ffe2ffc07f0 R08: 0000000000000004 R09: 00007ffe2ffcf080 <4>[ 125.503215] R10: 000000000002c7de R11: 0000000000000246 R12: 0000000040406469 <4>[ 125.503238] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 <4>[ 125.503268] Code: e8 18 a0 c9 da 48 8b 35 25 3a 47 00 49 c7 c0 a0 3b 88 c0 b9 95 05 00 00 48 c7 c2 e0 49 88 c0 48 c7 c7 8d 3b 5d c0 e8 ee 7e db da <0f> 0b 48 89 ef e8 a4 26 f5 da e9 51 fe ff ff e8 8a 26 f5 da e9 <1>[ 125.503548] RIP: i915_retire_requests+0x3f2/0x590 [i915] RSP: ffff88004e5dec40 Fixes: 643b450a594e ("drm/i915: Only track live rings for retiring") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180504101147.26286-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d68739b94dac..e4cf76ec14a6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1426,9 +1426,6 @@ void i915_retire_requests(struct drm_i915_private *i915) if (!i915->gt.active_requests) return; - /* An outstanding request must be on a still active ring somewhere */ - GEM_BUG_ON(list_empty(&i915->gt.active_rings)); - list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) ring_retire_requests(ring); } -- cgit From 71ace7ca2545d7cd7522988c16ad6c94e6169366 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 May 2018 14:57:26 +0100 Subject: drm/i915: Disable tasklet scheduling across initial scheduling During request submission, we call the engine->schedule() function so that we may reorder the active requests as required for inheriting the new request's priority. This may schedule several tasklets to run on the local CPU, but we will need to schedule the tasklets again for the new request. Delay all the local tasklets until the end, so that we only have to process the queue just once. v2: Beware PREEMPT_RCU, as then local_bh_disable() is then not a superset of rcu_read_lock(). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180507135731.10587-2-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_request.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e4cf76ec14a6..f336942229cf 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1110,12 +1110,11 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) engine->schedule(request, &request->ctx->sched); rcu_read_unlock(); - - local_bh_disable(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ -- cgit From 0adb90d330bb5f0d7fba511af5af3fc1ba93fb7a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 May 2018 16:35:14 +0100 Subject: drm/i915: Annotate timeline lock nesting CI noticed <4>[ 23.430701] ============================================ <4>[ 23.430706] WARNING: possible recursive locking detected <4>[ 23.430713] 4.17.0-rc4-CI-CI_DRM_4156+ #1 Not tainted <4>[ 23.430720] -------------------------------------------- <4>[ 23.430725] systemd-udevd/169 is trying to acquire lock: <4>[ 23.430732] (ptrval) (&(&timeline->lock)->rlock){....}, at: move_to_timeline+0x48/0x12c [i915] <4>[ 23.430888] but task is already holding lock: <4>[ 23.430894] (ptrval) (&(&timeline->lock)->rlock){....}, at: i915_request_submit+0x1a/0x40 [i915] <4>[ 23.430995] other info that might help us debug this: <4>[ 23.431002] Possible unsafe locking scenario: <4>[ 23.431007] CPU0 <4>[ 23.431010] ---- <4>[ 23.431013] lock(&(&timeline->lock)->rlock); <4>[ 23.431021] lock(&(&timeline->lock)->rlock); <4>[ 23.431028] *** DEADLOCK *** <4>[ 23.431036] May be due to missing lock nesting notation <4>[ 23.431044] 5 locks held by systemd-udevd/169: <4>[ 23.431049] #0: (ptrval) (&dev->mutex){....}, at: __driver_attach+0x42/0xe0 <4>[ 23.431065] #1: (ptrval) (&dev->mutex){....}, at: __driver_attach+0x50/0xe0 <4>[ 23.431078] #2: (ptrval) (&dev->struct_mutex){+.+.}, at: i915_gem_init+0xca/0x630 [i915] <4>[ 23.431174] #3: (ptrval) (rcu_read_lock){....}, at: submit_notify+0x35/0x124 [i915] <4>[ 23.431271] #4: (ptrval) (&(&timeline->lock)->rlock){....}, at: i915_request_submit+0x1a/0x40 [i915] <4>[ 23.431369] stack backtrace: <4>[ 23.431377] CPU: 0 PID: 169 Comm: systemd-udevd Not tainted 4.17.0-rc4-CI-CI_DRM_4156+ #1 <4>[ 23.431385] Hardware name: Dell Inc. OptiPlex GX280 /0G8310, BIOS A04 02/09/2005 <4>[ 23.431394] Call Trace: <4>[ 23.431403] dump_stack+0x67/0x9b <4>[ 23.431411] __lock_acquire+0xc67/0x1b50 <4>[ 23.431421] ? ring_buffer_lock_reserve+0x154/0x3f0 <4>[ 23.431429] ? lock_acquire+0xa6/0x210 <4>[ 23.431435] lock_acquire+0xa6/0x210 <4>[ 23.431530] ? move_to_timeline+0x48/0x12c [i915] <4>[ 23.431540] _raw_spin_lock+0x2a/0x40 <4>[ 23.431634] ? move_to_timeline+0x48/0x12c [i915] <4>[ 23.431730] move_to_timeline+0x48/0x12c [i915] <4>[ 23.431826] __i915_request_submit+0xfa/0x280 [i915] <4>[ 23.431923] i915_request_submit+0x25/0x40 [i915] <4>[ 23.432024] i9xx_submit_request+0x11/0x140 [i915] <4>[ 23.432120] submit_notify+0x8d/0x124 [i915] <4>[ 23.432202] __i915_sw_fence_complete+0x81/0x250 [i915] <4>[ 23.432300] __i915_request_add+0x31c/0x7c0 [i915] <4>[ 23.432395] i915_gem_init+0x621/0x630 [i915] <4>[ 23.432476] i915_driver_load+0xbee/0x10b0 [i915] <4>[ 23.432485] ? trace_hardirqs_on_caller+0xe0/0x1b0 <4>[ 23.432566] i915_pci_probe+0x29/0x90 [i915] <4>[ 23.432574] pci_device_probe+0xa1/0x130 <4>[ 23.432582] driver_probe_device+0x306/0x480 <4>[ 23.432589] __driver_attach+0xb7/0xe0 <4>[ 23.432596] ? driver_probe_device+0x480/0x480 <4>[ 23.432602] ? driver_probe_device+0x480/0x480 <4>[ 23.432609] bus_for_each_dev+0x74/0xc0 <4>[ 23.432616] bus_add_driver+0x15f/0x250 <4>[ 23.432623] ? 0xffffffffa02d7000 <4>[ 23.432629] driver_register+0x52/0xc0 <4>[ 23.432635] ? 0xffffffffa02d7000 <4>[ 23.432642] do_one_initcall+0x58/0x370 <4>[ 23.432653] ? do_init_module+0x1d/0x1ea <4>[ 23.432660] ? rcu_read_lock_sched_held+0x6f/0x80 <4>[ 23.432667] ? kmem_cache_alloc_trace+0x282/0x2e0 <4>[ 23.432675] do_init_module+0x56/0x1ea <4>[ 23.432682] load_module+0x2435/0x2b20 <4>[ 23.432694] ? __se_sys_finit_module+0xd3/0xf0 <4>[ 23.432701] __se_sys_finit_module+0xd3/0xf0 <4>[ 23.432710] do_syscall_64+0x55/0x190 <4>[ 23.432717] entry_SYSCALL_64_after_hwframe+0x49/0xbe <4>[ 23.432724] RIP: 0033:0x7fa780782839 <4>[ 23.432729] RSP: 002b:00007ffcea73e668 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 <4>[ 23.432738] RAX: ffffffffffffffda RBX: 0000561a472a4b30 RCX: 00007fa780782839 <4>[ 23.432745] RDX: 0000000000000000 RSI: 00007fa7804610e5 RDI: 000000000000000e <4>[ 23.432752] RBP: 00007fa7804610e5 R08: 0000000000000000 R09: 00007ffcea73e780 <4>[ 23.432758] R10: 000000000000000e R11: 0000000000000246 R12: 0000000000000000 <4>[ 23.432765] R13: 0000561a47296450 R14: 0000000000020000 R15: 0000561a472a4b30 but did not report it as an issue as it only occurred during the first module on boot. This is due to the removal of the distinct global timeline, and its separate lock class. So instead mark up the expected nesting. An alternative would be to define a separate lock class for the engine, but since we only expect to have a single point of nesting, we can avoid having multiple lock classes for the struct. Fixes: a89d1f921c15 ("drm/i915: Split i915_gem_timeline into individual timelines") Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Tested-by: Michel Thierry Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180508153514.20251-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f336942229cf..8928894dd9c7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -502,7 +502,7 @@ static void move_to_timeline(struct i915_request *request, GEM_BUG_ON(request->timeline == &request->engine->timeline); lockdep_assert_held(&request->engine->timeline.lock); - spin_lock(&request->timeline->lock); + spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); } -- cgit