From 639f2f24895fb37dd67dfecabd2c74019ed64140 Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Fri, 13 Dec 2019 07:51:52 -0800 Subject: drm/i915: Introduce new macros for tracing New macros ENGINE_TRACE(), CE_TRACE(), RQ_TRACE() and GT_TRACE() are introduce to tag device name and engine name with contexts and requests tracing in i915. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191213155152.69182-2-venkata.s.dhanalakota@intel.com --- drivers/gpu/drm/i915/i915_request.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 96991d64759c..a561b8efe869 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -49,6 +49,13 @@ struct i915_capture_list { struct i915_vma *vma; }; +#define RQ_TRACE(rq, fmt, ...) do { \ + const struct i915_request *rq__ = (rq); \ + ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d" fmt, \ + rq__->fence.context, rq__->fence.seqno, \ + hwsp_seqno(rq__), ##__VA_ARGS__); \ +} while (0) + enum { /* * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. -- cgit From 85bedbf191e82aac0d7f05623bccfeccdcd91cea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Dec 2019 01:16:59 +0000 Subject: drm/i915/gt: Eliminate the trylock for reading a timeline's hwsp As we stash a pointer to the HWSP cacheline on the request, when reading it we only need confirm that the cacheline is still valid by checking that the request and timeline are still intact. v2: Protect hwsp_cachline with RCU Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191217011659.3092130-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_timeline.c | 64 ++++++++++---------------- drivers/gpu/drm/i915/gt/intel_timeline_types.h | 12 ++++- drivers/gpu/drm/i915/i915_request.c | 4 +- drivers/gpu/drm/i915/i915_request.h | 5 +- 4 files changed, 39 insertions(+), 46 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index d71aafb66d6e..ee5dc4fbdeb9 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -15,6 +15,9 @@ #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) +#define CACHELINE_BITS 6 +#define CACHELINE_FREE CACHELINE_BITS + struct intel_timeline_hwsp { struct intel_gt *gt; struct intel_gt_timelines *gt_timelines; @@ -23,14 +26,6 @@ struct intel_timeline_hwsp { u64 free_bitmap; }; -struct intel_timeline_cacheline { - struct i915_active active; - struct intel_timeline_hwsp *hwsp; - void *vaddr; -#define CACHELINE_BITS 6 -#define CACHELINE_FREE CACHELINE_BITS -}; - static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); i915_active_fini(&cl->active); - kfree(cl); + kfree_rcu(cl, rcu); } __i915_active_call @@ -514,46 +509,35 @@ int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *to, u32 *hwsp) { - struct intel_timeline *tl; + struct intel_timeline_cacheline *cl; int err; + GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); + rcu_read_lock(); - tl = rcu_dereference(from->timeline); - if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref)) - tl = NULL; + cl = rcu_dereference(from->hwsp_cacheline); + if (unlikely(!i915_active_acquire_if_busy(&cl->active))) + goto unlock; /* seqno wrapped and completed! */ + if (unlikely(i915_request_completed(from))) + goto release; rcu_read_unlock(); - if (!tl) /* already completed */ - return 1; - GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl); - - err = -EAGAIN; - if (mutex_trylock(&tl->mutex)) { - struct intel_timeline_cacheline *cl = from->hwsp_cacheline; - - if (i915_request_completed(from)) { - err = 1; - goto unlock; - } + err = cacheline_ref(cl, to); + if (err) + goto out; - err = cacheline_ref(cl, to); - if (err) - goto unlock; + *hwsp = i915_ggtt_offset(cl->hwsp->vma) + + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; - if (likely(cl == tl->hwsp_cacheline)) { - *hwsp = tl->hwsp_offset; - } else { /* across a seqno wrap, recover the original offset */ - *hwsp = i915_ggtt_offset(cl->hwsp->vma) + - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * - CACHELINE_BYTES; - } +out: + i915_active_release(&cl->active); + return err; +release: + i915_active_release(&cl->active); unlock: - mutex_unlock(&tl->mutex); - } - intel_timeline_put(tl); - - return err; + rcu_read_unlock(); + return 1; } void intel_timeline_unpin(struct intel_timeline *tl) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index aaf15cbe1ce1..24d040f14e89 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -10,14 +10,15 @@ #include #include #include +#include #include #include "i915_active_types.h" struct drm_i915_private; struct i915_vma; -struct intel_timeline_cacheline; struct i915_syncmap; +struct intel_timeline_hwsp; struct intel_timeline { u64 fence_context; @@ -87,4 +88,13 @@ struct intel_timeline { struct rcu_head rcu; }; +struct intel_timeline_cacheline { + struct i915_active active; + + struct intel_timeline_hwsp *hwsp; + void *vaddr; + + struct rcu_head rcu; +}; + #endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a59b803aef92..269470d3527a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -655,9 +655,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->execution_mask = ce->engine->mask; rq->flags = 0; - rcu_assign_pointer(rq->timeline, tl); + RCU_INIT_POINTER(rq->timeline, tl); + RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; - rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a561b8efe869..aa38290eea3d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -30,6 +30,7 @@ #include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" +#include "gt/intel_timeline_types.h" #include "i915_gem.h" #include "i915_scheduler.h" @@ -41,8 +42,6 @@ struct drm_file; struct drm_i915_gem_object; struct i915_request; -struct intel_timeline; -struct intel_timeline_cacheline; struct i915_capture_list { struct i915_capture_list *next; @@ -183,7 +182,7 @@ struct i915_request { * inside the timeline's HWSP vma, but it is only valid while this * request has not completed and guarded by the timeline mutex. */ - struct intel_timeline_cacheline *hwsp_cacheline; + struct intel_timeline_cacheline __rcu *hwsp_cacheline; /** Position in the ring of the start of the request */ u32 head; -- cgit From b81e4d9b594188a8babf06442e1d6f28de78c68f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Dec 2019 12:43:53 +0000 Subject: drm/i915/gt: Track engine round-trip times Knowing the round trip time of an engine is useful for tracking the health of the system as well as providing a metric for the baseline responsiveness of the engine. We can use the latter metric for automatically tuning our waits in selftests and when idling so we don't confuse a slower system with a dead one. Upon idling the engine, we send one last pulse to switch the context away from precious user state to the volatile kernel context. We know the engine is idle at this point, and the pulse is non-preemptible, so this provides us with a good measurement of the round trip time. It also provides us with faster engine parking for ringbuffer submission, which is a welcome bonus (e.g. softer-rc6). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Stuart Summers Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20191219105043.4169050-1-chris@chris-wilson.co.uk Link: https://patchwork.freedesktop.org/patch/msgid/20191219124353.8607-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 +++ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 22 +++++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 11 +++++++++++ drivers/gpu/drm/i915/i915_request.h | 4 ++++ 4 files changed, 39 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3d1d48bf90cf..6dd18f93d45c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -334,6 +334,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; + ewma__engine_latency_init(&engine->latency); seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1481,6 +1482,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); drm_printf(m, "\tBarriers?: %s\n", yesno(!llist_empty(&engine->barrier_tasks))); + drm_printf(m, "\tLatency: %luus\n", + ewma__engine_latency_read(&engine->latency)); rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index bcbda8e52d41..8fb7b34fc5a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -73,6 +73,15 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct i915_request *rq = to_request(fence); + + ewma__engine_latency_add(&rq->engine->latency, + ktime_us_delta(rq->fence.timestamp, + rq->duration.emitted)); +} + static void __queue_and_release_pm(struct i915_request *rq, struct intel_timeline *tl, @@ -163,7 +172,18 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Install ourselves as a preemption barrier */ rq->sched.attr.priority = I915_PRIORITY_BARRIER; - __i915_request_commit(rq); + if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ + /* + * Use an interrupt for precise measurement of duration, + * otherwise we rely on someone else retiring all the requests + * which may delay the signaling (i.e. we will likely wait + * until the background request retirement running every + * second or two). + */ + BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq)); + dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration); + rq->duration.emitted = ktime_get(); + } /* Expose ourselves to the world */ __queue_and_release_pm(rq, ce->timeline, engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 17f1f1441efc..7f227da09d66 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -7,6 +7,7 @@ #ifndef __INTEL_ENGINE_TYPES__ #define __INTEL_ENGINE_TYPES__ +#include #include #include #include @@ -119,6 +120,9 @@ enum intel_engine_id { #define INVALID_ENGINE ((enum intel_engine_id)-1) }; +/* A simple estimator for the round-trip latency of an engine */ +DECLARE_EWMA(_engine_latency, 6, 4) + struct st_preempt_hang { struct completion completion; unsigned int count; @@ -316,6 +320,13 @@ struct intel_engine_cs { struct intel_timeline *timeline; } legacy; + /* + * We track the average duration of the idle pulse on parking the + * engine to keep an estimate of the how the fast the engine is + * under ideal conditions. + */ + struct ewma__engine_latency latency; + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index aa38290eea3d..c18c0bcd0193 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -150,6 +150,10 @@ struct i915_request { union { wait_queue_entry_t submitq; struct i915_sw_dma_fence_cb dmaq; + struct i915_request_duration_cb { + struct dma_fence_cb cb; + ktime_t emitted; + } duration; }; struct list_head execute_cb; struct i915_sw_fence semaphore; -- cgit From 9f3ccd40acf4a348aab4eda140cdb4d2f1f773b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Dec 2019 10:12:29 +0000 Subject: drm/i915: Drop GEM context as a direct link from i915_request Keep the intel_context as being the primary state for i915_request, with the GEM context a backpointer from the low level state for the rarer cases we need client information. Our goal is to remove such references to clients from the backend, and leave the HW submission agnostic to client interfaces and self-contained. Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191220101230.256839-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 15 +++----- drivers/gpu/drm/i915/gem/i915_gem_context.h | 38 ------------------ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 7 +--- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 8 ++-- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 4 +- drivers/gpu/drm/i915/gt/intel_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_context.h | 42 ++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_context_types.h | 7 +++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +-- drivers/gpu/drm/i915/gt/intel_lrc.c | 47 +++++++++++------------ drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++-------- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 8 ++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 20 +++++----- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 +-- drivers/gpu/drm/i915/gvt/scheduler.c | 27 ++++++------- drivers/gpu/drm/i915/i915_gem.c | 11 +++--- drivers/gpu/drm/i915/i915_gpu_error.c | 11 ++++-- drivers/gpu/drm/i915/i915_perf.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 21 ++++++---- drivers/gpu/drm/i915/i915_request.h | 3 +- drivers/gpu/drm/i915/i915_scheduler.c | 2 +- 21 files changed, 165 insertions(+), 161 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 6618c0c6506c..42585a20a9ae 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -69,6 +69,7 @@ #include +#include "gt/intel_context.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" @@ -423,15 +424,6 @@ static void kill_context(struct i915_gem_context *ctx) struct i915_gem_engines_iter it; struct intel_context *ce; - /* - * If we are already banned, it was due to a guilty request causing - * a reset and the entire context being evicted from the GPU. - */ - if (i915_gem_context_is_banned(ctx)) - return; - - i915_gem_context_set_banned(ctx); - /* * Map the user's engine back to the actual engines; one virtual * engine will be mapped to multiple engines, and using ctx->engine[] @@ -442,6 +434,9 @@ static void kill_context(struct i915_gem_context *ctx) for_each_gem_engine(ce, __context_engines_static(ctx), it) { struct intel_engine_cs *engine; + if (intel_context_set_banned(ce)) + continue; + /* * Check the current active state of this context; if we * are currently executing on the GPU we need to evict @@ -1093,7 +1088,7 @@ static void set_ppgtt_barrier(void *data) static int emit_ppgtt_update(struct i915_request *rq, void *data) { - struct i915_address_space *vm = rq->hw_context->vm; + struct i915_address_space *vm = rq->context->vm; struct intel_engine_cs *engine = rq->engine; u32 base = engine->mmio_base; u32 *cs; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 18e50a769a6e..69932899803e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -91,26 +91,6 @@ static inline void i915_gem_context_clear_persistence(struct i915_gem_context *c clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags); } -static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_BANNED, &ctx->flags); -} - -static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx) -{ - set_bit(CONTEXT_BANNED, &ctx->flags); -} - -static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - -static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx) -{ - __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - static inline bool i915_gem_context_user_engines(const struct i915_gem_context *ctx) { @@ -129,24 +109,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } -static inline bool -i915_gem_context_nopreempt(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_NOPREEMPT, &ctx->flags); -} - -static inline void -i915_gem_context_set_nopreempt(struct i915_gem_context *ctx) -{ - set_bit(CONTEXT_NOPREEMPT, &ctx->flags); -} - -static inline void -i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx) -{ - clear_bit(CONTEXT_NOPREEMPT, &ctx->flags); -} - static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) { return !ctx->file_priv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 69df5459c350..017ca803ab47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -134,11 +134,8 @@ struct i915_gem_context { * @flags: small set of booleans */ unsigned long flags; -#define CONTEXT_BANNED 0 -#define CONTEXT_CLOSED 1 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 -#define CONTEXT_USER_ENGINES 3 -#define CONTEXT_NOPREEMPT 4 +#define CONTEXT_CLOSED 0 +#define CONTEXT_USER_ENGINES 1 struct mutex mutex; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 7d07131aa3f7..cbd2bcade3c8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -730,9 +730,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) unsigned int i, batch; int err; - if (unlikely(i915_gem_context_is_banned(eb->gem_context))) - return -EIO; - INIT_LIST_HEAD(&eb->relocs); INIT_LIST_HEAD(&eb->unbound); @@ -2175,7 +2172,7 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } - if (i915_gem_context_nopreempt(eb->gem_context)) + if (intel_context_nopreempt(eb->context)) eb->request->flags |= I915_REQUEST_NOPREEMPT; return 0; @@ -2261,6 +2258,9 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) if (err) return err; + if (unlikely(intel_context_is_banned(ce))) + return -EIO; + /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 2979f0fd9270..0ba524a414c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -281,7 +281,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; struct list_head *pos; spin_lock(&b->irq_lock); @@ -338,7 +338,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) */ spin_lock(&b->irq_lock); if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; list_del(&rq->signal_link); if (list_empty(&ce->signals)) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index b1e346d2d35f..ae0dc40031df 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -302,7 +302,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce, int err; /* Only suitable for use in remotely modifying this context */ - GEM_BUG_ON(rq->hw_context == ce); + GEM_BUG_ON(rq->context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ /* Queue this switch after current activity by this context. */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index b39eb1fcfbca..65389cb8a565 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -7,7 +7,9 @@ #ifndef __INTEL_CONTEXT_H__ #define __INTEL_CONTEXT_H__ +#include #include +#include #include "i915_active.h" #include "intel_context_types.h" @@ -160,4 +162,44 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz) return u64_to_ptr(struct intel_ring, sz); } +static inline bool intel_context_is_banned(const struct intel_context *ce) +{ + return test_bit(CONTEXT_BANNED, &ce->flags); +} + +static inline bool intel_context_set_banned(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_BANNED, &ce->flags); +} + +static inline bool +intel_context_force_single_submission(const struct intel_context *ce) +{ + return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags); +} + +static inline void +intel_context_set_single_submission(struct intel_context *ce) +{ + __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags); +} + +static inline bool +intel_context_nopreempt(const struct intel_context *ce) +{ + return test_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + +static inline void +intel_context_set_nopreempt(struct intel_context *ce) +{ + set_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + +static inline void +intel_context_clear_nopreempt(struct intel_context *ce) +{ + clear_bit(CONTEXT_NOPREEMPT, &ce->flags); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index d1204cc899a3..597448f6e98b 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -54,8 +54,11 @@ struct intel_context { struct intel_timeline *timeline; unsigned long flags; -#define CONTEXT_ALLOC_BIT 0 -#define CONTEXT_VALID_BIT 1 +#define CONTEXT_ALLOC_BIT 0 +#define CONTEXT_VALID_BIT 1 +#define CONTEXT_BANNED 2 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 3 +#define CONTEXT_NOPREEMPT 4 u32 *lrc_reg_state; u64 lrc_desc; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 6dd18f93d45c..e091b3366eae 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1523,9 +1523,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request_ring(m, rq); - if (rq->hw_context->lrc_reg_state) { + if (rq->context->lrc_reg_state) { drm_printf(m, "Logical Ring Context:\n"); - hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE); + hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); } } spin_unlock_irqrestore(&engine->active.lock, flags); @@ -1586,7 +1586,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) for (port = execlists->pending; (rq = *port); port++) { /* Exclude any contexts already counted in active */ - if (!intel_context_inflight_count(rq->hw_context)) + if (!intel_context_inflight_count(rq->context)) engine->stats.active++; } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b7da6d7e7e2a..ec93c47c4bdd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -880,7 +880,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { - struct intel_engine_cs *owner = rq->hw_context->engine; + struct intel_engine_cs *owner = rq->context->engine; /* * Decouple the virtual breadcrumb before moving it @@ -1051,7 +1051,7 @@ static void restore_default_state(struct intel_context *ce, static void reset_active(struct i915_request *rq, struct intel_engine_cs *engine) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; u32 head; /* @@ -1092,11 +1092,11 @@ static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { struct intel_engine_cs * const engine = rq->engine; - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; intel_context_get(ce); - if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + if (unlikely(intel_context_is_banned(ce))) reset_active(rq, engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) @@ -1124,7 +1124,7 @@ __execlists_schedule_in(struct i915_request *rq) static inline struct i915_request * execlists_schedule_in(struct i915_request *rq, int idx) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; struct intel_engine_cs *old; GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); @@ -1155,7 +1155,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; /* * NB process_csb() is not under the engine->active.lock and hence @@ -1193,7 +1193,7 @@ __execlists_schedule_out(struct i915_request *rq, static inline void execlists_schedule_out(struct i915_request *rq) { - struct intel_context * const ce = rq->hw_context; + struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; trace_i915_request_out(rq); @@ -1210,7 +1210,7 @@ execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; u64 desc = ce->lrc_desc; u32 tail; @@ -1311,13 +1311,13 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, GEM_BUG_ON(!kref_read(&rq->fence.refcount)); GEM_BUG_ON(!i915_request_is_active(rq)); - if (ce == rq->hw_context) { + if (ce == rq->context) { GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n", ce->timeline->fence_context, port - execlists->pending); return false; } - ce = rq->hw_context; + ce = rq->context; /* Hold tightly onto the lock to prevent concurrent retires! */ if (!spin_trylock_irqsave(&rq->lock, flags)) @@ -1326,8 +1326,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, if (i915_request_completed(rq)) goto unlock; - if (i915_active_is_idle(&ce->active) && - !i915_gem_context_is_kernel(ce->gem_context)) { + if (i915_active_is_idle(&ce->active) && ce->gem_context) { GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n", ce->timeline->fence_context, port - execlists->pending); @@ -1399,7 +1398,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) static bool ctx_single_port_submission(const struct intel_context *ce) { return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - i915_gem_context_force_single_submission(ce->gem_context)); + intel_context_force_single_submission(ce)); } static bool can_merge_ctx(const struct intel_context *prev, @@ -1435,7 +1434,7 @@ static bool can_merge_rq(const struct i915_request *prev, (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) return false; - if (!can_merge_ctx(prev->hw_context, next->hw_context)) + if (!can_merge_ctx(prev->context, next->context)) return false; return true; @@ -1622,7 +1621,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) return 0; /* Force a fast reset for terminated contexts (ignoring sysfs!) */ - if (unlikely(i915_gem_context_is_banned(rq->gem_context))) + if (unlikely(intel_context_is_banned(rq->context))) return 1; return READ_ONCE(engine->props.preempt_timeout_ms); @@ -1730,7 +1729,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * tendency to ignore us rewinding the TAIL to the * end of an earlier request. */ - last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; + last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && timer_expired(&engine->execlists.timer)) { @@ -1802,7 +1801,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq != ve->request); GEM_BUG_ON(rq->engine != &ve->base); - GEM_BUG_ON(rq->hw_context != &ve->context); + GEM_BUG_ON(rq->context != &ve->context); if (rq_prio(rq) >= queue_prio(execlists)) { if (!virtual_matches(ve, rq, engine)) { @@ -1921,7 +1920,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * same LRCA, i.e. we must submit 2 different * contexts if we submit 2 ELSP. */ - if (last->hw_context == rq->hw_context) + if (last->context == rq->context) goto done; if (i915_request_has_sentinel(last)) @@ -1934,8 +1933,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the same context (even though a different * request) to the second port. */ - if (ctx_single_port_submission(last->hw_context) || - ctx_single_port_submission(rq->hw_context)) + if (ctx_single_port_submission(last->context) || + ctx_single_port_submission(rq->context)) goto done; merge = false; @@ -1949,8 +1948,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } GEM_BUG_ON(last && - !can_merge_ctx(last->hw_context, - rq->hw_context)); + !can_merge_ctx(last->context, + rq->context)); submit = true; last = rq; @@ -2564,7 +2563,7 @@ static int execlists_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + GEM_BUG_ON(!intel_context_is_pinned(request->context)); /* * Flush enough space to reduce the likelihood of waiting after @@ -3071,7 +3070,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) /* We still have requests in-flight; the engine should be active */ GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - ce = rq->hw_context; + ce = rq->context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); if (i915_request_completed(rq)) { diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f3d1e921fba6..3d293be64fc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -41,27 +41,30 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) static void engine_skip_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *hung_ctx = rq->gem_context; + struct intel_context *hung_ctx = rq->context; if (!i915_request_is_active(rq)) return; lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) - if (rq->gem_context == hung_ctx) + if (rq->context == hung_ctx) i915_request_skip(rq, -EIO); } -static void client_mark_guilty(struct drm_i915_file_private *file_priv, - const struct i915_gem_context *ctx) +static void client_mark_guilty(struct i915_request *rq, bool banned) { - unsigned int score; + struct i915_gem_context *ctx = rq->context->gem_context; + struct drm_i915_file_private *file_priv = ctx->file_priv; unsigned long prev_hang; + unsigned int score; - if (i915_gem_context_is_banned(ctx)) + if (IS_ERR_OR_NULL(file_priv)) + return; + + score = 0; + if (banned) score = I915_CLIENT_SCORE_CONTEXT_BAN; - else - score = 0; prev_hang = xchg(&file_priv->hang_timestamp, jiffies); if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) @@ -76,14 +79,15 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv, } } -static bool context_mark_guilty(struct i915_gem_context *ctx) +static bool mark_guilty(struct i915_request *rq) { + struct i915_gem_context *ctx = rq->context->gem_context; unsigned long prev_hang; bool banned; int i; if (i915_gem_context_is_closed(ctx)) { - i915_gem_context_set_banned(ctx); + intel_context_set_banned(rq->context); return true; } @@ -110,18 +114,17 @@ static bool context_mark_guilty(struct i915_gem_context *ctx) if (banned) { DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", ctx->name, atomic_read(&ctx->guilty_count)); - i915_gem_context_set_banned(ctx); + intel_context_set_banned(rq->context); } - if (!IS_ERR_OR_NULL(ctx->file_priv)) - client_mark_guilty(ctx->file_priv, ctx); + client_mark_guilty(rq, banned); return banned; } -static void context_mark_innocent(struct i915_gem_context *ctx) +static void mark_innocent(struct i915_request *rq) { - atomic_inc(&ctx->active_count); + atomic_inc(&rq->context->gem_context->active_count); } void __i915_request_reset(struct i915_request *rq, bool guilty) @@ -137,11 +140,11 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) rcu_read_lock(); /* protect the GEM context */ if (guilty) { i915_request_skip(rq, -EIO); - if (context_mark_guilty(rq->gem_context)) + if (mark_guilty(rq)) engine_skip_context(rq); } else { dma_fence_set_error(&rq->fence, -EAGAIN); - context_mark_innocent(rq->gem_context); + mark_innocent(rq); } rcu_read_unlock(); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index b61658601c86..a5d30d1468f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1480,7 +1480,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; + *cs++ = i915_ggtt_offset(rq->context->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1550,7 +1550,7 @@ static int remap_l3_slice(struct i915_request *rq, int slice) static int remap_l3(struct i915_request *rq) { - struct i915_gem_context *ctx = rq->gem_context; + struct i915_gem_context *ctx = rq->context->gem_context; int i, err; if (!ctx->remap_slice) @@ -1597,7 +1597,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) static int switch_context(struct i915_request *rq) { - struct intel_context *ce = rq->hw_context; + struct intel_context *ce = rq->context; int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); @@ -1631,7 +1631,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); + GEM_BUG_ON(!intel_context_is_pinned(request->context)); GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); /* diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index ac8b9116d307..619e34813ade 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1195,13 +1195,13 @@ static int __cancel_active0(struct live_preempt_cancel *arg) __func__, arg->engine->name)) return -EIO; - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); rq = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, MI_ARB_CHECK); if (IS_ERR(rq)) return PTR_ERR(rq); + clear_bit(CONTEXT_BANNED, &rq->context->flags); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { @@ -1209,7 +1209,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) goto out; } - i915_gem_context_set_banned(arg->a.ctx); + intel_context_set_banned(rq->context); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -1244,13 +1244,13 @@ static int __cancel_active1(struct live_preempt_cancel *arg) __func__, arg->engine->name)) return -EIO; - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); rq[0] = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, MI_NOOP); /* no preemption */ if (IS_ERR(rq[0])) return PTR_ERR(rq[0]); + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); i915_request_get(rq[0]); i915_request_add(rq[0]); if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { @@ -1258,7 +1258,6 @@ static int __cancel_active1(struct live_preempt_cancel *arg) goto out; } - clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); rq[1] = spinner_create_request(&arg->b.spin, arg->b.ctx, arg->engine, MI_ARB_CHECK); @@ -1267,13 +1266,14 @@ static int __cancel_active1(struct live_preempt_cancel *arg) goto out; } + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); i915_request_get(rq[1]); err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); i915_request_add(rq[1]); if (err) goto out; - i915_gem_context_set_banned(arg->b.ctx); + intel_context_set_banned(rq[1]->context); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -1316,13 +1316,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg) __func__, arg->engine->name)) return -EIO; - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); rq[0] = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, MI_ARB_CHECK); if (IS_ERR(rq[0])) return PTR_ERR(rq[0]); + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); i915_request_get(rq[0]); i915_request_add(rq[0]); if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { @@ -1330,13 +1330,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg) goto out; } - clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); if (IS_ERR(rq[1])) { err = PTR_ERR(rq[1]); goto out; } + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); i915_request_get(rq[1]); err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); i915_request_add(rq[1]); @@ -1357,7 +1357,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (err) goto out; - i915_gem_context_set_banned(arg->a.ctx); + intel_context_set_banned(rq[2]->context); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -1404,13 +1404,13 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) return 0; GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); rq = spinner_create_request(&arg->a.spin, arg->a.ctx, arg->engine, MI_NOOP); /* preemption disabled */ if (IS_ERR(rq)) return PTR_ERR(rq); + clear_bit(CONTEXT_BANNED, &rq->context->flags); i915_request_get(rq); i915_request_add(rq); if (!igt_wait_for_spinner(&arg->a.spin, rq)) { @@ -1418,7 +1418,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) goto out; } - i915_gem_context_set_banned(arg->a.ctx); + intel_context_set_banned(rq->context); err = intel_engine_pulse(arg->engine); /* force reset */ if (err) goto out; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 44a7d2e736a7..007636221a71 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); + u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); guc_wq_item_append(guc, engine->guc_id, ctx_desc, @@ -315,7 +315,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - if (last && rq->hw_context != last->hw_context) { + if (last && rq->context != last->context) { if (port == last_port) goto done; @@ -420,7 +420,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled) stalled = false; __i915_request_reset(rq, stalled); - intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); + intel_lr_context_reset(engine, rq->context, rq->head, stalled); out_unlock: spin_unlock_irqrestore(&engine->active.lock, flags); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 5b2a7d072ec9..228c66534e21 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -59,7 +59,7 @@ static void set_context_pdp_root_pointer( static void update_shadow_pdps(struct intel_vgpu_workload *workload) { struct drm_i915_gem_object *ctx_obj = - workload->req->hw_context->state->obj; + workload->req->context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -130,7 +130,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) struct intel_gvt *gvt = vgpu->gvt; int ring_id = workload->ring_id; struct drm_i915_gem_object *ctx_obj = - workload->req->hw_context->state->obj; + workload->req->context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -205,9 +205,9 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return 0; } -static inline bool is_gvt_request(struct i915_request *req) +static inline bool is_gvt_request(struct i915_request *rq) { - return i915_gem_context_force_single_submission(req->gem_context); + return intel_context_force_single_submission(rq->context); } static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) @@ -307,7 +307,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) u32 *cs; int err; - if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context)) + if (IS_GEN(req->i915, 9) && is_inhibit_context(req->context)) intel_vgpu_restore_inhibit_context(vgpu, req); /* @@ -363,11 +363,10 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) } static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, - struct i915_gem_context *ctx) + struct intel_context *ce) { struct intel_vgpu_mm *mm = workload->shadow_mm; - struct i915_ppgtt *ppgtt = - i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); int i = 0; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { @@ -380,8 +379,6 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } - - i915_vm_put(&ppgtt->vm); } static int @@ -529,7 +526,7 @@ static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx); struct i915_request *rq = workload->req; struct execlist_ring_context *shadow_ring_context = - (struct execlist_ring_context *)rq->hw_context->lrc_reg_state; + (struct execlist_ring_context *)rq->context->lrc_reg_state; shadow_ring_context->bb_per_ctx_ptr.val = (shadow_ring_context->bb_per_ctx_ptr.val & @@ -628,7 +625,7 @@ static int prepare_workload(struct intel_vgpu_workload *workload) update_shadow_pdps(workload); - set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context); + set_context_ppgtt_from_shadow(workload, s->shadow[ring]); ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { @@ -787,7 +784,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) struct i915_request *rq = workload->req; struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; - struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj; + struct drm_i915_gem_object *ctx_obj = rq->context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; @@ -1232,8 +1229,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) if (IS_ERR(ctx)) return PTR_ERR(ctx); - i915_gem_context_set_force_single_submission(ctx); - ppgtt = i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); i915_context_ppgtt_root_save(s, ppgtt); @@ -1249,6 +1244,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) goto out_shadow_ctx; } + intel_context_set_single_submission(ce); + if (!USES_GUC_SUBMISSION(i915)) { /* Max ring buffer size */ const unsigned int ring_size = 512 * SZ_4K; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f19c678ebefc..6e7010089e23 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1168,19 +1168,18 @@ err_rq: if (!rq) continue; - GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, - &rq->hw_context->flags)); - state = rq->hw_context->state; + GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); + state = rq->context->state; if (!state) continue; /* Serialise with retirement on another CPU */ - err = __intel_context_flush_retire(rq->hw_context); + err = __intel_context_flush_retire(rq->context); if (err) goto out; /* We want to be able to unbind the state from the GGTT */ - GEM_BUG_ON(intel_context_is_pinned(rq->hw_context)); + GEM_BUG_ON(intel_context_is_pinned(rq->context)); /* * As we will hold a reference to the logical state, it will @@ -1230,7 +1229,7 @@ out: if (!rq) continue; - ce = rq->hw_context; + ce = rq->context; i915_request_put(rq); intel_context_put(ce); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8374d50c0770..7e2cb063110c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1221,7 +1221,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, static void record_request(const struct i915_request *request, struct drm_i915_error_request *erq) { - const struct i915_gem_context *ctx = request->gem_context; + const struct i915_gem_context *ctx = request->context->gem_context; erq->flags = request->fence.flags; erq->context = request->fence.context; @@ -1231,7 +1231,7 @@ static void record_request(const struct i915_request *request, erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; - erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0; + erq->pid = ctx && ctx->pid ? pid_nr(ctx->pid) : 0; } static void engine_record_requests(struct intel_engine_cs *engine, @@ -1298,7 +1298,10 @@ static void error_record_engine_execlists(const struct intel_engine_cs *engine, static bool record_context(struct drm_i915_error_context *e, const struct i915_request *rq) { - const struct i915_gem_context *ctx = rq->gem_context; + const struct i915_gem_context *ctx = rq->context->gem_context; + + if (!ctx) + return false; if (ctx->pid) { struct task_struct *task; @@ -1452,7 +1455,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress) capture = request_record_user_bo(request, ee, capture); capture = capture_vma(capture, - request->hw_context->state, + request->context->state, &ee->ctx); capture = capture_vma(capture, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 3c163a9d69a9..9d4733927a2c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3112,7 +3112,7 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) stream->ops->enable(stream); if (stream->hold_preemption) - i915_gem_context_set_nopreempt(stream->ctx); + intel_context_set_nopreempt(stream->pinned_ctx); } /** @@ -3138,7 +3138,7 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) stream->enabled = false; if (stream->hold_preemption) - i915_gem_context_clear_nopreempt(stream->ctx); + intel_context_clear_nopreempt(stream->pinned_ctx); if (stream->ops->disable) stream->ops->disable(stream); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2118284b796e..218d20d4e414 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -62,6 +62,8 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { + const struct i915_gem_context *ctx; + /* * The timeline struct (as part of the ppgtt underneath a context) * may be freed when the request is no longer in use by the GPU. @@ -74,7 +76,11 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->gem_context->name ?: "[" DRIVER_NAME "]"; + ctx = to_request(fence)->context->gem_context; + if (!ctx) + return "[" DRIVER_NAME "]"; + + return ctx->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -269,8 +275,8 @@ bool i915_request_retire(struct i915_request *rq) remove_from_client(rq); list_del(&rq->link); - intel_context_exit(rq->hw_context); - intel_context_unpin(rq->hw_context); + intel_context_exit(rq->context); + intel_context_unpin(rq->context); free_capture_list(rq); i915_sched_node_fini(&rq->sched); @@ -369,7 +375,7 @@ bool __i915_request_submit(struct i915_request *request) if (i915_request_completed(request)) goto xfer; - if (i915_gem_context_is_banned(request->gem_context)) + if (intel_context_is_banned(request->context)) i915_request_skip(request, -EIO); /* @@ -648,8 +654,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) goto err_free; rq->i915 = ce->engine->i915; - rq->hw_context = ce; - rq->gem_context = ce->gem_context; + rq->context = ce; rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; @@ -917,7 +922,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) &from->submit, I915_FENCE_GFP); } else if (intel_engine_has_semaphores(to->engine) && - to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) { + to->context->gem_context->sched.priority >= I915_PRIORITY_NORMAL) { ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); } else { ret = i915_sw_fence_await_dma_fence(&to->submit, @@ -1298,7 +1303,7 @@ void __i915_request_queue(struct i915_request *rq, void i915_request_add(struct i915_request *rq) { - struct i915_sched_attr attr = rq->gem_context->sched; + struct i915_sched_attr attr = rq->context->gem_context->sched; struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_request *prev; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c18c0bcd0193..0e4fe3205ce7 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -115,9 +115,8 @@ struct i915_request { * i915_request_free() will then decrement the refcount on the * context. */ - struct i915_gem_context *gem_context; struct intel_engine_cs *engine; - struct intel_context *hw_context; + struct intel_context *context; struct intel_ring *ring; struct intel_timeline __rcu *timeline; struct list_head signal_link; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 2bc2aa46a1b9..bf87c70bfdd9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -213,7 +213,7 @@ static void kick_submission(struct intel_engine_cs *engine, * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. */ - if (inflight->hw_context == rq->hw_context) + if (inflight->context == rq->context) goto unlock; engine->execlists.queue_priority_hint = prio; -- cgit From 6a8679c048eb104dbcc6aa43a0baa7450de46503 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 22 Dec 2019 23:35:58 +0000 Subject: drm/i915: Mark the GEM context link as RCU protected The only protection for intel_context.gem_cotext is granted by RCU, so annotate it as a rcu protected pointer and carefully dereference it in the few occasions we need to use it. Fixes: 9f3ccd40acf4 ("drm/i915: Drop GEM context as a direct link from i915_request") Signed-off-by: Chris Wilson Cc: Andi Shyti Acked-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191222233558.2201901-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 ++-- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 26 ++++++++++++---- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 40 ++++++++++++++++--------- drivers/gpu/drm/i915/i915_request.c | 6 ++-- drivers/gpu/drm/i915/i915_request.h | 8 +++++ 7 files changed, 62 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 6167e68bbb25..dc90b044a217 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -212,8 +212,8 @@ context_get_vm_rcu(struct i915_gem_context *ctx) static void intel_context_set_gem(struct intel_context *ce, struct i915_gem_context *ctx) { - GEM_BUG_ON(ce->gem_context); - ce->gem_context = ctx; + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); + RCU_INIT_POINTER(ce->gem_context, ctx); if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) ce->ring = __intel_context_ring_size(SZ_16K); @@ -244,6 +244,7 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count) if (!e->engines[count]) continue; + RCU_INIT_POINTER(e->engines[count]->gem_context, NULL); intel_context_put(e->engines[count]); } kfree(e); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 7dd03ad9826c..9527a659546c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -44,7 +44,7 @@ struct intel_context { #define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2) struct i915_address_space *vm; - struct i915_gem_context *gem_context; + struct i915_gem_context __rcu *gem_context; struct list_head signal_link; struct list_head signals; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index ab8213b90517..1c51296646e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -85,20 +85,27 @@ static bool mark_guilty(struct i915_request *rq) bool banned; int i; - ctx = rq->context->gem_context; + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); if (!ctx) return false; if (i915_gem_context_is_closed(ctx)) { intel_context_set_banned(rq->context); - return true; + banned = true; + goto out; } atomic_inc(&ctx->guilty_count); /* Cool contexts are too cool to be banned! (Used for reset testing.) */ - if (!i915_gem_context_is_bannable(ctx)) - return false; + if (!i915_gem_context_is_bannable(ctx)) { + banned = false; + goto out; + } dev_notice(ctx->i915->drm.dev, "%s context reset due to GPU hang\n", @@ -122,13 +129,20 @@ static bool mark_guilty(struct i915_request *rq) client_mark_guilty(ctx, banned); +out: + i915_gem_context_put(ctx); return banned; } static void mark_innocent(struct i915_request *rq) { - if (rq->context->gem_context) - atomic_inc(&rq->context->gem_context->active_count); + struct i915_gem_context *ctx; + + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx) + atomic_inc(&ctx->active_count); + rcu_read_unlock(); } void __i915_request_reset(struct i915_request *rq, bool guilty) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 118170eb51b4..81f872f9ef03 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1550,7 +1550,7 @@ static int remap_l3_slice(struct i915_request *rq, int slice) static int remap_l3(struct i915_request *rq) { - struct i915_gem_context *ctx = rq->context->gem_context; + struct i915_gem_context *ctx = i915_request_gem_context(rq); int i, err; if (!ctx || !ctx->remap_slice) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 7e2cb063110c..fda0977d2059 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1221,7 +1221,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, static void record_request(const struct i915_request *request, struct drm_i915_error_request *erq) { - const struct i915_gem_context *ctx = request->context->gem_context; + const struct i915_gem_context *ctx; erq->flags = request->fence.flags; erq->context = request->fence.context; @@ -1231,7 +1231,13 @@ static void record_request(const struct i915_request *request, erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; - erq->pid = ctx && ctx->pid ? pid_nr(ctx->pid) : 0; + + erq->pid = 0; + rcu_read_lock(); + ctx = rcu_dereference(request->context->gem_context); + if (ctx) + erq->pid = pid_nr(ctx->pid); + rcu_read_unlock(); } static void engine_record_requests(struct intel_engine_cs *engine, @@ -1298,28 +1304,34 @@ static void error_record_engine_execlists(const struct intel_engine_cs *engine, static bool record_context(struct drm_i915_error_context *e, const struct i915_request *rq) { - const struct i915_gem_context *ctx = rq->context->gem_context; + struct i915_gem_context *ctx; + struct task_struct *task; + bool capture; + rcu_read_lock(); + ctx = rcu_dereference(rq->context->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); if (!ctx) return false; - if (ctx->pid) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(ctx->pid, PIDTYPE_PID); - if (task) { - strcpy(e->comm, task->comm); - e->pid = task->pid; - } - rcu_read_unlock(); + rcu_read_lock(); + task = pid_task(ctx->pid, PIDTYPE_PID); + if (task) { + strcpy(e->comm, task->comm); + e->pid = task->pid; } + rcu_read_unlock(); e->sched_attr = ctx->sched; e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); - return i915_gem_context_no_error_capture(ctx); + capture = i915_gem_context_no_error_capture(ctx); + + i915_gem_context_put(ctx); + return capture; } struct capture_vma { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 14a5a99284fa..44a0d1a950c5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -76,7 +76,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - ctx = to_request(fence)->context->gem_context; + ctx = i915_request_gem_context(to_request(fence)); if (!ctx) return "[" DRIVER_NAME "]"; @@ -1312,8 +1312,8 @@ void i915_request_add(struct i915_request *rq) prev = __i915_request_commit(rq); - if (rq->context->gem_context) - attr = rq->context->gem_context->sched; + if (rcu_access_pointer(rq->context->gem_context)) + attr = i915_request_gem_context(rq)->sched; /* * Boost actual workloads past semaphores! diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 0e4fe3205ce7..565322640378 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,6 +28,7 @@ #include #include +#include "gem/i915_gem_context_types.h" #include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" #include "gt/intel_timeline_types.h" @@ -463,6 +464,13 @@ i915_request_timeline(struct i915_request *rq) lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex)); } +static inline struct i915_gem_context * +i915_request_gem_context(struct i915_request *rq) +{ + /* Valid only while the request is being constructed (or retired). */ + return rcu_dereference_protected(rq->context->gem_context, true); +} + static inline struct intel_timeline * i915_request_active_timeline(struct i915_request *rq) { -- cgit From 41d329e287fb4888591491be4b4198f14b211cf6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Dec 2019 20:44:10 +0000 Subject: drm/i915: Add spaces before compound GEM_TRACE Add a space between the prefixed format and the users format so that the join are not mistakenly combined into one long word. Fixes: 639f2f24895f ("drm/i915: Introduce new macros for tracing") Signed-off-by: Chris Wilson Cc: Venkata Sandeep Dhanalakota Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20191223204411.2355304-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_context.h | 2 +- drivers/gpu/drm/i915/i915_request.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 1d4a1b1357cf..0f5ae4ff3b10 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -19,7 +19,7 @@ #define CE_TRACE(ce, fmt, ...) do { \ const struct intel_context *ce__ = (ce); \ - ENGINE_TRACE(ce__->engine, "context:%llx" fmt, \ + ENGINE_TRACE(ce__->engine, "context:%llx " fmt, \ ce__->timeline->fence_context, \ ##__VA_ARGS__); \ } while (0) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 565322640378..9784421a3b4d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -51,7 +51,7 @@ struct i915_capture_list { #define RQ_TRACE(rq, fmt, ...) do { \ const struct i915_request *rq__ = (rq); \ - ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d" fmt, \ + ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \ rq__->fence.context, rq__->fence.seqno, \ hwsp_seqno(rq__), ##__VA_ARGS__); \ } while (0) -- cgit From e1c31fb5dde3af91df34d98ca041c746504309d6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Jan 2020 11:42:31 +0000 Subject: drm/i915: Merge i915_request.flags with i915_request.fence.flags As we already have a flags field buried within i915_request, reuse it! Signed-off-by: Chris Wilson Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20200106114234.2529613-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +-- drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 1 - drivers/gpu/drm/i915/i915_request.h | 43 +++++++++++++++++++----- 7 files changed, 41 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cbd2bcade3c8..d5a0f5ae4a8b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2173,7 +2173,7 @@ static int eb_submit(struct i915_execbuffer *eb) } if (intel_context_nopreempt(eb->context)) - eb->request->flags |= I915_REQUEST_NOPREEMPT; + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 742628e40201..6c6fd185457c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -199,7 +199,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine) goto out_unlock; } - rq->flags |= I915_REQUEST_SENTINEL; + __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); idle_pulse(engine, rq); __i915_request_commit(rq); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 170b5a0139a3..28c05e7a1510 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1538,8 +1538,8 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; - if (unlikely((prev->flags ^ next->flags) & - (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) + if (unlikely((prev->fence.flags ^ next->fence.flags) & + (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL))) return false; if (!can_merge_ctx(prev->context, next->context)) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index f232036c3c7a..d2a3d935d186 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -777,7 +777,7 @@ void intel_rps_boost(struct i915_request *rq) spin_lock_irqsave(&rq->lock, flags); if (!i915_request_has_waitboost(rq) && !dma_fence_is_signaled_locked(&rq->fence)) { - rq->flags |= I915_REQUEST_WAITBOOST; + set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); if (!atomic_fetch_inc(&rps->num_waiters) && READ_ONCE(rps->cur_freq) < rps->boost_freq) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index d96604baab94..15cda024e3e4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1153,7 +1153,7 @@ static int live_nopreempt(void *arg) } /* Low priority client, but unpreemptable! */ - rq_a->flags |= I915_REQUEST_NOPREEMPT; + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 44a0d1a950c5..be185886e4fc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -658,7 +658,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; - rq->flags = 0; RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 9784421a3b4d..031433691a06 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -77,6 +77,38 @@ enum { * a request is on the various signal_list. */ I915_FENCE_FLAG_SIGNAL, + + /* + * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted + * + * The execution of some requests should not be interrupted. This is + * a sensitive operation as it makes the request super important, + * blocking other higher priority work. Abuse of this flag will + * lead to quality of service issues. + */ + I915_FENCE_FLAG_NOPREEMPT, + + /* + * I915_FENCE_FLAG_SENTINEL - this request should be last in the queue + * + * A high priority sentinel request may be submitted to clear the + * submission queue. As it will be the only request in-flight, upon + * execution all other active requests will have been preempted and + * unsubmitted. This preemptive pulse is used to re-evaluate the + * in-flight requests, particularly in cases where an active context + * is banned and those active requests need to be cancelled. + */ + I915_FENCE_FLAG_SENTINEL, + + /* + * I915_FENCE_FLAG_BOOST - upclock the gpu for this request + * + * Some requests are more important than others! In particular, a + * request that the user is waiting on is typically required for + * interactive latency, for which we want to minimise by upclocking + * the GPU. Here we track such boost requests on a per-request basis. + */ + I915_FENCE_FLAG_BOOST, }; /** @@ -225,11 +257,6 @@ struct i915_request { /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; - unsigned long flags; -#define I915_REQUEST_WAITBOOST BIT(0) -#define I915_REQUEST_NOPREEMPT BIT(1) -#define I915_REQUEST_SENTINEL BIT(2) - /** timeline->request entry for this request */ struct list_head link; @@ -442,18 +469,18 @@ static inline void i915_request_mark_complete(struct i915_request *rq) static inline bool i915_request_has_waitboost(const struct i915_request *rq) { - return rq->flags & I915_REQUEST_WAITBOOST; + return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); } static inline bool i915_request_has_nopreempt(const struct i915_request *rq) { /* Preemption should only be disabled very rarely */ - return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); + return unlikely(test_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags)); } static inline bool i915_request_has_sentinel(const struct i915_request *rq) { - return unlikely(rq->flags & I915_REQUEST_SENTINEL); + return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); } static inline struct intel_timeline * -- cgit From 9e2750fc80b5cc606365201132d49fed00570dd1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Jan 2020 18:47:52 +0000 Subject: drm/i915: Keep track of request among the scheduling lists If we keep track of when the i915_request.sched.link is on the HW runlist, or in the priority queue we can simplify our interactions with the request (such as during rescheduling). This also simplifies the next patch where we introduce a new in-between list, for requests that are ready but neither on the run list or in the queue. v2: Update i915_sched_node.link explanation for current usage where it is a link on both the queue and on the runlists. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200116184754.2860848-1-chris@chris-wilson.co.uk (cherry picked from commit 672c368f9398042b629740cc9816e8e939eff2db) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_lrc.c | 13 ++++++++----- drivers/gpu/drm/i915/i915_request.c | 4 +++- drivers/gpu/drm/i915/i915_request.h | 17 +++++++++++++++++ drivers/gpu/drm/i915/i915_scheduler.c | 22 ++++++++++------------ 4 files changed, 38 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d879e5e926af..f1f49c4aa7af 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); list_move(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; @@ -2431,11 +2433,12 @@ static void execlists_preempt(struct timer_list *timer) } static void queue_request(struct intel_engine_cs *engine, - struct i915_sched_node *node, - int prio) + struct i915_request *rq) { - GEM_BUG_ON(!list_empty(&node->link)); - list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(engine, rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } static void __submit_queue_imm(struct intel_engine_cs *engine) @@ -2471,7 +2474,7 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); - queue_request(engine, &request->sched, rq_prio(request)); + queue_request(engine, request); GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index be185886e4fc..9ed0d3bc7249 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -408,8 +408,10 @@ bool __i915_request_submit(struct i915_request *request) xfer: /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { list_move_tail(&request->sched.link, &engine->active.requests); + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + } if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 031433691a06..6f5bbfa95513 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -70,6 +70,18 @@ enum { */ I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, + /* + * I915_FENCE_FLAG_PQUEUE - this request is ready for execution + * + * Using the scheduler, when a request is ready for execution it is put + * into the priority queue, and removed from that queue when transferred + * to the HW runlists. We want to track its membership within the + * priority queue so that we can easily check before rescheduling. + * + * See i915_request_in_priority_queue() + */ + I915_FENCE_FLAG_PQUEUE, + /* * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list * @@ -361,6 +373,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq) return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); } +static inline bool i915_request_in_priority_queue(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + /** * Returns true if seq1 is later than seq2. */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index bf87c70bfdd9..5d96cfba40f8 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node, node->attr.priority = prio; - if (list_empty(&node->link)) { - /* - * If the request is not in the priolist queue because - * it is not yet runnable, then it doesn't contribute - * to our preemption decisions. On the other hand, - * if the request is on the HW, it too is not in the - * queue; but in that case we may still need to reorder - * the inflight requests. - */ + /* + * Once the request is ready, it will be placed into the + * priority lists and then onto the HW runlist. Before the + * request is ready, it does not contribute to our preemption + * decisions and we can safely ignore it, as it will, and + * any preemption required, be dealt with upon submission. + * See engine->submit_request() + */ + if (list_empty(&node->link)) continue; - } - if (!intel_engine_is_virtual(engine) && - !i915_request_is_active(node_to_request(node))) { + if (i915_request_in_priority_queue(node_to_request(node))) { if (!cache.priolist) cache.priolist = i915_sched_lookup_priolist(engine, -- cgit From c3f1ed90e6ffbf4e22010522351779f920e53d0d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Jan 2020 18:47:53 +0000 Subject: drm/i915/gt: Allow temporary suspension of inflight requests In order to support out-of-line error capture, we need to remove the active request from HW and put it to one side while a worker compresses and stores all the details associated with that request. (As that compression may take an arbitrary user-controlled amount of time, we want to let the engine continue running on other workloads while the hanging request is dumped.) Not only do we need to remove the active request, but we also have to remove its context and all requests that were dependent on it (both in flight, queued and future submission). Finally once the capture is complete, we need to be able to resubmit the request and its dependents and allow them to execute. v2: Replace stack recursion with a simple list. v3: Check all the parents, not just the first, when searching for a stuck ancestor! References: https://gitlab.freedesktop.org/drm/intel/issues/738 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200116184754.2860848-2-chris@chris-wilson.co.uk (cherry picked from commit 32ff621fd74496f0c33644125fb69ff175859b1f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 +++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 167 ++++++++++++++++++++++++++- drivers/gpu/drm/i915/gt/selftest_lrc.c | 103 +++++++++++++++++ drivers/gpu/drm/i915/i915_request.h | 43 +++++++ 5 files changed, 321 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f451ef376548..06ff7695fa29 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -671,6 +671,7 @@ void intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) { INIT_LIST_HEAD(&engine->active.requests); + INIT_LIST_HEAD(&engine->active.hold); spin_lock_init(&engine->active.lock); lockdep_set_subclass(&engine->active.lock, subclass); @@ -1422,6 +1423,17 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq) } } +static unsigned long list_count(struct list_head *list) +{ + struct list_head *pos; + unsigned long count = 0; + + list_for_each(pos, list) + count++; + + return count; +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1491,6 +1503,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); } } + drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold)); spin_unlock_irqrestore(&engine->active.lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 00287515e7af..77e68c7643de 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -295,6 +295,7 @@ struct intel_engine_cs { struct { spinlock_t lock; struct list_head requests; + struct list_head hold; /* ready requests, but on hold */ } active; struct llist_head barrier_tasks; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f1f49c4aa7af..93b35cb72aa6 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1635,8 +1635,8 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) !i915_request_completed(rq)); GEM_BUG_ON(i915_request_is_active(w)); - if (list_empty(&w->sched.link)) - continue; /* Not yet submitted; unready */ + if (!i915_request_is_ready(w)) + continue; if (rq_prio(w) < rq_prio(rq)) continue; @@ -2354,6 +2354,145 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) } } +static void __execlists_hold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + if (i915_request_is_active(rq)) + __i915_request_unsubmit(rq); + + RQ_TRACE(rq, "on hold\n"); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_move_tail(&rq->sched.link, &rq->engine->active.hold); + i915_request_set_hold(rq); + + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + if (!i915_request_is_ready(w)) + continue; + + if (i915_request_completed(w)) + continue; + + if (i915_request_on_hold(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +__maybe_unused +static void execlists_hold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Transfer this request onto the hold queue to prevent it + * being resumbitted to HW (and potentially completed) before we have + * released it. Since we may have already submitted following + * requests, we need to remove those as well. + */ + GEM_BUG_ON(i915_request_on_hold(rq)); + GEM_BUG_ON(rq->engine != engine); + __execlists_hold(rq); + + spin_unlock_irq(&engine->active.lock); +} + +static bool hold_request(const struct i915_request *rq) +{ + struct i915_dependency *p; + + /* + * If one of our ancestors is on hold, we must also be on hold, + * otherwise we will bypass it and execute before it. + */ + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { + const struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (s->engine != rq->engine) + continue; + + if (i915_request_on_hold(s)) + return true; + } + + return false; +} + +static void __execlists_unhold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + GEM_BUG_ON(!i915_request_on_hold(rq)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + i915_request_clear_hold(rq); + list_move_tail(&rq->sched.link, + i915_sched_lookup_priolist(rq->engine, + rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + RQ_TRACE(rq, "hold release\n"); + + /* Also release any children on this engine that are ready */ + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (w->engine != rq->engine) + continue; + + if (!i915_request_on_hold(rq)) + continue; + + /* Check that no other parents are also on hold */ + if (hold_request(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +__maybe_unused +static void execlists_unhold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Move this request back to the priority queue, and all of its + * children and grandchildren that were suspended along with it. + */ + __execlists_unhold(rq); + + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + spin_unlock_irq(&engine->active.lock); +} + static noinline void preempt_reset(struct intel_engine_cs *engine) { const unsigned int bit = I915_RESET_ENGINE + engine->id; @@ -2466,6 +2605,13 @@ static void submit_queue(struct intel_engine_cs *engine, __submit_queue_imm(engine); } +static bool ancestor_on_hold(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + GEM_BUG_ON(i915_request_on_hold(rq)); + return !list_empty(&engine->active.hold) && hold_request(rq); +} + static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; @@ -2474,12 +2620,17 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); - queue_request(engine, request); + if (unlikely(ancestor_on_hold(engine, request))) { + list_add_tail(&request->sched.link, &engine->active.hold); + i915_request_set_hold(request); + } else { + queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&request->sched.link)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, request); + submit_queue(engine, request); + } spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -3328,6 +3479,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) i915_priolist_free(p); } + /* On-hold requests will be flushed to timeline upon their release */ + list_for_each_entry(rq, &engine->active.hold, sched.link) + mark_eio(rq); + /* Cancel all attached virtual engines */ while ((rb = rb_first_cached(&execlists->virtual))) { struct virtual_engine *ve = diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 15cda024e3e4..b208c2176bbd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg) return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); } +static int live_hold_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + unsigned long heartbeat; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine, &heartbeat); + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + /* We have our request executing, now remove it and reset */ + + if (test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)) { + spin_unlock_irq(&engine->active.lock); + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_on_hold(rq)); + + intel_engine_reset(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, + >->reset.flags); + + /* Check that we do not resubmit the held request */ + i915_request_get(rq); + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + i915_request_put(rq); + err = -EIO; + goto out; + } + GEM_BUG_ON(!i915_request_on_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + i915_request_put(rq); + +out: + engine_heartbeat_enable(engine, heartbeat); + intel_context_put(ce); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + static int emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) { @@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_unlite_switch), SUBTEST(live_unlite_preempt), + SUBTEST(live_hold_reset), SUBTEST(live_timeslice_preempt), SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 6f5bbfa95513..f57eadcf3583 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -90,6 +90,13 @@ enum { */ I915_FENCE_FLAG_SIGNAL, + /* + * I915_FENCE_FLAG_HOLD - this request is currently on hold + * + * This request has been suspended, pending an ongoing investigation. + */ + I915_FENCE_FLAG_HOLD, + /* * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted * @@ -471,6 +478,27 @@ static inline bool i915_request_is_running(const struct i915_request *rq) return __i915_request_has_started(rq); } +/** + * i915_request_is_running - check if the request is ready for execution + * @rq: the request + * + * Upon construction, the request is instructed to wait upon various + * signals before it is ready to be executed by the HW. That is, we do + * not want to start execution and read data before it is written. In practice, + * this is controlled with a mixture of interrupts and semaphores. Once + * the submit fence is completed, the backend scheduler will place the + * request into its queue and from there submit it for execution. So we + * can detect when a request is eligible for execution (and is under control + * of the scheduler) by querying where it is in any of the scheduler's lists. + * + * Returns true if the request is ready for execution (it may be inflight), + * false otherwise. + */ +static inline bool i915_request_is_ready(const struct i915_request *rq) +{ + return !list_empty(&rq->sched.link); +} + static inline bool i915_request_completed(const struct i915_request *rq) { if (i915_request_signaled(rq)) @@ -500,6 +528,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq) return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); } +static inline bool i915_request_on_hold(const struct i915_request *rq) +{ + return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags)); +} + +static inline void i915_request_set_hold(struct i915_request *rq) +{ + set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + +static inline void i915_request_clear_hold(struct i915_request *rq) +{ + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + static inline struct intel_timeline * i915_request_timeline(struct i915_request *rq) { -- cgit