diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 504 |
1 files changed, 370 insertions, 134 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 31455eceeb0c..2dfaddb8811e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -176,8 +176,6 @@ /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ -#define WA_TAIL_DWORDS 2 -#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) struct virtual_engine { struct intel_engine_cs base; @@ -247,7 +245,7 @@ static void mark_eio(struct i915_request *rq) GEM_BUG_ON(i915_request_signaled(rq)); - dma_fence_set_error(&rq->fence, -EIO); + i915_request_set_error_once(rq, -EIO); i915_request_mark_complete(rq); } @@ -295,7 +293,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority; + return READ_ONCE(rq->sched.attr.priority); } static int effective_prio(const struct i915_request *rq) @@ -458,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev, * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ -static u64 +static u32 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - u64 desc; + u32 desc; desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) @@ -472,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ - /* - * The following 32bits are copied into the OA reports (dword 2). - * Consider updating oa_get_render_ctx_id in i915_perf.c when changing - * anything below. - */ - if (INTEL_GEN(engine->i915) >= 11) { - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; - /* bits 48-53 */ - - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; - /* bits 61-63 */ - } - - return desc; + return i915_ggtt_offset(ce->state) | desc; } static inline unsigned int dword_in_page(void *addr) @@ -1006,7 +990,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } - rq->engine = owner; + WRITE_ONCE(rq->engine, owner); owner->submit_request(rq); active = NULL; } @@ -1194,7 +1178,49 @@ static void reset_active(struct i915_request *rq, __execlists_update_reg_state(ce, engine, head); /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; +} + +static u32 intel_context_get_runtime(const struct intel_context *ce) +{ + /* + * We can use either ppHWSP[16] which is recorded before the context + * switch (and so excludes the cost of context switches) or use the + * value from the context image itself, which is saved/restored earlier + * and so includes the cost of the save. + */ + return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); +} + +static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) +{ +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) + ce->runtime.num_underflow += dt < 0; + ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); +#endif +} + +static void intel_context_update_runtime(struct intel_context *ce) +{ + u32 old; + s32 dt; + + if (intel_context_is_barrier(ce)) + return; + + old = ce->runtime.last; + ce->runtime.last = intel_context_get_runtime(ce); + dt = ce->runtime.last - old; + + if (unlikely(dt <= 0)) { + CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", + old, ce->runtime.last, dt); + st_update_runtime_underflow(ce, dt); + return; + } + + ewma_runtime_add(&ce->runtime.avg, dt); + ce->runtime.total += dt; } static inline struct intel_engine_cs * @@ -1213,16 +1239,21 @@ __execlists_schedule_in(struct i915_request *rq) if (ce->tag) { /* Use a fixed tag for OA and friends */ - ce->lrc_desc |= (u64)ce->tag << 32; + GEM_BUG_ON(ce->tag <= BITS_PER_LONG); + ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc_desc &= ~GENMASK_ULL(47, 37); - ce->lrc_desc |= - (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << - GEN11_SW_CTX_ID_SHIFT; - BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + unsigned int tag = ffs(engine->context_tag); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); } + ce->lrc.ccid |= engine->execlists.ccid; + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1262,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) static inline void __execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine) + struct intel_engine_cs * const engine, + unsigned int ccid) { struct intel_context * const ce = rq->context; @@ -1276,10 +1308,19 @@ __execlists_schedule_out(struct i915_request *rq, * If we have just completed this context, the engine may now be * idle and we want to re-enter powersaving. */ - if (list_is_last(&rq->link, &ce->timeline->requests) && + if (list_is_last_rcu(&rq->link, &ce->timeline->requests) && i915_request_completed(rq)) intel_engine_add_retire(engine, ce->timeline); + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (ccid < BITS_PER_LONG) { + GEM_BUG_ON(ccid == 0); + GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); + set_bit(ccid - 1, &engine->context_tag); + } + + intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put_async(engine->gt); @@ -1304,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; + u32 ccid; trace_i915_request_out(rq); + ccid = rq->context->lrc.ccid; old = READ_ONCE(ce->inflight); do cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; while (!try_cmpxchg(&ce->inflight, &old, cur)); if (!cur) - __execlists_schedule_out(rq, old); + __execlists_schedule_out(rq, old, ccid); i915_request_put(rq); } @@ -1320,7 +1363,7 @@ execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; u32 tail, prev; /* @@ -1359,7 +1402,7 @@ static u64 execlists_update_context(struct i915_request *rq) */ wmb(); - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1395,15 +1438,26 @@ trace_ports(const struct intel_engine_execlists *execlists, ports[1] ? ports[1]->fence.seqno : 0); } +static inline bool +reset_in_progress(const struct intel_engine_execlists *execlists) +{ + return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); +} + static __maybe_unused bool assert_pending_valid(const struct intel_engine_execlists *execlists, const char *msg) { struct i915_request * const *port, *rq; struct intel_context *ce = NULL; + bool sentinel = false; trace_ports(execlists, msg, execlists->pending); + /* We may be messing around with the lists during reset, lalala */ + if (reset_in_progress(execlists)) + return true; + if (!execlists->pending[0]) { GEM_TRACE_ERR("Nothing pending for promotion!\n"); return false; @@ -1430,6 +1484,26 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, } ce = rq->context; + /* + * Sentinels are supposed to be lonely so they flush the + * current exection off the HW. Check that they are the + * only request in the pending submission. + */ + if (sentinel) { + GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n", + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + + sentinel = i915_request_has_sentinel(rq); + if (sentinel && port != execlists->pending) { + GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n", + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + /* Hold tightly onto the lock to prevent concurrent retires! */ if (!spin_trylock_irqsave(&rq->lock, flags)) continue; @@ -1525,6 +1599,11 @@ static bool can_merge_ctx(const struct intel_context *prev, return true; } +static unsigned long i915_request_flags(const struct i915_request *rq) +{ + return READ_ONCE(rq->fence.flags); +} + static bool can_merge_rq(const struct i915_request *prev, const struct i915_request *next) { @@ -1542,7 +1621,7 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; - if (unlikely((prev->fence.flags ^ next->fence.flags) & + if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) & (BIT(I915_FENCE_FLAG_NOPREEMPT) | BIT(I915_FENCE_FLAG_SENTINEL)))) return false; @@ -1550,6 +1629,7 @@ static bool can_merge_rq(const struct i915_request *prev, if (!can_merge_ctx(prev->context, next->context)) return false; + GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); return true; } @@ -1585,7 +1665,7 @@ static bool virtual_matches(const struct virtual_engine *ve, } static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, - struct intel_engine_cs *engine) + struct i915_request *rq) { struct intel_engine_cs *old = ve->siblings[0]; @@ -1593,9 +1673,19 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, spin_lock(&old->breadcrumbs.irq_lock); if (!list_empty(&ve->context.signal_link)) { - list_move_tail(&ve->context.signal_link, - &engine->breadcrumbs.signalers); - intel_engine_signal_breadcrumbs(engine); + list_del_init(&ve->context.signal_link); + + /* + * We cannot acquire the new engine->breadcrumbs.irq_lock + * (as we are holding a breadcrumbs.irq_lock already), + * so attach this request to the signaler on submission. + * The queued irq_work will occur when we finally drop + * the engine->active.lock after dequeue. + */ + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags); + + /* Also transfer the pending irq_work for the old breadcrumb. */ + intel_engine_signal_breadcrumbs(rq->engine); } spin_unlock(&old->breadcrumbs.irq_lock); } @@ -1605,6 +1695,11 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, &(rq__)->sched.waiters_list, \ wait_link) +#define for_each_signaler(p__, rq__) \ + list_for_each_entry_rcu(p__, \ + &(rq__)->sched.signalers_list, \ + signal_link) + static void defer_request(struct i915_request *rq, struct list_head * const pl) { LIST_HEAD(list); @@ -1626,6 +1721,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; @@ -1661,7 +1759,8 @@ static void defer_active(struct intel_engine_cs *engine) } static bool -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +need_timeslice(const struct intel_engine_cs *engine, + const struct i915_request *rq) { int hint; @@ -1675,6 +1774,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) return hint >= effective_prio(rq); } +static bool +timeslice_yield(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + /* + * Once bitten, forever smitten! + * + * If the active context ever busy-waited on a semaphore, + * it will be treated as a hog until the end of its timeslice (i.e. + * until it is scheduled out and replaced by a new submission, + * possibly even its own lite-restore). The HW only sends an interrupt + * on the first miss, and we do know if that semaphore has been + * signaled, or even if it is now stuck on another semaphore. Play + * safe, yield if it might be stuck -- it will be given a fresh + * timeslice in the near future. + */ + return rq->context->lrc.ccid == READ_ONCE(el->yield); +} + +static bool +timeslice_expired(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + return timer_expired(&el->timer) || timeslice_yield(el, rq); +} + static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { @@ -1690,15 +1815,15 @@ timeslice(const struct intel_engine_cs *engine) return READ_ONCE(engine->props.timeslice_duration_ms); } -static unsigned long -active_timeslice(const struct intel_engine_cs *engine) +static unsigned long active_timeslice(const struct intel_engine_cs *engine) { - const struct i915_request *rq = *engine->execlists.active; + const struct intel_engine_execlists *execlists = &engine->execlists; + const struct i915_request *rq = *execlists->active; if (!rq || i915_request_completed(rq)) return 0; - if (engine->execlists.switch_priority_hint < effective_prio(rq)) + if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq)) return 0; return timeslice(engine); @@ -1715,8 +1840,11 @@ static void set_timeslice(struct intel_engine_cs *engine) static void start_timeslice(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; + int prio = queue_prio(execlists); - execlists->switch_priority_hint = execlists->queue_priority_hint; + WRITE_ONCE(execlists->switch_priority_hint, prio); + if (prio == INT_MIN) + return; if (timer_pending(&execlists->timer)) return; @@ -1849,13 +1977,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = NULL; } else if (need_timeslice(engine, last) && - timer_expired(&engine->execlists.timer)) { + timeslice_expired(execlists, last)) { ENGINE_TRACE(engine, - "expired last=%llx:%lld, prio=%d, hint=%d\n", + "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + execlists->queue_priority_hint, + yesno(timeslice_yield(execlists, last))); ring_set_paused(engine, 1); defer_active(engine); @@ -1938,13 +2067,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) "", yesno(engine != ve->siblings[0])); - ve->request = NULL; - ve->base.execlists.queue_priority_hint = INT_MIN; + WRITE_ONCE(ve->request, NULL); + WRITE_ONCE(ve->base.execlists.queue_priority_hint, + INT_MIN); rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); GEM_BUG_ON(!(rq->execution_mask & engine->mask)); - rq->engine = engine; + WRITE_ONCE(rq->engine, engine); if (engine != ve->siblings[0]) { u32 *regs = ve->context.lrc_reg_state; @@ -1957,7 +2087,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) engine); if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve, engine); + virtual_xfer_breadcrumbs(ve, rq); /* * Move the bound engine to the top of the list @@ -2064,6 +2194,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(last && !can_merge_ctx(last->context, rq->context)); + GEM_BUG_ON(last && + i915_seqno_passed(last->fence.seqno, + rq->fence.seqno)); submit = true; last = rq; @@ -2112,6 +2245,7 @@ done: } clear_ports(port + 1, last_port - port); + WRITE_ONCE(execlists->yield, -1); execlists_submit_ports(engine); set_preempt_timeout(engine, *active); } else { @@ -2134,6 +2268,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) execlists_schedule_out(*port); clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); + smp_wmb(); /* complete the seqlock for execlists_active() */ WRITE_ONCE(execlists->active, execlists->inflight); } @@ -2144,12 +2279,6 @@ invalidate_csb_entries(const u32 *first, const u32 *last) clflush((void *)last); } -static inline bool -reset_in_progress(const struct intel_engine_execlists *execlists) -{ - return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); -} - /* * Starting with Gen12, the status has a new format: * @@ -2240,7 +2369,6 @@ static void process_csb(struct intel_engine_cs *engine) */ head = execlists->csb_head; tail = READ_ONCE(*execlists->csb_write); - ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); if (unlikely(head == tail)) return; @@ -2254,6 +2382,7 @@ static void process_csb(struct intel_engine_cs *engine) */ rmb(); + ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); do { bool promote; @@ -2288,11 +2417,13 @@ static void process_csb(struct intel_engine_cs *engine) if (promote) { struct i915_request * const *old = execlists->active; + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); + + ring_set_paused(engine, 0); + /* Point active to the new ELSP; prevent overwriting */ WRITE_ONCE(execlists->active, execlists->pending); - - if (!inject_preempt_hang(execlists)) - ring_set_paused(engine, 0); + smp_wmb(); /* notify execlists_active() */ /* cancel old inflight, prepare for switch */ trace_ports(execlists, "preempted", old); @@ -2300,12 +2431,12 @@ static void process_csb(struct intel_engine_cs *engine) execlists_schedule_out(*old++); /* switch pending to inflight */ - GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - WRITE_ONCE(execlists->active, - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending))); + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending)); + smp_wmb(); /* complete the seqlock */ + WRITE_ONCE(execlists->active, execlists->inflight); WRITE_ONCE(execlists->pending[0], NULL); } else { @@ -2320,8 +2451,37 @@ static void process_csb(struct intel_engine_cs *engine) * coherent (visible from the CPU) before the * user interrupt and CSB is processed. */ - GEM_BUG_ON(!i915_request_completed(*execlists->active) && - !reset_in_progress(execlists)); + if (GEM_SHOW_DEBUG() && + !i915_request_completed(*execlists->active) && + !reset_in_progress(execlists)) { + struct i915_request *rq __maybe_unused = + *execlists->active; + const u32 *regs __maybe_unused = + rq->context->lrc_reg_state; + + ENGINE_TRACE(engine, + "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", + ENGINE_READ(engine, RING_START), + ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, + ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_MI_MODE)); + ENGINE_TRACE(engine, + "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", + i915_ggtt_offset(rq->ring->vma), + rq->head, rq->tail, + rq->fence.context, + lower_32_bits(rq->fence.seqno), + hwsp_seqno(rq)); + ENGINE_TRACE(engine, + "ctx:{start:%08x, head:%04x, tail:%04x}, ", + regs[CTX_RING_START], + regs[CTX_RING_HEAD], + regs[CTX_RING_TAIL]); + + GEM_BUG_ON("context completed before request"); + } + execlists_schedule_out(*execlists->active++); GEM_BUG_ON(execlists->active - execlists->inflight > @@ -2349,7 +2509,7 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - if (!engine->execlists.pending[0]) { + if (!READ_ONCE(engine->execlists.pending[0])) { rcu_read_lock(); /* protect peeking at execlists->active */ execlists_dequeue(engine); rcu_read_unlock(); @@ -2366,12 +2526,12 @@ static void __execlists_hold(struct i915_request *rq) if (i915_request_is_active(rq)) __i915_request_unsubmit(rq); - RQ_TRACE(rq, "on hold\n"); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); list_move_tail(&rq->sched.link, &rq->engine->active.hold); i915_request_set_hold(rq); + RQ_TRACE(rq, "on hold\n"); - list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + for_each_waiter(p, rq) { struct i915_request *w = container_of(p->waiter, typeof(*w), sched); @@ -2385,7 +2545,7 @@ static void __execlists_hold(struct i915_request *rq) if (i915_request_completed(w)) continue; - if (i915_request_on_hold(rq)) + if (i915_request_on_hold(w)) continue; list_move_tail(&w->sched.link, &list); @@ -2443,6 +2603,7 @@ static bool execlists_hold(struct intel_engine_cs *engine, GEM_BUG_ON(i915_request_on_hold(rq)); GEM_BUG_ON(rq->engine != engine); __execlists_hold(rq); + GEM_BUG_ON(list_empty(&engine->active.hold)); unlock: spin_unlock_irq(&engine->active.lock); @@ -2452,23 +2613,27 @@ unlock: static bool hold_request(const struct i915_request *rq) { struct i915_dependency *p; + bool result = false; /* * If one of our ancestors is on hold, we must also be on hold, * otherwise we will bypass it and execute before it. */ - list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { + rcu_read_lock(); + for_each_signaler(p, rq) { const struct i915_request *s = container_of(p->signaler, typeof(*s), sched); if (s->engine != rq->engine) continue; - if (i915_request_on_hold(s)) - return true; + result = i915_request_on_hold(s); + if (result) + break; } + rcu_read_unlock(); - return false; + return result; } static void __execlists_unhold(struct i915_request *rq) @@ -2478,6 +2643,8 @@ static void __execlists_unhold(struct i915_request *rq) do { struct i915_dependency *p; + RQ_TRACE(rq, "hold release\n"); + GEM_BUG_ON(!i915_request_on_hold(rq)); GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); @@ -2486,21 +2653,24 @@ static void __execlists_unhold(struct i915_request *rq) i915_sched_lookup_priolist(rq->engine, rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - RQ_TRACE(rq, "hold release\n"); /* Also release any children on this engine that are ready */ - list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + for_each_waiter(p, rq) { struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + /* Propagate any change in error status */ + if (rq->fence.error) + i915_request_set_error_once(w, rq->fence.error); + if (w->engine != rq->engine) continue; - if (!i915_request_on_hold(rq)) + if (!i915_request_on_hold(w)) continue; /* Check that no other parents are also on hold */ - if (hold_request(rq)) + if (hold_request(w)) continue; list_move_tail(&w->sched.link, &list); @@ -2615,13 +2785,13 @@ static bool execlists_capture(struct intel_engine_cs *engine) if (!cap) return true; + spin_lock_irq(&engine->active.lock); cap->rq = execlists_active(&engine->execlists); - GEM_BUG_ON(!cap->rq); - - rcu_read_lock(); - cap->rq = active_request(cap->rq->context->timeline, cap->rq); - cap->rq = i915_request_get_rcu(cap->rq); - rcu_read_unlock(); + if (cap->rq) { + cap->rq = active_request(cap->rq->context->timeline, cap->rq); + cap->rq = i915_request_get_rcu(cap->rq); + } + spin_unlock_irq(&engine->active.lock); if (!cap->rq) goto err_free; @@ -2660,27 +2830,25 @@ err_free: return false; } -static noinline void preempt_reset(struct intel_engine_cs *engine) +static void execlists_reset(struct intel_engine_cs *engine, const char *msg) { const unsigned int bit = I915_RESET_ENGINE + engine->id; unsigned long *lock = &engine->gt->reset.flags; - if (i915_modparams.reset < 3) + if (!intel_has_reset_engine(engine->gt)) return; if (test_and_set_bit(bit, lock)) return; + ENGINE_TRACE(engine, "reset for %s\n", msg); + /* Mark this tasklet as disabled to avoid waiting for it to complete */ tasklet_disable_nosync(&engine->execlists.tasklet); - ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", - READ_ONCE(engine->props.preempt_timeout_ms), - jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); - ring_set_paused(engine, 1); /* Freeze the current request in place */ if (execlists_capture(engine)) - intel_engine_reset(engine, "preemption time out"); + intel_engine_reset(engine, msg); else ring_set_paused(engine, 0); @@ -2711,6 +2879,13 @@ static void execlists_submission_tasklet(unsigned long data) bool timeout = preempt_timeout(engine); process_csb(engine); + + if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { + engine->execlists.error_interrupt = 0; + if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */ + execlists_reset(engine, "CS error"); + } + if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { unsigned long flags; @@ -2719,8 +2894,8 @@ static void execlists_submission_tasklet(unsigned long data) spin_unlock_irqrestore(&engine->active.lock, flags); /* Recheck after serialising with direct-submission */ - if (timeout && preempt_timeout(engine)) - preempt_reset(engine); + if (unlikely(timeout && preempt_timeout(engine))) + execlists_reset(engine, "preemption time out"); } } @@ -2793,6 +2968,7 @@ static void execlists_submit_request(struct i915_request *request) spin_lock_irqsave(&engine->active.lock, flags); if (unlikely(ancestor_on_hold(engine, request))) { + RQ_TRACE(request, "ancestor on hold\n"); list_add_tail(&request->sched.link, &engine->active.hold); i915_request_set_hold(request); } else { @@ -2874,6 +3050,7 @@ __execlists_update_reg_state(const struct intel_context *ce, regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); regs[CTX_RING_HEAD] = head; regs[CTX_RING_TAIL] = ring->tail; + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; /* RPCS */ if (engine->class == RENDER_CLASS) { @@ -2899,7 +3076,7 @@ __execlists_context_pin(struct intel_context *ce, if (IS_ERR(vaddr)) return PTR_ERR(vaddr); - ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; + ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine, ce->ring->tail); @@ -2921,22 +3098,6 @@ static void execlists_context_reset(struct intel_context *ce) CE_TRACE(ce, "reset\n"); GEM_BUG_ON(!intel_context_is_pinned(ce)); - /* - * Because we emit WA_TAIL_DWORDS there may be a disparity - * between our bookkeeping in ce->ring->head and ce->ring->tail and - * that stored in context. As we only write new commands from - * ce->ring->tail onwards, everything before that is junk. If the GPU - * starts reading from its RING_HEAD from the context, it may try to - * execute that junk and die. - * - * The contexts that are stilled pinned on resume belong to the - * kernel, and are local to each engine. All other contexts will - * have their head/tail sanitized upon pinning before use, so they - * will never see garbage, - * - * So to avoid that we reset the context images upon resume. For - * simplicity, we just zero everything out. - */ intel_ring_reset(ce->ring, ce->ring->emit); /* Scrub away the garbage */ @@ -2944,7 +3105,7 @@ static void execlists_context_reset(struct intel_context *ce) ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { @@ -2964,7 +3125,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; - GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb); + if (!i915_request_timeline(rq)->has_initial_breadcrumb) + return 0; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -3257,7 +3419,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_ggtt_pin(vma, 0, PIN_HIGH); if (err) goto err; @@ -3346,6 +3508,49 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } +static void enable_error_interrupt(struct intel_engine_cs *engine) +{ + u32 status; + + engine->execlists.error_interrupt = 0; + ENGINE_WRITE(engine, RING_EMR, ~0u); + ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ + + status = ENGINE_READ(engine, RING_ESR); + if (unlikely(status)) { + dev_err(engine->i915->drm.dev, + "engine '%s' resumed still in error: %08x\n", + engine->name, status); + __intel_gt_reset(engine->gt, engine->mask); + } + + /* + * On current gen8+, we have 2 signals to play with + * + * - I915_ERROR_INSTUCTION (bit 0) + * + * Generate an error if the command parser encounters an invalid + * instruction + * + * This is a fatal error. + * + * - CP_PRIV (bit 2) + * + * Generate an error on privilege violation (where the CP replaces + * the instruction with a no-op). This also fires for writes into + * read-only scratch pages. + * + * This is a non-fatal error, parsing continues. + * + * * there are a few others defined for odd HW that we do not use + * + * Since CP_PRIV fires for cases where we have chosen to ignore the + * error (as the HW is validating and suppressing the mistakes), we + * only unmask the instruction error bit. + */ + ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); +} + static void enable_execlists(struct intel_engine_cs *engine) { u32 mode; @@ -3367,7 +3572,9 @@ static void enable_execlists(struct intel_engine_cs *engine) i915_ggtt_offset(engine->status_page.vma)); ENGINE_POSTING_READ(engine, RING_HWS_PGA); - engine->context_tag = 0; + enable_error_interrupt(engine); + + engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); } static bool unexpected_starting_state(struct intel_engine_cs *engine) @@ -3384,9 +3591,6 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) static int execlists_resume(struct intel_engine_cs *engine) { - intel_engine_apply_workarounds(engine); - intel_engine_apply_whitelist(engine); - intel_mocs_init_engine(engine); intel_engine_reset_breadcrumbs(engine); @@ -3517,9 +3721,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!rq) goto unwind; - /* We still have requests in-flight; the engine should be active */ - GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - ce = rq->context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); @@ -3529,8 +3730,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) goto out_replay; } + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + /* Context has requests still in-flight; it should not be idle! */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); + rq = active_request(ce->timeline, rq); head = intel_ring_wrap(ce->ring, rq->head); GEM_BUG_ON(head == ce->ring->tail); @@ -3581,7 +3786,7 @@ out_replay: head, ce->ring->tail); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine, head); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -3604,7 +3809,10 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) static void nop_submission_tasklet(unsigned long data) { + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + /* The driver is wedged; don't process any more events. */ + WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); } static void execlists_reset_cancel(struct intel_engine_cs *engine) @@ -4194,8 +4402,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } } if (INTEL_GEN(engine->i915) >= 12) @@ -4273,6 +4484,8 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; + engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; + engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; } static void rcs_submission_override(struct intel_engine_cs *engine) @@ -4340,6 +4553,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; + if (INTEL_GEN(engine->i915) >= 11) { + execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); + execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); + } + reset_csb_pointers(engine); /* Finally, take ownership and responsibility for cleanup! */ @@ -4514,8 +4732,13 @@ populate_lr_context(struct intel_context *ce, inhibit = false; } - /* The second page of the context object contains some fields which must - * be set up prior to the first execution. */ + /* Clear the ppHWSP (inc. per-context counters) */ + memset(vaddr, 0, PAGE_SIZE); + + /* + * The second page of the context object contains some registers which + * must be set up prior to the first execution. + */ execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, ce, engine, ring, inhibit); @@ -4553,8 +4776,17 @@ static int __execlists_context_alloc(struct intel_context *ce, if (!ce->timeline) { struct intel_timeline *tl; + struct i915_vma *hwsp; + + /* + * Use the static global HWSP for the kernel context, and + * a dynamically allocated cacheline for everyone else. + */ + hwsp = NULL; + if (unlikely(intel_context_is_barrier(ce))) + hwsp = engine->status_page.vma; - tl = intel_timeline_create(engine->gt, NULL); + tl = intel_timeline_create(engine->gt, hwsp); if (IS_ERR(tl)) { ret = PTR_ERR(tl); goto error_deref_obj; @@ -4723,7 +4955,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) mask = rq->execution_mask; if (unlikely(!mask)) { /* Invalid selection, submit to a random engine in error */ - i915_request_skip(rq, -ENODEV); + i915_request_set_error_once(rq, -ENODEV); mask = ve->siblings[0]->mask; } @@ -4737,7 +4969,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) static void virtual_submission_tasklet(unsigned long data) { struct virtual_engine * const ve = (struct virtual_engine *)data; - const int prio = ve->base.execlists.queue_priority_hint; + const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -5133,11 +5365,15 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tE "); } - last = NULL; - count = 0; + if (execlists->switch_priority_hint != INT_MIN) + drm_printf(m, "\t\tSwitch priority hint: %d\n", + READ_ONCE(execlists->switch_priority_hint)); if (execlists->queue_priority_hint != INT_MIN) drm_printf(m, "\t\tQueue priority hint: %d\n", - execlists->queue_priority_hint); + READ_ONCE(execlists->queue_priority_hint)); + + last = NULL; + count = 0; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); int i; |