diff options
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_irq.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_pci.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 157 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 5 |
5 files changed, 143 insertions, 36 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 006332805702..7614880edad8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -368,9 +368,16 @@ static int i915_getparam(struct drm_device *dev, void *data, break; case I915_PARAM_HAS_SCHEDULER: value = 0; - if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) + if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { value |= I915_SCHEDULER_CAP_ENABLED; + + if (INTEL_INFO(dev_priv)->has_logical_ring_preemption && + i915_modparams.enable_execlists && + !i915_modparams.enable_guc_submission) + value |= I915_SCHEDULER_CAP_PREEMPTION; + } break; + case I915_PARAM_MMAP_VERSION: /* Remember to bump this if the version changes! */ case I915_PARAM_HAS_GEM: diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e5997e818673..de777139f6a1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1382,10 +1382,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (port_count(&execlists->port[0])) { - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; - } + __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; } if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index d3a6fe5e3c10..745b6a6e0188 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -424,6 +424,7 @@ static const struct intel_device_info intel_cherryview_info __initconst = { #define GEN9_FEATURES \ GEN8_FEATURES, \ + .has_logical_ring_preemption = 1, \ .has_csr = 1, \ .has_guc = 1, \ .has_ipc = 1, \ @@ -477,6 +478,7 @@ static const struct intel_device_info intel_skylake_gt4_info __initconst = { .has_rc6 = 1, \ .has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ + .has_logical_ring_preemption = 1, \ .has_guc = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5821762d9007..c5b76082d695 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -208,9 +208,9 @@ /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ - #define WA_TAIL_DWORDS 2 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) +#define PREEMPT_ID 0x1 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -429,6 +429,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) return ce->lrc_desc; } +static inline void elsp_write(u64 desc, u32 __iomem *elsp) +{ + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlists.port; @@ -454,8 +460,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = 0; } - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + elsp_write(desc, elsp); } } @@ -488,26 +493,43 @@ static void port_assign(struct execlist_port *port, port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); } +static void inject_preempt_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce = + &engine->i915->preempt_context->engine[engine->id]; + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + unsigned int n; + + GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID); + GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES)); + + memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES); + ce->ring->tail += WA_TAIL_BYTES; + ce->ring->tail &= (ce->ring->size - 1); + ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; + + for (n = execlists_num_ports(&engine->execlists); --n; ) + elsp_write(0, elsp); + + elsp_write(ce->lrc_desc, elsp); +} + +static bool can_preempt(struct intel_engine_cs *engine) +{ + return INTEL_INFO(engine->i915)->has_logical_ring_preemption; +} + static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *last; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; + struct drm_i915_gem_request *last = port_request(port); struct rb_node *rb; bool submit = false; - last = port_request(port); - if (last) - /* WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL - * as we resubmit the request. See gen8_emit_breadcrumb() - * for where we prepare the padding after the end of the - * request. - */ - last->tail = last->wa_tail; - /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -532,7 +554,65 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - while (rb) { + if (!rb) + goto unlock; + + if (last) { + /* + * Don't resubmit or switch until all outstanding + * preemptions (lite-restore) are seen. Then we + * know the next preemption status we see corresponds + * to this ELSP update. + */ + if (port_count(&port[0]) > 1) + goto unlock; + + if (can_preempt(engine) && + rb_entry(rb, struct i915_priolist, node)->priority > + max(last->priotree.priority, 0)) { + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ + inject_preempt_context(engine); + execlists->preempt = true; + goto unlock; + } else { + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). + */ + if (port_count(&port[1])) + goto unlock; + + /* WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == req:TAIL as we resubmit the + * request. See gen8_emit_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; + } + } + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -595,11 +675,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: execlists->first = rb; if (submit) port_assign(port, last); +unlock: spin_unlock_irq(&engine->timeline->lock); if (submit) @@ -680,13 +761,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static bool execlists_elsp_ready(const struct intel_engine_cs *engine) -{ - const struct execlist_port *port = engine->execlists.port; - - return port_count(&port[0]) + port_count(&port[1]) < 2; -} - /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -695,7 +769,7 @@ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; + struct execlist_port * const port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; /* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -780,6 +854,23 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE && + buf[2*head + 1] == PREEMPT_ID) { + execlist_cancel_port_requests(execlists); + + spin_lock_irq(&engine->timeline->lock); + unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->timeline->lock); + + GEM_BUG_ON(!execlists->preempt); + execlists->preempt = false; + continue; + } + + if (status & GEN8_CTX_STATUS_PREEMPTED && + execlists->preempt) + continue; + /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -811,7 +902,7 @@ static void intel_lrc_irq_handler(unsigned long data) } } - if (execlists_elsp_ready(engine)) + if (!execlists->preempt) execlists_dequeue(engine); intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); @@ -824,7 +915,7 @@ static void insert_request(struct intel_engine_cs *engine, struct i915_priolist *p = lookup_priolist(engine, pt, prio); list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine)) + if (ptr_unmask_bits(p, 1)) tasklet_hi_schedule(&engine->execlists.irq_tasklet); } @@ -954,8 +1045,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) } spin_unlock_irq(&engine->timeline->lock); - - /* XXX Do we need to preempt to make room for us and our deps? */ } static struct intel_ring * @@ -1151,6 +1240,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1166,6 +1257,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); @@ -1211,6 +1304,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) *batch++ = 0; } + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1364,6 +1459,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); execlists->csb_head = -1; + execlists->preempt = false; /* After a GPU reset, we may have requests to replay */ if (!i915_modparams.enable_guc_submission && execlists->first) @@ -1659,7 +1755,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, */ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *cs++ = MI_NOOP; + /* Ensure there's always at least one preemption point per-request. */ + *cs++ = MI_ARB_CHECK; *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); } @@ -1680,7 +1777,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, @@ -1708,7 +1804,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 56d7ae9f298b..0fedda17488c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -239,6 +239,11 @@ struct intel_engine_execlists { } port[EXECLIST_MAX_PORTS]; /** + * @preempt: are we currently handling a preempting context switch? + */ + bool preempt; + + /** * @port_mask: number of execlist ports - 1 */ unsigned int port_mask; |