From 1f0e785a9cc09b430d0fbe4e9ac438d43c245815 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Nov 2020 16:56:12 +0000 Subject: drm/i915: Lift i915_request_show() Extract i915_request_show for reuse in other request chain pretty printers. For a bonus point, quietly change the seqno format from %llx to %lld to match everywhere else. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201119165616.10834-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 39 +++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0e813819b041..673991718ae6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1855,6 +1855,45 @@ out: return timeout; } +static int print_sched_attr(const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + +void i915_request_show(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix) +{ + const char *name = rq->fence.ops->get_timeline_name((struct dma_fence *)&rq->fence); + char buf[80] = ""; + int x = 0; + + x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s %llx:%lld%s%s %s @ %dms: %s\n", + prefix, + rq->fence.context, rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + !i915_sw_fence_signaled(&rq->semaphore) ? "&" : + "", + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags) ? "+" : + test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags) ? "-" : + "", + buf, + jiffies_to_msecs(jiffies - rq->emitted_jiffies), + name); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" #include "selftests/i915_request.c" -- cgit From 562675d09a351a81e0dfc9a9d7df0f5f4f2fb6a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Nov 2020 16:56:13 +0000 Subject: drm/i915/gt: Update request status flags for debug pretty-printer We plan to expand upon the number of available statuses for when we pretty-print the requests along the timelines, and so need a new set of flags. We have settled upon: Unready [U] - initial status after being submitted, the request is not ready for execution as it is waiting for external fences Ready [R] - all fences the request was waiting on have been signaled, and the request is now ready for execution and will be in a backend queue - a ready request may still need to wait on semaphores [internal fences] Ready/virtual [V] - same as ready, but queued over multiple backends Executing [E] - the request has been transferred from the backend queue and submitted for execution on HW - a completed request may still be regarded as executing, its status may not be updated until it is retired and removed from the lists Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201119165616.10834-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +-- drivers/gpu/drm/i915/gt/intel_lrc.c | 15 +++--- drivers/gpu/drm/i915/gt/intel_lrc.h | 3 +- drivers/gpu/drm/i915/i915_request.c | 85 ++++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_request.h | 3 +- 5 files changed, 88 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c3bb2e9546e6..d4e988b2816a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1491,7 +1491,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); - i915_request_show(m, rq, hdr); + i915_request_show(m, rq, hdr, 0); } for (port = execlists->pending; (rq = *port); port++) { char hdr[160]; @@ -1505,7 +1505,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); - i915_request_show(m, rq, hdr); + i915_request_show(m, rq, hdr, 0); } rcu_read_unlock(); execlists_active_unlock_bh(execlists); @@ -1649,7 +1649,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (rq) { struct intel_timeline *tl = get_timeline(rq); - i915_request_show(m, rq, "\t\tactive "); + i915_request_show(m, rq, "\t\tactive ", 0); drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b6ab1161942a..5257f3c71366 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -5982,7 +5982,8 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, const struct i915_request *rq, - const char *prefix), + const char *prefix, + int indent), unsigned int max) { const struct intel_engine_execlists *execlists = &engine->execlists; @@ -5997,7 +5998,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, count = 0; list_for_each_entry(rq, &engine->active.requests, sched.link) { if (count++ < max - 1) - show_request(m, rq, "\t\tE "); + show_request(m, rq, "\t\t", 0); else last = rq; } @@ -6007,7 +6008,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, "\t\t...skipping %d executing requests...\n", count - max); } - show_request(m, last, "\t\tE "); + show_request(m, last, "\t\t", 0); } if (execlists->switch_priority_hint != INT_MIN) @@ -6025,7 +6026,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, priolist_for_each_request(rq, p, i) { if (count++ < max - 1) - show_request(m, rq, "\t\tQ "); + show_request(m, rq, "\t\t", 0); else last = rq; } @@ -6036,7 +6037,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, "\t\t...skipping %d queued requests...\n", count - max); } - show_request(m, last, "\t\tQ "); + show_request(m, last, "\t\t", 0); } last = NULL; @@ -6048,7 +6049,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, if (rq) { if (count++ < max - 1) - show_request(m, rq, "\t\tV "); + show_request(m, rq, "\t\t", 0); else last = rq; } @@ -6059,7 +6060,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, "\t\t...skipping %d virtual requests...\n", count - max); } - show_request(m, last, "\t\tV "); + show_request(m, last, "\t\t", 0); } spin_unlock_irqrestore(&engine->active.lock, flags); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 32e6e204f544..802585a308e9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -107,7 +107,8 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, const struct i915_request *rq, - const char *prefix), + const char *prefix, + int indent), unsigned int max); struct intel_context * diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 673991718ae6..8d7d29c9e375 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1867,28 +1867,89 @@ static int print_sched_attr(const struct i915_sched_attr *attr, return x; } +static char queue_status(const struct i915_request *rq) +{ + if (i915_request_is_active(rq)) + return 'E'; + + if (i915_request_is_ready(rq)) + return intel_engine_is_virtual(rq->engine) ? 'V' : 'R'; + + return 'U'; +} + +static const char *run_status(const struct i915_request *rq) +{ + if (i915_request_completed(rq)) + return "!"; + + if (i915_request_started(rq)) + return "*"; + + if (!i915_sw_fence_signaled(&rq->semaphore)) + return "&"; + + return ""; +} + +static const char *fence_status(const struct i915_request *rq) +{ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + return "+"; + + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + return "-"; + + return ""; +} + void i915_request_show(struct drm_printer *m, const struct i915_request *rq, - const char *prefix) + const char *prefix, + int indent) { const char *name = rq->fence.ops->get_timeline_name((struct dma_fence *)&rq->fence); char buf[80] = ""; int x = 0; + /* + * The prefix is used to show the queue status, for which we use + * the following flags: + * + * U [Unready] + * - initial status upon being submitted by the user + * + * - the request is not ready for execution as it is waiting + * for external fences + * + * R [Ready] + * - all fences the request was waiting on have been signaled, + * and the request is now ready for execution and will be + * in a backend queue + * + * - a ready request may still need to wait on semaphores + * [internal fences] + * + * V [Ready/virtual] + * - same as ready, but queued over multiple backends + * + * E [Executing] + * - the request has been transferred from the backend queue and + * submitted for execution on HW + * + * - a completed request may still be regarded as executing, its + * status may not be updated until it is retired and removed + * from the lists + */ + x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s %llx:%lld%s%s %s @ %dms: %s\n", - prefix, + drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n", + prefix, indent, " ", + queue_status(rq), rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - !i915_sw_fence_signaled(&rq->semaphore) ? "&" : - "", - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &rq->fence.flags) ? "+" : - test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags) ? "-" : - "", + run_status(rq), + fence_status(rq), buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 09609071b725..8f6173b1c3df 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -372,7 +372,8 @@ long i915_request_wait(struct i915_request *rq, void i915_request_show(struct drm_printer *m, const struct i915_request *rq, - const char *prefix); + const char *prefix, + int indent); static inline bool i915_request_signaled(const struct i915_request *rq) { -- cgit From b8e2bd98a2c9c3a90856c1909aab30d25d379c31 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Nov 2020 14:04:03 +0000 Subject: drm/i915/gt: Decouple completed requests on unwind Since the introduction of preempt-to-busy, requests can complete in the background, even while they are not on the engine->active.requests list. As such, the engine->active.request list itself is not in strict retirement order, and we have to scan the entire list while unwinding to not miss any. However, if the request is completed we currently leave it on the list [until retirement], but we could just as simply remove it and stop treating it as active. We would only have to then traverse it once while unwinding in quick succession. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201126140407.31952-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 6 ++++-- drivers/gpu/drm/i915/i915_request.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 30aa59fb7271..cf11cbac241b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1116,8 +1116,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - if (i915_request_completed(rq)) - continue; /* XXX */ + if (i915_request_completed(rq)) { + list_del_init(&rq->sched.link); + continue; + } __i915_request_unsubmit(rq); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 8d7d29c9e375..a9db1376b996 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -321,7 +321,8 @@ bool i915_request_retire(struct i915_request *rq) * after removing the breadcrumb and signaling it, so that we do not * inadvertently attach the breadcrumb to a completed request. */ - remove_from_engine(rq); + if (!list_empty(&rq->sched.link)) + remove_from_engine(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ -- cgit From 45233ab2d036ce55bbdb59e7260e586dbb1a6b86 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Dec 2020 13:54:52 +0000 Subject: drm/i915/gt: Move gen8 CS emitters into gen8_engine_cs.h Reduce the pollution of intel_engine.h by moving gen8_emit_pipe_control and friends to gen8_engine_cs.h Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201216135452.6063-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_overlay.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 1 + .../drm/i915/gem/selftests/i915_gem_coherency.c | 1 + drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 1 + drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c | 1 + drivers/gpu/drm/i915/gt/gen8_engine_cs.h | 91 ++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_engine.h | 86 -------------------- drivers/gpu/drm/i915/gt/intel_renderstate.c | 3 +- drivers/gpu/drm/i915/gt/intel_ring.c | 2 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 1 + drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 2 + drivers/gpu/drm/i915/gt/selftest_mocs.c | 1 + drivers/gpu/drm/i915/gt/selftest_rc6.c | 1 + drivers/gpu/drm/i915/gt/selftest_reset.c | 1 + drivers/gpu/drm/i915/gt/selftest_timeline.c | 1 + drivers/gpu/drm/i915/gvt/cmd_parser.c | 1 + drivers/gpu/drm/i915/gvt/mmio_context.c | 1 + drivers/gpu/drm/i915/i915_cmd_parser.c | 1 + drivers/gpu/drm/i915/i915_perf.c | 1 + drivers/gpu/drm/i915/i915_request.c | 1 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 1 + drivers/gpu/drm/i915/selftests/igt_spinner.c | 1 + 25 files changed, 117 insertions(+), 87 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 52b4f6193b4c..6be5d8946c69 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -29,6 +29,7 @@ #include #include "gem/i915_gem_pm.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 738a07b3583c..c7363036765a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -73,6 +73,7 @@ #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_user.h" #include "gt/intel_execlists_submission.h" /* virtual_engine */ +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "i915_gem_context.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 2ff32daa50bd..0cf9e79325a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -15,6 +15,7 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_pm.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index aee7ad3cc3c6..10cac9fac79b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_buffer_pool.h" #include "gt/intel_ring.h" diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 7049a6bbc03d..1117d2a44518 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -7,6 +7,7 @@ #include #include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_ring.h" diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index d27d87a678c8..d429c7643ff2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -7,6 +7,7 @@ #include #include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gem/i915_gem_region.h" diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index e21b5023ca7d..d6783061bc72 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "i915_vma.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index 3c5771fea235..cc6e21d3662a 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -6,8 +6,13 @@ #ifndef __GEN8_ENGINE_CS_H__ #define __GEN8_ENGINE_CS_H__ +#include #include +#include "i915_gem.h" /* GEM_BUG_ON */ + +#include "intel_gpu_commands.h" + struct i915_request; int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode); @@ -33,4 +38,90 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); +static inline u32 * +__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +{ + memset(batch, 0, 6 * sizeof(u32)); + + batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; + batch[1] = flags1; + batch[2] = offset; + + return batch + 6; +} + +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + return __gen8_emit_pipe_control(batch, 0, flags, offset); +} + +static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +{ + return __gen8_emit_pipe_control(batch, flags0, flags1, offset); +} + +static inline u32 * +__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1) +{ + *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; + *cs++ = flags1 | PIPE_CONTROL_QW_WRITE; + *cs++ = offset; + *cs++ = 0; + *cs++ = value; + *cs++ = 0; /* We're thrashing one extra dword. */ + + return cs; +} + +static inline u32* +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + return __gen8_emit_write_rcs(cs, + value, + gtt_offset, + 0, + flags | PIPE_CONTROL_GLOBAL_GTT_IVB); +} + +static inline u32* +gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + return __gen8_emit_write_rcs(cs, + value, + gtt_offset, + flags0, + flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB); +} + +static inline u32 * +__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + *cs++ = (MI_FLUSH_DW + 1) | flags; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + + return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + GEM_BUG_ON(gtt_offset & (1 << 5)); + /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + return __gen8_emit_flush_dw(cs, + value, + gtt_offset | MI_FLUSH_DW_USE_GTT, + flags | MI_FLUSH_DW_OP_STOREDW); +} + #endif /* __GEN8_ENGINE_CS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 760fefdfe392..6606b1dbf3d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -15,7 +15,6 @@ #include "i915_selftest.h" #include "gt/intel_timeline.h" #include "intel_engine_types.h" -#include "intel_gpu_commands.h" #include "intel_workarounds.h" struct drm_printer; @@ -223,91 +222,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, void intel_engine_init_execlists(struct intel_engine_cs *engine); -static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) -{ - memset(batch, 0, 6 * sizeof(u32)); - - batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; - batch[1] = flags1; - batch[2] = offset; - - return batch + 6; -} - -static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) -{ - return __gen8_emit_pipe_control(batch, 0, flags, offset); -} - -static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) -{ - return __gen8_emit_pipe_control(batch, flags0, flags1, offset); -} - -static inline u32 * -__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1) -{ - *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; - *cs++ = flags1 | PIPE_CONTROL_QW_WRITE; - *cs++ = offset; - *cs++ = 0; - *cs++ = value; - *cs++ = 0; /* We're thrashing one extra dword. */ - - return cs; -} - -static inline u32* -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) -{ - /* We're using qword write, offset should be aligned to 8 bytes. */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - return __gen8_emit_write_rcs(cs, - value, - gtt_offset, - 0, - flags | PIPE_CONTROL_GLOBAL_GTT_IVB); -} - -static inline u32* -gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) -{ - /* We're using qword write, offset should be aligned to 8 bytes. */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - return __gen8_emit_write_rcs(cs, - value, - gtt_offset, - flags0, - flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB); -} - -static inline u32 * -__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags) -{ - *cs++ = (MI_FLUSH_DW + 1) | flags; - *cs++ = gtt_offset; - *cs++ = 0; - *cs++ = value; - - return cs; -} - -static inline u32 * -gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) -{ - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ - GEM_BUG_ON(gtt_offset & (1 << 5)); - /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - return __gen8_emit_flush_dw(cs, - value, - gtt_offset | MI_FLUSH_DW_USE_GTT, - flags | MI_FLUSH_DW_OP_STOREDW); -} - static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool stalled) { diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index ea2a77c7b469..ca816ba22197 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -27,7 +27,8 @@ #include "i915_drv.h" #include "intel_renderstate.h" -#include "gt/intel_context.h" +#include "intel_context.h" +#include "intel_gpu_commands.h" #include "intel_ring.h" static const struct intel_renderstate_rodata * diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 4034a4bac7f0..06385550450c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -5,9 +5,11 @@ */ #include "gem/i915_gem_object.h" + #include "i915_drv.h" #include "i915_vma.h" #include "intel_engine.h" +#include "intel_gpu_commands.h" #include "intel_ring.h" #include "intel_timeline.h" diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 52f12a6d66b9..38868c5c038e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_gpu_commands.h" #include "intel_gt.h" #include "intel_ring.h" #include "intel_workarounds.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 729c3c7b11e2..439c8984f5fa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -6,6 +6,7 @@ #include +#include "intel_gpu_commands.h" #include "intel_gt_pm.h" #include "intel_rps.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index b08fc5390e8a..163a10b07f85 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -4,6 +4,8 @@ * Copyright © 2018 Intel Corporation */ +#include "intel_gpu_commands.h" + #include "i915_selftest.h" #include "selftest_engine.h" #include "selftest_engine_heartbeat.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 21dcd91cbd62..37b066dca52c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -5,6 +5,7 @@ */ #include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" #include "i915_selftest.h" #include "gem/selftests/mock_context.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 64ef5ee5decf..61abc0556601 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -6,6 +6,7 @@ #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_gpu_commands.h" #include "intel_gt_requests.h" #include "intel_ring.h" #include "selftest_rc6.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index ef5aeebbeeb0..e4645c8bb00a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -9,6 +9,7 @@ #include "i915_memcpy.h" #include "i915_selftest.h" +#include "intel_gpu_commands.h" #include "selftests/igt_reset.h" #include "selftests/igt_atomic.h" #include "selftests/igt_spinner.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index e4285d5a0360..6f3a3687ef0f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -9,6 +9,7 @@ #include "intel_context.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "intel_gpu_commands.h" #include "intel_gt.h" #include "intel_gt_requests.h" #include "intel_ring.h" diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 16b582cb97ed..3fea967ee817 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -37,6 +37,7 @@ #include #include "i915_drv.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "gvt.h" #include "i915_pvinfo.h" diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index afe574d6b3b5..c9589e26af93 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -35,6 +35,7 @@ #include "i915_drv.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "gvt.h" #include "trace.h" diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 93265951fdbb..8d88402387bd 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -26,6 +26,7 @@ */ #include "gt/intel_engine.h" +#include "gt/intel_gpu_commands.h" #include "i915_drv.h" #include "i915_memcpy.h" diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 391c2901a7d4..ff44346c5c0c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -199,6 +199,7 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_execlists_submission.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_lrc_reg.h" #include "gt/intel_ring.h" diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a9db1376b996..2675c6d70779 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -33,6 +33,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "gt/intel_rps.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index c53a222e3dec..70e07e9b78c2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -28,6 +28,7 @@ #include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "i915_random.h" #include "i915_selftest.h" diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index ec0ecb4e4ca6..1216d919185e 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -3,6 +3,7 @@ * * Copyright © 2018 Intel Corporation */ +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gem/selftests/igt_gem_utils.h" -- cgit From 5ec17c763055767e4b1490da8399a6c4a53d7e8c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Dec 2020 13:48:58 +0000 Subject: drm/i915/gt: Another tweak for flushing the tasklets tasklet_kill() ensures that we _yield_ the processor until a remote tasklet is completed. However, this leads to a starvation condition as being at the bottom of the scheduler's runqueue means that anything else is able to run, including all hogs keeping the tasklet occupied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201220134858.10510-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine.h | 7 ++++++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 10 +++++----- drivers/gpu/drm/i915/i915_request.c | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 6606b1dbf3d6..47ee8578e511 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -232,7 +232,12 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool intel_engines_are_idle(struct intel_gt *gt); bool intel_engine_is_idle(struct intel_engine_cs *engine); -void intel_engine_flush_submission(struct intel_engine_cs *engine); + +void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync); +static inline void intel_engine_flush_submission(struct intel_engine_cs *engine) +{ + __intel_engine_flush_submission(engine, true); +} void intel_engines_reset_default_submission(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 97ceaf7116e8..bb1c1adad78a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1190,17 +1190,13 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return idle; } -void intel_engine_flush_submission(struct intel_engine_cs *engine) +void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) { struct tasklet_struct *t = &engine->execlists.tasklet; if (!t->func) return; - /* Synchronise and wait for the tasklet on another CPU */ - tasklet_kill(t); - - /* Having cancelled the tasklet, ensure that is run */ local_bh_disable(); if (tasklet_trylock(t)) { /* Must wait for any GPU reset in progress. */ @@ -1209,6 +1205,10 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine) tasklet_unlock(t); } local_bh_enable(); + + /* Synchronise and wait for the tasklet on another CPU */ + if (sync) + tasklet_unlock_wait(t); } /** diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2675c6d70779..45744c3ef7c4 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1825,7 +1825,7 @@ long i915_request_wait(struct i915_request *rq, * for unhappy HW. */ if (i915_request_is_ready(rq)) - intel_engine_flush_submission(rq->engine); + __intel_engine_flush_submission(rq->engine, false); for (;;) { set_current_state(state); -- cgit From 16f2941ad3078cac9c9ad69f3205fe4619f49edf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 24 Dec 2020 13:55:36 +0000 Subject: drm/i915/gt: Replace direct submit with direct call to tasklet Rather than having special case code for opportunistically calling process_csb() and performing a direct submit while holding the engine spinlock for submitting the request, simply call the tasklet directly. This allows us to retain the direct submission path, including the CS draining to allow fast/immediate submissions, without requiring any duplicated code paths, and most importantly greatly simplifying the control flow by removing reentrancy. This will enable us to close a few races in the virtual engines in the next few patches. The trickiest part here is to ensure that paired operations (such as schedule_in/schedule_out) remain under consistent locking domains, e.g. when pulled outside of the engine->active.lock v2: Use bh kicking, see commit 3c53776e29f8 ("Mark HI and TASKLET softirq synchronous"). v3: Update engine-reset to be tasklet aware Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201224135544.1713-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 35 +++--- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 +- .../gpu/drm/i915/gt/intel_execlists_submission.c | 140 ++++++++------------- drivers/gpu/drm/i915/gt/intel_reset.c | 60 +++++---- drivers/gpu/drm/i915/gt/intel_reset.h | 2 + drivers/gpu/drm/i915/gt/selftest_context.c | 2 +- drivers/gpu/drm/i915/gt/selftest_execlists.c | 10 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 7 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 17 +-- drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- drivers/gpu/drm/i915/i915_request.c | 12 +- drivers/gpu/drm/i915/i915_request.h | 1 + drivers/gpu/drm/i915/i915_scheduler.c | 4 - drivers/gpu/drm/i915/selftests/i915_request.c | 6 +- drivers/gpu/drm/i915/selftests/igt_spinner.c | 3 + 16 files changed, 162 insertions(+), 150 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 8acb922b69f9..1847d3c2ea99 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1019,32 +1019,39 @@ static unsigned long stop_timeout(const struct intel_engine_cs *engine) return READ_ONCE(engine->props.stop_timeout_ms); } -int intel_engine_stop_cs(struct intel_engine_cs *engine) +static int __intel_engine_stop_cs(struct intel_engine_cs *engine, + int fast_timeout_us, + int slow_timeout_ms) { struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; - const i915_reg_t mode = RING_MI_MODE(base); + const i915_reg_t mode = RING_MI_MODE(engine->mmio_base); int err; + intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); + err = __intel_wait_for_register_fw(engine->uncore, mode, + MODE_IDLE, MODE_IDLE, + fast_timeout_us, + slow_timeout_ms, + NULL); + + /* A final mmio read to let GPU writes be hopefully flushed to memory */ + intel_uncore_posting_read_fw(uncore, mode); + return err; +} + +int intel_engine_stop_cs(struct intel_engine_cs *engine) +{ + int err = 0; + if (INTEL_GEN(engine->i915) < 3) return -ENODEV; ENGINE_TRACE(engine, "\n"); - - intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); - - err = 0; - if (__intel_wait_for_register_fw(uncore, - mode, MODE_IDLE, MODE_IDLE, - 1000, stop_timeout(engine), - NULL)) { + if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) { ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n"); err = -ETIMEDOUT; } - /* A final mmio read to let GPU writes be hopefully flushed to memory */ - intel_uncore_posting_read_fw(uncore, mode); - return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 8b353bc8c100..2843db731b7d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -144,7 +144,7 @@ __queue_and_release_pm(struct i915_request *rq, list_add_tail(&tl->link, &timelines->active_list); /* Hand the request over to HW and so engine_retire() */ - __i915_request_queue(rq, NULL); + __i915_request_queue_bh(rq); /* Let new submissions commence (and maybe retire this timeline) */ __intel_wakeref_defer_park(&engine->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 02ee1e736982..c28f4e190fe6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -184,7 +184,8 @@ struct intel_engine_execlists { * Reserve the upper 16b for tracking internal errors. */ u32 error_interrupt; -#define ERROR_CSB BIT(31) +#define ERROR_CSB BIT(31) +#define ERROR_PREEMPT BIT(30) /** * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 695a2d566d76..ac3e19da1a7d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -571,8 +571,7 @@ __execlists_schedule_in(struct i915_request *rq) return engine; } -static inline struct i915_request * -execlists_schedule_in(struct i915_request *rq, int idx) +static inline void execlists_schedule_in(struct i915_request *rq, int idx) { struct intel_context * const ce = rq->context; struct intel_engine_cs *old; @@ -589,7 +588,6 @@ execlists_schedule_in(struct i915_request *rq, int idx) } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); - return i915_request_get(rq); } static void kick_siblings(struct i915_request *rq, struct intel_context *ce) @@ -1257,8 +1255,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; - struct i915_request * const *active; - struct i915_request *last; + struct i915_request *last = *execlists->active; struct rb_node *rb; bool submit = false; @@ -1284,6 +1281,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ + spin_lock(&engine->active.lock); + for (rb = rb_first_cached(&execlists->virtual); rb; ) { struct virtual_engine *ve = rb_entry(rb, typeof(*ve), nodes[engine->id].rb); @@ -1311,10 +1310,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the active context to interject the preemption request, * i.e. we will retrigger preemption following the ack in case * of trouble. - */ - active = READ_ONCE(execlists->active); - - /* + * * In theory we can skip over completed contexts that have not * yet been processed by events (as those events are in flight): * @@ -1326,7 +1322,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * completed and barf. */ - if ((last = *active)) { + if (last) { if (i915_request_completed(last)) { goto check_secondary; } else if (need_preempt(engine, last, rb)) { @@ -1399,6 +1395,7 @@ check_secondary: * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ + spin_unlock(&engine->active.lock); start_timeslice(engine, queue_prio(execlists)); return; } @@ -1434,6 +1431,7 @@ check_secondary: if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); + spin_unlock(&engine->active.lock); start_timeslice(engine, rq_prio(rq)); return; /* leave this for another sibling */ } @@ -1551,8 +1549,7 @@ check_secondary: if (__i915_request_submit(rq)) { if (!merge) { - *port = execlists_schedule_in(last, port - execlists->pending); - port++; + *port++ = i915_request_get(last); last = NULL; } @@ -1571,8 +1568,9 @@ check_secondary: rb_erase_cached(&p->node, &execlists->queue); i915_priolist_free(p); } - done: + *port++ = i915_request_get(last); + /* * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. * @@ -1590,36 +1588,45 @@ done: * interrupt for secondary ports). */ execlists->queue_priority_hint = queue_prio(execlists); + spin_unlock(&engine->active.lock); if (submit) { - *port = execlists_schedule_in(last, port - execlists->pending); - execlists->switch_priority_hint = - switch_prio(engine, *execlists->pending); - /* * Skip if we ended up with exactly the same set of requests, * e.g. trying to timeslice a pair of ordered contexts */ - if (!memcmp(active, execlists->pending, - (port - execlists->pending + 1) * sizeof(*port))) { - do - execlists_schedule_out(fetch_and_zero(port)); - while (port-- != execlists->pending); - + if (!memcmp(execlists->active, + execlists->pending, + (port - execlists->pending) * sizeof(*port))) goto skip_submit; - } - clear_ports(port + 1, last_port - port); + + *port = NULL; + while (port-- != execlists->pending) + execlists_schedule_in(*port, port - execlists->pending); + + execlists->switch_priority_hint = + switch_prio(engine, *execlists->pending); WRITE_ONCE(execlists->yield, -1); - set_preempt_timeout(engine, *active); + set_preempt_timeout(engine, *execlists->active); execlists_submit_ports(engine); } else { start_timeslice(engine, execlists->queue_priority_hint); skip_submit: ring_set_paused(engine, 0); + while (port-- != execlists->pending) + i915_request_put(*port); + *execlists->pending = NULL; } } +static void execlists_dequeue_irq(struct intel_engine_cs *engine) +{ + local_irq_disable(); /* Suspend interrupts across request submission */ + execlists_dequeue(engine); + local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ +} + static void cancel_port_requests(struct intel_engine_execlists * const execlists) { @@ -1962,16 +1969,6 @@ static void process_csb(struct intel_engine_cs *engine) invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); } -static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) -{ - lockdep_assert_held(&engine->active.lock); - if (!READ_ONCE(engine->execlists.pending[0])) { - rcu_read_lock(); /* protect peeking at execlists->active */ - execlists_dequeue(engine); - rcu_read_unlock(); - } -} - static void __execlists_hold(struct i915_request *rq) { LIST_HEAD(list); @@ -2363,7 +2360,7 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine) if (!timer_expired(t)) return false; - return READ_ONCE(engine->execlists.pending[0]); + return engine->execlists.pending[0]; } /* @@ -2373,10 +2370,14 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine) static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - bool timeout = preempt_timeout(engine); process_csb(engine); + if (unlikely(preempt_timeout(engine))) { + cancel_timer(&engine->execlists.preempt); + engine->execlists.error_interrupt |= ERROR_PREEMPT; + } + if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { const char *msg; @@ -2385,6 +2386,8 @@ static void execlists_submission_tasklet(unsigned long data) msg = "CS error"; /* thrown by a user payload */ else if (engine->execlists.error_interrupt & ERROR_CSB) msg = "invalid CSB event"; + else if (engine->execlists.error_interrupt & ERROR_PREEMPT) + msg = "preemption time out"; else msg = "internal error"; @@ -2392,19 +2395,8 @@ static void execlists_submission_tasklet(unsigned long data) execlists_reset(engine, msg); } - if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { - unsigned long flags; - - spin_lock_irqsave(&engine->active.lock, flags); - __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->active.lock, flags); - - /* Recheck after serialising with direct-submission */ - if (unlikely(timeout && preempt_timeout(engine))) { - cancel_timer(&engine->execlists.preempt); - execlists_reset(engine, "preemption time out"); - } - } + if (!engine->execlists.pending[0]) + execlists_dequeue_irq(engine); } static void __execlists_kick(struct intel_engine_execlists *execlists) @@ -2435,26 +2427,16 @@ static void queue_request(struct intel_engine_cs *engine, set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } -static void __submit_queue_imm(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - - if (reset_in_progress(execlists)) - return; /* defer until we restart the engine following reset */ - - __execlists_submission_tasklet(engine); -} - -static void submit_queue(struct intel_engine_cs *engine, +static bool submit_queue(struct intel_engine_cs *engine, const struct i915_request *rq) { struct intel_engine_execlists *execlists = &engine->execlists; if (rq_prio(rq) <= execlists->queue_priority_hint) - return; + return false; execlists->queue_priority_hint = rq_prio(rq); - __submit_queue_imm(engine); + return true; } static bool ancestor_on_hold(const struct intel_engine_cs *engine, @@ -2464,25 +2446,11 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, return !list_empty(&engine->active.hold) && hold_request(rq); } -static void flush_csb(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *el = &engine->execlists; - - if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) { - if (!reset_in_progress(el)) - process_csb(engine); - tasklet_unlock(&el->tasklet); - } -} - static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; - /* Hopefully we clear execlists->pending[] to let us through */ - flush_csb(engine); - /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); @@ -2496,7 +2464,8 @@ static void execlists_submit_request(struct i915_request *request) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, request); + if (submit_queue(engine, request)) + __execlists_kick(&engine->execlists); } spin_unlock_irqrestore(&engine->active.lock, flags); @@ -2837,7 +2806,6 @@ static int execlists_resume(struct intel_engine_cs *engine) static void execlists_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - unsigned long flags; ENGINE_TRACE(engine, "depth<-%d\n", atomic_read(&execlists->tasklet.count)); @@ -2854,10 +2822,6 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) __tasklet_disable_sync_once(&execlists->tasklet); GEM_BUG_ON(!reset_in_progress(execlists)); - /* And flush any current direct submission. */ - spin_lock_irqsave(&engine->active.lock, flags); - spin_unlock_irqrestore(&engine->active.lock, flags); - /* * We stop engines, otherwise we might get failed reset and a * dead gpu (on elk). Also as modern gpu as kbl can suffer @@ -3082,12 +3046,12 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) * to sleep before we restart and reload a context. */ GEM_BUG_ON(!reset_in_progress(execlists)); - if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) - execlists->tasklet.func(execlists->tasklet.data); + GEM_BUG_ON(engine->execlists.pending[0]); + /* And kick in case we missed a new request submission. */ if (__tasklet_enable(&execlists->tasklet)) - /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&execlists->tasklet); + __execlists_kick(execlists); + ENGINE_TRACE(engine, "depth->%d\n", atomic_read(&execlists->tasklet.count)); } diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b3ccf7859ab1..7583f16c293c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -40,20 +40,19 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) intel_uncore_rmw_fw(uncore, reg, clr, 0); } -static void engine_skip_context(struct i915_request *rq) +static void skip_context(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; struct intel_context *hung_ctx = rq->context; - if (!i915_request_is_active(rq)) - return; + list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) { + if (!i915_request_is_active(rq)) + return; - lockdep_assert_held(&engine->active.lock); - list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->context == hung_ctx) { i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); } + } } static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) @@ -160,7 +159,7 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); if (mark_guilty(rq)) - engine_skip_context(rq); + skip_context(rq); } else { i915_request_set_error_once(rq, -EAGAIN); mark_innocent(rq); @@ -753,8 +752,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) if (err) return err; + local_bh_disable(); for_each_engine(engine, gt, id) __intel_engine_reset(engine, stalled_mask & engine->mask); + local_bh_enable(); intel_ggtt_restore_fences(gt->ggtt); @@ -832,9 +833,11 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ + local_bh_disable(); for_each_engine(engine, gt, id) if (engine->reset.cancel) engine->reset.cancel(engine); + local_bh_enable(); reset_finish(gt, awake); @@ -1109,20 +1112,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) return __intel_gt_reset(engine->gt, engine->mask); } -/** - * intel_engine_reset - reset GPU engine to recover from a hang - * @engine: engine to reset - * @msg: reason for GPU reset; or NULL for no drm_notice() - * - * Reset a specific GPU engine. Useful if a hang is detected. - * Returns zero on successful reset or otherwise an error code. - * - * Procedure is: - * - identifies the request that caused the hang and it is dropped - * - reset engine (which will force the engine to idle) - * - re-init/configure engine - */ -int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) +int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; bool uses_guc = intel_engine_in_guc_submission_mode(engine); @@ -1172,6 +1162,30 @@ out: return ret; } +/** + * intel_engine_reset - reset GPU engine to recover from a hang + * @engine: engine to reset + * @msg: reason for GPU reset; or NULL for no drm_notice() + * + * Reset a specific GPU engine. Useful if a hang is detected. + * Returns zero on successful reset or otherwise an error code. + * + * Procedure is: + * - identifies the request that caused the hang and it is dropped + * - reset engine (which will force the engine to idle) + * - re-init/configure engine + */ +int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) +{ + int err; + + local_bh_disable(); + err = __intel_engine_reset_bh(engine, msg); + local_bh_enable(); + + return err; +} + static void intel_gt_reset_global(struct intel_gt *gt, u32 engine_mask, const char *reason) @@ -1258,18 +1272,20 @@ void intel_gt_handle_error(struct intel_gt *gt, * single reset fails. */ if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + local_bh_disable(); for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) continue; - if (intel_engine_reset(engine, msg) == 0) + if (__intel_engine_reset_bh(engine, msg) == 0) engine_mask &= ~engine->mask; clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); } + local_bh_enable(); } if (!engine_mask) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index a0eec7c11c0c..7dbf5cc8a333 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -34,6 +34,8 @@ void intel_gt_reset(struct intel_gt *gt, const char *reason); int intel_engine_reset(struct intel_engine_cs *engine, const char *reason); +int __intel_engine_reset_bh(struct intel_engine_cs *engine, + const char *reason); void __i915_request_reset(struct i915_request *rq, bool guilty); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 1f4020e906a8..db738d400168 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -25,7 +25,7 @@ static int request_sync(struct i915_request *rq) /* Opencode i915_request_add() so we can keep the timeline locked. */ __i915_request_commit(rq); rq->sched.attr.priority = I915_PRIORITY_BARRIER; - __i915_request_queue(rq, NULL); + __i915_request_queue_bh(rq); timeout = i915_request_wait(rq, 0, HZ / 10); if (timeout < 0) diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index fa51cf6d840a..47b12ce4b132 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -599,8 +599,10 @@ static int live_hold_reset(void *arg) /* We have our request executing, now remove it and reset */ + local_bh_disable(); if (test_and_set_bit(I915_RESET_ENGINE + id, >->reset.flags)) { + local_bh_enable(); intel_gt_set_wedged(gt); err = -EBUSY; goto out; @@ -614,12 +616,13 @@ static int live_hold_reset(void *arg) execlists_hold(engine, rq); GEM_BUG_ON(!i915_request_on_hold(rq)); - intel_engine_reset(engine, NULL); + __intel_engine_reset_bh(engine, NULL); GEM_BUG_ON(rq->fence.error != -EIO); tasklet_enable(&engine->execlists.tasklet); clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); + local_bh_enable(); /* Check that we do not resubmit the held request */ if (!i915_request_wait(rq, 0, HZ / 5)) { @@ -4546,8 +4549,10 @@ static int reset_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(engine == ve->engine); /* Take ownership of the reset and tasklet */ + local_bh_disable(); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) { + local_bh_enable(); intel_gt_set_wedged(gt); err = -EBUSY; goto out_heartbeat; @@ -4567,12 +4572,13 @@ static int reset_virtual_engine(struct intel_gt *gt, execlists_hold(engine, rq); GEM_BUG_ON(!i915_request_on_hold(rq)); - intel_engine_reset(engine, NULL); + __intel_engine_reset_bh(engine, NULL); GEM_BUG_ON(rq->fence.error != -EIO); /* Release our grasp on the engine, letting CS flow again */ tasklet_enable(&engine->execlists.tasklet); clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); + local_bh_enable(); /* Check that we do not resubmit the held request */ i915_request_get(rq); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index fb5ebf930ab2..c28d1fcad673 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1576,12 +1576,17 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, engine->name, mode, p->name); tasklet_disable(t); + if (strcmp(p->name, "softirq")) + local_bh_disable(); p->critical_section_begin(); - err = intel_engine_reset(engine, NULL); + err = __intel_engine_reset_bh(engine, NULL); p->critical_section_end(); + if (strcmp(p->name, "softirq")) + local_bh_enable(); tasklet_enable(t); + tasklet_hi_schedule(t); if (err) pr_err("i915_reset_engine(%s:%s) failed under %s\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index d55421f6a250..ba6c2be5c8ff 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1607,16 +1607,17 @@ static void garbage_reset(struct intel_engine_cs *engine, const unsigned int bit = I915_RESET_ENGINE + engine->id; unsigned long *lock = &engine->gt->reset.flags; - if (test_and_set_bit(bit, lock)) - return; + local_bh_disable(); + if (!test_and_set_bit(bit, lock)) { + tasklet_disable(&engine->execlists.tasklet); - tasklet_disable(&engine->execlists.tasklet); + if (!rq->fence.error) + __intel_engine_reset_bh(engine, NULL); - if (!rq->fence.error) - intel_engine_reset(engine, NULL); - - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(bit, lock); + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); + } + local_bh_enable(); } static struct i915_request *garbage(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index e4645c8bb00a..5ec8d4e9983f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -327,11 +327,16 @@ static int igt_atomic_engine_reset(void *arg) for (p = igt_atomic_phases; p->name; p++) { GEM_TRACE("intel_engine_reset(%s) under %s\n", engine->name, p->name); + if (strcmp(p->name, "softirq")) + local_bh_disable(); p->critical_section_begin(); - err = intel_engine_reset(engine, NULL); + err = __intel_engine_reset_bh(engine, NULL); p->critical_section_end(); + if (strcmp(p->name, "softirq")) + local_bh_enable(); + if (err) { pr_err("intel_engine_reset(%s) failed under %s\n", engine->name, p->name); @@ -341,6 +346,7 @@ static int igt_atomic_engine_reset(void *arg) intel_engine_pm_put(engine); tasklet_enable(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->execlists.tasklet); if (err) break; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 45744c3ef7c4..6578faf6eed8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1584,6 +1584,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) return __i915_request_add_to_timeline(rq); } +void __i915_request_queue_bh(struct i915_request *rq) +{ + i915_sw_fence_commit(&rq->semaphore); + i915_sw_fence_commit(&rq->submit); +} + void __i915_request_queue(struct i915_request *rq, const struct i915_sched_attr *attr) { @@ -1600,8 +1606,10 @@ void __i915_request_queue(struct i915_request *rq, */ if (attr && rq->engine->schedule) rq->engine->schedule(rq, attr); - i915_sw_fence_commit(&rq->semaphore); - i915_sw_fence_commit(&rq->submit); + + local_bh_disable(); + __i915_request_queue_bh(rq); + local_bh_enable(); /* kick tasklets */ } void i915_request_add(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7c4453e60323..a8c413203f72 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -315,6 +315,7 @@ void __i915_request_skip(struct i915_request *rq); struct i915_request *__i915_request_commit(struct i915_request *request); void __i915_request_queue(struct i915_request *rq, const struct i915_sched_attr *attr); +void __i915_request_queue_bh(struct i915_request *rq); bool i915_request_retire(struct i915_request *rq); void i915_request_retire_upto(struct i915_request *rq); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index b9cf9931ebd7..318e359bf5c3 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -458,14 +458,10 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, if (!dep) return -ENOMEM; - local_bh_disable(); - if (!__i915_sched_node_add_dependency(node, signal, dep, flags | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); - local_bh_enable(); /* kick submission tasklet */ - return 0; } diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index ddf76069066e..d2a678a2497e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1933,9 +1933,7 @@ static int measure_inter_request(struct intel_context *ce) intel_ring_advance(rq, cs); i915_request_add(rq); } - local_bh_disable(); i915_sw_fence_commit(submit); - local_bh_enable(); intel_engine_flush_submission(ce->engine); heap_fence_put(submit); @@ -2221,11 +2219,9 @@ static int measure_completion(struct intel_context *ce) intel_ring_advance(rq, cs); dma_fence_add_callback(&rq->fence, &cb.base, signal_cb); - - local_bh_disable(); i915_request_add(rq); - local_bh_enable(); + intel_engine_flush_submission(ce->engine); if (wait_for(READ_ONCE(sema[i]) == -1, 50)) { err = -EIO; goto err; diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 1216d919185e..83f6e5f31fb3 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -220,6 +220,9 @@ void igt_spinner_fini(struct igt_spinner *spin) bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq) { + if (i915_request_is_ready(rq)) + intel_engine_flush_submission(rq->engine); + return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), rq->fence.seqno), 100) && -- cgit From 7904e0819d5f7e71413873ac62b79c164d619f58 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Dec 2020 22:00:28 +0000 Subject: drm/i915/gt: Cancel submitted requests upon context reset Since we process schedule-in of a context after submitting the request, if we decide to reset the context at that time, we also have to cancel the requets we have marked for submission. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201230220028.17089-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gt/intel_execlists_submission.c | 22 ++++++++++++++++------ drivers/gpu/drm/i915/i915_request.c | 2 ++ 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index f08ba2d1f6d6..33c5bbaad9fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -215,22 +215,32 @@ static void mark_eio(struct i915_request *rq) } static struct i915_request * -active_request(const struct intel_timeline * const tl, struct i915_request *rq) +__active_request(const struct intel_timeline * const tl, + struct i915_request *rq, + int error) { struct i915_request *active = rq; - rcu_read_lock(); - list_for_each_entry_continue_reverse(rq, &tl->requests, link) { + list_for_each_entry_from_reverse(rq, &tl->requests, link) { if (__i915_request_is_complete(rq)) break; + if (error) { + i915_request_set_error_once(rq, error); + __i915_request_skip(rq); + } active = rq; } - rcu_read_unlock(); return active; } +static struct i915_request * +active_request(const struct intel_timeline * const tl, struct i915_request *rq) +{ + return __active_request(tl, rq, 0); +} + static inline void ring_set_paused(const struct intel_engine_cs *engine, int state) { @@ -487,14 +497,14 @@ static void reset_active(struct i915_request *rq, * remain correctly ordered. And we defer to __i915_request_submit() * so that all asynchronous waits are correctly handled. */ - ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n", + ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n", rq->fence.context, rq->fence.seqno); /* On resubmission of the active request, payload will be scrubbed */ if (__i915_request_is_complete(rq)) head = rq->tail; else - head = active_request(ce->timeline, rq)->head; + head = __active_request(ce->timeline, rq, -EIO)->head; head = intel_ring_wrap(ce->ring, head); /* Scrub the context image to prevent replaying the previous batch */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 6578faf6eed8..ad3b6a4f424f 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -490,6 +490,8 @@ void __i915_request_skip(struct i915_request *rq) if (rq->infix == rq->postfix) return; + RQ_TRACE(rq, "error: %d\n", rq->fence.error); + /* * As this request likely depends on state from the lost * context, clear out all the user operations leaving the -- cgit From 9c080b0f96371dceca92d67b0c50c07b479203e2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 31 Dec 2020 09:39:46 +0000 Subject: drm/i915/gt: Pull context closure check from request submit to schedule-in We only need to evaluate the current status of the context when it is scheduled in, we will force a reschedule when the context is closed propagating the change to inflight contexts. Signed-off-by: Chris Wilson Cc: Matthew Brost Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201231093946.11649-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 ++++ drivers/gpu/drm/i915/i915_request.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 33c5bbaad9fe..cb2491ec6d3f 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -522,6 +522,10 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (unlikely(intel_context_is_closed(ce) && + !intel_engine_has_heartbeat(engine))) + intel_context_set_banned(ce); + if (unlikely(intel_context_is_banned(ce))) reset_active(rq, engine); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ad3b6a4f424f..3a9820a9e521 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -546,10 +546,6 @@ bool __i915_request_submit(struct i915_request *request) if (i915_request_completed(request)) goto xfer; - if (unlikely(intel_context_is_closed(request->context) && - !intel_engine_has_heartbeat(engine))) - intel_context_set_banned(request->context); - if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); -- cgit From 4e5c8a99e1cb701aa3ed70d9c0eb5aacd9529390 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 31 Dec 2020 09:31:49 +0000 Subject: drm/i915: Drop i915_request.lock requirement for intel_rps_boost() Since we use a flag within i915_request.flags to indicate when we have boosted the request (so that we only apply the boost) once, this can be used as the serialisation with i915_request_retire() to avoid having to explicitly take the i915_request.lock which is more heavily contended. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201231093149.19086-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/debugfs_gt_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 25 +++++++++++++------------ drivers/gpu/drm/i915/gt/intel_rps_types.h | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 4 +--- 5 files changed, 17 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index a0f10e8bbd21..d4f4452ce5ed 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -578,7 +578,7 @@ static int rps_boost_show(struct seq_file *m, void *data) intel_gpu_freq(rps, rps->efficient_freq), intel_gpu_freq(rps, rps->boost_freq)); - seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); + seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts)); if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) { struct intel_uncore *uncore = gt->uncore; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index f74d5e09e176..69e1bd46cc46 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -862,6 +862,8 @@ void intel_rps_park(struct intel_rps *rps) { int adj; + GEM_BUG_ON(atomic_read(&rps->num_waiters)); + if (!intel_rps_clear_active(rps)) return; @@ -917,28 +919,27 @@ void intel_rps_park(struct intel_rps *rps) void intel_rps_boost(struct i915_request *rq) { - struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; - unsigned long flags; - - if (i915_request_signaled(rq) || !intel_rps_is_active(rps)) + if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; /* Serializes with i915_request_retire() */ - spin_lock_irqsave(&rq->lock, flags); - if (!i915_request_has_waitboost(rq) && - !dma_fence_is_signaled_locked(&rq->fence)) { - set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); + if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { + struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; + + if (atomic_fetch_inc(&rps->num_waiters)) + return; + + if (!intel_rps_is_active(rps)) + return; GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); - if (!atomic_fetch_inc(&rps->num_waiters) && - READ_ONCE(rps->cur_freq) < rps->boost_freq) + if (READ_ONCE(rps->cur_freq) < rps->boost_freq) schedule_work(&rps->work); - atomic_inc(&rps->boosts); + WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ } - spin_unlock_irqrestore(&rq->lock, flags); } int intel_rps_set(struct intel_rps *rps, u8 val) diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index 38083f0402d9..029fe13cf303 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -93,7 +93,7 @@ struct intel_rps { } power; atomic_t num_waiters; - atomic_t boosts; + unsigned int boosts; /* manual wa residency calculations */ struct intel_rps_ei ei; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 877411a50299..3a8d843d7966 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1232,7 +1232,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_gpu_freq(rps, rps->efficient_freq), intel_gpu_freq(rps, rps->boost_freq)); - seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); + seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts)); if (INTEL_GEN(dev_priv) >= 6 && intel_rps_is_active(rps)) { u32 rpup, rpupei; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3a9820a9e521..bbf42bc526c7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -307,10 +307,8 @@ bool i915_request_retire(struct i915_request *rq) spin_unlock_irq(&rq->lock); } - if (i915_request_has_waitboost(rq)) { - GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); + if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) atomic_dec(&rq->engine->gt->rps.num_waiters); - } /* * We only loosely track inflight requests across preemption, -- cgit From baa7c2cd99c6326b4d1dece6046479b183533943 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Jan 2021 16:34:55 +0000 Subject: drm/i915: Refactor marking a request as EIO When wedging the device, we cancel all outstanding requests and mark them as EIO. Rather than duplicate the small function to do so between each submission backend, export one. Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20210109163455.28466-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 19 ++++--------------- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 6 ++---- drivers/gpu/drm/i915/gt/mock_engine.c | 15 ++------------- drivers/gpu/drm/i915/i915_request.c | 11 +++++++++++ drivers/gpu/drm/i915/i915_request.h | 3 ++- 5 files changed, 21 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 4d7ac81303bb..52c1fe62bdfe 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -203,17 +203,6 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) return container_of(engine, struct virtual_engine, base); } -static void mark_eio(struct i915_request *rq) -{ - if (__i915_request_is_complete(rq)) - return; - - GEM_BUG_ON(i915_request_signaled(rq)); - - i915_request_set_error_once(rq, -EIO); - i915_request_mark_complete(rq); -} - static struct i915_request * __active_request(const struct intel_timeline * const tl, struct i915_request *rq, @@ -2996,7 +2985,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) - mark_eio(rq); + i915_request_mark_eio(rq); intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ @@ -3005,7 +2994,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - mark_eio(rq); + i915_request_mark_eio(rq); __i915_request_submit(rq); } @@ -3015,7 +3004,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* On-hold requests will be flushed to timeline upon their release */ list_for_each_entry(rq, &engine->active.hold, sched.link) - mark_eio(rq); + i915_request_mark_eio(rq); /* Cancel all attached virtual engines */ while ((rb = rb_first_cached(&execlists->virtual))) { @@ -3028,7 +3017,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) spin_lock(&ve->base.active.lock); rq = fetch_and_zero(&ve->request); if (rq) { - mark_eio(rq); + i915_request_mark_eio(rq); rq->engine = engine; __i915_request_submit(rq); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 4ea741f488a8..1c6d421f6fe5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -473,10 +473,8 @@ static void reset_cancel(struct intel_engine_cs *engine) spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->active.requests, sched.link) { - i915_request_set_error_once(request, -EIO); - i915_request_mark_complete(request); - } + list_for_each_entry(request, &engine->active.requests, sched.link) + i915_request_mark_eio(request); intel_engine_signal_breadcrumbs(engine); /* Remaining _unready_ requests will be nop'ed when submitted */ diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2f830017c51d..4b4f03b70df7 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -245,17 +245,6 @@ static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled) GEM_BUG_ON(stalled); } -static void mark_eio(struct i915_request *rq) -{ - if (i915_request_completed(rq)) - return; - - GEM_BUG_ON(i915_request_signaled(rq)); - - i915_request_set_error_once(rq, -EIO); - i915_request_mark_complete(rq); -} - static void mock_reset_cancel(struct intel_engine_cs *engine) { struct mock_engine *mock = @@ -269,12 +258,12 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) - mark_eio(rq); + i915_request_mark_eio(rq); intel_engine_signal_breadcrumbs(engine); /* Cancel and submit all pending requests. */ list_for_each_entry(rq, &mock->hw_queue, mock.link) { - mark_eio(rq); + i915_request_mark_eio(rq); __i915_request_submit(rq); } INIT_LIST_HEAD(&mock->hw_queue); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index bbf42bc526c7..d9f32a342c68 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -515,6 +515,17 @@ void i915_request_set_error_once(struct i915_request *rq, int error) } while (!try_cmpxchg(&rq->fence.error, &old, error)); } +void i915_request_mark_eio(struct i915_request *rq) +{ + if (__i915_request_is_complete(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + i915_request_set_error_once(rq, -EIO); + i915_request_mark_complete(rq); +} + bool __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a8c413203f72..1bfe214a47e9 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -309,8 +309,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp); struct i915_request * __must_check i915_request_create(struct intel_context *ce); -void i915_request_set_error_once(struct i915_request *rq, int error); void __i915_request_skip(struct i915_request *rq); +void i915_request_set_error_once(struct i915_request *rq, int error); +void i915_request_mark_eio(struct i915_request *rq); struct i915_request *__i915_request_commit(struct i915_request *request); void __i915_request_queue(struct i915_request *rq, -- cgit From 163433e5c50afd8276f7dfdead4bf35f1bdafd05 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jan 2021 13:56:08 +0000 Subject: drm/i915: Mark up protected uses of 'i915_request_completed' When we know that we are inside the timeline mutex, or inside the submission flow (under active.lock or the holder's rcu lock), we know that the rq->hwsp is stable and we can use the simpler direct version. Signed-off-by: Chris Wilson Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20210114135612.13210-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_reset.c | 3 +-- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 4 +++- drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c | 19 +++++++++---------- drivers/gpu/drm/i915/i915_scheduler.c | 2 +- 9 files changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 68f58762d5e3..cac0c52fc681 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -408,7 +408,7 @@ __active_engine(struct i915_request *rq, struct intel_engine_cs **active) } if (i915_request_is_active(rq)) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) *active = locked; ret = true; } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index be2c285a0ac7..d098fc0c14ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -517,8 +517,8 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) list_for_each_entry_rcu(rq, &ce->signals, signal_link) drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : + __i915_request_is_complete(rq) ? "!" : + __i915_request_has_started(rq) ? "*" : "", jiffies_to_msecs(jiffies - rq->emitted_jiffies)); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index fa76602f9852..3fe44cdfe20a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1811,7 +1811,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) struct intel_timeline *tl = request->context->timeline; list_for_each_entry_from_reverse(request, &tl->requests, link) { - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) break; active = request; @@ -1822,10 +1822,10 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) return active; list_for_each_entry(request, &engine->active.requests, sched.link) { - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) continue; - if (!i915_request_started(request)) + if (!__i915_request_has_started(request)) continue; /* More than one preemptible request may match! */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 33c7495b12b1..269492b80177 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3291,7 +3291,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) old = fetch_and_zero(&ve->request); if (old) { - GEM_BUG_ON(!i915_request_completed(old)); + GEM_BUG_ON(!__i915_request_is_complete(old)); __i915_request_submit(old); i915_request_put(old); } @@ -3568,7 +3568,7 @@ static void virtual_submit_request(struct i915_request *rq) } if (ve->request) { /* background completion from preempt-to-busy */ - GEM_BUG_ON(!i915_request_completed(ve->request)); + GEM_BUG_ON(!__i915_request_is_complete(ve->request)); __i915_request_submit(ve->request); i915_request_put(ve->request); } diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 0a1f93f00b14..61410cd62927 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -151,8 +151,7 @@ static void mark_innocent(struct i915_request *rq) void __i915_request_reset(struct i915_request *rq, bool guilty) { RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); - - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rcu_read_lock(); /* protect the GEM context */ if (guilty) { diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 597eaead57f8..8d0964d2d597 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -389,12 +389,14 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) rq = NULL; spin_lock_irqsave(&engine->active.lock, flags); + rcu_read_lock(); list_for_each_entry(pos, &engine->active.requests, sched.link) { - if (!i915_request_completed(pos)) { + if (!__i915_request_is_complete(pos)) { rq = pos; break; } } + rcu_read_unlock(); /* * The guilty request will get skipped on a hung engine. diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 7fe05918a76e..037b0e3ccbed 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -582,11 +582,11 @@ int intel_timeline_read_hwsp(struct i915_request *from, rcu_read_lock(); cl = rcu_dereference(from->hwsp_cacheline); - if (i915_request_completed(from)) /* confirm cacheline is valid */ + if (i915_request_signaled(from)) /* confirm cacheline is valid */ goto unlock; if (unlikely(!i915_active_acquire_if_busy(&cl->active))) goto unlock; /* seqno wrapped and completed! */ - if (unlikely(i915_request_completed(from))) + if (unlikely(__i915_request_is_complete(from))) goto release; rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0b1a46a0d866..784c05ac5cca 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -276,7 +276,7 @@ static void remove_from_engine(struct i915_request *rq) bool i915_request_retire(struct i915_request *rq) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) return false; RQ_TRACE(rq, "\n"); @@ -342,8 +342,7 @@ void i915_request_retire_upto(struct i915_request *rq) struct i915_request *tmp; RQ_TRACE(rq, "\n"); - - GEM_BUG_ON(!i915_request_completed(rq)); + GEM_BUG_ON(!__i915_request_is_complete(rq)); do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); @@ -552,7 +551,7 @@ bool __i915_request_submit(struct i915_request *request) * dropped upon retiring. (Otherwise if resubmit a *retired* * request, this would be a horrible use-after-free.) */ - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) goto xfer; if (unlikely(intel_context_is_banned(request->context))) @@ -652,7 +651,7 @@ void __i915_request_unsubmit(struct i915_request *request) i915_request_cancel_breadcrumb(request); /* We've already spun, don't charge on resubmitting. */ - if (request->sched.semaphores && i915_request_started(request)) + if (request->sched.semaphores && __i915_request_has_started(request)) request->sched.semaphores = 0; /* @@ -864,7 +863,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -978,7 +977,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) struct i915_request *prev; /* Confirm signal has not been retired, the link is valid */ - if (unlikely(i915_request_started(signal))) + if (unlikely(__i915_request_has_started(signal))) break; /* Is signal the earliest request on its timeline? */ @@ -1520,7 +1519,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) */ prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); - if (prev && !i915_request_completed(prev)) { + if (prev && !__i915_request_is_complete(prev)) { /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request @@ -1897,10 +1896,10 @@ static char queue_status(const struct i915_request *rq) static const char *run_status(const struct i915_request *rq) { - if (i915_request_completed(rq)) + if (__i915_request_is_complete(rq)) return "!"; - if (i915_request_started(rq)) + if (__i915_request_has_started(rq)) return "*"; if (!i915_sw_fence_signaled(&rq->semaphore)) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 318e359bf5c3..7144239f08df 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -520,7 +520,7 @@ void i915_request_show_with_schedule(struct drm_printer *m, if (signaler->timeline == rq->timeline) continue; - if (i915_request_completed(signaler)) + if (__i915_request_is_complete(signaler)) continue; i915_request_show(m, signaler, prefix, indent + 2); -- cgit From b2fe00bbb2b6bf06929dc300a4e3e91d9c12a3c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jan 2021 13:56:09 +0000 Subject: drm/i915: Drop i915_request.lock serialisation around await_start Originally, we used the signal->lock as a means of following the previous link in its timeline and peeking at the previous fence. However, we have replaced the explicit serialisation with a series of very careful probes that anticipate the links being deleted and the fences recycled before we are able to acquire a strong reference to it. We do not need the signal->lock crutch anymore, nor want the contention. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20210114135612.13210-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 784c05ac5cca..973eceabbcca 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -969,9 +969,16 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) if (i915_request_started(signal)) return 0; + /* + * The caller holds a reference on @signal, but we do not serialise + * against it being retired and removed from the lists. + * + * We do not hold a reference to the request before @signal, and + * so must be very careful to ensure that it is not _recycled_ as + * we follow the link backwards. + */ fence = NULL; rcu_read_lock(); - spin_lock_irq(&signal->lock); do { struct list_head *pos = READ_ONCE(signal->link.prev); struct i915_request *prev; @@ -1002,7 +1009,6 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) fence = &prev->fence; } while (0); - spin_unlock_irq(&signal->lock); rcu_read_unlock(); if (!fence) return 0; -- cgit From 9736387a9f17a521a8aff3bc64814f335a6f09e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jan 2021 13:56:12 +0000 Subject: drm/i915: Reduce test_and_set_bit to set_bit in i915_request_submit() Avoid the full blown memory barrier of test_and_set_bit() by noting the completed request and removing it from the lists. Signed-off-by: Chris Wilson Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20210114135612.13210-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 973eceabbcca..22e39d938f17 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -551,8 +551,10 @@ bool __i915_request_submit(struct i915_request *request) * dropped upon retiring. (Otherwise if resubmit a *retired* * request, this would be a horrible use-after-free.) */ - if (__i915_request_is_complete(request)) - goto xfer; + if (__i915_request_is_complete(request)) { + list_del_init(&request->sched.link); + goto active; + } if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); @@ -587,11 +589,11 @@ bool __i915_request_submit(struct i915_request *request) engine->serial++; result = true; -xfer: - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { - list_move_tail(&request->sched.link, &engine->active.requests); - clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); - } + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + list_move_tail(&request->sched.link, &engine->active.requests); +active: + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); /* * XXX Rollback bonded-execution on __i915_request_unsubmit()? -- cgit