aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c3
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c75
4 files changed, 85 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c28f4e190fe6..430066e5884c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -561,6 +561,8 @@ struct intel_engine_cs {
unsigned long stop_timeout_ms;
unsigned long timeslice_duration_ms;
} props, defaults;
+
+ I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
};
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 2afbc0a4ca03..f02e3ae10d28 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3047,9 +3047,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
* After a GPU reset, we may have requests to replay. Do so now while
* we still have the forcewake to be sure that the GPU is not allowed
* to sleep before we restart and reload a context.
+ *
+ * If the GPU reset fails, the engine may still be alive with requests
+ * inflight. We expect those to complete, or for the device to be
+ * reset as the next level of recovery, and as a final resort we
+ * will declare the device wedged.
*/
GEM_BUG_ON(!reset_in_progress(execlists));
- GEM_BUG_ON(engine->execlists.pending[0]);
/* And kick in case we missed a new request submission. */
if (__tasklet_enable(&execlists->tasklet))
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index e02775fc326d..b516b2c0528d 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -497,6 +497,9 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
u32 request, mask, ack;
int ret;
+ if (I915_SELFTEST_ONLY(should_fail(&engine->reset_timeout, 1)))
+ return -ETIMEDOUT;
+
ack = intel_uncore_read_fw(uncore, reg);
if (ack & RESET_CTL_CAT_ERROR) {
/*
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 3854da5a4e65..bfa7fd5c2c91 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -2299,6 +2299,77 @@ out:
return err;
}
+static void force_reset_timeout(struct intel_engine_cs *engine)
+{
+ engine->reset_timeout.probability = 999;
+ atomic_set(&engine->reset_timeout.times, -1);
+}
+
+static void cancel_reset_timeout(struct intel_engine_cs *engine)
+{
+ memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
+}
+
+static int __cancel_fail(struct live_preempt_cancel *arg)
+{
+ struct intel_engine_cs *engine = arg->engine;
+ struct i915_request *rq;
+ int err;
+
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return 0;
+
+ if (!intel_has_reset_engine(engine->gt))
+ return 0;
+
+ GEM_TRACE("%s(%s)\n", __func__, engine->name);
+ rq = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, engine,
+ MI_NOOP); /* preemption disabled */
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ clear_bit(CONTEXT_BANNED, &rq->context->flags);
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+ err = -EIO;
+ goto out;
+ }
+
+ intel_context_set_banned(rq->context);
+
+ err = intel_engine_pulse(engine);
+ if (err)
+ goto out;
+
+ force_reset_timeout(engine);
+
+ /* force preempt reset [failure] */
+ while (!engine->execlists.pending[0])
+ intel_engine_flush_submission(engine);
+ del_timer_sync(&engine->execlists.preempt);
+ intel_engine_flush_submission(engine);
+
+ cancel_reset_timeout(engine);
+
+ /* after failure, require heartbeats to reset device */
+ intel_engine_set_heartbeat(engine, 1);
+ err = wait_for_reset(engine, rq, HZ / 2);
+ intel_engine_set_heartbeat(engine,
+ engine->defaults.heartbeat_interval_ms);
+ if (err) {
+ pr_err("Cancelled inflight0 request did not reset\n");
+ goto out;
+ }
+
+out:
+ i915_request_put(rq);
+ if (igt_flush_test(engine->i915))
+ err = -EIO;
+ return err;
+}
+
static int live_preempt_cancel(void *arg)
{
struct intel_gt *gt = arg;
@@ -2338,6 +2409,10 @@ static int live_preempt_cancel(void *arg)
err = __cancel_hostile(&data);
if (err)
goto err_wedged;
+
+ err = __cancel_fail(&data);
+ if (err)
+ goto err_wedged;
}
err = 0;