From d19d71fc2b15bf30ff3e56932eae23ff096c1396 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 19 Sep 2019 12:19:10 +0100
Subject: drm/i915: Mark i915_request.timeline as a volatile, rcu pointer

The request->timeline is only valid until the request is retired (i.e.
before it is completed). Upon retiring the request, the context may be
unpinned and freed, and along with it the timeline may be freed. We
therefore need to be very careful when chasing rq->timeline that the
pointer does not disappear beneath us. The vast majority of users are in
a protected context, either during request construction or retirement,
where the timeline->mutex is held and the timeline cannot disappear. It
is those few off the beaten path (where we access a second timeline) that
need extra scrutiny -- to be added in the next patch after first adding
the warnings about dangerous access.

One complication, where we cannot use the timeline->mutex itself, is
during request submission onto hardware (under spinlocks). Here, we want
to check on the timeline to finalize the breadcrumb, and so we need to
impose a second rule to ensure that the request->timeline is indeed
valid. As we are submitting the request, it's context and timeline must
be pinned, as it will be used by the hardware. Since it is pinned, we
know the request->timeline must still be valid, and we cannot submit the
idle barrier until after we release the engine->active.lock, ergo while
submitting and holding that spinlock, a second thread cannot release the
timeline.

v2: Don't be lazy inside selftests; hold the timeline->mutex for as long
as we need it, and tidy up acquiring the timeline with a bit of
refactoring (i915_active_add_request)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190919111912.21631-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_request.h | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/i915_request.h')

diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8ac6e1226a56..b18f49528ded 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -113,7 +113,7 @@ struct i915_request {
 	struct intel_engine_cs *engine;
 	struct intel_context *hw_context;
 	struct intel_ring *ring;
-	struct intel_timeline *timeline;
+	struct intel_timeline __rcu *timeline;
 	struct list_head signal_link;
 
 	/*
@@ -442,6 +442,26 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
 	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
 }
 
+static inline struct intel_timeline *
+i915_request_timeline(struct i915_request *rq)
+{
+	/* Valid only while the request is being constructed (or retired). */
+	return rcu_dereference_protected(rq->timeline,
+					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+}
+
+static inline struct intel_timeline *
+i915_request_active_timeline(struct i915_request *rq)
+{
+	/*
+	 * When in use during submission, we are protected by a guarantee that
+	 * the context/timeline is pinned and must remain pinned until after
+	 * this submission.
+	 */
+	return rcu_dereference_protected(rq->timeline,
+					 lockdep_is_held(&rq->engine->active.lock));
+}
+
 bool i915_retire_requests(struct drm_i915_private *i915);
 
 #endif /* I915_REQUEST_H */
-- 
cgit 


From c0bb487dc19fc45dbeede7dcf8f513df51a3cd33 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 23 Sep 2019 12:00:55 +0100
Subject: drm/i915: Only enqueue already completed requests

If we are asked to submit a completed request, just move it onto the
active-list without modifying it's payload. If we try to emit the
modified payload of a completed request, we risk racing with the
ring->head update during retirement which may advance the head past our
breadcrumb and so we generate a warning for the emission being behind
the RING_HEAD.

v2: Commentary for the sneaky, shared responsibility between functions.
v3: Spelling mistakes and bonus assertion

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190923110056.15176-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 66 +++++++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_request.c | 44 ++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_request.h |  2 +-
 3 files changed, 74 insertions(+), 38 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_request.h')

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0a4812ebd184..bc9badd9c046 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -799,6 +799,17 @@ static bool can_merge_rq(const struct i915_request *prev,
 	GEM_BUG_ON(prev == next);
 	GEM_BUG_ON(!assert_priority_queue(prev, next));
 
+	/*
+	 * We do not submit known completed requests. Therefore if the next
+	 * request is already completed, we can pretend to merge it in
+	 * with the previous context (and we will skip updating the ELSP
+	 * and tracking). Thus hopefully keeping the ELSP full with active
+	 * contexts, despite the best efforts of preempt-to-busy to confuse
+	 * us.
+	 */
+	if (i915_request_completed(next))
+		return true;
+
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
 		return false;
 
@@ -1181,21 +1192,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				continue;
 			}
 
-			if (i915_request_completed(rq)) {
-				ve->request = NULL;
-				ve->base.execlists.queue_priority_hint = INT_MIN;
-				rb_erase_cached(rb, &execlists->virtual);
-				RB_CLEAR_NODE(rb);
-
-				rq->engine = engine;
-				__i915_request_submit(rq);
-
-				spin_unlock(&ve->base.active.lock);
-
-				rb = rb_first_cached(&execlists->virtual);
-				continue;
-			}
-
 			if (last && !can_merge_rq(last, rq)) {
 				spin_unlock(&ve->base.active.lock);
 				return; /* leave this for another */
@@ -1249,11 +1245,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(ve->siblings[0] != engine);
 			}
 
-			__i915_request_submit(rq);
-			if (!i915_request_completed(rq)) {
+			if (__i915_request_submit(rq)) {
 				submit = true;
 				last = rq;
 			}
+
+			/*
+			 * Hmm, we have a bunch of virtual engine requests,
+			 * but the first one was already completed (thanks
+			 * preempt-to-busy!). Keep looking at the veng queue
+			 * until we have no more relevant requests (i.e.
+			 * the normal submit queue has higher priority).
+			 */
+			if (!submit) {
+				spin_unlock(&ve->base.active.lock);
+				rb = rb_first_cached(&execlists->virtual);
+				continue;
+			}
 		}
 
 		spin_unlock(&ve->base.active.lock);
@@ -1266,8 +1274,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
-			if (i915_request_completed(rq))
-				goto skip;
+			bool merge = true;
 
 			/*
 			 * Can we combine this request with the current port?
@@ -1308,14 +1315,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				    ctx_single_port_submission(rq->hw_context))
 					goto done;
 
-				*port = execlists_schedule_in(last, port - execlists->pending);
-				port++;
+				merge = false;
 			}
 
-			last = rq;
-			submit = true;
-skip:
-			__i915_request_submit(rq);
+			if (__i915_request_submit(rq)) {
+				if (!merge) {
+					*port = execlists_schedule_in(last, port - execlists->pending);
+					port++;
+					last = NULL;
+				}
+
+				GEM_BUG_ON(last &&
+					   !can_merge_ctx(last->hw_context,
+							  rq->hw_context));
+
+				submit = true;
+				last = rq;
+			}
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 9bd8538b1907..a8916412759b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -377,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
 	return 0;
 }
 
-void __i915_request_submit(struct i915_request *request)
+bool __i915_request_submit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
+	bool result = false;
 
 	GEM_TRACE("%s fence %llx:%lld, current %d\n",
 		  engine->name,
@@ -389,6 +390,25 @@ void __i915_request_submit(struct i915_request *request)
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->active.lock);
 
+	/*
+	 * With the advent of preempt-to-busy, we frequently encounter
+	 * requests that we have unsubmitted from HW, but left running
+	 * until the next ack and so have completed in the meantime. On
+	 * resubmission of that completed request, we can skip
+	 * updating the payload, and execlists can even skip submitting
+	 * the request.
+	 *
+	 * We must remove the request from the caller's priority queue,
+	 * and the caller must only call us when the request is in their
+	 * priority queue, under the active.lock. This ensures that the
+	 * request has *not* yet been retired and we can safely move
+	 * the request into the engine->active.list where it will be
+	 * dropped upon retiring. (Otherwise if resubmit a *retired*
+	 * request, this would be a horrible use-after-free.)
+	 */
+	if (i915_request_completed(request))
+		goto xfer;
+
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
 
@@ -412,13 +432,18 @@ void __i915_request_submit(struct i915_request *request)
 	    i915_sw_fence_signaled(&request->semaphore))
 		engine->saturated |= request->sched.semaphores;
 
-	/* We may be recursing from the signal callback of another i915 fence */
-	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	engine->emit_fini_breadcrumb(request,
+				     request->ring->vaddr + request->postfix);
+
+	trace_i915_request_execute(request);
+	engine->serial++;
+	result = true;
 
-	list_move_tail(&request->sched.link, &engine->active.requests);
+xfer:	/* We may be recursing from the signal callback of another i915 fence */
+	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
-	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
-	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+		list_move_tail(&request->sched.link, &engine->active.requests);
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
 	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
@@ -429,12 +454,7 @@ void __i915_request_submit(struct i915_request *request)
 
 	spin_unlock(&request->lock);
 
-	engine->emit_fini_breadcrumb(request,
-				     request->ring->vaddr + request->postfix);
-
-	engine->serial++;
-
-	trace_i915_request_execute(request);
+	return result;
 }
 
 void i915_request_submit(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index b18f49528ded..ec5bb4c2e5ae 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,
 
 void i915_request_add(struct i915_request *rq);
 
-void __i915_request_submit(struct i915_request *request);
+bool __i915_request_submit(struct i915_request *request);
 void i915_request_submit(struct i915_request *request);
 
 void i915_request_skip(struct i915_request *request, int error);
-- 
cgit 


From b1e3177bd1d8f41e2a9cc847e56a96cdc0eefe62 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 4 Oct 2019 14:40:00 +0100
Subject: drm/i915: Coordinate i915_active with its own mutex

Forgo the struct_mutex serialisation for i915_active, and interpose its
own mutex handling for active/retire.

This is a multi-layered sleight-of-hand. First, we had to ensure that no
active/retire callbacks accidentally inverted the mutex ordering rules,
nor assumed that they were themselves serialised by struct_mutex. More
challenging though, is the rule over updating elements of the active
rbtree. Instead of the whole i915_active now being serialised by
struct_mutex, allocations/rotations of the tree are serialised by the
i915_active.mutex and individual nodes are serialised by the caller
using the i915_timeline.mutex (we need to use nested spinlocks to
interact with the dma_fence callback lists).

The pain point here is that instead of a single mutex around execbuf, we
now have to take a mutex for active tracker (one for each vma, context,
etc) and a couple of spinlocks for each fence update. The improvement in
fine grained locking allowing for multiple concurrent clients
(eventually!) should be worth it in typical loads.

v2: Add some comments that barely elucidate anything :(

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-6-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/display/intel_frontbuffer.c  |   2 +-
 drivers/gpu/drm/i915/display/intel_overlay.c      |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c       |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_object_types.h  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_pm.c            |   9 +-
 drivers/gpu/drm/i915/gt/intel_context.c           |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_pool.c       |   2 +-
 drivers/gpu/drm/i915/gt/intel_reset.c             |  10 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c          |   7 +-
 drivers/gpu/drm/i915/gt/intel_timeline_types.h    |   9 +-
 drivers/gpu/drm/i915/gt/selftest_context.c        |  16 +-
 drivers/gpu/drm/i915/gt/selftest_lrc.c            |  10 +-
 drivers/gpu/drm/i915/gt/selftests/mock_timeline.c |   2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c              |   3 -
 drivers/gpu/drm/i915/i915_active.c                | 322 +++++++++++-----------
 drivers/gpu/drm/i915/i915_active.h                | 319 ++++-----------------
 drivers/gpu/drm/i915/i915_active_types.h          |  23 +-
 drivers/gpu/drm/i915/i915_gem.c                   |  42 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c               |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c             |   4 +-
 drivers/gpu/drm/i915/i915_request.c               |  38 +--
 drivers/gpu/drm/i915/i915_request.h               |   1 -
 drivers/gpu/drm/i915/i915_vma.c                   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_active.c      |  32 +--
 24 files changed, 298 insertions(+), 572 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_request.h')

diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 6428b8dd70d3..84b164f31895 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -257,7 +257,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
 	front->obj = obj;
 	kref_init(&front->ref);
 	atomic_set(&front->bits, 0);
-	i915_active_init(i915, &front->write,
+	i915_active_init(&front->write,
 			 frontbuffer_active,
 			 i915_active_may_sleep(frontbuffer_retire));
 
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 3f4ac1ee7668..e12e1a753af0 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -1360,8 +1360,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	overlay->contrast = 75;
 	overlay->saturation = 146;
 
-	i915_active_init(dev_priv,
-			 &overlay->last_flip,
+	i915_active_init(&overlay->last_flip,
 			 NULL, intel_overlay_last_flip_retire);
 
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 4cd7d2ecf1d5..9d85aab68d34 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -868,20 +868,18 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 				void (*task)(void *data),
 				void *data)
 {
-	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
 	int err = 0;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!task);
 
 	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
 	if (!cb)
 		return -ENOMEM;
 
-	i915_active_init(i915, &cb->base, NULL, cb_retire);
+	i915_active_init(&cb->base, NULL, cb_retire);
 	err = i915_active_acquire(&cb->base);
 	if (err) {
 		kfree(cb);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index e1aab2fd1cd9..c00b4f077f9e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -8,6 +8,7 @@
 #define __I915_GEM_OBJECT_TYPES_H__
 
 #include <drm/drm_gem.h>
+#include <uapi/drm/i915_drm.h>
 
 #include "i915_active.h"
 #include "i915_selftest.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index cca192ecff8b..0a4115c6c275 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -16,14 +16,11 @@ static void call_idle_barriers(struct intel_engine_cs *engine)
 	struct llist_node *node, *next;
 
 	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		struct i915_active_request *active =
+		struct dma_fence_cb *cb =
 			container_of((struct list_head *)node,
-				     typeof(*active), link);
+				     typeof(*cb), node);
 
-		INIT_LIST_HEAD(&active->link);
-		RCU_INIT_POINTER(active->request, NULL);
-
-		active->retire(active, NULL);
+		cb->func(NULL, cb);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 06fabdf205cf..35a40c2820a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -240,7 +240,7 @@ intel_context_init(struct intel_context *ce,
 
 	mutex_init(&ce->pin_mutex);
 
-	i915_active_init(ctx->i915, &ce->active,
+	i915_active_init(&ce->active,
 			 __intel_context_active, __intel_context_retire);
 }
 
@@ -307,7 +307,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 			return err;
 
 		/* Queue this switch after current activity by this context. */
-		err = i915_active_request_set(&tl->last_request, rq);
+		err = i915_active_fence_set(&tl->last_request, rq);
 		mutex_unlock(&tl->mutex);
 		if (err)
 			return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 81fab101fdb4..3cdbd5f8b5be 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -95,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 		return ERR_PTR(-ENOMEM);
 
 	node->pool = pool;
-	i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
+	i915_active_init(&node->active, pool_active, pool_retire);
 
 	obj = i915_gem_object_create_internal(engine->i915, sz);
 	if (IS_ERR(obj)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index e189897e8797..055496f0825f 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -844,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 */
 	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 
-		rq = i915_active_request_get_unlocked(&tl->last_request);
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			continue;
 
 		spin_unlock_irqrestore(&timelines->lock, flags);
@@ -859,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
 		 * in the worst case.
 		 */
-		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
-		i915_request_put(rq);
+		dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
+		dma_fence_put(fence);
 
 		/* Restart iteration after droping lock */
 		spin_lock_irqsave(&timelines->lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 653f60e78392..0f959694303c 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -178,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 	cl->hwsp = hwsp;
 	cl->vaddr = page_pack_bits(vaddr, cacheline);
 
-	i915_active_init(hwsp->gt->i915, &cl->active,
-			 __cacheline_active, __cacheline_retire);
+	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
 
 	return cl;
 }
@@ -255,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+	INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
@@ -443,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	 * free it after the current request is retired, which ensures that
 	 * all writes into the cacheline from previous requests are complete.
 	 */
-	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
+	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
 	if (err)
 		goto err_cacheline;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index c668c4c50e75..98d9ee166379 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -58,12 +58,13 @@ struct intel_timeline {
 	 */
 	struct list_head requests;
 
-	/* Contains an RCU guarded pointer to the last request. No reference is
+	/*
+	 * Contains an RCU guarded pointer to the last request. No reference is
 	 * held to the request, users must carefully acquire a reference to
-	 * the request using i915_active_request_get_request_rcu(), or hold the
-	 * struct_mutex.
+	 * the request using i915_active_fence_get(), or manage the RCU
+	 * protection themselves (cf the i915_active_fence API).
 	 */
-	struct i915_active_request last_request;
+	struct i915_active_fence last_request;
 
 	/**
 	 * We track the most recent seqno that we wait on in every context so
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 4ce1e25433d2..e6bcbe7ab5e1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -47,24 +47,20 @@ static int context_sync(struct intel_context *ce)
 
 	mutex_lock(&tl->mutex);
 	do {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 		long timeout;
 
-		rcu_read_lock();
-		rq = rcu_dereference(tl->last_request.request);
-		if (rq)
-			rq = i915_request_get_rcu(rq);
-		rcu_read_unlock();
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			break;
 
-		timeout = i915_request_wait(rq, 0, HZ / 10);
+		timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
 		if (timeout < 0)
 			err = timeout;
 		else
-			i915_request_retire_upto(rq);
+			i915_request_retire_upto(to_request(fence));
 
-		i915_request_put(rq);
+		dma_fence_put(fence);
 	} while (!err);
 	mutex_unlock(&tl->mutex);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index e7bc2dbbb2a5..dd25636abc5b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1172,9 +1172,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	if (!rq)
 		return NULL;
 
-	INIT_LIST_HEAD(&rq->active_list);
 	rq->engine = engine;
 
+	spin_lock_init(&rq->lock);
+	INIT_LIST_HEAD(&rq->fence.cb_list);
+	rq->fence.lock = &rq->lock;
+	rq->fence.ops = &i915_fence_ops;
+
 	i915_sched_node_init(&rq->sched);
 
 	/* mark this request as permanently incomplete */
@@ -1267,8 +1271,8 @@ static int live_suppress_wait_preempt(void *arg)
 				}
 
 				/* Disable NEWCLIENT promotion */
-				__i915_active_request_set(&i915_request_timeline(rq[i])->last_request,
-							  dummy);
+				__i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+							&dummy->fence);
 				i915_request_add(rq[i]);
 			}
 
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 598170efcaf6..2a77c051f36a 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+	INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 1a28e3666951..7052c90d937f 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -385,11 +385,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct i915_request *rq;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (workload->req)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 7ca066688b98..023652ded4be 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -12,8 +12,6 @@
 #include "i915_active.h"
 #include "i915_globals.h"
 
-#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
-
 /*
  * Active refs memory management
  *
@@ -27,35 +25,35 @@ static struct i915_global_active {
 } global;
 
 struct active_node {
-	struct i915_active_request base;
+	struct i915_active_fence base;
 	struct i915_active *ref;
 	struct rb_node node;
 	u64 timeline;
 };
 
 static inline struct active_node *
-node_from_active(struct i915_active_request *active)
+node_from_active(struct i915_active_fence *active)
 {
 	return container_of(active, struct active_node, base);
 }
 
 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
 
-static inline bool is_barrier(const struct i915_active_request *active)
+static inline bool is_barrier(const struct i915_active_fence *active)
 {
-	return IS_ERR(rcu_access_pointer(active->request));
+	return IS_ERR(rcu_access_pointer(active->fence));
 }
 
 static inline struct llist_node *barrier_to_ll(struct active_node *node)
 {
 	GEM_BUG_ON(!is_barrier(&node->base));
-	return (struct llist_node *)&node->base.link;
+	return (struct llist_node *)&node->base.cb.node;
 }
 
 static inline struct intel_engine_cs *
 __barrier_to_engine(struct active_node *node)
 {
-	return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev);
+	return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
 }
 
 static inline struct intel_engine_cs *
@@ -68,7 +66,7 @@ barrier_to_engine(struct active_node *node)
 static inline struct active_node *barrier_from_ll(struct llist_node *x)
 {
 	return container_of((struct list_head *)x,
-			    struct active_node, base.link);
+			    struct active_node, base.cb.node);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
@@ -147,15 +145,18 @@ __active_retire(struct i915_active *ref)
 	if (!retire)
 		return;
 
-	GEM_BUG_ON(rcu_access_pointer(ref->excl));
+	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
 	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
-		GEM_BUG_ON(i915_active_request_isset(&it->base));
+		GEM_BUG_ON(i915_active_fence_isset(&it->base));
 		kmem_cache_free(global.slab_cache, it);
 	}
 
 	/* After the final retire, the entire struct may be freed */
 	if (ref->retire)
 		ref->retire(ref);
+
+	/* ... except if you wait on it, you must manage your own references! */
+	wake_up_var(ref);
 }
 
 static void
@@ -189,12 +190,20 @@ active_retire(struct i915_active *ref)
 }
 
 static void
-node_retire(struct i915_active_request *base, struct i915_request *rq)
+node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	active_retire(node_from_active(base)->ref);
+	i915_active_fence_cb(fence, cb);
+	active_retire(container_of(cb, struct active_node, base.cb)->ref);
 }
 
-static struct i915_active_request *
+static void
+excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	i915_active_fence_cb(fence, cb);
+	active_retire(container_of(cb, struct i915_active, excl.cb));
+}
+
+static struct i915_active_fence *
 active_instance(struct i915_active *ref, struct intel_timeline *tl)
 {
 	struct active_node *node, *prealloc;
@@ -238,7 +247,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	}
 
 	node = prealloc;
-	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
+	__i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
 
@@ -253,8 +262,7 @@ out:
 	return &node->base;
 }
 
-void __i915_active_init(struct drm_i915_private *i915,
-			struct i915_active *ref,
+void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *key)
@@ -263,19 +271,18 @@ void __i915_active_init(struct drm_i915_private *i915,
 
 	debug_active_init(ref);
 
-	ref->i915 = i915;
 	ref->flags = 0;
 	ref->active = active;
 	ref->retire = ptr_unpack_bits(retire, &bits, 2);
 	if (bits & I915_ACTIVE_MAY_SLEEP)
 		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
 
-	ref->excl = NULL;
 	ref->tree = RB_ROOT;
 	ref->cache = NULL;
 	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
 	__mutex_init(&ref->mutex, "i915_active", key);
+	__i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire);
 	INIT_WORK(&ref->work, active_work);
 }
 
@@ -329,9 +336,9 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
 
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
-		    struct i915_request *rq)
+		    struct dma_fence *fence)
 {
-	struct i915_active_request *active;
+	struct i915_active_fence *active;
 	int err;
 
 	lockdep_assert_held(&tl->mutex);
@@ -354,66 +361,44 @@ int i915_active_ref(struct i915_active *ref,
 		 * request that we want to emit on the kernel_context.
 		 */
 		__active_del_barrier(ref, node_from_active(active));
-		RCU_INIT_POINTER(active->request, NULL);
-		INIT_LIST_HEAD(&active->link);
-	} else {
-		if (!i915_active_request_isset(active))
-			atomic_inc(&ref->count);
+		RCU_INIT_POINTER(active->fence, NULL);
+		atomic_dec(&ref->count);
 	}
-	GEM_BUG_ON(!atomic_read(&ref->count));
-	__i915_active_request_set(active, rq);
+	if (!__i915_active_fence_set(active, fence))
+		atomic_inc(&ref->count);
 
 out:
 	i915_active_release(ref);
 	return err;
 }
 
-static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb)
-{
-	struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb);
-
-	RCU_INIT_POINTER(ref->excl, NULL);
-	dma_fence_put(f);
-
-	active_retire(ref);
-}
-
 void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
 {
 	/* We expect the caller to manage the exclusive timeline ordering */
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
-	dma_fence_get(f);
-
-	rcu_read_lock();
-	if (rcu_access_pointer(ref->excl)) {
-		struct dma_fence *old;
-
-		old = dma_fence_get_rcu_safe(&ref->excl);
-		if (old) {
-			if (dma_fence_remove_callback(old, &ref->excl_cb))
-				atomic_dec(&ref->count);
-			dma_fence_put(old);
-		}
-	}
-	rcu_read_unlock();
-
-	atomic_inc(&ref->count);
-	rcu_assign_pointer(ref->excl, f);
+	/*
+	 * As we don't know which mutex the caller is using, we told a small
+	 * lie to the debug code that it is using the i915_active.mutex;
+	 * and now we must stick to that lie.
+	 */
+	mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_);
+	if (!__i915_active_fence_set(&ref->excl, f))
+		atomic_inc(&ref->count);
+	mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_);
+}
 
-	if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) {
-		RCU_INIT_POINTER(ref->excl, NULL);
-		atomic_dec(&ref->count);
-		dma_fence_put(f);
-	}
+bool i915_active_acquire_if_busy(struct i915_active *ref)
+{
+	debug_active_assert(ref);
+	return atomic_add_unless(&ref->count, 1, 0);
 }
 
 int i915_active_acquire(struct i915_active *ref)
 {
 	int err;
 
-	debug_active_assert(ref);
-	if (atomic_add_unless(&ref->count, 1, 0))
+	if (i915_active_acquire_if_busy(ref))
 		return 0;
 
 	err = mutex_lock_interruptible(&ref->mutex);
@@ -438,121 +423,57 @@ void i915_active_release(struct i915_active *ref)
 	active_retire(ref);
 }
 
-static void __active_ungrab(struct i915_active *ref)
-{
-	clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags);
-}
-
-bool i915_active_trygrab(struct i915_active *ref)
+static void enable_signaling(struct i915_active_fence *active)
 {
-	debug_active_assert(ref);
-
-	if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags))
-		return false;
-
-	if (!atomic_add_unless(&ref->count, 1, 0)) {
-		__active_ungrab(ref);
-		return false;
-	}
+	struct dma_fence *fence;
 
-	return true;
-}
-
-void i915_active_ungrab(struct i915_active *ref)
-{
-	GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags));
-
-	active_retire(ref);
-	__active_ungrab(ref);
-}
-
-static int excl_wait(struct i915_active *ref)
-{
-	struct dma_fence *old;
-	int err = 0;
-
-	if (!rcu_access_pointer(ref->excl))
-		return 0;
-
-	rcu_read_lock();
-	old = dma_fence_get_rcu_safe(&ref->excl);
-	rcu_read_unlock();
-	if (old) {
-		err = dma_fence_wait(old, true);
-		dma_fence_put(old);
-	}
+	fence = i915_active_fence_get(active);
+	if (!fence)
+		return;
 
-	return err;
+	dma_fence_enable_sw_signaling(fence);
+	dma_fence_put(fence);
 }
 
 int i915_active_wait(struct i915_active *ref)
 {
 	struct active_node *it, *n;
-	int err;
+	int err = 0;
 
 	might_sleep();
-	might_lock(&ref->mutex);
-
-	if (i915_active_is_idle(ref))
-		return 0;
-
-	err = mutex_lock_interruptible(&ref->mutex);
-	if (err)
-		return err;
 
-	if (!atomic_add_unless(&ref->count, 1, 0)) {
-		mutex_unlock(&ref->mutex);
+	if (!i915_active_acquire_if_busy(ref))
 		return 0;
-	}
-
-	err = excl_wait(ref);
-	if (err)
-		goto out;
 
+	/* Flush lazy signals */
+	enable_signaling(&ref->excl);
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
-			err = -EBUSY;
-			break;
-		}
+		if (is_barrier(&it->base)) /* unconnected idle barrier */
+			continue;
 
-		err = i915_active_request_retire(&it->base, BKL(ref));
-		if (err)
-			break;
+		enable_signaling(&it->base);
 	}
+	/* Any fence added after the wait begins will not be auto-signaled */
 
-out:
-	__active_retire(ref);
+	i915_active_release(ref);
 	if (err)
 		return err;
 
-	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
+	if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
 		return -EINTR;
 
-	flush_work(&ref->work);
-	if (!i915_active_is_idle(ref))
-		return -EBUSY;
-
 	return 0;
 }
 
-int i915_request_await_active_request(struct i915_request *rq,
-				      struct i915_active_request *active)
-{
-	struct i915_request *barrier =
-		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
-
-	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
-}
-
 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 {
 	int err = 0;
 
-	if (rcu_access_pointer(ref->excl)) {
+	if (rcu_access_pointer(ref->excl.fence)) {
 		struct dma_fence *fence;
 
 		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&ref->excl);
+		fence = dma_fence_get_rcu_safe(&ref->excl.fence);
 		rcu_read_unlock();
 		if (fence) {
 			err = i915_request_await_dma_fence(rq, fence);
@@ -578,7 +499,7 @@ void i915_active_fini(struct i915_active *ref)
 
 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 {
-	return node->timeline == idx && !i915_active_request_isset(&node->base);
+	return node->timeline == idx && !i915_active_fence_isset(&node->base);
 }
 
 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
@@ -698,13 +619,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			node->base.lock =
 				&engine->kernel_context->timeline->mutex;
 #endif
-			RCU_INIT_POINTER(node->base.request, NULL);
-			node->base.retire = node_retire;
+			RCU_INIT_POINTER(node->base.fence, NULL);
+			node->base.cb.func = node_retire;
 			node->timeline = idx;
 			node->ref = ref;
 		}
 
-		if (!i915_active_request_isset(&node->base)) {
+		if (!i915_active_fence_isset(&node->base)) {
 			/*
 			 * Mark this as being *our* unconnected proto-node.
 			 *
@@ -714,8 +635,8 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			 * and then we can use the rb_node and list pointers
 			 * for our tracking of the pending barrier.
 			 */
-			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
-			node->base.link.prev = (void *)engine;
+			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
+			node->base.cb.node.prev = (void *)engine;
 			atomic_inc(&ref->count);
 		}
 
@@ -782,44 +703,113 @@ void i915_request_add_active_barriers(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct llist_node *node, *next;
+	unsigned long flags;
 
 	GEM_BUG_ON(intel_engine_is_virtual(engine));
 	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
 
+	node = llist_del_all(&engine->barrier_tasks);
+	if (!node)
+		return;
 	/*
 	 * Attach the list of proto-fences to the in-flight request such
 	 * that the parent i915_active will be released when this request
 	 * is retired.
 	 */
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
+	spin_lock_irqsave(&rq->lock, flags);
+	llist_for_each_safe(node, next, node) {
+		RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence);
 		smp_wmb(); /* serialise with reuse_idle_barrier */
-		list_add_tail((struct list_head *)node, &rq->active_list);
+		list_add_tail((struct list_head *)node, &rq->fence.cb_list);
+	}
+	spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#define active_is_held(active) lockdep_is_held((active)->lock)
+#else
+#define active_is_held(active) true
+#endif
+
+/*
+ * __i915_active_fence_set: Update the last active fence along its timeline
+ * @active: the active tracker
+ * @fence: the new fence (under construction)
+ *
+ * Records the new @fence as the last active fence along its timeline in
+ * this active tracker, moving the tracking callbacks from the previous
+ * fence onto this one. Returns the previous fence (if not already completed),
+ * which the caller must ensure is executed before the new fence. To ensure
+ * that the order of fences within the timeline of the i915_active_fence is
+ * maintained, it must be locked by the caller.
+ */
+struct dma_fence *
+__i915_active_fence_set(struct i915_active_fence *active,
+			struct dma_fence *fence)
+{
+	struct dma_fence *prev;
+	unsigned long flags;
+
+	/* NB: must be serialised by an outer timeline mutex (active->lock) */
+	spin_lock_irqsave(fence->lock, flags);
+	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
+
+	prev = rcu_dereference_protected(active->fence, active_is_held(active));
+	if (prev) {
+		GEM_BUG_ON(prev == fence);
+		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+		__list_del_entry(&active->cb.node);
+		spin_unlock(prev->lock); /* serialise with prev->cb_list */
+
+		/*
+		 * active->fence is reset by the callback from inside
+		 * interrupt context. We need to serialise our list
+		 * manipulation with the fence->lock to prevent the prev
+		 * being lost inside an interrupt (it can't be replaced as
+		 * no other caller is allowed to enter __i915_active_fence_set
+		 * as we hold the timeline lock). After serialising with
+		 * the callback, we need to double check which ran first,
+		 * our list_del() [decoupling prev from the callback] or
+		 * the callback...
+		 */
+		prev = rcu_access_pointer(active->fence);
 	}
+
+	rcu_assign_pointer(active->fence, fence);
+	list_add_tail(&active->cb.node, &fence->cb_list);
+
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return prev;
 }
 
-int i915_active_request_set(struct i915_active_request *active,
-			    struct i915_request *rq)
+int i915_active_fence_set(struct i915_active_fence *active,
+			  struct i915_request *rq)
 {
-	int err;
+	struct dma_fence *fence;
+	int err = 0;
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	lockdep_assert_held(active->lock);
 #endif
 
-	/* Must maintain ordering wrt previous active requests */
-	err = i915_request_await_active_request(rq, active);
-	if (err)
-		return err;
+	/* Must maintain timeline ordering wrt previous active requests */
+	rcu_read_lock();
+	fence = __i915_active_fence_set(active, &rq->fence);
+	if (fence) /* but the previous fence may not belong to that timeline! */
+		fence = dma_fence_get_rcu(fence);
+	rcu_read_unlock();
+	if (fence) {
+		err = i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
 
-	__i915_active_request_set(active, rq);
-	return 0;
+	return err;
 }
 
-void i915_active_retire_noop(struct i915_active_request *active,
-			     struct i915_request *request)
+void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	/* Space left intentionally blank */
+	i915_active_fence_cb(fence, cb);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 90034f61b7c2..4f52fe6146d2 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -12,6 +12,10 @@
 #include "i915_active_types.h"
 #include "i915_request.h"
 
+struct i915_request;
+struct intel_engine_cs;
+struct intel_timeline;
+
 /*
  * We treat requests as fences. This is not be to confused with our
  * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
@@ -28,308 +32,108 @@
  * write access so that we can perform concurrent read operations between
  * the CPU and GPU engines, as well as waiting for all rendering to
  * complete, or waiting for the last GPU user of a "fence register". The
- * object then embeds a #i915_active_request to track the most recent (in
+ * object then embeds a #i915_active_fence to track the most recent (in
  * retirement order) request relevant for the desired mode of access.
- * The #i915_active_request is updated with i915_active_request_set() to
+ * The #i915_active_fence is updated with i915_active_fence_set() to
  * track the most recent fence request, typically this is done as part of
  * i915_vma_move_to_active().
  *
- * When the #i915_active_request completes (is retired), it will
+ * When the #i915_active_fence completes (is retired), it will
  * signal its completion to the owner through a callback as well as mark
- * itself as idle (i915_active_request.request == NULL). The owner
+ * itself as idle (i915_active_fence.request == NULL). The owner
  * can then perform any action, such as delayed freeing of an active
  * resource including itself.
  */
 
-void i915_active_retire_noop(struct i915_active_request *active,
-			     struct i915_request *request);
+void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb);
 
 /**
- * i915_active_request_init - prepares the activity tracker for use
+ * __i915_active_fence_init - prepares the activity tracker for use
  * @active - the active tracker
- * @rq - initial request to track, can be NULL
+ * @fence - initial fence to track, can be NULL
  * @func - a callback when then the tracker is retired (becomes idle),
  *         can be NULL
  *
- * i915_active_request_init() prepares the embedded @active struct for use as
- * an activity tracker, that is for tracking the last known active request
- * associated with it. When the last request becomes idle, when it is retired
+ * i915_active_fence_init() prepares the embedded @active struct for use as
+ * an activity tracker, that is for tracking the last known active fence
+ * associated with it. When the last fence becomes idle, when it is retired
  * after completion, the optional callback @func is invoked.
  */
 static inline void
-i915_active_request_init(struct i915_active_request *active,
+__i915_active_fence_init(struct i915_active_fence *active,
 			 struct mutex *lock,
-			 struct i915_request *rq,
-			 i915_active_retire_fn retire)
+			 void *fence,
+			 dma_fence_func_t fn)
 {
-	RCU_INIT_POINTER(active->request, rq);
-	INIT_LIST_HEAD(&active->link);
-	active->retire = retire ?: i915_active_retire_noop;
+	RCU_INIT_POINTER(active->fence, fence);
+	active->cb.func = fn ?: i915_active_noop;
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	active->lock = lock;
 #endif
 }
 
-#define INIT_ACTIVE_REQUEST(name, lock) \
-	i915_active_request_init((name), (lock), NULL, NULL)
-
-/**
- * i915_active_request_set - updates the tracker to watch the current request
- * @active - the active tracker
- * @request - the request to watch
- *
- * __i915_active_request_set() watches the given @request for completion. Whilst
- * that @request is busy, the @active reports busy. When that @request is
- * retired, the @active tracker is updated to report idle.
- */
-static inline void
-__i915_active_request_set(struct i915_active_request *active,
-			  struct i915_request *request)
-{
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
-	lockdep_assert_held(active->lock);
-#endif
-	list_move(&active->link, &request->active_list);
-	rcu_assign_pointer(active->request, request);
-}
+#define INIT_ACTIVE_FENCE(A, LOCK) \
+	__i915_active_fence_init((A), (LOCK), NULL, NULL)
 
-int __must_check
-i915_active_request_set(struct i915_active_request *active,
-			struct i915_request *rq);
+struct dma_fence *
+__i915_active_fence_set(struct i915_active_fence *active,
+			struct dma_fence *fence);
 
 /**
- * i915_active_request_raw - return the active request
+ * i915_active_fence_set - updates the tracker to watch the current fence
  * @active - the active tracker
+ * @rq - the request to watch
  *
- * i915_active_request_raw() returns the current request being tracked, or NULL.
- * It does not obtain a reference on the request for the caller, so the caller
- * must hold struct_mutex.
+ * i915_active_fence_set() watches the given @rq for completion. While
+ * that @rq is busy, the @active reports busy. When that @rq is signaled
+ * (or else retired) the @active tracker is updated to report idle.
  */
-static inline struct i915_request *
-i915_active_request_raw(const struct i915_active_request *active,
-			struct mutex *mutex)
-{
-	return rcu_dereference_protected(active->request,
-					 lockdep_is_held(mutex));
-}
-
-/**
- * i915_active_request_peek - report the active request being monitored
- * @active - the active tracker
- *
- * i915_active_request_peek() returns the current request being tracked if
- * still active, or NULL. It does not obtain a reference on the request
- * for the caller, so the caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_active_request_peek(const struct i915_active_request *active,
-			 struct mutex *mutex)
-{
-	struct i915_request *request;
-
-	request = i915_active_request_raw(active, mutex);
-	if (!request || i915_request_completed(request))
-		return NULL;
-
-	return request;
-}
-
-/**
- * i915_active_request_get - return a reference to the active request
- * @active - the active tracker
- *
- * i915_active_request_get() returns a reference to the active request, or NULL
- * if the active tracker is idle. The caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_active_request_get(const struct i915_active_request *active,
-			struct mutex *mutex)
-{
-	return i915_request_get(i915_active_request_peek(active, mutex));
-}
-
-/**
- * __i915_active_request_get_rcu - return a reference to the active request
- * @active - the active tracker
- *
- * __i915_active_request_get() returns a reference to the active request,
- * or NULL if the active tracker is idle. The caller must hold the RCU read
- * lock, but the returned pointer is safe to use outside of RCU.
- */
-static inline struct i915_request *
-__i915_active_request_get_rcu(const struct i915_active_request *active)
-{
-	/*
-	 * Performing a lockless retrieval of the active request is super
-	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
-	 * slab of request objects will not be freed whilst we hold the
-	 * RCU read lock. It does not guarantee that the request itself
-	 * will not be freed and then *reused*. Viz,
-	 *
-	 * Thread A			Thread B
-	 *
-	 * rq = active.request
-	 *				retire(rq) -> free(rq);
-	 *				(rq is now first on the slab freelist)
-	 *				active.request = NULL
-	 *
-	 *				rq = new submission on a new object
-	 * ref(rq)
-	 *
-	 * To prevent the request from being reused whilst the caller
-	 * uses it, we take a reference like normal. Whilst acquiring
-	 * the reference we check that it is not in a destroyed state
-	 * (refcnt == 0). That prevents the request being reallocated
-	 * whilst the caller holds on to it. To check that the request
-	 * was not reallocated as we acquired the reference we have to
-	 * check that our request remains the active request across
-	 * the lookup, in the same manner as a seqlock. The visibility
-	 * of the pointer versus the reference counting is controlled
-	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
-	 *
-	 * In the middle of all that, we inspect whether the request is
-	 * complete. Retiring is lazy so the request may be completed long
-	 * before the active tracker is updated. Querying whether the
-	 * request is complete is far cheaper (as it involves no locked
-	 * instructions setting cachelines to exclusive) than acquiring
-	 * the reference, so we do it first. The RCU read lock ensures the
-	 * pointer dereference is valid, but does not ensure that the
-	 * seqno nor HWS is the right one! However, if the request was
-	 * reallocated, that means the active tracker's request was complete.
-	 * If the new request is also complete, then both are and we can
-	 * just report the active tracker is idle. If the new request is
-	 * incomplete, then we acquire a reference on it and check that
-	 * it remained the active request.
-	 *
-	 * It is then imperative that we do not zero the request on
-	 * reallocation, so that we can chase the dangling pointers!
-	 * See i915_request_alloc().
-	 */
-	do {
-		struct i915_request *request;
-
-		request = rcu_dereference(active->request);
-		if (!request || i915_request_completed(request))
-			return NULL;
-
-		/*
-		 * An especially silly compiler could decide to recompute the
-		 * result of i915_request_completed, more specifically
-		 * re-emit the load for request->fence.seqno. A race would catch
-		 * a later seqno value, which could flip the result from true to
-		 * false. Which means part of the instructions below might not
-		 * be executed, while later on instructions are executed. Due to
-		 * barriers within the refcounting the inconsistency can't reach
-		 * past the call to i915_request_get_rcu, but not executing
-		 * that while still executing i915_request_put() creates
-		 * havoc enough.  Prevent this with a compiler barrier.
-		 */
-		barrier();
-
-		request = i915_request_get_rcu(request);
-
-		/*
-		 * What stops the following rcu_access_pointer() from occurring
-		 * before the above i915_request_get_rcu()? If we were
-		 * to read the value before pausing to get the reference to
-		 * the request, we may not notice a change in the active
-		 * tracker.
-		 *
-		 * The rcu_access_pointer() is a mere compiler barrier, which
-		 * means both the CPU and compiler are free to perform the
-		 * memory read without constraint. The compiler only has to
-		 * ensure that any operations after the rcu_access_pointer()
-		 * occur afterwards in program order. This means the read may
-		 * be performed earlier by an out-of-order CPU, or adventurous
-		 * compiler.
-		 *
-		 * The atomic operation at the heart of
-		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
-		 * atomic_inc_not_zero() which is only a full memory barrier
-		 * when successful. That is, if i915_request_get_rcu()
-		 * returns the request (and so with the reference counted
-		 * incremented) then the following read for rcu_access_pointer()
-		 * must occur after the atomic operation and so confirm
-		 * that this request is the one currently being tracked.
-		 *
-		 * The corresponding write barrier is part of
-		 * rcu_assign_pointer().
-		 */
-		if (!request || request == rcu_access_pointer(active->request))
-			return rcu_pointer_handoff(request);
-
-		i915_request_put(request);
-	} while (1);
-}
-
+int __must_check
+i915_active_fence_set(struct i915_active_fence *active,
+		      struct i915_request *rq);
 /**
- * i915_active_request_get_unlocked - return a reference to the active request
+ * i915_active_fence_get - return a reference to the active fence
  * @active - the active tracker
  *
- * i915_active_request_get_unlocked() returns a reference to the active request,
+ * i915_active_fence_get() returns a reference to the active fence,
  * or NULL if the active tracker is idle. The reference is obtained under RCU,
  * so no locking is required by the caller.
  *
- * The reference should be freed with i915_request_put().
+ * The reference should be freed with dma_fence_put().
  */
-static inline struct i915_request *
-i915_active_request_get_unlocked(const struct i915_active_request *active)
+static inline struct dma_fence *
+i915_active_fence_get(struct i915_active_fence *active)
 {
-	struct i915_request *request;
+	struct dma_fence *fence;
 
 	rcu_read_lock();
-	request = __i915_active_request_get_rcu(active);
+	fence = dma_fence_get_rcu_safe(&active->fence);
 	rcu_read_unlock();
 
-	return request;
+	return fence;
 }
 
 /**
- * i915_active_request_isset - report whether the active tracker is assigned
+ * i915_active_fence_isset - report whether the active tracker is assigned
  * @active - the active tracker
  *
- * i915_active_request_isset() returns true if the active tracker is currently
- * assigned to a request. Due to the lazy retiring, that request may be idle
+ * i915_active_fence_isset() returns true if the active tracker is currently
+ * assigned to a fence. Due to the lazy retiring, that fence may be idle
  * and this may report stale information.
  */
 static inline bool
-i915_active_request_isset(const struct i915_active_request *active)
+i915_active_fence_isset(const struct i915_active_fence *active)
 {
-	return rcu_access_pointer(active->request);
+	return rcu_access_pointer(active->fence);
 }
 
-/**
- * i915_active_request_retire - waits until the request is retired
- * @active - the active request on which to wait
- *
- * i915_active_request_retire() waits until the request is completed,
- * and then ensures that at least the retirement handler for this
- * @active tracker is called before returning. If the @active
- * tracker is idle, the function returns immediately.
- */
-static inline int __must_check
-i915_active_request_retire(struct i915_active_request *active,
-			   struct mutex *mutex)
+static inline void
+i915_active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	struct i915_request *request;
-	long ret;
-
-	request = i915_active_request_raw(active, mutex);
-	if (!request)
-		return 0;
-
-	ret = i915_request_wait(request,
-				I915_WAIT_INTERRUPTIBLE,
-				MAX_SCHEDULE_TIMEOUT);
-	if (ret < 0)
-		return ret;
+	struct i915_active_fence *active =
+		container_of(cb, typeof(*active), cb);
 
-	list_del_init(&active->link);
-	RCU_INIT_POINTER(active->request, NULL);
-
-	active->retire(active, request);
-
-	return 0;
+	RCU_INIT_POINTER(active->fence, NULL);
 }
 
 /*
@@ -358,47 +162,40 @@ i915_active_request_retire(struct i915_active_request *active,
  * synchronisation.
  */
 
-void __i915_active_init(struct drm_i915_private *i915,
-			struct i915_active *ref,
+void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *key);
-#define i915_active_init(i915, ref, active, retire) do {		\
+#define i915_active_init(ref, active, retire) do {		\
 	static struct lock_class_key __key;				\
 									\
-	__i915_active_init(i915, ref, active, retire, &__key);		\
+	__i915_active_init(ref, active, retire, &__key);		\
 } while (0)
 
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
-		    struct i915_request *rq);
+		    struct dma_fence *fence);
 
 static inline int
 i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 {
-	return i915_active_ref(ref, i915_request_timeline(rq), rq);
+	return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
 }
 
 void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
 
 static inline bool i915_active_has_exclusive(struct i915_active *ref)
 {
-	return rcu_access_pointer(ref->excl);
+	return rcu_access_pointer(ref->excl.fence);
 }
 
 int i915_active_wait(struct i915_active *ref);
 
-int i915_request_await_active(struct i915_request *rq,
-			      struct i915_active *ref);
-int i915_request_await_active_request(struct i915_request *rq,
-				      struct i915_active_request *active);
+int i915_request_await_active(struct i915_request *rq, struct i915_active *ref);
 
 int i915_active_acquire(struct i915_active *ref);
+bool i915_active_acquire_if_busy(struct i915_active *ref);
 void i915_active_release(struct i915_active *ref);
-void __i915_active_release_nested(struct i915_active *ref, int subclass);
-
-bool i915_active_trygrab(struct i915_active *ref);
-void i915_active_ungrab(struct i915_active *ref);
 
 static inline bool
 i915_active_is_idle(const struct i915_active *ref)
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 021167f0004d..d89a74c142c6 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -17,17 +17,9 @@
 
 #include "i915_utils.h"
 
-struct drm_i915_private;
-struct i915_active_request;
-struct i915_request;
-
-typedef void (*i915_active_retire_fn)(struct i915_active_request *,
-				      struct i915_request *);
-
-struct i915_active_request {
-	struct i915_request __rcu *request;
-	struct list_head link;
-	i915_active_retire_fn retire;
+struct i915_active_fence {
+	struct dma_fence __rcu *fence;
+	struct dma_fence_cb cb;
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	/*
 	 * Incorporeal!
@@ -53,20 +45,17 @@ struct active_node;
 #define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2)
 
 struct i915_active {
-	struct drm_i915_private *i915;
+	atomic_t count;
+	struct mutex mutex;
 
 	struct active_node *cache;
 	struct rb_root tree;
-	struct mutex mutex;
-	atomic_t count;
 
 	/* Preallocated "exclusive" node */
-	struct dma_fence __rcu *excl;
-	struct dma_fence_cb excl_cb;
+	struct i915_active_fence excl;
 
 	unsigned long flags;
 #define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
-#define I915_ACTIVE_GRAB_BIT 1
 
 	int (*active)(struct i915_active *ref);
 	void (*retire)(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc5c618f6c19..b0aa0a7c680f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -892,28 +892,38 @@ wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout)
 
 	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 
-		rq = i915_active_request_get_unlocked(&tl->last_request);
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			continue;
 
 		spin_unlock_irqrestore(&timelines->lock, flags);
 
-		/*
-		 * "Race-to-idle".
-		 *
-		 * Switching to the kernel context is often used a synchronous
-		 * step prior to idling, e.g. in suspend for flushing all
-		 * current operations to memory before sleeping. These we
-		 * want to complete as quickly as possible to avoid prolonged
-		 * stalls, so allow the gpu to boost to maximum clocks.
-		 */
-		if (wait & I915_WAIT_FOR_IDLE_BOOST)
-			gen6_rps_boost(rq);
+		if (!dma_fence_is_i915(fence)) {
+			timeout = dma_fence_wait_timeout(fence,
+							 flags & I915_WAIT_INTERRUPTIBLE,
+							 timeout);
+		} else {
+			struct i915_request *rq = to_request(fence);
+
+			/*
+			 * "Race-to-idle".
+			 *
+			 * Switching to the kernel context is often used as
+			 * a synchronous step prior to idling, e.g. in suspend
+			 * for flushing all current operations to memory before
+			 * sleeping. These we want to complete as quickly as
+			 * possible to avoid prolonged stalls, so allow the gpu
+			 * to boost to maximum clocks.
+			 */
+			if (flags & I915_WAIT_FOR_IDLE_BOOST)
+				gen6_rps_boost(rq);
+
+			timeout = i915_request_wait(rq, flags, timeout);
+		}
 
-		timeout = i915_request_wait(rq, wait, timeout);
-		i915_request_put(rq);
+		dma_fence_put(fence);
 		if (timeout < 0)
 			return timeout;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 55cebf256d03..7462d87f7a48 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1861,7 +1861,6 @@ static const struct i915_vma_ops pd_vma_ops = {
 
 static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 {
-	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
 	struct i915_vma *vma;
 
@@ -1872,7 +1871,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	i915_active_init(i915, &vma->active, NULL, NULL);
+	i915_active_init(&vma->active, NULL, NULL);
 
 	mutex_init(&vma->pages_mutex);
 	vma->vm = i915_vm_get(&ggtt->vm);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6384a06aa5bf..a28ee754b7b4 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1299,7 +1299,7 @@ capture_vma(struct capture_vma *next,
 	if (!c)
 		return next;
 
-	if (!i915_active_trygrab(&vma->active)) {
+	if (!i915_active_acquire_if_busy(&vma->active)) {
 		kfree(c);
 		return next;
 	}
@@ -1439,7 +1439,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
 			*this->slot =
 				i915_error_object_create(i915, vma, compress);
 
-			i915_active_ungrab(&vma->active);
+			i915_active_release(&vma->active);
 			i915_vma_put(vma);
 
 			capture = this->next;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a8916412759b..4ffe62a42186 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -218,8 +218,6 @@ static void remove_from_engine(struct i915_request *rq)
 
 static bool i915_request_retire(struct i915_request *rq)
 {
-	struct i915_active_request *active, *next;
-
 	if (!i915_request_completed(rq))
 		return false;
 
@@ -244,35 +242,6 @@ static bool i915_request_retire(struct i915_request *rq)
 				  &i915_request_timeline(rq)->requests));
 	rq->ring->head = rq->postfix;
 
-	/*
-	 * Walk through the active list, calling retire on each. This allows
-	 * objects to track their GPU activity and mark themselves as idle
-	 * when their *last* active request is completed (updating state
-	 * tracking lists for eviction, active references for GEM, etc).
-	 *
-	 * As the ->retire() may free the node, we decouple it first and
-	 * pass along the auxiliary information (to avoid dereferencing
-	 * the node after the callback).
-	 */
-	list_for_each_entry_safe(active, next, &rq->active_list, link) {
-		/*
-		 * In microbenchmarks or focusing upon time inside the kernel,
-		 * we may spend an inordinate amount of time simply handling
-		 * the retirement of requests and processing their callbacks.
-		 * Of which, this loop itself is particularly hot due to the
-		 * cache misses when jumping around the list of
-		 * i915_active_request.  So we try to keep this loop as
-		 * streamlined as possible and also prefetch the next
-		 * i915_active_request to try and hide the likely cache miss.
-		 */
-		prefetchw(next);
-
-		INIT_LIST_HEAD(&active->link);
-		RCU_INIT_POINTER(active->request, NULL);
-
-		active->retire(active, rq);
-	}
-
 	local_irq_disable();
 
 	/*
@@ -704,7 +673,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->flags = 0;
 	rq->execution_mask = ALL_ENGINES;
 
-	INIT_LIST_HEAD(&rq->active_list);
 	INIT_LIST_HEAD(&rq->execute_cb);
 
 	/*
@@ -743,7 +711,6 @@ err_unwind:
 	ce->ring->emit = rq->head;
 
 	/* Make sure we didn't add ourselves to external state before freeing */
-	GEM_BUG_ON(!list_empty(&rq->active_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
@@ -1174,8 +1141,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = rcu_dereference_protected(timeline->last_request.request,
-					 lockdep_is_held(&timeline->mutex));
+	prev = to_request(__i915_active_fence_set(&timeline->last_request,
+						  &rq->fence));
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
@@ -1200,7 +1167,6 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * us, the timeline will hold its seqno which is later than ours.
 	 */
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
-	__i915_active_request_set(&timeline->last_request, rq);
 
 	return prev;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index ec5bb4c2e5ae..91a885c36c6b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -211,7 +211,6 @@ struct i915_request {
 	 * on the active_list (of their final request).
 	 */
 	struct i915_capture_list *capture_list;
-	struct list_head active_list;
 
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e191247c7c5f..9fdcd4e2c799 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -120,8 +120,7 @@ vma_create(struct drm_i915_gem_object *obj,
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
-	i915_active_init(vm->i915, &vma->active,
-			 __i915_vma_active, __i915_vma_retire);
+	i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire);
 
 	/* Declare ourselves safe for use inside shrinkers */
 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
@@ -1148,6 +1147,7 @@ int __i915_vma_unbind(struct i915_vma *vma)
 	if (ret)
 		return ret;
 
+	GEM_BUG_ON(i915_vma_is_active(vma));
 	if (i915_vma_is_pinned(vma)) {
 		vma_print_allocator(vma, "is pinned");
 		return -EBUSY;
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index a41785822ed9..2cc71bcf884f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915)
 		return NULL;
 
 	kref_init(&active->ref);
-	i915_active_init(i915, &active->base, __live_active, __live_retire);
+	i915_active_init(&active->base, __live_active, __live_retire);
 
 	return active;
 }
@@ -146,19 +146,13 @@ static int live_active_wait(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct live_active *active;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that we get a callback when requests retire upon waiting */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	active = __live_active_setup(i915);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
-		goto err;
-	}
+	if (IS_ERR(active))
+		return PTR_ERR(active);
 
 	i915_active_wait(&active->base);
 	if (!READ_ONCE(active->retired)) {
@@ -168,11 +162,9 @@ static int live_active_wait(void *arg)
 
 	__live_put(active);
 
+	mutex_lock(&i915->drm.struct_mutex);
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
-
-err:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
@@ -182,23 +174,19 @@ static int live_active_retire(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct live_active *active;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that we get a callback when requests are indirectly retired */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	active = __live_active_setup(i915);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
-		goto err;
-	}
+	if (IS_ERR(active))
+		return PTR_ERR(active);
 
 	/* waits for & retires all requests */
+	mutex_lock(&i915->drm.struct_mutex);
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing!\n");
@@ -207,10 +195,6 @@ static int live_active_retire(void *arg)
 
 	__live_put(active);
 
-err:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
-- 
cgit 


From 7e8057626640cfedbae000c5032be32269713687 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 4 Oct 2019 14:40:02 +0100
Subject: drm/i915: Drop struct_mutex from around i915_retire_requests()

We don't need to hold struct_mutex now for retiring requests, so drop it
from i915_retire_requests() and i915_gem_wait_for_idle(), finally
removing I915_WAIT_LOCKED for good.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-8-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/gem/i915_gem_client_blt.c     |   7 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c        |  20 +--
 drivers/gpu/drm/i915/gem/i915_gem_pm.c             |  45 +++---
 .../drm/i915/gem/selftests/i915_gem_coherency.c    |  40 +++---
 .../gpu/drm/i915/gem/selftests/i915_gem_context.c  |  18 +--
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c |   6 +-
 .../drm/i915/gem/selftests/i915_gem_object_blt.c   |   4 -
 drivers/gpu/drm/i915/gt/intel_gt_pm.c              |  28 ++--
 drivers/gpu/drm/i915/gt/selftest_context.c         |   4 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c       |  89 ++----------
 drivers/gpu/drm/i915/gt/selftest_lrc.c             |  23 ++--
 drivers/gpu/drm/i915/gt/selftest_timeline.c        |  91 ++++++-------
 drivers/gpu/drm/i915/gt/selftest_workarounds.c     |   6 +-
 drivers/gpu/drm/i915/i915_debugfs.c                |  42 ++----
 drivers/gpu/drm/i915/i915_gem.c                    |  19 ++-
 drivers/gpu/drm/i915/i915_request.h                |   7 +-
 drivers/gpu/drm/i915/selftests/i915_active.c       |   8 +-
 drivers/gpu/drm/i915/selftests/i915_gem_evict.c    |   2 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c      |   6 +-
 drivers/gpu/drm/i915/selftests/i915_request.c      | 151 ++++++---------------
 drivers/gpu/drm/i915/selftests/i915_selftest.c     |   8 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c          |   4 -
 drivers/gpu/drm/i915/selftests/igt_flush_test.c    |  30 ++--
 drivers/gpu/drm/i915/selftests/igt_flush_test.h    |   2 +-
 drivers/gpu/drm/i915/selftests/igt_live_test.c     |   9 +-
 drivers/gpu/drm/i915/selftests/mock_gem_device.c   |   4 -
 26 files changed, 213 insertions(+), 460 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_request.h')

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index c1fca5728e6e..81366aa4812b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -155,7 +155,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence,
 static void clear_pages_worker(struct work_struct *work)
 {
 	struct clear_pages_work *w = container_of(work, typeof(*w), work);
-	struct drm_i915_private *i915 = w->ce->engine->i915;
 	struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
 	struct i915_vma *vma = w->sleeve->vma;
 	struct i915_request *rq;
@@ -173,11 +172,9 @@ static void clear_pages_worker(struct work_struct *work)
 	obj->read_domains = I915_GEM_GPU_DOMAINS;
 	obj->write_domain = 0;
 
-	/* XXX: we need to kill this */
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
 	if (unlikely(err))
-		goto out_unlock;
+		goto out_signal;
 
 	batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
 	if (IS_ERR(batch)) {
@@ -229,8 +226,6 @@ out_batch:
 	intel_emit_vma_release(w->ce, batch);
 out_unpin:
 	i915_vma_unpin(vma);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 out_signal:
 	if (unlikely(err)) {
 		dma_fence_set_error(&w->dma, err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9d85aab68d34..0ab416887fc2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1159,8 +1159,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 }
 
 static int
-__intel_context_reconfigure_sseu(struct intel_context *ce,
-				 struct intel_sseu sseu)
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
 {
 	int ret;
 
@@ -1183,23 +1182,6 @@ unlock:
 	return ret;
 }
 
-static int
-intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
-{
-	struct drm_i915_private *i915 = ce->engine->i915;
-	int ret;
-
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	ret = __intel_context_reconfigure_sseu(ce, sseu);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return ret;
-}
-
 static int
 user_to_context_sseu(struct drm_i915_private *i915,
 		     const struct drm_i915_gem_context_param_sseu *user,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 5180b2ee1cb7..2ddc3aeaac9d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -48,11 +48,7 @@ static void retire_work_handler(struct work_struct *work)
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gem.retire_work.work);
 
-	/* Come back later if the device is busy... */
-	if (mutex_trylock(&i915->drm.struct_mutex)) {
-		i915_retire_requests(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
-	}
+	i915_retire_requests(i915);
 
 	queue_delayed_work(i915->wq,
 			   &i915->gem.retire_work,
@@ -86,26 +82,23 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
 	bool result = !intel_gt_is_wedged(gt);
 
-	do {
-		if (i915_gem_wait_for_idle(gt->i915,
-					   I915_WAIT_LOCKED |
-					   I915_WAIT_FOR_IDLE_BOOST,
-					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
-			/* XXX hide warning from gem_eio */
-			if (i915_modparams.reset) {
-				dev_err(gt->i915->drm.dev,
-					"Failed to idle engines, declaring wedged!\n");
-				GEM_TRACE_DUMP();
-			}
-
-			/*
-			 * Forcibly cancel outstanding work and leave
-			 * the gpu quiet.
-			 */
-			intel_gt_set_wedged(gt);
-			result = false;
+	if (i915_gem_wait_for_idle(gt->i915,
+				   I915_WAIT_FOR_IDLE_BOOST,
+				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+		/* XXX hide warning from gem_eio */
+		if (i915_modparams.reset) {
+			dev_err(gt->i915->drm.dev,
+				"Failed to idle engines, declaring wedged!\n");
+			GEM_TRACE_DUMP();
 		}
-	} while (i915_retire_requests(gt->i915) && result);
+
+		/*
+		 * Forcibly cancel outstanding work and leave
+		 * the gpu quiet.
+		 */
+		intel_gt_set_wedged(gt);
+		result = false;
+	}
 
 	if (intel_gt_pm_wait_for_idle(gt))
 		result = false;
@@ -145,8 +138,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 
 	user_forcewake(&i915->gt, true);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	/*
 	 * We have to flush all the executing contexts to main memory so
 	 * that they can saved in the hibernation image. To ensure the last
@@ -158,8 +149,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	switch_to_kernel_context_sync(&i915->gt);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 
 	i915_gem_drain_freed_objects(i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 0ff7a89aadca..549810f70aeb 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -7,6 +7,7 @@
 #include <linux/prime_numbers.h>
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
 
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
@@ -78,7 +79,7 @@ static int gtt_set(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	u32 __iomem *map;
-	int err;
+	int err = 0;
 
 	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
@@ -90,15 +91,21 @@ static int gtt_set(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	intel_gt_pm_get(vma->vm->gt);
+
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out_rpm;
+	}
 
 	iowrite32(v, &map[offset / sizeof(*map)]);
 	i915_vma_unpin_iomap(vma);
 
-	return 0;
+out_rpm:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
 }
 
 static int gtt_get(struct drm_i915_gem_object *obj,
@@ -107,7 +114,7 @@ static int gtt_get(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	u32 __iomem *map;
-	int err;
+	int err = 0;
 
 	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
@@ -119,15 +126,21 @@ static int gtt_get(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	intel_gt_pm_get(vma->vm->gt);
+
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out_rpm;
+	}
 
 	*v = ioread32(&map[offset / sizeof(*map)]);
 	i915_vma_unpin_iomap(vma);
 
-	return 0;
+out_rpm:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
 }
 
 static int wc_set(struct drm_i915_gem_object *obj,
@@ -280,7 +293,6 @@ static int igt_gem_coherency(void *arg)
 	struct drm_i915_private *i915 = arg;
 	const struct igt_coherency_mode *read, *write, *over;
 	struct drm_i915_gem_object *obj;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	u32 *offsets, *values;
 	int err = 0;
@@ -299,8 +311,6 @@ static int igt_gem_coherency(void *arg)
 
 	values = offsets + ncachelines;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	for (over = igt_coherency_mode; over->name; over++) {
 		if (!over->set)
 			continue;
@@ -326,7 +336,7 @@ static int igt_gem_coherency(void *arg)
 					obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 					if (IS_ERR(obj)) {
 						err = PTR_ERR(obj);
-						goto unlock;
+						goto free;
 					}
 
 					i915_random_reorder(offsets, ncachelines, &prng);
@@ -377,15 +387,13 @@ static int igt_gem_coherency(void *arg)
 			}
 		}
 	}
-unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+free:
 	kfree(offsets);
 	return err;
 
 put_object:
 	i915_gem_object_put(obj);
-	goto unlock;
+	goto free;
 }
 
 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 8eba0d3a31de..f5402aad9b5a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -164,7 +164,6 @@ struct parallel_switch {
 static int __live_parallel_switch1(void *data)
 {
 	struct parallel_switch *arg = data;
-	struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
 	IGT_TIMEOUT(end_time);
 	unsigned long count;
 
@@ -176,16 +175,12 @@ static int __live_parallel_switch1(void *data)
 		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
 			i915_request_put(rq);
 
-			mutex_lock(&i915->drm.struct_mutex);
 			rq = i915_request_create(arg->ce[n]);
-			if (IS_ERR(rq)) {
-				mutex_unlock(&i915->drm.struct_mutex);
+			if (IS_ERR(rq))
 				return PTR_ERR(rq);
-			}
 
 			i915_request_get(rq);
 			i915_request_add(rq);
-			mutex_unlock(&i915->drm.struct_mutex);
 		}
 
 		err = 0;
@@ -205,7 +200,6 @@ static int __live_parallel_switch1(void *data)
 static int __live_parallel_switchN(void *data)
 {
 	struct parallel_switch *arg = data;
-	struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
 	IGT_TIMEOUT(end_time);
 	unsigned long count;
 	int n;
@@ -215,15 +209,11 @@ static int __live_parallel_switchN(void *data)
 		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
 			struct i915_request *rq;
 
-			mutex_lock(&i915->drm.struct_mutex);
 			rq = i915_request_create(arg->ce[n]);
-			if (IS_ERR(rq)) {
-				mutex_unlock(&i915->drm.struct_mutex);
+			if (IS_ERR(rq))
 				return PTR_ERR(rq);
-			}
 
 			i915_request_add(rq);
-			mutex_unlock(&i915->drm.struct_mutex);
 		}
 
 		count++;
@@ -1173,7 +1163,7 @@ __sseu_test(const char *name,
 	if (ret)
 		return ret;
 
-	ret = __intel_context_reconfigure_sseu(ce, sseu);
+	ret = intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
 		goto out_spin;
 
@@ -1277,7 +1267,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		goto out_fail;
 
 out_fail:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		ret = -EIO;
 
 	intel_context_unpin(ce);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 36aca1c172e7..856b8e467ee8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -581,12 +581,8 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 
 static void restore_retire_worker(struct drm_i915_private *i915)
 {
+	igt_flush_test(i915);
 	intel_gt_pm_put(&i915->gt);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_driver_register__shrinker(i915);
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
index c21d747e7d05..9ec55b3a3815 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -65,9 +65,7 @@ static int igt_fill_blt(void *arg)
 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			obj->cache_dirty = true;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = i915_gem_object_fill_blt(obj, ce, val);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			goto err_unpin;
 
@@ -166,9 +164,7 @@ static int igt_copy_blt(void *arg)
 		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			dst->cache_dirty = true;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = i915_gem_object_copy_blt(src, dst, ce);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			goto err_unpin;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 29fa1dabbc2e..d4cefdd38431 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -196,26 +196,14 @@ int intel_gt_resume(struct intel_gt *gt)
 
 static void wait_for_idle(struct intel_gt *gt)
 {
-	mutex_lock(&gt->i915->drm.struct_mutex); /* XXX */
-	do {
-		if (i915_gem_wait_for_idle(gt->i915,
-					   I915_WAIT_LOCKED,
-					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
-			/* XXX hide warning from gem_eio */
-			if (i915_modparams.reset) {
-				dev_err(gt->i915->drm.dev,
-					"Failed to idle engines, declaring wedged!\n");
-				GEM_TRACE_DUMP();
-			}
-
-			/*
-			 * Forcibly cancel outstanding work and leave
-			 * the gpu quiet.
-			 */
-			intel_gt_set_wedged(gt);
-		}
-	} while (i915_retire_requests(gt->i915));
-	mutex_unlock(&gt->i915->drm.struct_mutex);
+	if (i915_gem_wait_for_idle(gt->i915, 0,
+				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+		/*
+		 * Forcibly cancel outstanding work and leave
+		 * the gpu quiet.
+		 */
+		intel_gt_set_wedged(gt);
+	}
 
 	intel_gt_pm_wait_for_idle(gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index e6bcbe7ab5e1..86cffbb0a9cb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -318,7 +318,7 @@ static int live_active_context(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -431,7 +431,7 @@ static int live_remote_context(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index d3bee9f88008..ffbb3d23b887 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -58,7 +58,9 @@ static int hang_init(struct hang *h, struct intel_gt *gt)
 	memset(h, 0, sizeof(*h));
 	h->gt = gt;
 
+	mutex_lock(&gt->i915->drm.struct_mutex);
 	h->ctx = kernel_context(gt->i915);
+	mutex_unlock(&gt->i915->drm.struct_mutex);
 	if (IS_ERR(h->ctx))
 		return PTR_ERR(h->ctx);
 
@@ -285,7 +287,7 @@ static void hang_fini(struct hang *h)
 
 	kernel_context_close(h->ctx);
 
-	igt_flush_test(h->gt->i915, I915_WAIT_LOCKED);
+	igt_flush_test(h->gt->i915);
 }
 
 static bool wait_until_running(struct hang *h, struct i915_request *rq)
@@ -309,10 +311,9 @@ static int igt_hang_sanitycheck(void *arg)
 
 	/* Basic check that we can execute our hanging batch */
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
-		goto unlock;
+		return err;
 
 	for_each_engine(engine, gt->i915, id) {
 		struct intel_wedge_me w;
@@ -355,8 +356,6 @@ static int igt_hang_sanitycheck(void *arg)
 
 fini:
 	hang_fini(&h);
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	return err;
 }
 
@@ -395,8 +394,6 @@ static int igt_reset_nop(void *arg)
 	reset_count = i915_reset_count(global);
 	count = 0;
 	do {
-		mutex_lock(&gt->i915->drm.struct_mutex);
-
 		for_each_engine(engine, gt->i915, id) {
 			int i;
 
@@ -417,7 +414,6 @@ static int igt_reset_nop(void *arg)
 		intel_gt_reset(gt, ALL_ENGINES, NULL);
 		igt_global_reset_unlock(gt);
 
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (intel_gt_is_wedged(gt)) {
 			err = -EIO;
 			break;
@@ -429,16 +425,13 @@ static int igt_reset_nop(void *arg)
 			break;
 		}
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	} while (time_before(jiffies, end_time));
 	pr_info("%s: %d resets\n", __func__, count);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
+	err = igt_flush_test(gt->i915);
 out:
 	mock_file_free(gt->i915, file);
 	if (intel_gt_is_wedged(gt))
@@ -494,7 +487,6 @@ static int igt_reset_nop_engine(void *arg)
 				break;
 			}
 
-			mutex_lock(&gt->i915->drm.struct_mutex);
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
@@ -507,7 +499,6 @@ static int igt_reset_nop_engine(void *arg)
 				i915_request_add(rq);
 			}
 			err = intel_engine_reset(engine, NULL);
-			mutex_unlock(&gt->i915->drm.struct_mutex);
 			if (err) {
 				pr_err("i915_reset_engine failed\n");
 				break;
@@ -533,15 +524,12 @@ static int igt_reset_nop_engine(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
+	err = igt_flush_test(gt->i915);
 out:
 	mock_file_free(gt->i915, file);
 	if (intel_gt_is_wedged(gt))
@@ -563,9 +551,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 		return 0;
 
 	if (active) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
 		err = hang_init(&h, gt);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (err)
 			return err;
 	}
@@ -593,17 +579,14 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 			if (active) {
 				struct i915_request *rq;
 
-				mutex_lock(&gt->i915->drm.struct_mutex);
 				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
-					mutex_unlock(&gt->i915->drm.struct_mutex);
 					break;
 				}
 
 				i915_request_get(rq);
 				i915_request_add(rq);
-				mutex_unlock(&gt->i915->drm.struct_mutex);
 
 				if (!wait_until_running(&h, rq)) {
 					struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -647,7 +630,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -655,11 +638,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 	if (intel_gt_is_wedged(gt))
 		err = -EIO;
 
-	if (active) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
+	if (active)
 		hang_fini(&h);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
-	}
 
 	return err;
 }
@@ -741,10 +721,8 @@ static int active_engine(void *data)
 		struct i915_request *old = rq[idx];
 		struct i915_request *new;
 
-		mutex_lock(&engine->i915->drm.struct_mutex);
 		new = igt_request_alloc(ctx[idx], engine);
 		if (IS_ERR(new)) {
-			mutex_unlock(&engine->i915->drm.struct_mutex);
 			err = PTR_ERR(new);
 			break;
 		}
@@ -755,7 +733,6 @@ static int active_engine(void *data)
 
 		rq[idx] = i915_request_get(new);
 		i915_request_add(new);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
 
 		err = active_request_put(old);
 		if (err)
@@ -795,9 +772,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 		return 0;
 
 	if (flags & TEST_ACTIVE) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
 		err = hang_init(&h, gt);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (err)
 			return err;
 
@@ -855,17 +830,14 @@ static int __igt_reset_engines(struct intel_gt *gt,
 			struct i915_request *rq = NULL;
 
 			if (flags & TEST_ACTIVE) {
-				mutex_lock(&gt->i915->drm.struct_mutex);
 				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
-					mutex_unlock(&gt->i915->drm.struct_mutex);
 					break;
 				}
 
 				i915_request_get(rq);
 				i915_request_add(rq);
-				mutex_unlock(&gt->i915->drm.struct_mutex);
 
 				if (!wait_until_running(&h, rq)) {
 					struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -977,9 +949,7 @@ unwind:
 		if (err)
 			break;
 
-		mutex_lock(&gt->i915->drm.struct_mutex);
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -987,11 +957,8 @@ unwind:
 	if (intel_gt_is_wedged(gt))
 		err = -EIO;
 
-	if (flags & TEST_ACTIVE) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
+	if (flags & TEST_ACTIVE)
 		hang_fini(&h);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
-	}
 
 	return err;
 }
@@ -1061,7 +1028,6 @@ static int igt_reset_wait(void *arg)
 
 	igt_global_reset_lock(gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
 		goto unlock;
@@ -1109,7 +1075,6 @@ out_rq:
 fini:
 	hang_fini(&h);
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	if (intel_gt_is_wedged(gt))
@@ -1189,10 +1154,9 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 
 	/* Check that we can recover an unbind stuck on a hanging request */
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
-		goto unlock;
+		return err;
 
 	obj = i915_gem_object_create_internal(gt->i915, SZ_1M);
 	if (IS_ERR(obj)) {
@@ -1255,8 +1219,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 	if (err)
 		goto out_rq;
 
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	if (!wait_until_running(&h, rq)) {
 		struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
 
@@ -1305,16 +1267,12 @@ out_reset:
 		put_task_struct(tsk);
 	}
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 out_rq:
 	i915_request_put(rq);
 out_obj:
 	i915_gem_object_put(obj);
 fini:
 	hang_fini(&h);
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	if (intel_gt_is_wedged(gt))
 		return -EIO;
 
@@ -1396,7 +1354,6 @@ static int igt_reset_queue(void *arg)
 
 	igt_global_reset_lock(gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
 		goto unlock;
@@ -1511,7 +1468,7 @@ static int igt_reset_queue(void *arg)
 
 		i915_request_put(prev);
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -1519,7 +1476,6 @@ static int igt_reset_queue(void *arg)
 fini:
 	hang_fini(&h);
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	if (intel_gt_is_wedged(gt))
@@ -1546,11 +1502,9 @@ static int igt_handle_error(void *arg)
 	if (!engine || !intel_engine_can_store_dword(engine))
 		return 0;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	err = hang_init(&h, gt);
 	if (err)
-		goto err_unlock;
+		return err;
 
 	rq = hang_create_request(&h, engine);
 	if (IS_ERR(rq)) {
@@ -1574,8 +1528,6 @@ static int igt_handle_error(void *arg)
 		goto err_request;
 	}
 
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	/* Temporarily disable error capture */
 	error = xchg(&global->first_error, (void *)-1);
 
@@ -1583,8 +1535,6 @@ static int igt_handle_error(void *arg)
 
 	xchg(&global->first_error, error);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	if (rq->fence.error != -EIO) {
 		pr_err("Guilty request not identified!\n");
 		err = -EINVAL;
@@ -1595,8 +1545,6 @@ err_request:
 	i915_request_put(rq);
 err_fini:
 	hang_fini(&h);
-err_unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1689,7 +1637,6 @@ static int igt_reset_engines_atomic(void *arg)
 		return 0;
 
 	igt_global_reset_lock(gt);
-	mutex_lock(&gt->i915->drm.struct_mutex);
 
 	/* Flush any requests before we get started and check basics */
 	if (!igt_force_reset(gt))
@@ -1709,9 +1656,7 @@ static int igt_reset_engines_atomic(void *arg)
 out:
 	/* As we poke around the guts, do a full reset before continuing. */
 	igt_force_reset(gt);
-
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	return err;
@@ -1751,10 +1696,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 
 	err = intel_gt_live_subtests(tests, gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	i915_modparams.enable_hangcheck = saved_hangcheck;
 	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index dd25636abc5b..04c1cf573642 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -61,7 +61,7 @@ static int live_sanitycheck(void *arg)
 		}
 
 		igt_spinner_end(&spin);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(i915)) {
 			err = -EIO;
 			goto err_ctx;
 		}
@@ -384,8 +384,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
 	if (err)
 		goto out;
 
-	if (i915_request_wait(head,
-			      I915_WAIT_LOCKED,
+	if (i915_request_wait(head, 0,
 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
 		       count, n);
@@ -457,7 +456,7 @@ static int live_timeslice_preempt(void *arg)
 			if (err)
 				goto err_pin;
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			if (igt_flush_test(i915)) {
 				err = -EIO;
 				goto err_pin;
 			}
@@ -1010,7 +1009,7 @@ static int live_nopreempt(void *arg)
 			goto err_wedged;
 		}
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(i915))
 			goto err_wedged;
 	}
 
@@ -1075,7 +1074,7 @@ static int live_suppress_self_preempt(void *arg)
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(i915))
 			goto err_wedged;
 
 		intel_engine_pm_get(engine);
@@ -1136,7 +1135,7 @@ static int live_suppress_self_preempt(void *arg)
 		}
 
 		intel_engine_pm_put(engine);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(i915))
 			goto err_wedged;
 	}
 
@@ -1297,7 +1296,7 @@ static int live_suppress_wait_preempt(void *arg)
 			for (i = 0; i < ARRAY_SIZE(client); i++)
 				igt_spinner_end(&client[i].spin);
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			if (igt_flush_test(i915))
 				goto err_wedged;
 
 			if (engine->execlists.preempt_hang.count) {
@@ -1576,7 +1575,7 @@ static int live_preempt_hang(void *arg)
 
 		igt_spinner_end(&spin_hi);
 		igt_spinner_end(&spin_lo);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(i915)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -1973,7 +1972,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		prime, div64_u64(ktime_to_ns(times[1]), prime));
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (nc = 0; nc < nctx; nc++) {
@@ -2118,7 +2117,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 		goto out;
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < nsibling; n++)
@@ -2296,7 +2295,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 out:
 	for (n = 0; !IS_ERR(rq[n]); n++)
 		i915_request_put(rq[n]);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	kernel_context_close(ctx);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 321481403165..16abfabf08c7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -6,7 +6,7 @@
 
 #include <linux/prime_numbers.h>
 
-#include "gem/i915_gem_pm.h"
+#include "intel_engine_pm.h"
 #include "intel_gt.h"
 
 #include "../selftests/i915_random.h"
@@ -136,7 +136,6 @@ static int mock_hwsp_freelist(void *arg)
 		goto err_put;
 	}
 
-	mutex_lock(&state.i915->drm.struct_mutex);
 	for (p = phases; p->name; p++) {
 		pr_debug("%s(%s)\n", __func__, p->name);
 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
@@ -149,7 +148,6 @@ static int mock_hwsp_freelist(void *arg)
 out:
 	for (na = 0; na < state.max; na++)
 		__mock_hwsp_record(&state, na, NULL);
-	mutex_unlock(&state.i915->drm.struct_mutex);
 	kfree(state.history);
 err_put:
 	drm_dev_put(&state.i915->drm);
@@ -449,8 +447,6 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 	struct i915_request *rq;
 	int err;
 
-	lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */
-
 	err = intel_timeline_pin(tl);
 	if (err) {
 		rq = ERR_PTR(err);
@@ -461,10 +457,14 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 	if (IS_ERR(rq))
 		goto out_unpin;
 
+	i915_request_get(rq);
+
 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 	i915_request_add(rq);
-	if (err)
+	if (err) {
+		i915_request_put(rq);
 		rq = ERR_PTR(err);
+	}
 
 out_unpin:
 	intel_timeline_unpin(tl);
@@ -500,7 +500,6 @@ static int live_hwsp_engine(void *arg)
 	struct intel_timeline **timelines;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	int err = 0;
 
@@ -515,14 +514,13 @@ static int live_hwsp_engine(void *arg)
 	if (!timelines)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for_each_engine(engine, i915, id) {
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		intel_engine_pm_get(engine);
+
 		for (n = 0; n < NUM_TIMELINES; n++) {
 			struct intel_timeline *tl;
 			struct i915_request *rq;
@@ -530,22 +528,26 @@ static int live_hwsp_engine(void *arg)
 			tl = checked_intel_timeline_create(i915);
 			if (IS_ERR(tl)) {
 				err = PTR_ERR(tl);
-				goto out;
+				break;
 			}
 
 			rq = tl_write(tl, engine, count);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
-				goto out;
+				break;
 			}
 
 			timelines[count++] = tl;
+			i915_request_put(rq);
 		}
+
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
 	}
 
-out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < count; n++) {
@@ -559,11 +561,7 @@ out:
 		intel_timeline_put(tl);
 	}
 
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	kvfree(timelines);
-
 	return err;
 #undef NUM_TIMELINES
 }
@@ -575,7 +573,6 @@ static int live_hwsp_alternate(void *arg)
 	struct intel_timeline **timelines;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	int err = 0;
 
@@ -591,9 +588,6 @@ static int live_hwsp_alternate(void *arg)
 	if (!timelines)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for (n = 0; n < NUM_TIMELINES; n++) {
 		for_each_engine(engine, i915, id) {
@@ -605,11 +599,14 @@ static int live_hwsp_alternate(void *arg)
 
 			tl = checked_intel_timeline_create(i915);
 			if (IS_ERR(tl)) {
+				intel_engine_pm_put(engine);
 				err = PTR_ERR(tl);
 				goto out;
 			}
 
+			intel_engine_pm_get(engine);
 			rq = tl_write(tl, engine, count);
+			intel_engine_pm_put(engine);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
@@ -617,11 +614,12 @@ static int live_hwsp_alternate(void *arg)
 			}
 
 			timelines[count++] = tl;
+			i915_request_put(rq);
 		}
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < count; n++) {
@@ -635,11 +633,7 @@ out:
 		intel_timeline_put(tl);
 	}
 
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	kvfree(timelines);
-
 	return err;
 #undef NUM_TIMELINES
 }
@@ -650,7 +644,6 @@ static int live_hwsp_wrap(void *arg)
 	struct intel_engine_cs *engine;
 	struct intel_timeline *tl;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/*
@@ -658,14 +651,10 @@ static int live_hwsp_wrap(void *arg)
 	 * foreign GPU references.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	tl = intel_timeline_create(&i915->gt, NULL);
-	if (IS_ERR(tl)) {
-		err = PTR_ERR(tl);
-		goto out_rpm;
-	}
+	if (IS_ERR(tl))
+		return PTR_ERR(tl);
+
 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 		goto out_free;
 
@@ -681,7 +670,9 @@ static int live_hwsp_wrap(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		intel_engine_pm_get(engine);
 		rq = i915_request_create(engine->kernel_context);
+		intel_engine_pm_put(engine);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out;
@@ -747,16 +738,12 @@ static int live_hwsp_wrap(void *arg)
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	intel_timeline_unpin(tl);
 out_free:
 	intel_timeline_put(tl);
-out_rpm:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
@@ -765,7 +752,6 @@ static int live_hwsp_recycle(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count;
 	int err = 0;
 
@@ -775,9 +761,6 @@ static int live_hwsp_recycle(void *arg)
 	 * want to confuse ourselves or the GPU.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for_each_engine(engine, i915, id) {
 		IGT_TIMEOUT(end_time);
@@ -785,6 +768,8 @@ static int live_hwsp_recycle(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		intel_engine_pm_get(engine);
+
 		do {
 			struct intel_timeline *tl;
 			struct i915_request *rq;
@@ -792,21 +777,22 @@ static int live_hwsp_recycle(void *arg)
 			tl = checked_intel_timeline_create(i915);
 			if (IS_ERR(tl)) {
 				err = PTR_ERR(tl);
-				goto out;
+				break;
 			}
 
 			rq = tl_write(tl, engine, count);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
-				goto out;
+				break;
 			}
 
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Wait for timeline writes timed out!\n");
+				i915_request_put(rq);
 				intel_timeline_put(tl);
 				err = -EIO;
-				goto out;
+				break;
 			}
 
 			if (*tl->hwsp_seqno != count) {
@@ -815,17 +801,18 @@ static int live_hwsp_recycle(void *arg)
 				err = -EINVAL;
 			}
 
+			i915_request_put(rq);
 			intel_timeline_put(tl);
 			count++;
 
 			if (err)
-				goto out;
+				break;
 		} while (!__igt_timeout(end_time, NULL));
-	}
 
-out:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
+	}
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index d40ce0709bff..4ee2e2babd0d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -676,7 +676,7 @@ out_unpin:
 			break;
 	}
 
-	if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(ctx->i915))
 		err = -EIO;
 out_batch:
 	i915_vma_unpin_and_release(&batch, 0);
@@ -1090,7 +1090,7 @@ err:
 		kernel_context_close(client[i].ctx);
 	}
 
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	return err;
@@ -1248,7 +1248,7 @@ err:
 	igt_global_reset_unlock(&i915->gt);
 	kernel_context_close(ctx);
 
-	igt_flush_test(i915, I915_WAIT_LOCKED);
+	igt_flush_test(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index fec9fb7cc384..385289895107 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3621,6 +3621,7 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
+	int ret;
 
 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);
@@ -3630,40 +3631,21 @@ i915_drop_caches_set(void *data, u64 val)
 		     I915_IDLE_ENGINES_TIMEOUT))
 		intel_gt_set_wedged(&i915->gt);
 
-	/* No need to check and wait for gpu resets, only libdrm auto-restarts
-	 * on ioctls on -EAGAIN. */
-	if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) {
-		int ret;
+	if (val & DROP_RETIRE)
+		i915_retire_requests(i915);
 
-		ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	if (val & (DROP_IDLE | DROP_ACTIVE)) {
+		ret = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE,
+					     MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
+	}
 
-		/*
-		 * To finish the flush of the idle_worker, we must complete
-		 * the switch-to-kernel-context, which requires a double
-		 * pass through wait_for_idle: first queues the switch,
-		 * second waits for the switch.
-		 */
-		if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE))
-			ret = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_INTERRUPTIBLE |
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
-
-		if (ret == 0 && val & DROP_IDLE)
-			ret = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_INTERRUPTIBLE |
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
-
-		if (val & DROP_RETIRE)
-			i915_retire_requests(i915);
-
-		mutex_unlock(&i915->drm.struct_mutex);
-
-		if (ret == 0 && val & DROP_IDLE)
-			ret = intel_gt_pm_wait_for_idle(&i915->gt);
+	if (val & DROP_IDLE) {
+		ret = intel_gt_pm_wait_for_idle(&i915->gt);
+		if (ret)
+			return ret;
 	}
 
 	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b0aa0a7c680f..83c2c7bf1e34 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -945,19 +945,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	if (!intel_gt_pm_is_awake(gt))
 		return 0;
 
-	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
-		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
-		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
-
-	timeout = wait_for_timelines(gt, flags, timeout);
-	if (timeout < 0)
-		return timeout;
+	do {
+		timeout = wait_for_timelines(gt, flags, timeout);
+		if (timeout < 0)
+			return timeout;
 
-	if (flags & I915_WAIT_LOCKED) {
-		lockdep_assert_held(&i915->drm.struct_mutex);
+		cond_resched();
+		if (signal_pending(current))
+			return -EINTR;
 
-		i915_retire_requests(i915);
-	}
+	} while (i915_retire_requests(i915));
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 91a885c36c6b..621fb33cda30 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -308,10 +308,9 @@ long i915_request_wait(struct i915_request *rq,
 		       long timeout)
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
-#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
-#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
+#define I915_WAIT_PRIORITY	BIT(1) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 2cc71bcf884f..268192b5613b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -162,10 +162,8 @@ static int live_active_wait(void *arg)
 
 	__live_put(active);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -183,10 +181,8 @@ static int live_active_retire(void *arg)
 		return PTR_ERR(active);
 
 	/* waits for & retires all requests */
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing!\n");
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 75a4695b82bb..52d2df843148 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -523,7 +523,7 @@ static int igt_evict_contexts(void *arg)
 
 	mutex_lock(&i915->ggtt.vm.mutex);
 out_locked:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 	while (reserved) {
 		struct reserved *next = reserved->next;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 02749bbfd0cf..e40e6cfa51f1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1705,12 +1705,8 @@ int i915_gem_gtt_mock_selftests(void)
 
 	err = i915_subtests(tests, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
 	mock_fini_ggtt(ggtt);
 	kfree(ggtt);
 out_put:
@@ -2006,7 +2002,7 @@ static int igt_cs_tlb(void *arg)
 		}
 	}
 end:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 	i915_gem_context_unlock_engines(ctx);
 	i915_gem_object_unpin_map(out);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index eb175da48547..d7d68c6a6bd5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -41,21 +41,16 @@ static int igt_add_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct i915_request *request;
-	int err = -ENOMEM;
 
 	/* Basic preliminary test to create a request and let it loose! */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
 	if (!request)
-		goto out_unlock;
+		return -ENOMEM;
 
 	i915_request_add(request);
 
-	err = 0;
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
 static int igt_wait_request(void *arg)
@@ -67,12 +62,10 @@ static int igt_wait_request(void *arg)
 
 	/* Submit a request, then wait upon it */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
-	if (!request) {
-		err = -ENOMEM;
-		goto out_unlock;
-	}
+	if (!request)
+		return -ENOMEM;
+
 	i915_request_get(request);
 
 	if (i915_request_wait(request, 0, 0) != -ETIME) {
@@ -125,9 +118,7 @@ static int igt_wait_request(void *arg)
 	err = 0;
 out_request:
 	i915_request_put(request);
-out_unlock:
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -140,52 +131,45 @@ static int igt_fence_wait(void *arg)
 
 	/* Submit a request, treat it as a fence and wait upon it */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
-	if (!request) {
-		err = -ENOMEM;
-		goto out_locked;
-	}
+	if (!request)
+		return -ENOMEM;
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 		pr_err("fence wait success before submit (expected timeout)!\n");
-		goto out_locked;
+		goto out;
 	}
 
 	i915_request_add(request);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (dma_fence_is_signaled(&request->fence)) {
 		pr_err("fence signaled immediately!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
 		pr_err("fence wait success after submit (expected timeout)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 		pr_err("fence wait timed out (expected success)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (!dma_fence_is_signaled(&request->fence)) {
 		pr_err("fence unsignaled after waiting!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 		pr_err("fence wait timed out when complete (expected success)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	err = 0;
-out_device:
-	mutex_lock(&i915->drm.struct_mutex);
-out_locked:
+out:
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -199,6 +183,8 @@ static int igt_request_rewind(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 	ctx[0] = mock_context(i915, "A");
+	mutex_unlock(&i915->drm.struct_mutex);
+
 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
 	GEM_BUG_ON(IS_ERR(ce));
 	request = mock_request(ce, 2 * HZ);
@@ -211,7 +197,10 @@ static int igt_request_rewind(void *arg)
 	i915_request_get(request);
 	i915_request_add(request);
 
+	mutex_lock(&i915->drm.struct_mutex);
 	ctx[1] = mock_context(i915, "B");
+	mutex_unlock(&i915->drm.struct_mutex);
+
 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
 	GEM_BUG_ON(IS_ERR(ce));
 	vip = mock_request(ce, 0);
@@ -233,7 +222,6 @@ static int igt_request_rewind(void *arg)
 	request->engine->submit_request(request);
 	rcu_read_unlock();
 
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 		pr_err("timed out waiting for high priority request\n");
@@ -248,14 +236,12 @@ static int igt_request_rewind(void *arg)
 	err = 0;
 err:
 	i915_request_put(vip);
-	mutex_lock(&i915->drm.struct_mutex);
 err_context_1:
 	mock_context_close(ctx[1]);
 	i915_request_put(request);
 err_context_0:
 	mock_context_close(ctx[0]);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -282,7 +268,6 @@ __live_request_alloc(struct intel_context *ce)
 static int __igt_breadcrumbs_smoketest(void *arg)
 {
 	struct smoketest *t = arg;
-	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
 	const unsigned int total = 4 * t->ncontexts + 1;
 	unsigned int num_waits = 0, num_fences = 0;
@@ -337,14 +322,11 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 			struct i915_request *rq;
 			struct intel_context *ce;
 
-			mutex_lock(BKL);
-
 			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
 			GEM_BUG_ON(IS_ERR(ce));
 			rq = t->request_alloc(ce);
 			intel_context_put(ce);
 			if (IS_ERR(rq)) {
-				mutex_unlock(BKL);
 				err = PTR_ERR(rq);
 				count = n;
 				break;
@@ -357,8 +339,6 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 			requests[n] = i915_request_get(rq);
 			i915_request_add(rq);
 
-			mutex_unlock(BKL);
-
 			if (err >= 0)
 				err = i915_sw_fence_await_dma_fence(wait,
 								    &rq->fence,
@@ -457,15 +437,15 @@ static int mock_breadcrumbs_smoketest(void *arg)
 		goto out_threads;
 	}
 
-	mutex_lock(&t.engine->i915->drm.struct_mutex);
 	for (n = 0; n < t.ncontexts; n++) {
+		mutex_lock(&t.engine->i915->drm.struct_mutex);
 		t.contexts[n] = mock_context(t.engine->i915, "mock");
+		mutex_unlock(&t.engine->i915->drm.struct_mutex);
 		if (!t.contexts[n]) {
 			ret = -ENOMEM;
 			goto out_contexts;
 		}
 	}
-	mutex_unlock(&t.engine->i915->drm.struct_mutex);
 
 	for (n = 0; n < ncpus; n++) {
 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
@@ -495,18 +475,15 @@ static int mock_breadcrumbs_smoketest(void *arg)
 		atomic_long_read(&t.num_fences),
 		ncpus);
 
-	mutex_lock(&t.engine->i915->drm.struct_mutex);
 out_contexts:
 	for (n = 0; n < t.ncontexts; n++) {
 		if (!t.contexts[n])
 			break;
 		mock_context_close(t.contexts[n]);
 	}
-	mutex_unlock(&t.engine->i915->drm.struct_mutex);
 	kfree(t.contexts);
 out_threads:
 	kfree(threads);
-
 	return ret;
 }
 
@@ -539,7 +516,6 @@ static int live_nop_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	unsigned int id;
 	int err = -ENODEV;
@@ -549,28 +525,25 @@ static int live_nop_request(void *arg)
 	 * the overhead of submitting requests to the hardware.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	for_each_engine(engine, i915, id) {
-		struct i915_request *request = NULL;
 		unsigned long n, prime;
 		IGT_TIMEOUT(end_time);
 		ktime_t times[2] = {};
 
 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
-			goto out_unlock;
+			return err;
 
 		for_each_prime_number_from(prime, 1, 8192) {
+			struct i915_request *request = NULL;
+
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
+				i915_request_put(request);
 				request = i915_request_create(engine->kernel_context);
-				if (IS_ERR(request)) {
-					err = PTR_ERR(request);
-					goto out_unlock;
-				}
+				if (IS_ERR(request))
+					return PTR_ERR(request);
 
 				/* This space is left intentionally blank.
 				 *
@@ -585,9 +558,11 @@ static int live_nop_request(void *arg)
 				 * for latency.
 				 */
 
+				i915_request_get(request);
 				i915_request_add(request);
 			}
 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
+			i915_request_put(request);
 
 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
 			if (prime == 1)
@@ -599,7 +574,7 @@ static int live_nop_request(void *arg)
 
 		err = igt_live_test_end(&t);
 		if (err)
-			goto out_unlock;
+			return err;
 
 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
 			engine->name,
@@ -607,9 +582,6 @@ static int live_nop_request(void *arg)
 			prime, div64_u64(ktime_to_ns(times[1]), prime));
 	}
 
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -679,6 +651,7 @@ empty_request(struct intel_engine_cs *engine,
 	if (err)
 		goto out_request;
 
+	i915_request_get(request);
 out_request:
 	i915_request_add(request);
 	return err ? ERR_PTR(err) : request;
@@ -688,7 +661,6 @@ static int live_empty_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct i915_vma *batch;
 	unsigned int id;
@@ -699,14 +671,9 @@ static int live_empty_request(void *arg)
 	 * the overhead of submitting requests to the hardware.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	batch = empty_batch(i915);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto out_unlock;
-	}
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
 
 	for_each_engine(engine, i915, id) {
 		IGT_TIMEOUT(end_time);
@@ -730,6 +697,7 @@ static int live_empty_request(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
+				i915_request_put(request);
 				request = empty_request(engine, batch);
 				if (IS_ERR(request)) {
 					err = PTR_ERR(request);
@@ -745,6 +713,7 @@ static int live_empty_request(void *arg)
 			if (__igt_timeout(end_time, NULL))
 				break;
 		}
+		i915_request_put(request);
 
 		err = igt_live_test_end(&t);
 		if (err)
@@ -759,9 +728,6 @@ static int live_empty_request(void *arg)
 out_batch:
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -841,7 +807,6 @@ static int live_all_engines(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	struct i915_request *request[I915_NUM_ENGINES];
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct i915_vma *batch;
 	unsigned int id;
@@ -852,18 +817,15 @@ static int live_all_engines(void *arg)
 	 * block doing so, and that they don't complete too soon.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		return err;
 
 	batch = recursive_batch(i915);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
-		goto out_unlock;
+		return err;
 	}
 
 	for_each_engine(engine, i915, id) {
@@ -933,9 +895,6 @@ out_request:
 			i915_request_put(request[id]);
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -945,7 +904,6 @@ static int live_sequential_engines(void *arg)
 	struct i915_request *request[I915_NUM_ENGINES] = {};
 	struct i915_request *prev = NULL;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	unsigned int id;
 	int err;
@@ -956,12 +914,9 @@ static int live_sequential_engines(void *arg)
 	 * they are running on independent engines.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		return err;
 
 	for_each_engine(engine, i915, id) {
 		struct i915_vma *batch;
@@ -971,7 +926,7 @@ static int live_sequential_engines(void *arg)
 			err = PTR_ERR(batch);
 			pr_err("%s: Unable to create batch for %s, err=%d\n",
 			       __func__, engine->name, err);
-			goto out_unlock;
+			return err;
 		}
 
 		request[id] = i915_request_create(engine->kernel_context);
@@ -1063,9 +1018,6 @@ out_request:
 		i915_vma_put(request[id]->batch);
 		i915_request_put(request[id]);
 	}
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1080,16 +1032,12 @@ static int __live_parallel_engine1(void *arg)
 		struct i915_request *rq;
 		int err;
 
-		mutex_lock(&engine->i915->drm.struct_mutex);
 		rq = i915_request_create(engine->kernel_context);
-		if (IS_ERR(rq)) {
-			mutex_unlock(&engine->i915->drm.struct_mutex);
+		if (IS_ERR(rq))
 			return PTR_ERR(rq);
-		}
 
 		i915_request_get(rq);
 		i915_request_add(rq);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
 
 		err = 0;
 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
@@ -1115,16 +1063,11 @@ static int __live_parallel_engineN(void *arg)
 	do {
 		struct i915_request *rq;
 
-		mutex_lock(&engine->i915->drm.struct_mutex);
 		rq = i915_request_create(engine->kernel_context);
-		if (IS_ERR(rq)) {
-			mutex_unlock(&engine->i915->drm.struct_mutex);
+		if (IS_ERR(rq))
 			return PTR_ERR(rq);
-		}
 
 		i915_request_add(rq);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
-
 		count++;
 	} while (!__igt_timeout(end_time, NULL));
 
@@ -1154,9 +1097,7 @@ static int live_parallel_engines(void *arg)
 		struct task_struct *tsk[I915_NUM_ENGINES] = {};
 		struct igt_live_test t;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = igt_live_test_begin(&t, i915, __func__, "");
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			break;
 
@@ -1184,10 +1125,8 @@ static int live_parallel_engines(void *arg)
 			put_task_struct(tsk[id]);
 		}
 
-		mutex_lock(&i915->drm.struct_mutex);
 		if (igt_live_test_end(&t))
 			err = -EIO;
-		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
 	return err;
@@ -1280,9 +1219,10 @@ static int live_breadcrumbs_smoketest(void *arg)
 		goto out_threads;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	for (n = 0; n < t[0].ncontexts; n++) {
+		mutex_lock(&i915->drm.struct_mutex);
 		t[0].contexts[n] = live_context(i915, file);
+		mutex_unlock(&i915->drm.struct_mutex);
 		if (!t[0].contexts[n]) {
 			ret = -ENOMEM;
 			goto out_contexts;
@@ -1299,7 +1239,6 @@ static int live_breadcrumbs_smoketest(void *arg)
 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
 		if (t[id].max_batch < 0) {
 			ret = t[id].max_batch;
-			mutex_unlock(&i915->drm.struct_mutex);
 			goto out_flush;
 		}
 		/* One ring interleaved between requests from all cpus */
@@ -1314,7 +1253,6 @@ static int live_breadcrumbs_smoketest(void *arg)
 					  &t[id], "igt/%d.%d", id, n);
 			if (IS_ERR(tsk)) {
 				ret = PTR_ERR(tsk);
-				mutex_unlock(&i915->drm.struct_mutex);
 				goto out_flush;
 			}
 
@@ -1322,7 +1260,6 @@ static int live_breadcrumbs_smoketest(void *arg)
 			threads[id * ncpus + n] = tsk;
 		}
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 
@@ -1350,10 +1287,8 @@ out_flush:
 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ret = igt_live_test_end(&live) ?: ret;
 out_contexts:
-	mutex_unlock(&i915->drm.struct_mutex);
 	kfree(t[0].contexts);
 out_threads:
 	kfree(threads);
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 438ea0eaa416..825a8286cbe8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -263,10 +263,8 @@ int __i915_live_teardown(int err, void *data)
 {
 	struct drm_i915_private *i915 = data;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(i915);
 
@@ -284,10 +282,8 @@ int __intel_gt_live_teardown(int err, void *data)
 {
 	struct intel_gt *gt = data;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	if (igt_flush_test(gt->i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(gt->i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 0e4f66312b39..1c9db08f7c28 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -833,12 +833,8 @@ int i915_vma_mock_selftests(void)
 
 	err = i915_subtests(tests, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
 	mock_fini_ggtt(ggtt);
 	kfree(ggtt);
 out_put:
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index d3b5eb402d33..2a5fbe46ea9f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -12,31 +12,25 @@
 
 #include "igt_flush_test.h"
 
-int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
+int igt_flush_test(struct drm_i915_private *i915)
 {
 	int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0;
-	int repeat = !!(flags & I915_WAIT_LOCKED);
 
 	cond_resched();
 
-	do {
-		if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
-			pr_err("%pS timed out, cancelling all further testing.\n",
-			       __builtin_return_address(0));
+	i915_retire_requests(i915);
+	if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) {
+		pr_err("%pS timed out, cancelling all further testing.\n",
+		       __builtin_return_address(0));
 
-			GEM_TRACE("%pS timed out.\n",
-				  __builtin_return_address(0));
-			GEM_TRACE_DUMP();
+		GEM_TRACE("%pS timed out.\n",
+			  __builtin_return_address(0));
+		GEM_TRACE_DUMP();
 
-			intel_gt_set_wedged(&i915->gt);
-			repeat = 0;
-			ret = -EIO;
-		}
-
-		/* Ensure we also flush after wedging. */
-		if (flags & I915_WAIT_LOCKED)
-			i915_retire_requests(i915);
-	} while (repeat--);
+		intel_gt_set_wedged(&i915->gt);
+		ret = -EIO;
+	}
+	i915_retire_requests(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
index 63e009927c43..7541fa74e641 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
@@ -9,6 +9,6 @@
 
 struct drm_i915_private;
 
-int igt_flush_test(struct drm_i915_private *i915, unsigned int flags);
+int igt_flush_test(struct drm_i915_private *i915);
 
 #endif /* IGT_FLUSH_TEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 3e902761cd16..04a6f88fdf64 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -19,15 +19,12 @@ int igt_live_test_begin(struct igt_live_test *t,
 	enum intel_engine_id id;
 	int err;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	t->i915 = i915;
 	t->func = func;
 	t->name = name;
 
 	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED,
+				     I915_WAIT_INTERRUPTIBLE,
 				     MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
@@ -50,9 +47,7 @@ int igt_live_test_end(struct igt_live_test *t)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		return -EIO;
 
 	if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 2448067822af..622bb2127453 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -41,8 +41,6 @@ void mock_device_flush(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	do {
 		for_each_engine(engine, i915, id)
 			mock_engine_flush(engine);
@@ -55,9 +53,7 @@ static void mock_device_release(struct drm_device *dev)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	flush_work(&i915->gem.idle_work);
 	i915_gem_drain_workqueue(i915);
-- 
cgit 


From f33a8a51602c84cc7d5cadd2655835ba3b7d03f9 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 4 Oct 2019 14:40:04 +0100
Subject: drm/i915: Merge wait_for_timelines with retire_request

wait_for_timelines is essentially the same loop as retiring requests
(with an extra timeout), so merge the two into one routine.

v2: i915_retire_requests_timeout and keep VT'd w/a as !interruptible

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-10-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c           |  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_pm.c             |  4 +-
 .../gpu/drm/i915/gem/selftests/i915_gem_context.c  |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_pm.c              |  3 +-
 drivers/gpu/drm/i915/i915_debugfs.c                |  4 +-
 drivers/gpu/drm/i915/i915_drv.h                    |  3 +-
 drivers/gpu/drm/i915/i915_gem.c                    | 67 ++--------------------
 drivers/gpu/drm/i915/i915_gem_evict.c              | 12 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c                |  4 +-
 drivers/gpu/drm/i915/i915_request.c                | 26 ++++++++-
 drivers/gpu/drm/i915/i915_request.h                |  7 ++-
 drivers/gpu/drm/i915/selftests/igt_flush_test.c    |  4 +-
 drivers/gpu/drm/i915/selftests/igt_live_test.c     |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gem_device.c   |  2 +-
 14 files changed, 50 insertions(+), 96 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_request.h')

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index c19431d609fc..418d0d2b5fa9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -432,9 +432,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj)
 
 	/* Attempt to reap some mmap space from dead objects */
 	do {
-		err = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
+		err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 		if (err)
 			break;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 26f325bbfe4d..90b211257f2d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -59,9 +59,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
 	bool result = !intel_gt_is_wedged(gt);
 
-	if (i915_gem_wait_for_idle(gt->i915,
-				   I915_WAIT_FOR_IDLE_BOOST,
-				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/* XXX hide warning from gem_eio */
 		if (i915_modparams.reset) {
 			dev_err(gt->i915->drm.dev,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index f5402aad9b5a..f902aeee1755 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1137,7 +1137,7 @@ out:
 
 	if ((flags & TEST_IDLE) && ret == 0) {
 		ret = i915_gem_wait_for_idle(ce->engine->i915,
-					     0, MAX_SCHEDULE_TIMEOUT);
+					     MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index d4cefdd38431..bdb34f03ec47 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -196,8 +196,7 @@ int intel_gt_resume(struct intel_gt *gt)
 
 static void wait_for_idle(struct intel_gt *gt)
 {
-	if (i915_gem_wait_for_idle(gt->i915, 0,
-				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/*
 		 * Forcibly cancel outstanding work and leave
 		 * the gpu quiet.
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7c4bba21adcd..5888a658e2b7 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3635,9 +3635,7 @@ i915_drop_caches_set(void *data, u64 val)
 		i915_retire_requests(i915);
 
 	if (val & (DROP_IDLE | DROP_ACTIVE)) {
-		ret = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
+		ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ad31852e4309..44f3463ff9f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2321,8 +2321,7 @@ void i915_gem_driver_register(struct drm_i915_private *i915);
 void i915_gem_driver_unregister(struct drm_i915_private *i915);
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv);
 void i915_gem_driver_release(struct drm_i915_private *dev_priv);
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-			   unsigned int flags, long timeout);
+int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 83c2c7bf1e34..12d21e5cf3ed 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -883,61 +883,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	}
 }
 
-static long
-wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout)
-{
-	struct intel_gt_timelines *timelines = &gt->timelines;
-	struct intel_timeline *tl;
-	unsigned long flags;
-
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct dma_fence *fence;
-
-		fence = i915_active_fence_get(&tl->last_request);
-		if (!fence)
-			continue;
-
-		spin_unlock_irqrestore(&timelines->lock, flags);
-
-		if (!dma_fence_is_i915(fence)) {
-			timeout = dma_fence_wait_timeout(fence,
-							 flags & I915_WAIT_INTERRUPTIBLE,
-							 timeout);
-		} else {
-			struct i915_request *rq = to_request(fence);
-
-			/*
-			 * "Race-to-idle".
-			 *
-			 * Switching to the kernel context is often used as
-			 * a synchronous step prior to idling, e.g. in suspend
-			 * for flushing all current operations to memory before
-			 * sleeping. These we want to complete as quickly as
-			 * possible to avoid prolonged stalls, so allow the gpu
-			 * to boost to maximum clocks.
-			 */
-			if (flags & I915_WAIT_FOR_IDLE_BOOST)
-				gen6_rps_boost(rq);
-
-			timeout = i915_request_wait(rq, flags, timeout);
-		}
-
-		dma_fence_put(fence);
-		if (timeout < 0)
-			return timeout;
-
-		/* restart after reacquiring the lock */
-		spin_lock_irqsave(&timelines->lock, flags);
-		tl = list_entry(&timelines->active_list, typeof(*tl), link);
-	}
-	spin_unlock_irqrestore(&timelines->lock, flags);
-
-	return timeout;
-}
-
-int i915_gem_wait_for_idle(struct drm_i915_private *i915,
-			   unsigned int flags, long timeout)
+int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout)
 {
 	struct intel_gt *gt = &i915->gt;
 
@@ -945,18 +891,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	if (!intel_gt_pm_is_awake(gt))
 		return 0;
 
-	do {
-		timeout = wait_for_timelines(gt, flags, timeout);
-		if (timeout < 0)
-			return timeout;
-
+	while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) {
 		cond_resched();
 		if (signal_pending(current))
 			return -EINTR;
+	}
 
-	} while (i915_retire_requests(i915));
-
-	return 0;
+	return timeout;
 }
 
 struct i915_vma *
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 0552bf93eea3..0a412f6d01d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -46,9 +46,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	return i915_gem_wait_for_idle(i915,
-				      I915_WAIT_INTERRUPTIBLE,
-				      MAX_SCHEDULE_TIMEOUT);
+	return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -126,6 +124,8 @@ i915_gem_evict_something(struct i915_address_space *vm,
 				    min_size, alignment, color,
 				    start, end, mode);
 
+	i915_retire_requests(vm->i915);
+
 search_again:
 	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
@@ -264,13 +264,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 
 	trace_i915_gem_evict_node(vm, target, flags);
 
-	/* Retire before we search the active list. Although we have
+	/*
+	 * Retire before we search the active list. Although we have
 	 * reasonable accuracy in our retirement lists, we may have
 	 * a stray pin (preventing eviction) that can only be resolved by
 	 * retiring.
 	 */
-	if (!(flags & PIN_NONBLOCK))
-		i915_retire_requests(vm->i915);
+	i915_retire_requests(vm->i915);
 
 	if (i915_vm_has_cache_coloring(vm)) {
 		/* Expand search to cover neighbouring guard pages (or lack!) */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7462d87f7a48..082fcf9085a6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2528,7 +2528,9 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
+		/* XXX This does not prevent more requests being submitted! */
+		if (i915_retire_requests_timeout(dev_priv,
+						 -MAX_SCHEDULE_TIMEOUT)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4ffe62a42186..52f7c4e5b644 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1508,13 +1508,19 @@ out:
 	return timeout;
 }
 
-bool i915_retire_requests(struct drm_i915_private *i915)
+long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout)
 {
 	struct intel_gt_timelines *timelines = &i915->gt.timelines;
 	struct intel_timeline *tl, *tn;
+	unsigned long active_count = 0;
 	unsigned long flags;
+	bool interruptible;
 	LIST_HEAD(free);
 
+	interruptible = true;
+	if (timeout < 0)
+		timeout = -timeout, interruptible = false;
+
 	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
 		if (!mutex_trylock(&tl->mutex))
@@ -1525,13 +1531,27 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 		tl->active_count++; /* pin the list element */
 		spin_unlock_irqrestore(&timelines->lock, flags);
 
+		if (timeout > 0) {
+			struct dma_fence *fence;
+
+			fence = i915_active_fence_get(&tl->last_request);
+			if (fence) {
+				timeout = dma_fence_wait_timeout(fence,
+								 interruptible,
+								 timeout);
+				dma_fence_put(fence);
+			}
+		}
+
 		retire_requests(tl);
 
 		spin_lock_irqsave(&timelines->lock, flags);
 
 		/* Resume iteration after dropping lock */
 		list_safe_reset_next(tl, tn, link);
-		if (!--tl->active_count)
+		if (--tl->active_count)
+			active_count += !!rcu_access_pointer(tl->last_request.fence);
+		else
 			list_del(&tl->link);
 
 		mutex_unlock(&tl->mutex);
@@ -1547,7 +1567,7 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 	list_for_each_entry_safe(tl, tn, &free, link)
 		__intel_timeline_free(&tl->kref);
 
-	return !list_empty(&timelines->active_list);
+	return active_count ? timeout : 0;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 621fb33cda30..256b0715180f 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -310,7 +310,6 @@ long i915_request_wait(struct i915_request *rq,
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_PRIORITY	BIT(1) /* small priority bump for the request */
 #define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
@@ -460,6 +459,10 @@ i915_request_active_timeline(struct i915_request *rq)
 					 lockdep_is_held(&rq->engine->active.lock));
 }
 
-bool i915_retire_requests(struct drm_i915_private *i915);
+long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout);
+static inline void i915_retire_requests(struct drm_i915_private *i915)
+{
+	i915_retire_requests_timeout(i915, 0);
+}
 
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 2a5fbe46ea9f..ed496bd6d84f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -18,8 +18,7 @@ int igt_flush_test(struct drm_i915_private *i915)
 
 	cond_resched();
 
-	i915_retire_requests(i915);
-	if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) {
+	if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) {
 		pr_err("%pS timed out, cancelling all further testing.\n",
 		       __builtin_return_address(0));
 
@@ -30,7 +29,6 @@ int igt_flush_test(struct drm_i915_private *i915)
 		intel_gt_set_wedged(&i915->gt);
 		ret = -EIO;
 	}
-	i915_retire_requests(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 04a6f88fdf64..eae90f97df6c 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -23,9 +23,7 @@ int igt_live_test_begin(struct igt_live_test *t,
 	t->func = func;
 	t->name = name;
 
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE,
-				     MAX_SCHEDULE_TIMEOUT);
+	err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
 		       func, name, err);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index a8be5da2b3cf..3b589bbb2c2d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -44,7 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915)
 	do {
 		for_each_engine(engine, i915, id)
 			mock_engine_flush(engine);
-	} while (i915_retire_requests(i915));
+	} while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT));
 }
 
 static void mock_device_release(struct drm_device *dev)
-- 
cgit 


From 661019754202d610203a9cf09d26fdd8677e41c6 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 4 Oct 2019 14:40:06 +0100
Subject: drm/i915: Move request runtime management onto gt

Requests are run from the gt and are tided into the gt runtime power
management, so pull the runtime request management under gt/

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-12-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/Makefile                      |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_mman.c           |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_pm.c             |  28 +----
 .../gpu/drm/i915/gem/selftests/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c |   2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c                 |   2 +
 drivers/gpu/drm/i915/gt/intel_gt_pm.c              |   5 +-
 drivers/gpu/drm/i915/gt/intel_gt_requests.c        | 123 +++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_requests.h        |  24 ++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h           |  11 ++
 drivers/gpu/drm/i915/gt/selftest_timeline.c        |   8 +-
 drivers/gpu/drm/i915/i915_debugfs.c                |  17 +--
 drivers/gpu/drm/i915/i915_drv.h                    |  10 --
 drivers/gpu/drm/i915/i915_gem.c                    |  17 ---
 drivers/gpu/drm/i915/i915_gem_evict.c              |  14 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c                |   5 +-
 drivers/gpu/drm/i915/i915_request.c                |  64 +----------
 drivers/gpu/drm/i915/i915_request.h                |   7 +-
 drivers/gpu/drm/i915/selftests/igt_flush_test.c    |   9 +-
 drivers/gpu/drm/i915/selftests/igt_live_test.c     |   5 +-
 drivers/gpu/drm/i915/selftests/mock_gem_device.c   |  10 +-
 21 files changed, 213 insertions(+), 158 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.h

(limited to 'drivers/gpu/drm/i915/i915_request.h')

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b020b024286d..4e23f1a5c39f 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -83,6 +83,7 @@ gt-y += \
 	gt/intel_gt_irq.o \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
+	gt/intel_gt_requests.o \
 	gt/intel_hangcheck.o \
 	gt/intel_lrc.o \
 	gt/intel_rc6.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 45bbd22c14f1..fd4122d8c0a9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -8,6 +8,7 @@
 #include <linux/sizes.h>
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_gem_gtt.h"
@@ -424,6 +425,7 @@ out:
 static int create_mmap_offset(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_gt *gt = &i915->gt;
 	int err;
 
 	err = drm_gem_create_mmap_offset(&obj->base);
@@ -431,7 +433,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj)
 		return 0;
 
 	/* Attempt to reap some mmap space from dead objects */
-	err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT);
+	err = intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 90b211257f2d..9194d8464bf7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -7,31 +7,18 @@
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_globals.h"
 
 static void i915_gem_park(struct drm_i915_private *i915)
 {
-	cancel_delayed_work(&i915->gem.retire_work);
-
 	i915_vma_parked(i915);
 
 	i915_globals_park();
 }
 
-static void retire_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gem.retire_work.work);
-
-	i915_retire_requests(i915);
-
-	queue_delayed_work(i915->wq,
-			   &i915->gem.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
 static int pm_notifier(struct notifier_block *nb,
 		       unsigned long action,
 		       void *data)
@@ -42,9 +29,6 @@ static int pm_notifier(struct notifier_block *nb,
 	switch (action) {
 	case INTEL_GT_UNPARK:
 		i915_globals_unpark();
-		queue_delayed_work(i915->wq,
-				   &i915->gem.retire_work,
-				   round_jiffies_up_relative(HZ));
 		break;
 
 	case INTEL_GT_PARK:
@@ -59,7 +43,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
 	bool result = !intel_gt_is_wedged(gt);
 
-	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/* XXX hide warning from gem_eio */
 		if (i915_modparams.reset) {
 			dev_err(gt->i915->drm.dev,
@@ -122,14 +106,12 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(&i915->gt);
+	intel_gt_suspend(&i915->gt);
+	intel_uc_suspend(&i915->gt.uc);
 
 	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 
 	i915_gem_drain_freed_objects(i915);
-
-	intel_uc_suspend(&i915->gt.uc);
-	intel_gt_suspend(&i915->gt);
 }
 
 static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
@@ -239,8 +221,6 @@ err_wedged:
 
 void i915_gem_init__pm(struct drm_i915_private *i915)
 {
-	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
-
 	i915->gem.pm_notifier.notifier_call = pm_notifier;
 	blocking_notifier_chain_register(&i915->gt.pm_notifications,
 					 &i915->gem.pm_notifier);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index f902aeee1755..2288757808ae 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -8,6 +8,7 @@
 
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"
 
@@ -518,7 +519,7 @@ create_test_object(struct i915_address_space *vm,
 	int err;
 
 	/* Keep in GEM's good graces */
-	i915_retire_requests(vm->i915);
+	intel_gt_retire_requests(vm->gt);
 
 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
@@ -1136,7 +1137,7 @@ out:
 		igt_spinner_end(spin);
 
 	if ((flags & TEST_IDLE) && ret == 0) {
-		ret = i915_gem_wait_for_idle(ce->engine->i915,
+		ret = intel_gt_wait_for_idle(ce->engine->gt,
 					     MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 4ba6ed5c8313..1cd25cfd0246 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -573,7 +573,7 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_driver_unregister__shrinker(i915);
 	intel_gt_pm_get(&i915->gt);
-	cancel_delayed_work_sync(&i915->gem.retire_work);
+	cancel_delayed_work_sync(&i915->gt.requests.retire_work);
 }
 
 static void restore_retire_worker(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 7205595369be..53220741e49e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -6,6 +6,7 @@
 #include "i915_drv.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_mocs.h"
 #include "intel_rc6.h"
 #include "intel_uncore.h"
@@ -23,6 +24,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 
 	intel_gt_init_hangcheck(gt);
 	intel_gt_init_reset(gt);
+	intel_gt_init_requests(gt);
 	intel_gt_pm_init_early(gt);
 	intel_uc_init_early(&gt->uc);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index bdb34f03ec47..d2e80ba64d69 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -10,6 +10,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_pm.h"
 #include "intel_rc6.h"
 #include "intel_wakeref.h"
@@ -49,6 +50,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
 	i915_pmu_gt_unparked(i915);
 
 	intel_gt_queue_hangcheck(gt);
+	intel_gt_unpark_requests(gt);
 
 	pm_notify(gt, INTEL_GT_UNPARK);
 
@@ -64,6 +66,7 @@ static int __gt_park(struct intel_wakeref *wf)
 	GEM_TRACE("\n");
 
 	pm_notify(gt, INTEL_GT_PARK);
+	intel_gt_park_requests(gt);
 
 	i915_pmu_gt_parked(i915);
 	if (INTEL_GEN(i915) >= 6)
@@ -196,7 +199,7 @@ int intel_gt_resume(struct intel_gt *gt)
 
 static void wait_for_idle(struct intel_gt *gt)
 {
-	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/*
 		 * Forcibly cancel outstanding work and leave
 		 * the gpu quiet.
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
new file mode 100644
index 000000000000..8aed89fd2cdc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -0,0 +1,123 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_timeline.h"
+
+static void retire_requests(struct intel_timeline *tl)
+{
+	struct i915_request *rq, *rn;
+
+	list_for_each_entry_safe(rq, rn, &tl->requests, link)
+		if (!i915_request_retire(rq))
+			break;
+}
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
+{
+	struct intel_gt_timelines *timelines = &gt->timelines;
+	struct intel_timeline *tl, *tn;
+	unsigned long active_count = 0;
+	unsigned long flags;
+	bool interruptible;
+	LIST_HEAD(free);
+
+	interruptible = true;
+	if (unlikely(timeout < 0))
+		timeout = -timeout, interruptible = false;
+
+	spin_lock_irqsave(&timelines->lock, flags);
+	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+		if (!mutex_trylock(&tl->mutex))
+			continue;
+
+		intel_timeline_get(tl);
+		GEM_BUG_ON(!tl->active_count);
+		tl->active_count++; /* pin the list element */
+		spin_unlock_irqrestore(&timelines->lock, flags);
+
+		if (timeout > 0) {
+			struct dma_fence *fence;
+
+			fence = i915_active_fence_get(&tl->last_request);
+			if (fence) {
+				timeout = dma_fence_wait_timeout(fence,
+								 true,
+								 timeout);
+				dma_fence_put(fence);
+			}
+		}
+
+		retire_requests(tl);
+
+		spin_lock_irqsave(&timelines->lock, flags);
+
+		/* Resume iteration after dropping lock */
+		list_safe_reset_next(tl, tn, link);
+		if (--tl->active_count)
+			active_count += !!rcu_access_pointer(tl->last_request.fence);
+		else
+			list_del(&tl->link);
+
+		mutex_unlock(&tl->mutex);
+
+		/* Defer the final release to after the spinlock */
+		if (refcount_dec_and_test(&tl->kref.refcount)) {
+			GEM_BUG_ON(tl->active_count);
+			list_add(&tl->link, &free);
+		}
+	}
+	spin_unlock_irqrestore(&timelines->lock, flags);
+
+	list_for_each_entry_safe(tl, tn, &free, link)
+		__intel_timeline_free(&tl->kref);
+
+	return active_count ? timeout : 0;
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+{
+	/* If the device is asleep, we have no requests outstanding */
+	if (!intel_gt_pm_is_awake(gt))
+		return 0;
+
+	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
+		cond_resched();
+		if (signal_pending(current))
+			return -EINTR;
+	}
+
+	return timeout;
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+	struct intel_gt *gt =
+		container_of(work, typeof(*gt), requests.retire_work.work);
+
+	intel_gt_retire_requests(gt);
+	schedule_delayed_work(&gt->requests.retire_work,
+			      round_jiffies_up_relative(HZ));
+}
+
+void intel_gt_init_requests(struct intel_gt *gt)
+{
+	INIT_DELAYED_WORK(&gt->requests.retire_work, retire_work_handler);
+}
+
+void intel_gt_park_requests(struct intel_gt *gt)
+{
+	cancel_delayed_work(&gt->requests.retire_work);
+}
+
+void intel_gt_unpark_requests(struct intel_gt *gt)
+{
+	schedule_delayed_work(&gt->requests.retire_work,
+			      round_jiffies_up_relative(HZ));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
new file mode 100644
index 000000000000..bd31cbce47e0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_REQUESTS_H
+#define INTEL_GT_REQUESTS_H
+
+struct intel_gt;
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
+static inline void intel_gt_retire_requests(struct intel_gt *gt)
+{
+	intel_gt_retire_requests_timeout(gt, 0);
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
+
+void intel_gt_init_requests(struct intel_gt *gt);
+void intel_gt_park_requests(struct intel_gt *gt);
+void intel_gt_unpark_requests(struct intel_gt *gt);
+
+#endif /* INTEL_GT_REQUESTS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 7134f1319bbe..802f516a3430 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,17 @@ struct intel_gt {
 		struct list_head hwsp_free_list;
 	} timelines;
 
+	struct intel_gt_requests {
+		/**
+		 * We leave the user IRQ off as much as possible,
+		 * but this means that requests will finish and never
+		 * be retired once the system goes idle. Set a timer to
+		 * fire periodically while the ring is running. When it
+		 * fires, go retire requests.
+		 */
+		struct delayed_work retire_work;
+	} requests;
+
 	struct intel_wakeref wakeref;
 	atomic_t user_wakeref;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 16abfabf08c7..d6df40cdc8a6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -8,6 +8,7 @@
 
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
+#include "intel_gt_requests.h"
 
 #include "../selftests/i915_random.h"
 #include "../i915_selftest.h"
@@ -641,6 +642,7 @@ out:
 static int live_hwsp_wrap(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = &i915->gt;
 	struct intel_engine_cs *engine;
 	struct intel_timeline *tl;
 	enum intel_engine_id id;
@@ -651,7 +653,7 @@ static int live_hwsp_wrap(void *arg)
 	 * foreign GPU references.
 	 */
 
-	tl = intel_timeline_create(&i915->gt, NULL);
+	tl = intel_timeline_create(gt, NULL);
 	if (IS_ERR(tl))
 		return PTR_ERR(tl);
 
@@ -662,7 +664,7 @@ static int live_hwsp_wrap(void *arg)
 	if (err)
 		goto out_free;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt->i915, id) {
 		const u32 *hwsp_seqno[2];
 		struct i915_request *rq;
 		u32 seqno[2];
@@ -734,7 +736,7 @@ static int live_hwsp_wrap(void *arg)
 			goto out;
 		}
 
-		i915_retire_requests(i915); /* recycle HWSP */
+		intel_gt_retire_requests(gt); /* recycle HWSP */
 	}
 
 out:
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5888a658e2b7..2afc41e43b6e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -41,6 +41,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/intel_reset.h"
 #include "gt/intel_rc6.h"
 #include "gt/uc/intel_guc_submission.h"
@@ -3621,33 +3622,33 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
+	struct intel_gt *gt = &i915->gt;
 	int ret;
 
 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);
 
 	if (val & DROP_RESET_ACTIVE &&
-	    wait_for(intel_engines_are_idle(&i915->gt),
-		     I915_IDLE_ENGINES_TIMEOUT))
-		intel_gt_set_wedged(&i915->gt);
+	    wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT))
+		intel_gt_set_wedged(gt);
 
 	if (val & DROP_RETIRE)
-		i915_retire_requests(i915);
+		intel_gt_retire_requests(gt);
 
 	if (val & (DROP_IDLE | DROP_ACTIVE)) {
-		ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
+		ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 	}
 
 	if (val & DROP_IDLE) {
-		ret = intel_gt_pm_wait_for_idle(&i915->gt);
+		ret = intel_gt_pm_wait_for_idle(gt);
 		if (ret)
 			return ret;
 	}
 
-	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
-		intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL);
+	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt))
+		intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL);
 
 	fs_reclaim_acquire(GFP_KERNEL);
 	if (val & DROP_BOUND)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 44f3463ff9f1..cb63b2bd0ce8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1710,15 +1710,6 @@ struct drm_i915_private {
 
 	struct {
 		struct notifier_block pm_notifier;
-
-		/**
-		 * We leave the user IRQ off as much as possible,
-		 * but this means that requests will finish and never
-		 * be retired once the system goes idle. Set a timer to
-		 * fire periodically while the ring is running. When it
-		 * fires, go retire requests.
-		 */
-		struct delayed_work retire_work;
 	} gem;
 
 	/* For i945gm vblank irq vs. C3 workaround */
@@ -2321,7 +2312,6 @@ void i915_gem_driver_register(struct drm_i915_private *i915);
 void i915_gem_driver_unregister(struct drm_i915_private *i915);
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv);
 void i915_gem_driver_release(struct drm_i915_private *dev_priv);
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 12d21e5cf3ed..a1077919b16a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -883,23 +883,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	}
 }
 
-int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout)
-{
-	struct intel_gt *gt = &i915->gt;
-
-	/* If the device is asleep, we have no requests outstanding */
-	if (!intel_gt_pm_is_awake(gt))
-		return 0;
-
-	while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) {
-		cond_resched();
-		if (signal_pending(current))
-			return -EINTR;
-	}
-
-	return timeout;
-}
-
 struct i915_vma *
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 0a412f6d01d7..7e62c310290f 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -29,6 +29,7 @@
 #include <drm/i915_drm.h>
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_trace.h"
@@ -37,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 	bool fail_if_busy:1;
 } igt_evict_ctl;)
 
-static int ggtt_flush(struct drm_i915_private *i915)
+static int ggtt_flush(struct intel_gt *gt)
 {
 	/*
 	 * Not everything in the GGTT is tracked via vma (otherwise we
@@ -46,7 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
+	return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -92,7 +93,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
 			 u64 start, u64 end,
 			 unsigned flags)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct drm_mm_scan scan;
 	struct list_head eviction_list;
 	struct i915_vma *vma, *next;
@@ -124,7 +124,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 				    min_size, alignment, color,
 				    start, end, mode);
 
-	i915_retire_requests(vm->i915);
+	intel_gt_retire_requests(vm->gt);
 
 search_again:
 	active = NULL;
@@ -197,7 +197,7 @@ search_again:
 	if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
 		return -EBUSY;
 
-	ret = ggtt_flush(dev_priv);
+	ret = ggtt_flush(vm->gt);
 	if (ret)
 		return ret;
 
@@ -270,7 +270,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	 * a stray pin (preventing eviction) that can only be resolved by
 	 * retiring.
 	 */
-	i915_retire_requests(vm->i915);
+	intel_gt_retire_requests(vm->gt);
 
 	if (i915_vm_has_cache_coloring(vm)) {
 		/* Expand search to cover neighbouring guard pages (or lack!) */
@@ -372,7 +372,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	 * switch otherwise is ineffective.
 	 */
 	if (i915_is_ggtt(vm)) {
-		ret = ggtt_flush(vm->i915);
+		ret = ggtt_flush(vm->gt);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 082fcf9085a6..1d26634ca597 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -38,6 +38,7 @@
 
 #include "display/intel_frontbuffer.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
@@ -2529,8 +2530,8 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 
 	if (unlikely(ggtt->do_idle_maps)) {
 		/* XXX This does not prevent more requests being submitted! */
-		if (i915_retire_requests_timeout(dev_priv,
-						 -MAX_SCHEDULE_TIMEOUT)) {
+		if (intel_gt_retire_requests_timeout(ggtt->vm.gt,
+						     -MAX_SCHEDULE_TIMEOUT)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 52f7c4e5b644..437f9fc6282e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -216,7 +216,7 @@ static void remove_from_engine(struct i915_request *rq)
 	spin_unlock(&locked->active.lock);
 }
 
-static bool i915_request_retire(struct i915_request *rq)
+bool i915_request_retire(struct i915_request *rq)
 {
 	if (!i915_request_completed(rq))
 		return false;
@@ -1508,68 +1508,6 @@ out:
 	return timeout;
 }
 
-long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout)
-{
-	struct intel_gt_timelines *timelines = &i915->gt.timelines;
-	struct intel_timeline *tl, *tn;
-	unsigned long active_count = 0;
-	unsigned long flags;
-	bool interruptible;
-	LIST_HEAD(free);
-
-	interruptible = true;
-	if (timeout < 0)
-		timeout = -timeout, interruptible = false;
-
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
-		if (!mutex_trylock(&tl->mutex))
-			continue;
-
-		intel_timeline_get(tl);
-		GEM_BUG_ON(!tl->active_count);
-		tl->active_count++; /* pin the list element */
-		spin_unlock_irqrestore(&timelines->lock, flags);
-
-		if (timeout > 0) {
-			struct dma_fence *fence;
-
-			fence = i915_active_fence_get(&tl->last_request);
-			if (fence) {
-				timeout = dma_fence_wait_timeout(fence,
-								 interruptible,
-								 timeout);
-				dma_fence_put(fence);
-			}
-		}
-
-		retire_requests(tl);
-
-		spin_lock_irqsave(&timelines->lock, flags);
-
-		/* Resume iteration after dropping lock */
-		list_safe_reset_next(tl, tn, link);
-		if (--tl->active_count)
-			active_count += !!rcu_access_pointer(tl->last_request.fence);
-		else
-			list_del(&tl->link);
-
-		mutex_unlock(&tl->mutex);
-
-		/* Defer the final release to after the spinlock */
-		if (refcount_dec_and_test(&tl->kref.refcount)) {
-			GEM_BUG_ON(tl->active_count);
-			list_add(&tl->link, &free);
-		}
-	}
-	spin_unlock_irqrestore(&timelines->lock, flags);
-
-	list_for_each_entry_safe(tl, tn, &free, link)
-		__intel_timeline_free(&tl->kref);
-
-	return active_count ? timeout : 0;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_request.c"
 #include "selftests/i915_request.c"
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 256b0715180f..6a95242b280d 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -250,6 +250,7 @@ struct i915_request *__i915_request_commit(struct i915_request *request);
 void __i915_request_queue(struct i915_request *rq,
 			  const struct i915_sched_attr *attr);
 
+bool i915_request_retire(struct i915_request *rq);
 void i915_request_retire_upto(struct i915_request *rq);
 
 static inline struct i915_request *
@@ -459,10 +460,4 @@ i915_request_active_timeline(struct i915_request *rq)
 					 lockdep_is_held(&rq->engine->active.lock));
 }
 
-long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout);
-static inline void i915_retire_requests(struct drm_i915_private *i915)
-{
-	i915_retire_requests_timeout(i915, 0);
-}
-
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index ed496bd6d84f..7b0939e3f007 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -4,8 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "gem/i915_gem_context.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_selftest.h"
@@ -14,11 +14,12 @@
 
 int igt_flush_test(struct drm_i915_private *i915)
 {
-	int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0;
+	struct intel_gt *gt = &i915->gt;
+	int ret = intel_gt_is_wedged(gt) ? -EIO : 0;
 
 	cond_resched();
 
-	if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
 		pr_err("%pS timed out, cancelling all further testing.\n",
 		       __builtin_return_address(0));
 
@@ -26,7 +27,7 @@ int igt_flush_test(struct drm_i915_private *i915)
 			  __builtin_return_address(0));
 		GEM_TRACE_DUMP();
 
-		intel_gt_set_wedged(&i915->gt);
+		intel_gt_set_wedged(gt);
 		ret = -EIO;
 	}
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index eae90f97df6c..810b60100c2c 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -4,7 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "../i915_drv.h"
+#include "i915_drv.h"
+#include "gt/intel_gt_requests.h"
 
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
@@ -23,7 +24,7 @@ int igt_live_test_begin(struct igt_live_test *t,
 	t->func = func;
 	t->name = name;
 
-	err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
+	err = intel_gt_wait_for_idle(&i915->gt, MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
 		       func, name, err);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 3b589bbb2c2d..4e6cde0d4859 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -26,6 +26,7 @@
 #include <linux/pm_runtime.h>
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/mock_engine.h"
 
 #include "mock_request.h"
@@ -44,7 +45,8 @@ void mock_device_flush(struct drm_i915_private *i915)
 	do {
 		for_each_engine(engine, i915, id)
 			mock_engine_flush(engine);
-	} while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT));
+	} while (intel_gt_retire_requests_timeout(&i915->gt,
+						  MAX_SCHEDULE_TIMEOUT));
 }
 
 static void mock_device_release(struct drm_device *dev)
@@ -98,10 +100,6 @@ static void release_dev(struct device *dev)
 	kfree(pdev);
 }
 
-static void mock_retire_work_handler(struct work_struct *work)
-{
-}
-
 static int pm_domain_resume(struct device *dev)
 {
 	return pm_generic_runtime_resume(dev);
@@ -181,8 +179,6 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mock_init_contexts(i915);
 
-	INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler);
-
 	intel_timelines_init(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
-- 
cgit 


From c3eb54aad982d05493ab374a6f0d0a6831dd2c6e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 12 Oct 2019 08:01:36 +0100
Subject: drm/i915: Mark up "sentinel" requests

Sometimes we want to emit a terminator request, a request that flushes
the pipeline and allows no request to come after it. This can be used
for a "preempt-to-idle" to ensure that upon processing the
context-switch to that request, all other active contexts have been
flushed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191012070136.32058-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/gt/intel_lrc.c |  6 +++++-
 drivers/gpu/drm/i915/i915_request.h | 10 ++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_request.h')

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7f2d28bf9ea3..a07baeb897fe 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1208,7 +1208,8 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (i915_request_completed(next))
 		return true;
 
-	if (unlikely(prev->flags ^ next->flags) & I915_REQUEST_NOPREEMPT)
+	if (unlikely((prev->flags ^ next->flags) &
+		     (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
 		return false;
 
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
@@ -1659,6 +1660,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (last->hw_context == rq->hw_context)
 					goto done;
 
+				if (i915_request_has_sentinel(last))
+					goto done;
+
 				/*
 				 * If GVT overrides us we only ever submit
 				 * port[0], leaving port[1] empty. Note that we
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 6a95242b280d..96991d64759c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -216,8 +216,9 @@ struct i915_request {
 	unsigned long emitted_jiffies;
 
 	unsigned long flags;
-#define I915_REQUEST_WAITBOOST BIT(0)
-#define I915_REQUEST_NOPREEMPT BIT(1)
+#define I915_REQUEST_WAITBOOST	BIT(0)
+#define I915_REQUEST_NOPREEMPT	BIT(1)
+#define I915_REQUEST_SENTINEL	BIT(2)
 
 	/** timeline->request entry for this request */
 	struct list_head link;
@@ -440,6 +441,11 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
 	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
 }
 
+static inline bool i915_request_has_sentinel(const struct i915_request *rq)
+{
+	return unlikely(rq->flags & I915_REQUEST_SENTINEL);
+}
+
 static inline struct intel_timeline *
 i915_request_timeline(struct i915_request *rq)
 {
-- 
cgit