1 files changed, 249 insertions, 210 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d89e1b8e1cc5..61cac26a8b05 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -244,7 +244,7 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl
 
 	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) &&
 	    USES_PPGTT(dev_priv) &&
-	    i915.use_mmio_flip >= 0)
+	    i915_modparams.use_mmio_flip >= 0)
 		return 1;
 
 	return 0;
@@ -279,17 +279,73 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH));
 
 	desc = ctx->desc_template;				/* bits  0-11 */
-	desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE;
+	desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
 								/* bits 12-31 */
 	desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;		/* bits 32-52 */
 
 	ce->lrc_desc = desc;
 }
 
-uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
-				     struct intel_engine_cs *engine)
+static struct i915_priolist *
+lookup_priolist(struct intel_engine_cs *engine,
+		struct i915_priotree *pt,
+		int prio)
 {
-	return ctx->engine[engine->id].lrc_desc;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
+	bool first = true;
+
+	if (unlikely(execlists->no_priolist))
+		prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
+	/* most positive priority is scheduled first, equal priorities fifo */
+	rb = NULL;
+	parent = &execlists->queue.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = rb_entry(rb, typeof(*p), node);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
+			first = false;
+		} else {
+			return p;
+		}
+	}
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &execlists->default_priolist;
+	} else {
+		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+			/* To maintain ordering with all rendering, after an
+			 * allocation failure we have to disable all scheduling.
+			 * Requests will then be executed in fifo, and schedule
+			 * will ensure that dependencies are emitted in fifo.
+			 * There will be still some reordering with existing
+			 * requests, so if userspace lied about their
+			 * dependencies that reordering may be visible.
+			 */
+			execlists->no_priolist = true;
+			goto find_priolist;
+		}
+	}
+
+	p->priority = prio;
+	INIT_LIST_HEAD(&p->requests);
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color(&p->node, &execlists->queue);
+
+	if (first)
+		execlists->first = &p->node;
+
+	return ptr_pack_bits(p, first, 1);
 }
 
 static inline void
@@ -338,12 +394,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct execlist_port *port = engine->execlist_port;
+	struct execlist_port *port = engine->execlists.port;
 	u32 __iomem *elsp =
 		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
 
-	for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+	for (n = execlists_num_ports(&engine->execlists); n--; ) {
 		struct drm_i915_gem_request *rq;
 		unsigned int count;
 		u64 desc;
@@ -398,7 +454,10 @@ static void port_assign(struct execlist_port *port,
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *last;
-	struct execlist_port *port = engine->execlist_port;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct execlist_port *port = execlists->port;
+	const struct execlist_port * const last_port =
+		&execlists->port[execlists->port_mask];
 	struct rb_node *rb;
 	bool submit = false;
 
@@ -412,8 +471,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 */
 		last->tail = last->wa_tail;
 
-	GEM_BUG_ON(port_isset(&port[1]));
-
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
 	 * static for a context, and unique to each, so we only execute
@@ -436,8 +493,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 */
 
 	spin_lock_irq(&engine->timeline->lock);
-	rb = engine->execlist_first;
-	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
+	rb = execlists->first;
+	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
 	while (rb) {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
@@ -460,7 +517,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port != engine->execlist_port) {
+				if (port == last_port) {
 					__list_del_many(&p->requests,
 							&rq->priotree.link);
 					goto done;
@@ -485,25 +542,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (submit)
 					port_assign(port, last);
 				port++;
+
+				GEM_BUG_ON(port_isset(port));
 			}
 
 			INIT_LIST_HEAD(&rq->priotree.link);
 			rq->priotree.priority = INT_MAX;
 
 			__i915_gem_request_submit(rq);
-			trace_i915_gem_request_in(rq, port_index(port, engine));
+			trace_i915_gem_request_in(rq, port_index(port, execlists));
 			last = rq;
 			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&p->node, &engine->execlist_queue);
+		rb_erase(&p->node, &execlists->queue);
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
-	engine->execlist_first = rb;
+	execlists->first = rb;
 	if (submit)
 		port_assign(port, last);
 	spin_unlock_irq(&engine->timeline->lock);
@@ -512,9 +571,83 @@ done:
 		execlists_submit_ports(engine);
 }
 
+static void
+execlist_cancel_port_requests(struct intel_engine_execlists *execlists)
+{
+	struct execlist_port *port = execlists->port;
+	unsigned int num_ports = ARRAY_SIZE(execlists->port);
+
+	while (num_ports-- && port_isset(port)) {
+		struct drm_i915_gem_request *rq = port_request(port);
+
+		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+		i915_gem_request_put(rq);
+
+		memset(port, 0, sizeof(*port));
+		port++;
+	}
+}
+
+static void execlists_cancel_requests(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct drm_i915_gem_request *rq, *rn;
+	struct rb_node *rb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+
+	/* Cancel the requests on the HW and clear the ELSP tracker. */
+	execlist_cancel_port_requests(execlists);
+
+	/* Mark all executing requests as skipped. */
+	list_for_each_entry(rq, &engine->timeline->requests, link) {
+		GEM_BUG_ON(!rq->global_seqno);
+		if (!i915_gem_request_completed(rq))
+			dma_fence_set_error(&rq->fence, -EIO);
+	}
+
+	/* Flush the queued requests to the timeline list (for retiring). */
+	rb = execlists->first;
+	while (rb) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			dma_fence_set_error(&rq->fence, -EIO);
+			__i915_gem_request_submit(rq);
+		}
+
+		rb = rb_next(rb);
+		rb_erase(&p->node, &execlists->queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kmem_cache_free(engine->i915->priorities, p);
+	}
+
+	/* Remaining _unready_ requests will be nop'ed when submitted */
+
+
+	execlists->queue = RB_ROOT;
+	execlists->first = NULL;
+	GEM_BUG_ON(port_isset(execlists->port));
+
+	/*
+	 * The port is checked prior to scheduling a tasklet, but
+	 * just in case we have suspended the tasklet to do the
+	 * wedging make sure that when it wakes, it decides there
+	 * is no work to do by clearing the irq_posted bit.
+	 */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+}
+
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
-	const struct execlist_port *port = engine->execlist_port;
+	const struct execlist_port *port = engine->execlists.port;
 
 	return port_count(&port[0]) + port_count(&port[1]) < 2;
 }
@@ -525,8 +658,9 @@ static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
  */
 static void intel_lrc_irq_handler(unsigned long data)
 {
-	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
-	struct execlist_port *port = engine->execlist_port;
+	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct execlist_port *port = execlists->port;
 	struct drm_i915_private *dev_priv = engine->i915;
 
 	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -538,19 +672,25 @@ static void intel_lrc_irq_handler(unsigned long data)
 	 */
 	GEM_BUG_ON(!dev_priv->gt.awake);
 
-	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
+	intel_uncore_forcewake_get(dev_priv, execlists->fw_domains);
 
 	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
 	 * imposing the cost of a locked atomic transaction when submitting a
 	 * new request (outside of the context-switch interrupt).
 	 */
 	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
-		u32 __iomem *csb_mmio =
-			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
-		u32 __iomem *buf =
-			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+		/* The HWSP contains a (cacheable) mirror of the CSB */
+		const u32 *buf =
+			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
 		unsigned int head, tail;
 
+		/* However GVT emulation depends upon intercepting CSB mmio */
+		if (unlikely(execlists->csb_use_mmio)) {
+			buf = (u32 * __force)
+				(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+			execlists->csb_head = -1; /* force mmio read of CSB ptrs */
+		}
+
 		/* The write will be ordered by the uncached read (itself
 		 * a memory barrier), so we do not need another in the form
 		 * of a locked instruction. The race between the interrupt
@@ -562,9 +702,20 @@ static void intel_lrc_irq_handler(unsigned long data)
 		 * is set and we do a new loop.
 		 */
 		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		head = readl(csb_mmio);
-		tail = GEN8_CSB_WRITE_PTR(head);
-		head = GEN8_CSB_READ_PTR(head);
+		if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+			head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+			tail = GEN8_CSB_WRITE_PTR(head);
+			head = GEN8_CSB_READ_PTR(head);
+			execlists->csb_head = head;
+		} else {
+			const int write_idx =
+				intel_hws_csb_write_index(dev_priv) -
+				I915_HWS_CSB_BUF0_INDEX;
+
+			head = execlists->csb_head;
+			tail = READ_ONCE(buf[write_idx]);
+		}
+
 		while (head != tail) {
 			struct drm_i915_gem_request *rq;
 			unsigned int status;
@@ -590,13 +741,12 @@ static void intel_lrc_irq_handler(unsigned long data)
 			 * status notifier.
 			 */
 
-			status = readl(buf + 2 * head);
+			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
 			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-					 port->context_id);
+			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
 			rq = port_unpack(port, &count);
 			GEM_BUG_ON(count == 0);
@@ -608,8 +758,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 				trace_i915_gem_request_out(rq);
 				i915_gem_request_put(rq);
 
-				port[0] = port[1];
-				memset(&port[1], 0, sizeof(port[1]));
+				execlists_port_complete(execlists, port);
 			} else {
 				port_set(port, port_pack(rq, count));
 			}
@@ -619,78 +768,28 @@ static void intel_lrc_irq_handler(unsigned long data)
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-		       csb_mmio);
+		if (head != execlists->csb_head) {
+			execlists->csb_head = head;
+			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+			       dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+		}
 	}
 
 	if (execlists_elsp_ready(engine))
 		execlists_dequeue(engine);
 
-	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
+	intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
 }
 
-static bool
-insert_request(struct intel_engine_cs *engine,
-	       struct i915_priotree *pt,
-	       int prio)
+static void insert_request(struct intel_engine_cs *engine,
+			   struct i915_priotree *pt,
+			   int prio)
 {
-	struct i915_priolist *p;
-	struct rb_node **parent, *rb;
-	bool first = true;
+	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
 
-	if (unlikely(engine->no_priolist))
-		prio = I915_PRIORITY_NORMAL;
-
-find_priolist:
-	/* most positive priority is scheduled first, equal priorities fifo */
-	rb = NULL;
-	parent = &engine->execlist_queue.rb_node;
-	while (*parent) {
-		rb = *parent;
-		p = rb_entry(rb, typeof(*p), node);
-		if (prio > p->priority) {
-			parent = &rb->rb_left;
-		} else if (prio < p->priority) {
-			parent = &rb->rb_right;
-			first = false;
-		} else {
-			list_add_tail(&pt->link, &p->requests);
-			return false;
-		}
-	}
-
-	if (prio == I915_PRIORITY_NORMAL) {
-		p = &engine->default_priolist;
-	} else {
-		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
-		/* Convert an allocation failure to a priority bump */
-		if (unlikely(!p)) {
-			prio = I915_PRIORITY_NORMAL; /* recurses just once */
-
-			/* To maintain ordering with all rendering, after an
-			 * allocation failure we have to disable all scheduling.
-			 * Requests will then be executed in fifo, and schedule
-			 * will ensure that dependencies are emitted in fifo.
-			 * There will be still some reordering with existing
-			 * requests, so if userspace lied about their
-			 * dependencies that reordering may be visible.
-			 */
-			engine->no_priolist = true;
-			goto find_priolist;
-		}
-	}
-
-	p->priority = prio;
-	rb_link_node(&p->node, rb, parent);
-	rb_insert_color(&p->node, &engine->execlist_queue);
-
-	INIT_LIST_HEAD(&p->requests);
-	list_add_tail(&pt->link, &p->requests);
-
-	if (first)
-		engine->execlist_first = &p->node;
-
-	return first;
+	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
+	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
+		tasklet_hi_schedule(&engine->execlists.irq_tasklet);
 }
 
 static void execlists_submit_request(struct drm_i915_gem_request *request)
@@ -701,14 +800,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
-	if (insert_request(engine,
-			   &request->priotree,
-			   request->priotree.priority)) {
-		if (execlists_elsp_ready(engine))
-			tasklet_hi_schedule(&engine->irq_tasklet);
-	}
+	insert_request(engine, &request->priotree, request->priotree.priority);
 
-	GEM_BUG_ON(!engine->execlist_first);
+	GEM_BUG_ON(!engine->execlists.first);
 	GEM_BUG_ON(list_empty(&request->priotree.link));
 
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
@@ -914,27 +1008,14 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
 	 */
 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
 
-	if (i915.enable_guc_submission) {
-		/*
-		 * Check that the GuC has space for the request before
-		 * going any further, as the i915_add_request() call
-		 * later on mustn't fail ...
-		 */
-		ret = i915_guc_wq_reserve(request);
-		if (ret)
-			goto err;
-	}
-
 	cs = intel_ring_begin(request, 0);
-	if (IS_ERR(cs)) {
-		ret = PTR_ERR(cs);
-		goto err_unreserve;
-	}
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
 
 	if (!ce->initialised) {
 		ret = engine->init_context(request);
 		if (ret)
-			goto err_unreserve;
+			return ret;
 
 		ce->initialised = true;
 	}
@@ -948,12 +1029,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
 
 	request->reserved_space -= EXECLISTS_REQUEST_SIZE;
 	return 0;
-
-err_unreserve:
-	if (i915.enable_guc_submission)
-		i915_guc_wq_unreserve(request);
-err:
-	return ret;
 }
 
 /*
@@ -1116,13 +1191,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	return batch;
 }
 
-static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
-	*batch++ = MI_BATCH_BUFFER_END;
-
-	return batch;
-}
-
 #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
 
 static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
@@ -1179,7 +1247,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 		return 0;
 	case 9:
 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
-		wa_bb_fn[1] = gen9_init_perctx_bb;
+		wa_bb_fn[1] = NULL;
 		break;
 	case 8:
 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
@@ -1210,7 +1278,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 			ret = -EINVAL;
 			break;
 		}
-		batch_ptr = wa_bb_fn[i](engine, batch_ptr);
+		if (wa_bb_fn[i])
+			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
 	}
 
@@ -1234,9 +1303,7 @@ static u8 gtiir[] = {
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	struct execlist_port *port = engine->execlist_port;
-	unsigned int n;
-	bool submit;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
 	int ret;
 
 	ret = intel_mocs_init_engine(engine);
@@ -1269,24 +1336,11 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
 		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	execlists->csb_head = -1;
 
 	/* After a GPU reset, we may have requests to replay */
-	submit = false;
-	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
-		if (!port_isset(&port[n]))
-			break;
-
-		DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
-				 engine->name, n,
-				 port_request(&port[n])->global_seqno);
-
-		/* Discard the current inflight count */
-		port_set(&port[n], port_request(&port[n]));
-		submit = true;
-	}
-
-	if (submit && !i915.enable_guc_submission)
-		execlists_submit_ports(engine);
+	if (!i915_modparams.enable_guc_submission && execlists->first)
+		tasklet_schedule(&execlists->irq_tasklet);
 
 	return 0;
 }
@@ -1327,9 +1381,12 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 static void reset_common_ring(struct intel_engine_cs *engine,
 			      struct drm_i915_gem_request *request)
 {
-	struct execlist_port *port = engine->execlist_port;
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct drm_i915_gem_request *rq, *rn;
 	struct intel_context *ce;
-	unsigned int n;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
 
 	/*
 	 * Catch up with any missed context-switch interrupts.
@@ -1340,20 +1397,26 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	 * guessing the missed context-switch events by looking at what
 	 * requests were completed.
 	 */
-	if (!request) {
-		for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-			i915_gem_request_put(port_request(&port[n]));
-		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
-		return;
-	}
+	execlist_cancel_port_requests(execlists);
 
-	if (request->ctx != port_request(port)->ctx) {
-		i915_gem_request_put(port_request(port));
-		port[0] = port[1];
-		memset(&port[1], 0, sizeof(port[1]));
+	/* Push back any incomplete requests for replay after the reset. */
+	list_for_each_entry_safe_reverse(rq, rn,
+					 &engine->timeline->requests, link) {
+		struct i915_priolist *p;
+
+		if (i915_gem_request_completed(rq))
+			break;
+
+		__i915_gem_request_unsubmit(rq);
+
+		p = lookup_priolist(engine,
+				    &rq->priotree,
+				    rq->priotree.priority);
+		list_add(&rq->priotree.link,
+			 &ptr_mask_bits(p, 1)->requests);
 	}
 
-	GEM_BUG_ON(request->ctx != port_request(port)->ctx);
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
 	/* If the request was innocent, we leave the request in the ELSP
 	 * and will try to replay it on restarting. The context image may
@@ -1365,7 +1428,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	 * and have to at least restore the RING register in the context
 	 * image back to the expected values to skip over the guilty request.
 	 */
-	if (request->fence.error != -EIO)
+	if (!request || request->fence.error != -EIO)
 		return;
 
 	/* We want a simple context + ring to execute the breadcrumb update.
@@ -1668,8 +1731,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	 * Tasklet cannot be active at this point due intel_mark_active/idle
 	 * so this is just for documentation.
 	 */
-	if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->irq_tasklet.state)))
-		tasklet_kill(&engine->irq_tasklet);
+	if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->execlists.irq_tasklet.state)))
+		tasklet_kill(&engine->execlists.irq_tasklet);
 
 	dev_priv = engine->i915;
 
@@ -1680,11 +1743,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	if (engine->cleanup)
 		engine->cleanup(engine);
 
-	if (engine->status_page.vma) {
-		i915_gem_object_unpin_map(engine->status_page.vma->obj);
-		engine->status_page.vma = NULL;
-	}
-
 	intel_engine_cleanup_common(engine);
 
 	lrc_destroy_wa_ctx(engine);
@@ -1696,8 +1754,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
+	engine->cancel_requests = execlists_cancel_requests;
 	engine->schedule = execlists_schedule;
-	engine->irq_tasklet.func = intel_lrc_irq_handler;
+	engine->execlists.irq_tasklet.func = intel_lrc_irq_handler;
 }
 
 static void
@@ -1731,24 +1790,6 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-static int
-lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma)
-{
-	const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE;
-	void *hws;
-
-	/* The HWSP is part of the default context object in LRC mode. */
-	hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(hws))
-		return PTR_ERR(hws);
-
-	engine->status_page.page_addr = hws + hws_offset;
-	engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset;
-	engine->status_page.vma = vma;
-
-	return 0;
-}
-
 static void
 logical_ring_setup(struct intel_engine_cs *engine)
 {
@@ -1772,32 +1813,23 @@ logical_ring_setup(struct intel_engine_cs *engine)
 						     RING_CONTEXT_STATUS_BUF_BASE(engine),
 						     FW_REG_READ);
 
-	engine->fw_domains = fw_domains;
+	engine->execlists.fw_domains = fw_domains;
 
-	tasklet_init(&engine->irq_tasklet,
+	tasklet_init(&engine->execlists.irq_tasklet,
 		     intel_lrc_irq_handler, (unsigned long)engine);
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
 }
 
-static int
-logical_ring_init(struct intel_engine_cs *engine)
+static int logical_ring_init(struct intel_engine_cs *engine)
 {
-	struct i915_gem_context *dctx = engine->i915->kernel_context;
 	int ret;
 
 	ret = intel_engine_init_common(engine);
 	if (ret)
 		goto error;
 
-	/* And setup the hardware status page. */
-	ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
-	if (ret) {
-		DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret);
-		goto error;
-	}
-
 	return 0;
 
 error:
@@ -1955,13 +1987,12 @@ static void execlists_init_reg_state(u32 *regs,
 	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
 	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
 	if (rcs) {
-		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+		struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+
 		CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
 		CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
 			RING_INDIRECT_CTX_OFFSET(base), 0);
-
-		if (engine->wa_ctx.vma) {
-			struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+		if (wa_ctx->indirect_ctx.size) {
 			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
 			regs[CTX_RCS_INDIRECT_CTX + 1] =
@@ -1970,6 +2001,11 @@ static void execlists_init_reg_state(u32 *regs,
 
 			regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
 				intel_lr_indirect_ctx_offset(engine) << 6;
+		}
+
+		CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+		if (wa_ctx->per_ctx.size) {
+			u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
 			regs[CTX_BB_PER_CTX_PTR + 1] =
 				(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
@@ -2054,8 +2090,11 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 
 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
 
-	/* One extra page as the sharing data between driver and GuC */
-	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
+	/*
+	 * Before the actual start of the context image, we insert a few pages
+	 * for our own use and for sharing with the GuC.
+	 */
+	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
 	ctx_obj = i915_gem_object_create(ctx->i915, context_size);
 	if (IS_ERR(ctx_obj)) {