diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_engine_cs.c | 288 | 
1 files changed, 222 insertions, 66 deletions
| diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1590375f31cb..2d1952849d69 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,7 +25,6 @@  #include <drm/drm_print.h>  #include "i915_drv.h" -#include "i915_vgpu.h"  #include "intel_ringbuffer.h"  #include "intel_lrc.h" @@ -230,6 +229,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)  		break;  	default:  		MISSING_CASE(class); +		/* fall through */  	case VIDEO_DECODE_CLASS:  	case VIDEO_ENHANCEMENT_CLASS:  	case COPY_ENGINE_CLASS: @@ -302,6 +302,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv,  							   engine->class);  	if (WARN_ON(engine->context_size > BIT(20)))  		engine->context_size = 0; +	if (engine->context_size) +		DRIVER_CAPS(dev_priv)->has_logical_contexts = true;  	/* Nothing to do here, execute in order of dependencies */  	engine->schedule = NULL; @@ -456,28 +458,16 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)  	i915_gem_batch_pool_init(&engine->batch_pool, engine);  } -static bool csb_force_mmio(struct drm_i915_private *i915) -{ -	/* Older GVT emulation depends upon intercepting CSB mmio */ -	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) -		return true; - -	return false; -} -  static void intel_engine_init_execlist(struct intel_engine_cs *engine)  {  	struct intel_engine_execlists * const execlists = &engine->execlists; -	execlists->csb_use_mmio = csb_force_mmio(engine->i915); -  	execlists->port_mask = 1;  	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));  	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);  	execlists->queue_priority = INT_MIN; -	execlists->queue = RB_ROOT; -	execlists->first = NULL; +	execlists->queue = RB_ROOT_CACHED;  }  /** @@ -492,6 +482,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)  void intel_engine_setup_common(struct intel_engine_cs *engine)  {  	i915_timeline_init(engine->i915, &engine->timeline, engine->name); +	lockdep_set_subclass(&engine->timeline.lock, TIMELINE_ENGINE);  	intel_engine_init_execlist(engine);  	intel_engine_init_hangcheck(engine); @@ -499,7 +490,8 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)  	intel_engine_init_cmd_parser(engine);  } -int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) +int intel_engine_create_scratch(struct intel_engine_cs *engine, +				unsigned int size)  {  	struct drm_i915_gem_object *obj;  	struct i915_vma *vma; @@ -515,7 +507,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)  		return PTR_ERR(obj);  	} -	vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); +	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);  	if (IS_ERR(vma)) {  		ret = PTR_ERR(vma);  		goto err_unref; @@ -533,7 +525,7 @@ err_unref:  	return ret;  } -static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) +void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)  {  	i915_vma_unpin_and_release(&engine->scratch);  } @@ -585,7 +577,7 @@ static int init_status_page(struct intel_engine_cs *engine)  	if (ret)  		goto err; -	vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); +	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);  	if (IS_ERR(vma)) {  		ret = PTR_ERR(vma);  		goto err; @@ -645,6 +637,12 @@ static int init_phys_status_page(struct intel_engine_cs *engine)  	return 0;  } +static void __intel_context_unpin(struct i915_gem_context *ctx, +				  struct intel_engine_cs *engine) +{ +	intel_context_unpin(to_intel_context(ctx, engine)); +} +  /**   * intel_engines_init_common - initialize cengine state which might require hw access   * @engine: Engine to initialize. @@ -658,7 +656,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine)   */  int intel_engine_init_common(struct intel_engine_cs *engine)  { -	struct intel_ring *ring; +	struct drm_i915_private *i915 = engine->i915; +	struct intel_context *ce;  	int ret;  	engine->set_default_submission(engine); @@ -670,18 +669,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)  	 * be available. To avoid this we always pin the default  	 * context.  	 */ -	ring = intel_context_pin(engine->i915->kernel_context, engine); -	if (IS_ERR(ring)) -		return PTR_ERR(ring); +	ce = intel_context_pin(i915->kernel_context, engine); +	if (IS_ERR(ce)) +		return PTR_ERR(ce);  	/*  	 * Similarly the preempt context must always be available so that  	 * we can interrupt the engine at any time.  	 */ -	if (engine->i915->preempt_context) { -		ring = intel_context_pin(engine->i915->preempt_context, engine); -		if (IS_ERR(ring)) { -			ret = PTR_ERR(ring); +	if (i915->preempt_context) { +		ce = intel_context_pin(i915->preempt_context, engine); +		if (IS_ERR(ce)) { +			ret = PTR_ERR(ce);  			goto err_unpin_kernel;  		}  	} @@ -690,7 +689,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)  	if (ret)  		goto err_unpin_preempt; -	if (HWS_NEEDS_PHYSICAL(engine->i915)) +	if (HWS_NEEDS_PHYSICAL(i915))  		ret = init_phys_status_page(engine);  	else  		ret = init_status_page(engine); @@ -702,10 +701,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine)  err_breadcrumbs:  	intel_engine_fini_breadcrumbs(engine);  err_unpin_preempt: -	if (engine->i915->preempt_context) -		intel_context_unpin(engine->i915->preempt_context, engine); +	if (i915->preempt_context) +		__intel_context_unpin(i915->preempt_context, engine); +  err_unpin_kernel: -	intel_context_unpin(engine->i915->kernel_context, engine); +	__intel_context_unpin(i915->kernel_context, engine);  	return ret;  } @@ -718,6 +718,8 @@ err_unpin_kernel:   */  void intel_engine_cleanup_common(struct intel_engine_cs *engine)  { +	struct drm_i915_private *i915 = engine->i915; +  	intel_engine_cleanup_scratch(engine);  	if (HWS_NEEDS_PHYSICAL(engine->i915)) @@ -732,9 +734,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)  	if (engine->default_state)  		i915_gem_object_put(engine->default_state); -	if (engine->i915->preempt_context) -		intel_context_unpin(engine->i915->preempt_context, engine); -	intel_context_unpin(engine->i915->kernel_context, engine); +	if (i915->preempt_context) +		__intel_context_unpin(i915->preempt_context, engine); +	__intel_context_unpin(i915->kernel_context, engine);  	i915_timeline_fini(&engine->timeline);  } @@ -769,6 +771,35 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)  	return bbaddr;  } +int intel_engine_stop_cs(struct intel_engine_cs *engine) +{ +	struct drm_i915_private *dev_priv = engine->i915; +	const u32 base = engine->mmio_base; +	const i915_reg_t mode = RING_MI_MODE(base); +	int err; + +	if (INTEL_GEN(dev_priv) < 3) +		return -ENODEV; + +	GEM_TRACE("%s\n", engine->name); + +	I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); + +	err = 0; +	if (__intel_wait_for_register_fw(dev_priv, +					 mode, MODE_IDLE, MODE_IDLE, +					 1000, 0, +					 NULL)) { +		GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); +		err = -ETIMEDOUT; +	} + +	/* A final mmio read to let GPU writes be hopefully flushed to memory */ +	POSTING_READ_FW(mode); + +	return err; +} +  const char *i915_cache_level_str(struct drm_i915_private *i915, int type)  {  	switch (type) { @@ -780,12 +811,32 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)  	}  } +u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) +{ +	const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu); +	u32 mcr_s_ss_select; +	u32 slice = fls(sseu->slice_mask); +	u32 subslice = fls(sseu->subslice_mask[slice]); + +	if (INTEL_GEN(dev_priv) == 10) +		mcr_s_ss_select = GEN8_MCR_SLICE(slice) | +				  GEN8_MCR_SUBSLICE(subslice); +	else if (INTEL_GEN(dev_priv) >= 11) +		mcr_s_ss_select = GEN11_MCR_SLICE(slice) | +				  GEN11_MCR_SUBSLICE(subslice); +	else +		mcr_s_ss_select = 0; + +	return mcr_s_ss_select; +} +  static inline uint32_t  read_subslice_reg(struct drm_i915_private *dev_priv, int slice,  		  int subslice, i915_reg_t reg)  {  	uint32_t mcr_slice_subslice_mask;  	uint32_t mcr_slice_subslice_select; +	uint32_t default_mcr_s_ss_select;  	uint32_t mcr;  	uint32_t ret;  	enum forcewake_domains fw_domains; @@ -802,6 +853,8 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,  					    GEN8_MCR_SUBSLICE(subslice);  	} +	default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv); +  	fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,  						    FW_REG_READ);  	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, @@ -812,11 +865,10 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,  	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);  	mcr = I915_READ_FW(GEN8_MCR_SELECTOR); -	/* -	 * The HW expects the slice and sublice selectors to be reset to 0 -	 * after reading out the registers. -	 */ -	WARN_ON_ONCE(mcr & mcr_slice_subslice_mask); + +	WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != +		     default_mcr_s_ss_select); +  	mcr &= ~mcr_slice_subslice_mask;  	mcr |= mcr_slice_subslice_select;  	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); @@ -824,6 +876,8 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,  	ret = I915_READ_FW(reg);  	mcr &= ~mcr_slice_subslice_mask; +	mcr |= default_mcr_s_ss_select; +  	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);  	intel_uncore_forcewake_put__locked(dev_priv, fw_domains); @@ -934,11 +988,24 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)  		return true;  	/* Waiting to drain ELSP? */ -	if (READ_ONCE(engine->execlists.active)) -		return false; +	if (READ_ONCE(engine->execlists.active)) { +		struct tasklet_struct *t = &engine->execlists.tasklet; + +		local_bh_disable(); +		if (tasklet_trylock(t)) { +			/* Must wait for any GPU reset in progress. */ +			if (__tasklet_is_enabled(t)) +				t->func(t->data); +			tasklet_unlock(t); +		} +		local_bh_enable(); -	/* ELSP is empty, but there are ready requests? */ -	if (READ_ONCE(engine->execlists.first)) +		if (READ_ONCE(engine->execlists.active)) +			return false; +	} + +	/* ELSP is empty, but there are ready requests? E.g. after reset */ +	if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))  		return false;  	/* Ring stopped? */ @@ -978,8 +1045,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)   */  bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)  { -	const struct i915_gem_context * const kernel_context = -		engine->i915->kernel_context; +	const struct intel_context *kernel_context = +		to_intel_context(engine->i915->kernel_context, engine);  	struct i915_request *rq;  	lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -991,7 +1058,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)  	 */  	rq = __i915_gem_active_peek(&engine->timeline.last_request);  	if (rq) -		return rq->ctx == kernel_context; +		return rq->hw_context == kernel_context;  	else  		return engine->last_retired_context == kernel_context;  } @@ -1006,6 +1073,28 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)  }  /** + * intel_engines_sanitize: called after the GPU has lost power + * @i915: the i915 device + * + * Anytime we reset the GPU, either with an explicit GPU reset or through a + * PCI power cycle, the GPU loses state and we must reset our state tracking + * to match. Note that calling intel_engines_sanitize() if the GPU has not + * been reset results in much confusion! + */ +void intel_engines_sanitize(struct drm_i915_private *i915) +{ +	struct intel_engine_cs *engine; +	enum intel_engine_id id; + +	GEM_TRACE("\n"); + +	for_each_engine(engine, i915, id) { +		if (engine->reset.reset) +			engine->reset.reset(engine, NULL); +	} +} + +/**   * intel_engines_park: called when the GT is transitioning from busy->idle   * @i915: the i915 device   * @@ -1043,6 +1132,11 @@ void intel_engines_park(struct drm_i915_private *i915)  		if (engine->park)  			engine->park(engine); +		if (engine->pinned_default_state) { +			i915_gem_object_unpin_map(engine->default_state); +			engine->pinned_default_state = NULL; +		} +  		i915_gem_batch_pool_fini(&engine->batch_pool);  		engine->execlists.no_priolist = false;  	} @@ -1060,6 +1154,16 @@ void intel_engines_unpark(struct drm_i915_private *i915)  	enum intel_engine_id id;  	for_each_engine(engine, i915, id) { +		void *map; + +		/* Pin the default state for fast resets from atomic context. */ +		map = NULL; +		if (engine->default_state) +			map = i915_gem_object_pin_map(engine->default_state, +						      I915_MAP_WB); +		if (!IS_ERR_OR_NULL(map)) +			engine->pinned_default_state = map; +  		if (engine->unpark)  			engine->unpark(engine); @@ -1067,6 +1171,26 @@ void intel_engines_unpark(struct drm_i915_private *i915)  	}  } +/** + * intel_engine_lost_context: called when the GPU is reset into unknown state + * @engine: the engine + * + * We have either reset the GPU or otherwise about to lose state tracking of + * the current GPU logical state (e.g. suspend). On next use, it is therefore + * imperative that we make no presumptions about the current state and load + * from scratch. + */ +void intel_engine_lost_context(struct intel_engine_cs *engine) +{ +	struct intel_context *ce; + +	lockdep_assert_held(&engine->i915->drm.struct_mutex); + +	ce = fetch_and_zero(&engine->last_retired_context); +	if (ce) +		intel_context_unpin(ce); +} +  bool intel_engine_can_store_dword(struct intel_engine_cs *engine)  {  	switch (INTEL_GEN(engine->i915)) { @@ -1151,7 +1275,7 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)  						rowsize, sizeof(u32),  						line, sizeof(line),  						false) >= sizeof(line)); -		drm_printf(m, "%08zx %s\n", pos, line); +		drm_printf(m, "[%04zx] %s\n", pos, line);  		prev = buf + pos;  		skip = false; @@ -1166,6 +1290,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,  		&engine->execlists;  	u64 addr; +	if (engine->id == RCS && IS_GEN(dev_priv, 4, 7)) +		drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID));  	drm_printf(m, "\tRING_START: 0x%08x\n",  		   I915_READ(RING_START(engine->mmio_base)));  	drm_printf(m, "\tRING_HEAD:  0x%08x\n", @@ -1232,12 +1358,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,  		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));  		read = GEN8_CSB_READ_PTR(ptr);  		write = GEN8_CSB_WRITE_PTR(ptr); -		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n", +		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",  			   read, execlists->csb_head,  			   write,  			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), -			   yesno(test_bit(ENGINE_IRQ_EXECLIST, -					  &engine->irq_posted)),  			   yesno(test_bit(TASKLET_STATE_SCHED,  					  &engine->execlists.tasklet.state)),  			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); @@ -1287,6 +1411,39 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,  	}  } +static void print_request_ring(struct drm_printer *m, struct i915_request *rq) +{ +	void *ring; +	int size; + +	drm_printf(m, +		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", +		   rq->head, rq->postfix, rq->tail, +		   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, +		   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + +	size = rq->tail - rq->head; +	if (rq->tail < rq->head) +		size += rq->ring->size; + +	ring = kmalloc(size, GFP_ATOMIC); +	if (ring) { +		const void *vaddr = rq->ring->vaddr; +		unsigned int head = rq->head; +		unsigned int len = 0; + +		if (rq->tail < head) { +			len = rq->ring->size - head; +			memcpy(ring, vaddr + head, len); +			head = 0; +		} +		memcpy(ring + len, vaddr + head, size - len); + +		hexdump(m, ring, size); +		kfree(ring); +	} +} +  void intel_engine_dump(struct intel_engine_cs *engine,  		       struct drm_printer *m,  		       const char *header, ...) @@ -1296,6 +1453,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,  	const struct intel_engine_execlists * const execlists = &engine->execlists;  	struct i915_gpu_error * const error = &engine->i915->gpu_error;  	struct i915_request *rq, *last; +	unsigned long flags;  	struct rb_node *rb;  	int count; @@ -1336,11 +1494,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,  	rq = i915_gem_find_active_request(engine);  	if (rq) {  		print_request(m, rq, "\t\tactive "); -		drm_printf(m, -			   "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", -			   rq->head, rq->postfix, rq->tail, -			   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, -			   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); +  		drm_printf(m, "\t\tring->start:  0x%08x\n",  			   i915_ggtt_offset(rq->ring->vma));  		drm_printf(m, "\t\tring->head:   0x%08x\n", @@ -1351,6 +1505,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,  			   rq->ring->emit);  		drm_printf(m, "\t\tring->space:  0x%08x\n",  			   rq->ring->space); + +		print_request_ring(m, rq);  	}  	rcu_read_unlock(); @@ -1362,7 +1518,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,  		drm_printf(m, "\tDevice is asleep; skipping register dump\n");  	} -	spin_lock_irq(&engine->timeline.lock); +	local_irq_save(flags); +	spin_lock(&engine->timeline.lock);  	last = NULL;  	count = 0; @@ -1384,7 +1541,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,  	last = NULL;  	count = 0;  	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); -	for (rb = execlists->first; rb; rb = rb_next(rb)) { +	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {  		struct i915_priolist *p =  			rb_entry(rb, typeof(*p), node); @@ -1404,22 +1561,21 @@ void intel_engine_dump(struct intel_engine_cs *engine,  		print_request(m, last, "\t\tQ ");  	} -	spin_unlock_irq(&engine->timeline.lock); +	spin_unlock(&engine->timeline.lock); -	spin_lock_irq(&b->rb_lock); +	spin_lock(&b->rb_lock);  	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {  		struct intel_wait *w = rb_entry(rb, typeof(*w), node);  		drm_printf(m, "\t%s [%d] waiting for %x\n",  			   w->tsk->comm, w->tsk->pid, w->seqno);  	} -	spin_unlock_irq(&b->rb_lock); +	spin_unlock(&b->rb_lock); +	local_irq_restore(flags); -	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", +	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",  		   engine->irq_posted,  		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB, -				  &engine->irq_posted)), -		   yesno(test_bit(ENGINE_IRQ_EXECLIST,  				  &engine->irq_posted)));  	drm_printf(m, "HWSP:\n"); @@ -1468,8 +1624,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)  	if (!intel_engine_supports_stats(engine))  		return -ENODEV; -	tasklet_disable(&execlists->tasklet); -	write_seqlock_irqsave(&engine->stats.lock, flags); +	spin_lock_irqsave(&engine->timeline.lock, flags); +	write_seqlock(&engine->stats.lock);  	if (unlikely(engine->stats.enabled == ~0)) {  		err = -EBUSY; @@ -1493,8 +1649,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)  	}  unlock: -	write_sequnlock_irqrestore(&engine->stats.lock, flags); -	tasklet_enable(&execlists->tasklet); +	write_sequnlock(&engine->stats.lock); +	spin_unlock_irqrestore(&engine->timeline.lock, flags);  	return err;  } |