diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 263 | 
1 files changed, 235 insertions, 28 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2863d5a65187..c5ff203e42d6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -6,6 +6,7 @@  #include "i915_gem_batch_pool.h"  #include "i915_gem_request.h"  #include "i915_gem_timeline.h" +#include "i915_pmu.h"  #include "i915_selftest.h"  struct drm_printer; @@ -47,16 +48,6 @@ struct intel_hw_status_page {  /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to   * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.   */ -#define gen8_semaphore_seqno_size sizeof(uint64_t) -#define GEN8_SEMAPHORE_OFFSET(__from, __to)			     \ -	(((__from) * I915_NUM_ENGINES  + (__to)) * gen8_semaphore_seqno_size) -#define GEN8_SIGNAL_OFFSET(__ring, to)			     \ -	(dev_priv->semaphore->node.start + \ -	 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) -#define GEN8_WAIT_OFFSET(__ring, from)			     \ -	(dev_priv->semaphore->node.start + \ -	 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) -  enum intel_engine_hangcheck_action {  	ENGINE_IDLE = 0,  	ENGINE_WAIT, @@ -166,7 +157,6 @@ struct i915_ctx_workarounds {  };  struct drm_i915_gem_request; -struct intel_render_state;  /*   * Engine IDs definitions. @@ -195,9 +185,9 @@ struct i915_priolist {   */  struct intel_engine_execlists {  	/** -	 * @irq_tasklet: softirq tasklet for bottom handler +	 * @tasklet: softirq tasklet for bottom handler  	 */ -	struct tasklet_struct irq_tasklet; +	struct tasklet_struct tasklet;  	/**  	 * @default_priolist: priority list for I915_PRIORITY_NORMAL @@ -210,6 +200,11 @@ struct intel_engine_execlists {  	bool no_priolist;  	/** +	 * @elsp: the ExecList Submission Port register +	 */ +	u32 __iomem *elsp; + +	/**  	 * @port: execlist port states  	 *  	 * For each hardware ELSP (ExecList Submission Port) we keep @@ -253,6 +248,7 @@ struct intel_engine_execlists {  	unsigned int active;  #define EXECLISTS_ACTIVE_USER 0  #define EXECLISTS_ACTIVE_PREEMPT 1 +#define EXECLISTS_ACTIVE_HWACK 2  	/**  	 * @port_mask: number of execlist ports - 1 @@ -290,11 +286,14 @@ struct intel_engine_execlists {  struct intel_engine_cs {  	struct drm_i915_private *i915;  	char name[INTEL_ENGINE_CS_MAX_NAME]; +  	enum intel_engine_id id; -	unsigned int uabi_id;  	unsigned int hw_id;  	unsigned int guc_id; +	u8 uabi_id; +	u8 uabi_class; +  	u8 class;  	u8 instance;  	u32 context_size; @@ -304,7 +303,7 @@ struct intel_engine_cs {  	struct intel_ring *buffer;  	struct intel_timeline *timeline; -	struct intel_render_state *render_state; +	struct drm_i915_gem_object *default_state;  	atomic_t irq_count;  	unsigned long irq_posted; @@ -340,12 +339,49 @@ struct intel_engine_cs {  		struct timer_list hangcheck; /* detect missed interrupts */  		unsigned int hangcheck_interrupts; +		unsigned int irq_enabled;  		bool irq_armed : 1; -		bool irq_enabled : 1;  		I915_SELFTEST_DECLARE(bool mock : 1);  	} breadcrumbs; +	struct { +		/** +		 * @enable: Bitmask of enable sample events on this engine. +		 * +		 * Bits correspond to sample event types, for instance +		 * I915_SAMPLE_QUEUED is bit 0 etc. +		 */ +		u32 enable; +		/** +		 * @enable_count: Reference count for the enabled samplers. +		 * +		 * Index number corresponds to the bit number from @enable. +		 */ +		unsigned int enable_count[I915_PMU_SAMPLE_BITS]; +		/** +		 * @sample: Counter values for sampling events. +		 * +		 * Our internal timer stores the current counters in this field. +		 */ +#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) +		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; +		/** +		 * @busy_stats: Has enablement of engine stats tracking been +		 * 		requested. +		 */ +		bool busy_stats; +		/** +		 * @disable_busy_stats: Work item for busy stats disabling. +		 * +		 * Same as with @enable_busy_stats action, with the difference +		 * that we delay it in case there are rapid enable-disable +		 * actions, which can happen during tool startup (like perf +		 * stat). +		 */ +		struct delayed_work disable_busy_stats; +	} pmu; +  	/*  	 * A pool of objects to use as shadow copies of client batch buffers  	 * when the command parser is enabled. Prevents the client from @@ -366,6 +402,9 @@ struct intel_engine_cs {  	void		(*reset_hw)(struct intel_engine_cs *engine,  				    struct drm_i915_gem_request *req); +	void		(*park)(struct intel_engine_cs *engine); +	void		(*unpark)(struct intel_engine_cs *engine); +  	void		(*set_default_submission)(struct intel_engine_cs *engine);  	struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, @@ -462,18 +501,15 @@ struct intel_engine_cs {  	 *  ie. transpose of f(x, y)  	 */  	struct { -		union {  #define GEN6_SEMAPHORE_LAST	VECS_HW  #define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1)  #define GEN6_SEMAPHORES_MASK	GENMASK(GEN6_SEMAPHORE_LAST, 0) -			struct { -				/* our mbox written by others */ -				u32		wait[GEN6_NUM_SEMAPHORES]; -				/* mboxes this ring signals to */ -				i915_reg_t	signal[GEN6_NUM_SEMAPHORES]; -			} mbox; -			u64		signal_ggtt[I915_NUM_ENGINES]; -		}; +		struct { +			/* our mbox written by others */ +			u32		wait[GEN6_NUM_SEMAPHORES]; +			/* mboxes this ring signals to */ +			i915_reg_t	signal[GEN6_NUM_SEMAPHORES]; +		} mbox;  		/* AKA wait() */  		int	(*sync_to)(struct drm_i915_gem_request *req, @@ -501,13 +537,16 @@ struct intel_engine_cs {  	 * stream (ring).  	 */  	struct i915_gem_context *legacy_active_context; +	struct i915_hw_ppgtt *legacy_active_ppgtt;  	/* status_notifier: list of callbacks for context-switch changes */  	struct atomic_notifier_head context_status_notifier;  	struct intel_engine_hangcheck hangcheck; -	bool needs_cmd_parser; +#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_SUPPORTS_STATS   BIT(1) +	unsigned int flags;  	/*  	 * Table of commands the command parser needs to know about @@ -532,8 +571,50 @@ struct intel_engine_cs {  	 * certain bits to encode the command length in the header).  	 */  	u32 (*get_cmd_length_mask)(u32 cmd_header); + +	struct { +		/** +		 * @lock: Lock protecting the below fields. +		 */ +		spinlock_t lock; +		/** +		 * @enabled: Reference count indicating number of listeners. +		 */ +		unsigned int enabled; +		/** +		 * @active: Number of contexts currently scheduled in. +		 */ +		unsigned int active; +		/** +		 * @enabled_at: Timestamp when busy stats were enabled. +		 */ +		ktime_t enabled_at; +		/** +		 * @start: Timestamp of the last idle to active transition. +		 * +		 * Idle is defined as active == 0, active is active > 0. +		 */ +		ktime_t start; +		/** +		 * @total: Total time this engine was busy. +		 * +		 * Accumulated time not counting the most recent block in cases +		 * where engine is currently busy (active > 0). +		 */ +		ktime_t total; +	} stats;  }; +static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) +{ +	return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; +} + +static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine) +{ +	return engine->flags & I915_ENGINE_SUPPORTS_STATS; +} +  static inline void  execlists_set_active(struct intel_engine_execlists *execlists,  		     unsigned int bit) @@ -555,6 +636,12 @@ execlists_is_active(const struct intel_engine_execlists *execlists,  	return test_bit(bit, (unsigned long *)&execlists->active);  } +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); + +void +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); +  static inline unsigned int  execlists_num_ports(const struct intel_engine_execlists * const execlists)  { @@ -624,6 +711,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)   */  #define I915_GEM_HWS_INDEX		0x30  #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_PREEMPT_INDEX	0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)  #define I915_GEM_HWS_SCRATCH_INDEX	0x40  #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) @@ -648,6 +737,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);  int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);  u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,  				   unsigned int n); @@ -776,6 +866,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)  	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;  } +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ +	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; +} +  /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */  int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); @@ -846,6 +941,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);  #define ENGINE_WAKEUP_WAITER BIT(0)  #define ENGINE_WAKEUP_ASLEEP BIT(1) +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); +  void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);  void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); @@ -864,14 +962,123 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)  	return batch + 6;  } +static inline u32 * +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) +{ +	/* We're using qword write, offset should be aligned to 8 bytes. */ +	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + +	/* w/a for post sync ops following a GPGPU operation we +	 * need a prior CS_STALL, which is emitted by the flush +	 * following the batch. +	 */ +	*cs++ = GFX_OP_PIPE_CONTROL(6); +	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | +		PIPE_CONTROL_QW_WRITE; +	*cs++ = gtt_offset; +	*cs++ = 0; +	*cs++ = value; +	/* We're thrashing one dword of HWS. */ +	*cs++ = 0; + +	return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) +{ +	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ +	GEM_BUG_ON(gtt_offset & (1 << 5)); +	/* Offset should be aligned to 8 bytes for both (QW/DW) write types */ +	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + +	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; +	*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; +	*cs++ = 0; +	*cs++ = value; + +	return cs; +} +  bool intel_engine_is_idle(struct intel_engine_cs *engine);  bool intel_engines_are_idle(struct drm_i915_private *dev_priv); -void intel_engines_mark_idle(struct drm_i915_private *i915); +bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); + +void intel_engines_park(struct drm_i915_private *i915); +void intel_engines_unpark(struct drm_i915_private *i915); +  void intel_engines_reset_default_submission(struct drm_i915_private *i915); +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);  bool intel_engine_can_store_dword(struct intel_engine_cs *engine); -void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); +__printf(3, 4) +void intel_engine_dump(struct intel_engine_cs *engine, +		       struct drm_printer *m, +		       const char *header, ...); + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); + +static inline void intel_engine_context_in(struct intel_engine_cs *engine) +{ +	unsigned long flags; + +	if (READ_ONCE(engine->stats.enabled) == 0) +		return; + +	spin_lock_irqsave(&engine->stats.lock, flags); + +	if (engine->stats.enabled > 0) { +		if (engine->stats.active++ == 0) +			engine->stats.start = ktime_get(); +		GEM_BUG_ON(engine->stats.active == 0); +	} + +	spin_unlock_irqrestore(&engine->stats.lock, flags); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ +	unsigned long flags; + +	if (READ_ONCE(engine->stats.enabled) == 0) +		return; + +	spin_lock_irqsave(&engine->stats.lock, flags); + +	if (engine->stats.enabled > 0) { +		ktime_t last; + +		if (engine->stats.active && --engine->stats.active == 0) { +			/* +			 * Decrement the active context count and in case GPU +			 * is now idle add up to the running total. +			 */ +			last = ktime_sub(ktime_get(), engine->stats.start); + +			engine->stats.total = ktime_add(engine->stats.total, +							last); +		} else if (engine->stats.active == 0) { +			/* +			 * After turning on engine stats, context out might be +			 * the first event in which case we account from the +			 * time stats gathering was turned on. +			 */ +			last = ktime_sub(ktime_get(), engine->stats.enabled_at); + +			engine->stats.total = ktime_add(engine->stats.total, +							last); +		} +	} + +	spin_unlock_irqrestore(&engine->stats.lock, flags); +} + +int intel_enable_engine_stats(struct intel_engine_cs *engine); +void intel_disable_engine_stats(struct intel_engine_cs *engine); + +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);  #endif /* _INTEL_RINGBUFFER_H_ */  |