diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 1286 | 
1 files changed, 706 insertions, 580 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 24d77ddcc5f4..fa4ccb346389 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -56,24 +56,42 @@ static void gen9_init_clock_gating(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private; -	/* -	 * WaDisableSDEUnitClockGating:skl -	 * This seems to be a pre-production w/a. -	 */ -	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | -		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE); +	/* WaEnableLbsSlaRetryTimerDecrement:skl */ +	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | +		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); +} -	/* -	 * WaDisableDgMirrorFixInHalfSliceChicken5:skl -	 * This is a pre-production w/a. -	 */ -	I915_WRITE(GEN9_HALF_SLICE_CHICKEN5, -		   I915_READ(GEN9_HALF_SLICE_CHICKEN5) & -		   ~GEN9_DG_MIRROR_FIX_ENABLE); +static void skl_init_clock_gating(struct drm_device *dev) +{ +	struct drm_i915_private *dev_priv = dev->dev_private; -	/* Wa4x4STCOptimizationDisable:skl */ -	I915_WRITE(CACHE_MODE_1, -		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); +	gen9_init_clock_gating(dev); + +	if (INTEL_REVID(dev) == SKL_REVID_A0) { +		/* +		 * WaDisableSDEUnitClockGating:skl +		 * WaSetGAPSunitClckGateDisable:skl +		 */ +		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | +			   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE | +			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE); +	} + +	if (INTEL_REVID(dev) <= SKL_REVID_D0) { +		/* WaDisableHDCInvalidation:skl */ +		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | +			   BDW_DISABLE_HDC_INVALIDATION); + +		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */ +		I915_WRITE(FF_SLICE_CS_CHICKEN2, +			   I915_READ(FF_SLICE_CS_CHICKEN2) | +			   GEN9_TSG_BARRIER_ACK_DISABLE); +	} + +	if (INTEL_REVID(dev) <= SKL_REVID_E0) +		/* WaDisableLSQCROPERFforOCL:skl */ +		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | +			   GEN8_LQSC_RO_PERF_DIS);  }  static void i915_pineview_get_mem_freq(struct drm_device *dev) @@ -245,6 +263,47 @@ static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,  	return NULL;  } +static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) +{ +	u32 val; + +	mutex_lock(&dev_priv->rps.hw_lock); + +	val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); +	if (enable) +		val &= ~FORCE_DDR_HIGH_FREQ; +	else +		val |= FORCE_DDR_HIGH_FREQ; +	val &= ~FORCE_DDR_LOW_FREQ; +	val |= FORCE_DDR_FREQ_REQ_ACK; +	vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); + +	if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & +		      FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) +		DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); + +	mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) +{ +	u32 val; + +	mutex_lock(&dev_priv->rps.hw_lock); + +	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); +	if (enable) +		val |= DSP_MAXFIFO_PM5_ENABLE; +	else +		val &= ~DSP_MAXFIFO_PM5_ENABLE; +	vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + +	mutex_unlock(&dev_priv->rps.hw_lock); +} + +#define FW_WM(value, plane) \ +	(((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK) +  void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)  {  	struct drm_device *dev = dev_priv->dev; @@ -252,6 +311,8 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)  	if (IS_VALLEYVIEW(dev)) {  		I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0); +		if (IS_CHERRYVIEW(dev)) +			chv_set_memory_pm5(dev_priv, enable);  	} else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {  		I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);  	} else if (IS_PINEVIEW(dev)) { @@ -274,6 +335,7 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)  		      enable ? "enabled" : "disabled");  } +  /*   * Latency for FIFO fetches is dependent on several factors:   *   - memory configuration (speed, channels) @@ -290,6 +352,61 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)   */  static const int pessimal_latency_ns = 5000; +#define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ +	((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) + +static int vlv_get_fifo_size(struct drm_device *dev, +			      enum pipe pipe, int plane) +{ +	struct drm_i915_private *dev_priv = dev->dev_private; +	int sprite0_start, sprite1_start, size; + +	switch (pipe) { +		uint32_t dsparb, dsparb2, dsparb3; +	case PIPE_A: +		dsparb = I915_READ(DSPARB); +		dsparb2 = I915_READ(DSPARB2); +		sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0); +		sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4); +		break; +	case PIPE_B: +		dsparb = I915_READ(DSPARB); +		dsparb2 = I915_READ(DSPARB2); +		sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8); +		sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12); +		break; +	case PIPE_C: +		dsparb2 = I915_READ(DSPARB2); +		dsparb3 = I915_READ(DSPARB3); +		sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16); +		sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); +		break; +	default: +		return 0; +	} + +	switch (plane) { +	case 0: +		size = sprite0_start; +		break; +	case 1: +		size = sprite1_start - sprite0_start; +		break; +	case 2: +		size = 512 - 1 - sprite1_start; +		break; +	default: +		return 0; +	} + +	DRM_DEBUG_KMS("Pipe %c %s %c FIFO size: %d\n", +		      pipe_name(pipe), plane == 0 ? "primary" : "sprite", +		      plane == 0 ? plane_name(pipe) : sprite_name(pipe, plane - 1), +		      size); + +	return size; +} +  static int i9xx_get_fifo_size(struct drm_device *dev, int plane)  {  	struct drm_i915_private *dev_priv = dev->dev_private; @@ -535,7 +652,7 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)  	crtc = single_enabled_crtc(dev);  	if (crtc) {  		const struct drm_display_mode *adjusted_mode; -		int pixel_size = crtc->primary->fb->bits_per_pixel / 8; +		int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;  		int clock;  		adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; @@ -547,7 +664,7 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)  					pixel_size, latency->display_sr);  		reg = I915_READ(DSPFW1);  		reg &= ~DSPFW_SR_MASK; -		reg |= wm << DSPFW_SR_SHIFT; +		reg |= FW_WM(wm, SR);  		I915_WRITE(DSPFW1, reg);  		DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); @@ -557,7 +674,7 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)  					pixel_size, latency->cursor_sr);  		reg = I915_READ(DSPFW3);  		reg &= ~DSPFW_CURSOR_SR_MASK; -		reg |= (wm & 0x3f) << DSPFW_CURSOR_SR_SHIFT; +		reg |= FW_WM(wm, CURSOR_SR);  		I915_WRITE(DSPFW3, reg);  		/* Display HPLL off SR */ @@ -566,7 +683,7 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)  					pixel_size, latency->display_hpll_disable);  		reg = I915_READ(DSPFW3);  		reg &= ~DSPFW_HPLL_SR_MASK; -		reg |= wm & DSPFW_HPLL_SR_MASK; +		reg |= FW_WM(wm, HPLL_SR);  		I915_WRITE(DSPFW3, reg);  		/* cursor HPLL off SR */ @@ -575,7 +692,7 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)  					pixel_size, latency->cursor_hpll_disable);  		reg = I915_READ(DSPFW3);  		reg &= ~DSPFW_HPLL_CURSOR_MASK; -		reg |= (wm & 0x3f) << DSPFW_HPLL_CURSOR_SHIFT; +		reg |= FW_WM(wm, HPLL_CURSOR);  		I915_WRITE(DSPFW3, reg);  		DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); @@ -611,7 +728,7 @@ static bool g4x_compute_wm0(struct drm_device *dev,  	clock = adjusted_mode->crtc_clock;  	htotal = adjusted_mode->crtc_htotal;  	hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; -	pixel_size = crtc->primary->fb->bits_per_pixel / 8; +	pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;  	/* Use the small buffer method to calculate plane watermark */  	entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000; @@ -626,7 +743,7 @@ static bool g4x_compute_wm0(struct drm_device *dev,  	/* Use the large buffer method to calculate cursor watermark */  	line_time_us = max(htotal * 1000 / clock, 1);  	line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; -	entries = line_count * to_intel_crtc(crtc)->cursor_width * pixel_size; +	entries = line_count * crtc->cursor->state->crtc_w * pixel_size;  	tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;  	if (tlb_miss > 0)  		entries += tlb_miss; @@ -698,7 +815,7 @@ static bool g4x_compute_srwm(struct drm_device *dev,  	clock = adjusted_mode->crtc_clock;  	htotal = adjusted_mode->crtc_htotal;  	hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; -	pixel_size = crtc->primary->fb->bits_per_pixel / 8; +	pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;  	line_time_us = max(htotal * 1000 / clock, 1);  	line_count = (latency_ns / line_time_us + 1000) / 1000; @@ -712,7 +829,7 @@ static bool g4x_compute_srwm(struct drm_device *dev,  	*display_wm = entries + display->guard_size;  	/* calculate the self-refresh watermark for display cursor */ -	entries = line_count * pixel_size * to_intel_crtc(crtc)->cursor_width; +	entries = line_count * pixel_size * crtc->cursor->state->crtc_w;  	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);  	*cursor_wm = entries + cursor->guard_size; @@ -721,232 +838,234 @@ static bool g4x_compute_srwm(struct drm_device *dev,  			      display, cursor);  } -static bool vlv_compute_drain_latency(struct drm_crtc *crtc, -				      int pixel_size, -				      int *prec_mult, -				      int *drain_latency) -{ -	struct drm_device *dev = crtc->dev; -	int entries; -	int clock = to_intel_crtc(crtc)->config->base.adjusted_mode.crtc_clock; +#define FW_WM_VLV(value, plane) \ +	(((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV) -	if (WARN(clock == 0, "Pixel clock is zero!\n")) -		return false; +static void vlv_write_wm_values(struct intel_crtc *crtc, +				const struct vlv_wm_values *wm) +{ +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); +	enum pipe pipe = crtc->pipe; -	if (WARN(pixel_size == 0, "Pixel size is zero!\n")) -		return false; +	I915_WRITE(VLV_DDL(pipe), +		   (wm->ddl[pipe].cursor << DDL_CURSOR_SHIFT) | +		   (wm->ddl[pipe].sprite[1] << DDL_SPRITE_SHIFT(1)) | +		   (wm->ddl[pipe].sprite[0] << DDL_SPRITE_SHIFT(0)) | +		   (wm->ddl[pipe].primary << DDL_PLANE_SHIFT)); -	entries = DIV_ROUND_UP(clock, 1000) * pixel_size; -	if (IS_CHERRYVIEW(dev)) -		*prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_32 : -					       DRAIN_LATENCY_PRECISION_16; -	else -		*prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : -					       DRAIN_LATENCY_PRECISION_32; -	*drain_latency = (64 * (*prec_mult) * 4) / entries; +	I915_WRITE(DSPFW1, +		   FW_WM(wm->sr.plane, SR) | +		   FW_WM(wm->pipe[PIPE_B].cursor, CURSORB) | +		   FW_WM_VLV(wm->pipe[PIPE_B].primary, PLANEB) | +		   FW_WM_VLV(wm->pipe[PIPE_A].primary, PLANEA)); +	I915_WRITE(DSPFW2, +		   FW_WM_VLV(wm->pipe[PIPE_A].sprite[1], SPRITEB) | +		   FW_WM(wm->pipe[PIPE_A].cursor, CURSORA) | +		   FW_WM_VLV(wm->pipe[PIPE_A].sprite[0], SPRITEA)); +	I915_WRITE(DSPFW3, +		   FW_WM(wm->sr.cursor, CURSOR_SR)); + +	if (IS_CHERRYVIEW(dev_priv)) { +		I915_WRITE(DSPFW7_CHV, +			   FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | +			   FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); +		I915_WRITE(DSPFW8_CHV, +			   FW_WM_VLV(wm->pipe[PIPE_C].sprite[1], SPRITEF) | +			   FW_WM_VLV(wm->pipe[PIPE_C].sprite[0], SPRITEE)); +		I915_WRITE(DSPFW9_CHV, +			   FW_WM_VLV(wm->pipe[PIPE_C].primary, PLANEC) | +			   FW_WM(wm->pipe[PIPE_C].cursor, CURSORC)); +		I915_WRITE(DSPHOWM, +			   FW_WM(wm->sr.plane >> 9, SR_HI) | +			   FW_WM(wm->pipe[PIPE_C].sprite[1] >> 8, SPRITEF_HI) | +			   FW_WM(wm->pipe[PIPE_C].sprite[0] >> 8, SPRITEE_HI) | +			   FW_WM(wm->pipe[PIPE_C].primary >> 8, PLANEC_HI) | +			   FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | +			   FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | +			   FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | +			   FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | +			   FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | +			   FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); +	} else { +		I915_WRITE(DSPFW7, +			   FW_WM_VLV(wm->pipe[PIPE_B].sprite[1], SPRITED) | +			   FW_WM_VLV(wm->pipe[PIPE_B].sprite[0], SPRITEC)); +		I915_WRITE(DSPHOWM, +			   FW_WM(wm->sr.plane >> 9, SR_HI) | +			   FW_WM(wm->pipe[PIPE_B].sprite[1] >> 8, SPRITED_HI) | +			   FW_WM(wm->pipe[PIPE_B].sprite[0] >> 8, SPRITEC_HI) | +			   FW_WM(wm->pipe[PIPE_B].primary >> 8, PLANEB_HI) | +			   FW_WM(wm->pipe[PIPE_A].sprite[1] >> 8, SPRITEB_HI) | +			   FW_WM(wm->pipe[PIPE_A].sprite[0] >> 8, SPRITEA_HI) | +			   FW_WM(wm->pipe[PIPE_A].primary >> 8, PLANEA_HI)); +	} -	if (*drain_latency > DRAIN_LATENCY_MASK) -		*drain_latency = DRAIN_LATENCY_MASK; +	POSTING_READ(DSPFW1); -	return true; +	dev_priv->wm.vlv = *wm;  } -/* - * Update drain latency registers of memory arbiter - * - * Valleyview SoC has a new memory arbiter and needs drain latency registers - * to be programmed. Each plane has a drain latency multiplier and a drain - * latency value. - */ +#undef FW_WM_VLV -static void vlv_update_drain_latency(struct drm_crtc *crtc) +static uint8_t vlv_compute_drain_latency(struct drm_crtc *crtc, +					 struct drm_plane *plane)  {  	struct drm_device *dev = crtc->dev; -	struct drm_i915_private *dev_priv = dev->dev_private;  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc); -	int pixel_size; -	int drain_latency; -	enum pipe pipe = intel_crtc->pipe; -	int plane_prec, prec_mult, plane_dl; -	const int high_precision = IS_CHERRYVIEW(dev) ? -		DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64; +	int entries, prec_mult, drain_latency, pixel_size; +	int clock = intel_crtc->config->base.adjusted_mode.crtc_clock; +	const int high_precision = IS_CHERRYVIEW(dev) ? 16 : 64; -	plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_HIGH | -		   DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_HIGH | -		   (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT)); +	/* +	 * FIXME the plane might have an fb +	 * but be invisible (eg. due to clipping) +	 */ +	if (!intel_crtc->active || !plane->state->fb) +		return 0; -	if (!intel_crtc_active(crtc)) { -		I915_WRITE(VLV_DDL(pipe), plane_dl); -		return; -	} +	if (WARN(clock == 0, "Pixel clock is zero!\n")) +		return 0; -	/* Primary plane Drain Latency */ -	pixel_size = crtc->primary->fb->bits_per_pixel / 8;	/* BPP */ -	if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { -		plane_prec = (prec_mult == high_precision) ? -					   DDL_PLANE_PRECISION_HIGH : -					   DDL_PLANE_PRECISION_LOW; -		plane_dl |= plane_prec | drain_latency; -	} +	pixel_size = drm_format_plane_cpp(plane->state->fb->pixel_format, 0); -	/* Cursor Drain Latency -	 * BPP is always 4 for cursor -	 */ -	pixel_size = 4; +	if (WARN(pixel_size == 0, "Pixel size is zero!\n")) +		return 0; -	/* Program cursor DL only if it is enabled */ -	if (intel_crtc->cursor_base && -	    vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { -		plane_prec = (prec_mult == high_precision) ? -					   DDL_CURSOR_PRECISION_HIGH : -					   DDL_CURSOR_PRECISION_LOW; -		plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT); +	entries = DIV_ROUND_UP(clock, 1000) * pixel_size; + +	prec_mult = high_precision; +	drain_latency = 64 * prec_mult * 4 / entries; + +	if (drain_latency > DRAIN_LATENCY_MASK) { +		prec_mult /= 2; +		drain_latency = 64 * prec_mult * 4 / entries;  	} -	I915_WRITE(VLV_DDL(pipe), plane_dl); -} +	if (drain_latency > DRAIN_LATENCY_MASK) +		drain_latency = DRAIN_LATENCY_MASK; -#define single_plane_enabled(mask) is_power_of_2(mask) +	return drain_latency | (prec_mult == high_precision ? +				DDL_PRECISION_HIGH : DDL_PRECISION_LOW); +} -static void valleyview_update_wm(struct drm_crtc *crtc) +static int vlv_compute_wm(struct intel_crtc *crtc, +			  struct intel_plane *plane, +			  int fifo_size)  { -	struct drm_device *dev = crtc->dev; -	static const int sr_latency_ns = 12000; -	struct drm_i915_private *dev_priv = dev->dev_private; -	int planea_wm, planeb_wm, cursora_wm, cursorb_wm; -	int plane_sr, cursor_sr; -	int ignore_plane_sr, ignore_cursor_sr; -	unsigned int enabled = 0; -	bool cxsr_enabled; +	int clock, entries, pixel_size; -	vlv_update_drain_latency(crtc); +	/* +	 * FIXME the plane might have an fb +	 * but be invisible (eg. due to clipping) +	 */ +	if (!crtc->active || !plane->base.state->fb) +		return 0; -	if (g4x_compute_wm0(dev, PIPE_A, -			    &valleyview_wm_info, pessimal_latency_ns, -			    &valleyview_cursor_wm_info, pessimal_latency_ns, -			    &planea_wm, &cursora_wm)) -		enabled |= 1 << PIPE_A; +	pixel_size = drm_format_plane_cpp(plane->base.state->fb->pixel_format, 0); +	clock = crtc->config->base.adjusted_mode.crtc_clock; -	if (g4x_compute_wm0(dev, PIPE_B, -			    &valleyview_wm_info, pessimal_latency_ns, -			    &valleyview_cursor_wm_info, pessimal_latency_ns, -			    &planeb_wm, &cursorb_wm)) -		enabled |= 1 << PIPE_B; +	entries = DIV_ROUND_UP(clock, 1000) * pixel_size; -	if (single_plane_enabled(enabled) && -	    g4x_compute_srwm(dev, ffs(enabled) - 1, -			     sr_latency_ns, -			     &valleyview_wm_info, -			     &valleyview_cursor_wm_info, -			     &plane_sr, &ignore_cursor_sr) && -	    g4x_compute_srwm(dev, ffs(enabled) - 1, -			     2*sr_latency_ns, -			     &valleyview_wm_info, -			     &valleyview_cursor_wm_info, -			     &ignore_plane_sr, &cursor_sr)) { -		cxsr_enabled = true; -	} else { -		cxsr_enabled = false; -		intel_set_memory_cxsr(dev_priv, false); -		plane_sr = cursor_sr = 0; +	/* +	 * Set up the watermark such that we don't start issuing memory +	 * requests until we are within PND's max deadline value (256us). +	 * Idea being to be idle as long as possible while still taking +	 * advatange of PND's deadline scheduling. The limit of 8 +	 * cachelines (used when the FIFO will anyway drain in less time +	 * than 256us) should match what we would be done if trickle +	 * feed were enabled. +	 */ +	return fifo_size - clamp(DIV_ROUND_UP(256 * entries, 64), 0, fifo_size - 8); +} + +static bool vlv_compute_sr_wm(struct drm_device *dev, +			      struct vlv_wm_values *wm) +{ +	struct drm_i915_private *dev_priv = to_i915(dev); +	struct drm_crtc *crtc; +	enum pipe pipe = INVALID_PIPE; +	int num_planes = 0; +	int fifo_size = 0; +	struct intel_plane *plane; + +	wm->sr.cursor = wm->sr.plane = 0; + +	crtc = single_enabled_crtc(dev); +	/* maxfifo not supported on pipe C */ +	if (crtc && to_intel_crtc(crtc)->pipe != PIPE_C) { +		pipe = to_intel_crtc(crtc)->pipe; +		num_planes = !!wm->pipe[pipe].primary + +			!!wm->pipe[pipe].sprite[0] + +			!!wm->pipe[pipe].sprite[1]; +		fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;  	} -	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " -		      "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", -		      planea_wm, cursora_wm, -		      planeb_wm, cursorb_wm, -		      plane_sr, cursor_sr); +	if (fifo_size == 0 || num_planes > 1) +		return false; -	I915_WRITE(DSPFW1, -		   (plane_sr << DSPFW_SR_SHIFT) | -		   (cursorb_wm << DSPFW_CURSORB_SHIFT) | -		   (planeb_wm << DSPFW_PLANEB_SHIFT) | -		   (planea_wm << DSPFW_PLANEA_SHIFT)); -	I915_WRITE(DSPFW2, -		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | -		   (cursora_wm << DSPFW_CURSORA_SHIFT)); -	I915_WRITE(DSPFW3, -		   (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | -		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); +	wm->sr.cursor = vlv_compute_wm(to_intel_crtc(crtc), +				       to_intel_plane(crtc->cursor), 0x3f); -	if (cxsr_enabled) -		intel_set_memory_cxsr(dev_priv, true); +	list_for_each_entry(plane, &dev->mode_config.plane_list, base.head) { +		if (plane->base.type == DRM_PLANE_TYPE_CURSOR) +			continue; + +		if (plane->pipe != pipe) +			continue; + +		wm->sr.plane = vlv_compute_wm(to_intel_crtc(crtc), +					      plane, fifo_size); +		if (wm->sr.plane != 0) +			break; +	} + +	return true;  } -static void cherryview_update_wm(struct drm_crtc *crtc) +static void valleyview_update_wm(struct drm_crtc *crtc)  {  	struct drm_device *dev = crtc->dev; -	static const int sr_latency_ns = 12000;  	struct drm_i915_private *dev_priv = dev->dev_private; -	int planea_wm, planeb_wm, planec_wm; -	int cursora_wm, cursorb_wm, cursorc_wm; -	int plane_sr, cursor_sr; -	int ignore_plane_sr, ignore_cursor_sr; -	unsigned int enabled = 0; +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc); +	enum pipe pipe = intel_crtc->pipe;  	bool cxsr_enabled; +	struct vlv_wm_values wm = dev_priv->wm.vlv; -	vlv_update_drain_latency(crtc); +	wm.ddl[pipe].primary = vlv_compute_drain_latency(crtc, crtc->primary); +	wm.pipe[pipe].primary = vlv_compute_wm(intel_crtc, +					       to_intel_plane(crtc->primary), +					       vlv_get_fifo_size(dev, pipe, 0)); -	if (g4x_compute_wm0(dev, PIPE_A, -			    &valleyview_wm_info, pessimal_latency_ns, -			    &valleyview_cursor_wm_info, pessimal_latency_ns, -			    &planea_wm, &cursora_wm)) -		enabled |= 1 << PIPE_A; +	wm.ddl[pipe].cursor = vlv_compute_drain_latency(crtc, crtc->cursor); +	wm.pipe[pipe].cursor = vlv_compute_wm(intel_crtc, +					      to_intel_plane(crtc->cursor), +					      0x3f); -	if (g4x_compute_wm0(dev, PIPE_B, -			    &valleyview_wm_info, pessimal_latency_ns, -			    &valleyview_cursor_wm_info, pessimal_latency_ns, -			    &planeb_wm, &cursorb_wm)) -		enabled |= 1 << PIPE_B; +	cxsr_enabled = vlv_compute_sr_wm(dev, &wm); -	if (g4x_compute_wm0(dev, PIPE_C, -			    &valleyview_wm_info, pessimal_latency_ns, -			    &valleyview_cursor_wm_info, pessimal_latency_ns, -			    &planec_wm, &cursorc_wm)) -		enabled |= 1 << PIPE_C; +	if (memcmp(&wm, &dev_priv->wm.vlv, sizeof(wm)) == 0) +		return; -	if (single_plane_enabled(enabled) && -	    g4x_compute_srwm(dev, ffs(enabled) - 1, -			     sr_latency_ns, -			     &valleyview_wm_info, -			     &valleyview_cursor_wm_info, -			     &plane_sr, &ignore_cursor_sr) && -	    g4x_compute_srwm(dev, ffs(enabled) - 1, -			     2*sr_latency_ns, -			     &valleyview_wm_info, -			     &valleyview_cursor_wm_info, -			     &ignore_plane_sr, &cursor_sr)) { -		cxsr_enabled = true; -	} else { -		cxsr_enabled = false; -		intel_set_memory_cxsr(dev_priv, false); -		plane_sr = cursor_sr = 0; -	} +	DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " +		      "SR: plane=%d, cursor=%d\n", pipe_name(pipe), +		      wm.pipe[pipe].primary, wm.pipe[pipe].cursor, +		      wm.sr.plane, wm.sr.cursor); -	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " -		      "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, " -		      "SR: plane=%d, cursor=%d\n", -		      planea_wm, cursora_wm, -		      planeb_wm, cursorb_wm, -		      planec_wm, cursorc_wm, -		      plane_sr, cursor_sr); +	/* +	 * FIXME DDR DVFS introduces massive memory latencies which +	 * are not known to system agent so any deadline specified +	 * by the display may not be respected. To support DDR DVFS +	 * the watermark code needs to be rewritten to essentially +	 * bypass deadline mechanism and rely solely on the +	 * watermarks. For now disable DDR DVFS. +	 */ +	if (IS_CHERRYVIEW(dev_priv)) +		chv_set_memory_dvfs(dev_priv, false); -	I915_WRITE(DSPFW1, -		   (plane_sr << DSPFW_SR_SHIFT) | -		   (cursorb_wm << DSPFW_CURSORB_SHIFT) | -		   (planeb_wm << DSPFW_PLANEB_SHIFT) | -		   (planea_wm << DSPFW_PLANEA_SHIFT)); -	I915_WRITE(DSPFW2, -		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | -		   (cursora_wm << DSPFW_CURSORA_SHIFT)); -	I915_WRITE(DSPFW3, -		   (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | -		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); -	I915_WRITE(DSPFW9_CHV, -		   (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK | -					      DSPFW_CURSORC_MASK)) | -		   (planec_wm << DSPFW_PLANEC_SHIFT) | -		   (cursorc_wm << DSPFW_CURSORC_SHIFT)); +	if (!cxsr_enabled) +		intel_set_memory_cxsr(dev_priv, false); + +	vlv_write_wm_values(intel_crtc, &wm);  	if (cxsr_enabled)  		intel_set_memory_cxsr(dev_priv, true); @@ -961,30 +1080,47 @@ static void valleyview_update_sprite_wm(struct drm_plane *plane,  {  	struct drm_device *dev = crtc->dev;  	struct drm_i915_private *dev_priv = dev->dev_private; -	int pipe = to_intel_plane(plane)->pipe; +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc); +	enum pipe pipe = intel_crtc->pipe;  	int sprite = to_intel_plane(plane)->plane; -	int drain_latency; -	int plane_prec; -	int sprite_dl; -	int prec_mult; -	const int high_precision = IS_CHERRYVIEW(dev) ? -		DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64; +	bool cxsr_enabled; +	struct vlv_wm_values wm = dev_priv->wm.vlv; -	sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_HIGH(sprite) | -		    (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite))); +	if (enabled) { +		wm.ddl[pipe].sprite[sprite] = +			vlv_compute_drain_latency(crtc, plane); -	if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, -						 &drain_latency)) { -		plane_prec = (prec_mult == high_precision) ? -					   DDL_SPRITE_PRECISION_HIGH(sprite) : -					   DDL_SPRITE_PRECISION_LOW(sprite); -		sprite_dl |= plane_prec | -			     (drain_latency << DDL_SPRITE_SHIFT(sprite)); +		wm.pipe[pipe].sprite[sprite] = +			vlv_compute_wm(intel_crtc, +				       to_intel_plane(plane), +				       vlv_get_fifo_size(dev, pipe, sprite+1)); +	} else { +		wm.ddl[pipe].sprite[sprite] = 0; +		wm.pipe[pipe].sprite[sprite] = 0;  	} -	I915_WRITE(VLV_DDL(pipe), sprite_dl); +	cxsr_enabled = vlv_compute_sr_wm(dev, &wm); + +	if (memcmp(&wm, &dev_priv->wm.vlv, sizeof(wm)) == 0) +		return; + +	DRM_DEBUG_KMS("Setting FIFO watermarks - %c: sprite %c=%d, " +		      "SR: plane=%d, cursor=%d\n", pipe_name(pipe), +		      sprite_name(pipe, sprite), +		      wm.pipe[pipe].sprite[sprite], +		      wm.sr.plane, wm.sr.cursor); + +	if (!cxsr_enabled) +		intel_set_memory_cxsr(dev_priv, false); + +	vlv_write_wm_values(intel_crtc, &wm); + +	if (cxsr_enabled) +		intel_set_memory_cxsr(dev_priv, true);  } +#define single_plane_enabled(mask) is_power_of_2(mask) +  static void g4x_update_wm(struct drm_crtc *crtc)  {  	struct drm_device *dev = crtc->dev; @@ -1027,17 +1163,17 @@ static void g4x_update_wm(struct drm_crtc *crtc)  		      plane_sr, cursor_sr);  	I915_WRITE(DSPFW1, -		   (plane_sr << DSPFW_SR_SHIFT) | -		   (cursorb_wm << DSPFW_CURSORB_SHIFT) | -		   (planeb_wm << DSPFW_PLANEB_SHIFT) | -		   (planea_wm << DSPFW_PLANEA_SHIFT)); +		   FW_WM(plane_sr, SR) | +		   FW_WM(cursorb_wm, CURSORB) | +		   FW_WM(planeb_wm, PLANEB) | +		   FW_WM(planea_wm, PLANEA));  	I915_WRITE(DSPFW2,  		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | -		   (cursora_wm << DSPFW_CURSORA_SHIFT)); +		   FW_WM(cursora_wm, CURSORA));  	/* HPLL off in SR has some issues on G4x... disable it */  	I915_WRITE(DSPFW3,  		   (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | -		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); +		   FW_WM(cursor_sr, CURSOR_SR));  	if (cxsr_enabled)  		intel_set_memory_cxsr(dev_priv, true); @@ -1062,7 +1198,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)  		int clock = adjusted_mode->crtc_clock;  		int htotal = adjusted_mode->crtc_htotal;  		int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; -		int pixel_size = crtc->primary->fb->bits_per_pixel / 8; +		int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;  		unsigned long line_time_us;  		int entries; @@ -1080,7 +1216,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)  			      entries, srwm);  		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * -			pixel_size * to_intel_crtc(crtc)->cursor_width; +			pixel_size * crtc->cursor->state->crtc_w;  		entries = DIV_ROUND_UP(entries,  					  i965_cursor_wm_info.cacheline_size);  		cursor_sr = i965_cursor_wm_info.fifo_size - @@ -1103,19 +1239,21 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)  		      srwm);  	/* 965 has limitations... */ -	I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) | -		   (8 << DSPFW_CURSORB_SHIFT) | -		   (8 << DSPFW_PLANEB_SHIFT) | -		   (8 << DSPFW_PLANEA_SHIFT)); -	I915_WRITE(DSPFW2, (8 << DSPFW_CURSORA_SHIFT) | -		   (8 << DSPFW_PLANEC_SHIFT_OLD)); +	I915_WRITE(DSPFW1, FW_WM(srwm, SR) | +		   FW_WM(8, CURSORB) | +		   FW_WM(8, PLANEB) | +		   FW_WM(8, PLANEA)); +	I915_WRITE(DSPFW2, FW_WM(8, CURSORA) | +		   FW_WM(8, PLANEC_OLD));  	/* update cursor SR watermark */ -	I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); +	I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));  	if (cxsr_enabled)  		intel_set_memory_cxsr(dev_priv, true);  } +#undef FW_WM +  static void i9xx_update_wm(struct drm_crtc *unused_crtc)  {  	struct drm_device *dev = unused_crtc->dev; @@ -1139,7 +1277,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)  	crtc = intel_get_crtc_for_plane(dev, 0);  	if (intel_crtc_active(crtc)) {  		const struct drm_display_mode *adjusted_mode; -		int cpp = crtc->primary->fb->bits_per_pixel / 8; +		int cpp = crtc->primary->state->fb->bits_per_pixel / 8;  		if (IS_GEN2(dev))  			cpp = 4; @@ -1161,7 +1299,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)  	crtc = intel_get_crtc_for_plane(dev, 1);  	if (intel_crtc_active(crtc)) {  		const struct drm_display_mode *adjusted_mode; -		int cpp = crtc->primary->fb->bits_per_pixel / 8; +		int cpp = crtc->primary->state->fb->bits_per_pixel / 8;  		if (IS_GEN2(dev))  			cpp = 4; @@ -1184,7 +1322,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)  	if (IS_I915GM(dev) && enabled) {  		struct drm_i915_gem_object *obj; -		obj = intel_fb_obj(enabled->primary->fb); +		obj = intel_fb_obj(enabled->primary->state->fb);  		/* self-refresh seems busted with untiled */  		if (obj->tiling_mode == I915_TILING_NONE) @@ -1208,7 +1346,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)  		int clock = adjusted_mode->crtc_clock;  		int htotal = adjusted_mode->crtc_htotal;  		int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w; -		int pixel_size = enabled->primary->fb->bits_per_pixel / 8; +		int pixel_size = enabled->primary->state->fb->bits_per_pixel / 8;  		unsigned long line_time_us;  		int entries; @@ -1645,7 +1783,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)  	struct drm_display_mode *mode = &intel_crtc->config->base.adjusted_mode;  	u32 linetime, ips_linetime; -	if (!intel_crtc_active(crtc)) +	if (!intel_crtc->active)  		return 0;  	/* The WM are computed with base on how long it takes to fill a single @@ -1711,6 +1849,8 @@ static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])  				GEN9_MEM_LATENCY_LEVEL_MASK;  		/* +		 * WaWmMemoryReadLatency:skl +		 *  		 * punit doesn't take into account the read latency so we need  		 * to add 2us to the various latency levels we retrieve from  		 * the punit. @@ -1898,19 +2038,31 @@ static void ilk_compute_wm_parameters(struct drm_crtc *crtc,  	enum pipe pipe = intel_crtc->pipe;  	struct drm_plane *plane; -	if (!intel_crtc_active(crtc)) +	if (!intel_crtc->active)  		return;  	p->active = true;  	p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;  	p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc); -	p->pri.bytes_per_pixel = crtc->primary->fb->bits_per_pixel / 8; -	p->cur.bytes_per_pixel = 4; + +	if (crtc->primary->state->fb) { +		p->pri.enabled = true; +		p->pri.bytes_per_pixel = +			crtc->primary->state->fb->bits_per_pixel / 8; +	} else { +		p->pri.enabled = false; +		p->pri.bytes_per_pixel = 0; +	} + +	if (crtc->cursor->state->fb) { +		p->cur.enabled = true; +		p->cur.bytes_per_pixel = 4; +	} else { +		p->cur.enabled = false; +		p->cur.bytes_per_pixel = 0; +	}  	p->pri.horiz_pixels = intel_crtc->config->pipe_src_w; -	p->cur.horiz_pixels = intel_crtc->cursor_width; -	/* TODO: for now, assume primary and cursor planes are always enabled. */ -	p->pri.enabled = true; -	p->cur.enabled = true; +	p->cur.horiz_pixels = intel_crtc->base.cursor->state->crtc_w;  	drm_for_each_legacy_plane(plane, &dev->mode_config.plane_list) {  		struct intel_plane *intel_plane = to_intel_plane(plane); @@ -2410,7 +2562,7 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,  	nth_active_pipe = 0;  	for_each_crtc(dev, crtc) { -		if (!intel_crtc_active(crtc)) +		if (!to_intel_crtc(crtc)->active)  			continue;  		if (crtc == for_crtc) @@ -2443,13 +2595,12 @@ static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)  void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,  			  struct skl_ddb_allocation *ddb /* out */)  { -	struct drm_device *dev = dev_priv->dev;  	enum pipe pipe;  	int plane;  	u32 val;  	for_each_pipe(dev_priv, pipe) { -		for_each_plane(pipe, plane) { +		for_each_plane(dev_priv, pipe, plane) {  			val = I915_READ(PLANE_BUF_CFG(pipe, plane));  			skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],  						   val); @@ -2498,10 +2649,12 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,  		      struct skl_ddb_allocation *ddb /* out */)  {  	struct drm_device *dev = crtc->dev; +	struct drm_i915_private *dev_priv = dev->dev_private;  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);  	enum pipe pipe = intel_crtc->pipe;  	struct skl_ddb_entry *alloc = &ddb->pipe[pipe];  	uint16_t alloc_size, start, cursor_blocks; +	uint16_t minimum[I915_MAX_PLANES];  	unsigned int total_data_rate;  	int plane; @@ -2520,9 +2673,21 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,  	alloc_size -= cursor_blocks;  	alloc->end -= cursor_blocks; +	/* 1. Allocate the mininum required blocks for each active plane */ +	for_each_plane(dev_priv, pipe, plane) { +		const struct intel_plane_wm_parameters *p; + +		p = ¶ms->plane[plane]; +		if (!p->enabled) +			continue; + +		minimum[plane] = 8; +		alloc_size -= minimum[plane]; +	} +  	/* -	 * Each active plane get a portion of the remaining space, in -	 * proportion to the amount of data they need to fetch from memory. +	 * 2. Distribute the remaining space in proportion to the amount of +	 * data each plane needs to fetch from memory.  	 *  	 * FIXME: we may not allocate every single block here.  	 */ @@ -2544,8 +2709,9 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,  		 * promote the expression to 64 bits to avoid overflowing, the  		 * result is < available as data_rate / total_data_rate < 1  		 */ -		plane_blocks = div_u64((uint64_t)alloc_size * data_rate, -				       total_data_rate); +		plane_blocks = minimum[plane]; +		plane_blocks += div_u64((uint64_t)alloc_size * data_rate, +					total_data_rate);  		ddb->plane[pipe][plane].start = start;  		ddb->plane[pipe][plane].end = start + plane_blocks; @@ -2575,7 +2741,7 @@ static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,  	if (latency == 0)  		return UINT_MAX; -	wm_intermediate_val = latency * pixel_rate * bytes_per_pixel; +	wm_intermediate_val = latency * pixel_rate * bytes_per_pixel / 512;  	ret = DIV_ROUND_UP(wm_intermediate_val, 1000);  	return ret; @@ -2583,17 +2749,29 @@ static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,  static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,  			       uint32_t horiz_pixels, uint8_t bytes_per_pixel, -			       uint32_t latency) +			       uint64_t tiling, uint32_t latency)  { -	uint32_t ret, plane_bytes_per_line, wm_intermediate_val; +	uint32_t ret; +	uint32_t plane_bytes_per_line, plane_blocks_per_line; +	uint32_t wm_intermediate_val;  	if (latency == 0)  		return UINT_MAX;  	plane_bytes_per_line = horiz_pixels * bytes_per_pixel; + +	if (tiling == I915_FORMAT_MOD_Y_TILED || +	    tiling == I915_FORMAT_MOD_Yf_TILED) { +		plane_bytes_per_line *= 4; +		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); +		plane_blocks_per_line /= 4; +	} else { +		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); +	} +  	wm_intermediate_val = latency * pixel_rate;  	ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * -				plane_bytes_per_line; +				plane_blocks_per_line;  	return ret;  } @@ -2624,7 +2802,7 @@ static void skl_compute_wm_global_parameters(struct drm_device *dev,  	struct drm_plane *plane;  	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) -		config->num_pipes_active += intel_crtc_active(crtc); +		config->num_pipes_active += to_intel_crtc(crtc)->active;  	/* FIXME: I don't think we need those two global parameters on SKL */  	list_for_each_entry(plane, &dev->mode_config.plane_list, head) { @@ -2642,26 +2820,40 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);  	enum pipe pipe = intel_crtc->pipe;  	struct drm_plane *plane; +	struct drm_framebuffer *fb;  	int i = 1; /* Index for sprite planes start */ -	p->active = intel_crtc_active(crtc); +	p->active = intel_crtc->active;  	if (p->active) {  		p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;  		p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config); -		/* -		 * For now, assume primary and cursor planes are always enabled. -		 */ -		p->plane[0].enabled = true; -		p->plane[0].bytes_per_pixel = -			crtc->primary->fb->bits_per_pixel / 8; +		fb = crtc->primary->state->fb; +		if (fb) { +			p->plane[0].enabled = true; +			p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8; +			p->plane[0].tiling = fb->modifier[0]; +		} else { +			p->plane[0].enabled = false; +			p->plane[0].bytes_per_pixel = 0; +			p->plane[0].tiling = DRM_FORMAT_MOD_NONE; +		}  		p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;  		p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h; - -		p->cursor.enabled = true; -		p->cursor.bytes_per_pixel = 4; -		p->cursor.horiz_pixels = intel_crtc->cursor_width ? -					 intel_crtc->cursor_width : 64; +		p->plane[0].rotation = crtc->primary->state->rotation; + +		fb = crtc->cursor->state->fb; +		if (fb) { +			p->cursor.enabled = true; +			p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8; +			p->cursor.horiz_pixels = crtc->cursor->state->crtc_w; +			p->cursor.vert_pixels = crtc->cursor->state->crtc_h; +		} else { +			p->cursor.enabled = false; +			p->cursor.bytes_per_pixel = 0; +			p->cursor.horiz_pixels = 64; +			p->cursor.vert_pixels = 64; +		}  	}  	list_for_each_entry(plane, &dev->mode_config.plane_list, head) { @@ -2673,41 +2865,74 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,  	}  } -static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p, +static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, +				 struct skl_pipe_wm_parameters *p,  				 struct intel_plane_wm_parameters *p_params,  				 uint16_t ddb_allocation, -				 uint32_t mem_value, +				 int level,  				 uint16_t *out_blocks, /* out */  				 uint8_t *out_lines /* out */)  { -	uint32_t method1, method2, plane_bytes_per_line, res_blocks, res_lines; -	uint32_t result_bytes; +	uint32_t latency = dev_priv->wm.skl_latency[level]; +	uint32_t method1, method2; +	uint32_t plane_bytes_per_line, plane_blocks_per_line; +	uint32_t res_blocks, res_lines; +	uint32_t selected_result; -	if (mem_value == 0 || !p->active || !p_params->enabled) +	if (latency == 0 || !p->active || !p_params->enabled)  		return false;  	method1 = skl_wm_method1(p->pixel_rate,  				 p_params->bytes_per_pixel, -				 mem_value); +				 latency);  	method2 = skl_wm_method2(p->pixel_rate,  				 p->pipe_htotal,  				 p_params->horiz_pixels,  				 p_params->bytes_per_pixel, -				 mem_value); +				 p_params->tiling, +				 latency);  	plane_bytes_per_line = p_params->horiz_pixels *  					p_params->bytes_per_pixel; +	plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); + +	if (p_params->tiling == I915_FORMAT_MOD_Y_TILED || +	    p_params->tiling == I915_FORMAT_MOD_Yf_TILED) { +		uint32_t min_scanlines = 4; +		uint32_t y_tile_minimum; +		if (intel_rotation_90_or_270(p_params->rotation)) { +			switch (p_params->bytes_per_pixel) { +			case 1: +				min_scanlines = 16; +				break; +			case 2: +				min_scanlines = 8; +				break; +			case 8: +				WARN(1, "Unsupported pixel depth for rotation"); +			} +		} +		y_tile_minimum = plane_blocks_per_line * min_scanlines; +		selected_result = max(method2, y_tile_minimum); +	} else { +		if ((ddb_allocation / plane_blocks_per_line) >= 1) +			selected_result = min(method1, method2); +		else +			selected_result = method1; +	} -	/* For now xtile and linear */ -	if (((ddb_allocation * 512) / plane_bytes_per_line) >= 1) -		result_bytes = min(method1, method2); -	else -		result_bytes = method1; +	res_blocks = selected_result + 1; +	res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line); -	res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1; -	res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line); +	if (level >= 1 && level <= 7) { +		if (p_params->tiling == I915_FORMAT_MOD_Y_TILED || +		    p_params->tiling == I915_FORMAT_MOD_Yf_TILED) +			res_lines += 4; +		else +			res_blocks++; +	} -	if (res_blocks > ddb_allocation || res_lines > 31) +	if (res_blocks >= ddb_allocation || res_lines > 31)  		return false;  	*out_blocks = res_blocks; @@ -2724,30 +2949,31 @@ static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,  				 int num_planes,  				 struct skl_wm_level *result)  { -	uint16_t latency = dev_priv->wm.skl_latency[level];  	uint16_t ddb_blocks;  	int i;  	for (i = 0; i < num_planes; i++) {  		ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); -		result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i], +		result->plane_en[i] = skl_compute_plane_wm(dev_priv, +						p, &p->plane[i],  						ddb_blocks, -						latency, +						level,  						&result->plane_res_b[i],  						&result->plane_res_l[i]);  	}  	ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]); -	result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks, -						 latency, &result->cursor_res_b, +	result->cursor_en = skl_compute_plane_wm(dev_priv, p, &p->cursor, +						 ddb_blocks, level, +						 &result->cursor_res_b,  						 &result->cursor_res_l);  }  static uint32_t  skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)  { -	if (!intel_crtc_active(crtc)) +	if (!to_intel_crtc(crtc)->active)  		return 0;  	return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate); @@ -2921,12 +3147,11 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv,  static void  skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass)  { -	struct drm_device *dev = dev_priv->dev;  	int plane;  	DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); -	for_each_plane(pipe, plane) { +	for_each_plane(dev_priv, pipe, plane) {  		I915_WRITE(PLANE_SURF(pipe, plane),  			   I915_READ(PLANE_SURF(pipe, plane)));  	} @@ -3133,12 +3358,21 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,  		     int pixel_size, bool enabled, bool scaled)  {  	struct intel_plane *intel_plane = to_intel_plane(plane); +	struct drm_framebuffer *fb = plane->state->fb;  	intel_plane->wm.enabled = enabled;  	intel_plane->wm.scaled = scaled;  	intel_plane->wm.horiz_pixels = sprite_width;  	intel_plane->wm.vert_pixels = sprite_height;  	intel_plane->wm.bytes_per_pixel = pixel_size; +	intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE; +	/* +	 * Framebuffer can be NULL on plane disable, but it does not +	 * matter for watermarks if we assume no tiling in that case. +	 */ +	if (fb) +		intel_plane->wm.tiling = fb->modifier[0]; +	intel_plane->wm.rotation = plane->state->rotation;  	skl_update_wm(crtc);  } @@ -3287,7 +3521,7 @@ static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)  		hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));  	hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe)); -	if (!intel_crtc_active(crtc)) +	if (!intel_crtc->active)  		return;  	hw->dirty[pipe] = true; @@ -3342,7 +3576,7 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)  	if (IS_HASWELL(dev) || IS_BROADWELL(dev))  		hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); -	active->pipe_enabled = intel_crtc_active(crtc); +	active->pipe_enabled = intel_crtc->active;  	if (active->pipe_enabled) {  		u32 tmp = hw->wm_pipe[pipe]; @@ -3456,41 +3690,6 @@ void intel_update_sprite_watermarks(struct drm_plane *plane,  						   pixel_size, enabled, scaled);  } -static struct drm_i915_gem_object * -intel_alloc_context_page(struct drm_device *dev) -{ -	struct drm_i915_gem_object *ctx; -	int ret; - -	WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - -	ctx = i915_gem_alloc_object(dev, 4096); -	if (!ctx) { -		DRM_DEBUG("failed to alloc power context, RC6 disabled\n"); -		return NULL; -	} - -	ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0); -	if (ret) { -		DRM_ERROR("failed to pin power context: %d\n", ret); -		goto err_unref; -	} - -	ret = i915_gem_object_set_to_gtt_domain(ctx, 1); -	if (ret) { -		DRM_ERROR("failed to set-domain on power context: %d\n", ret); -		goto err_unpin; -	} - -	return ctx; - -err_unpin: -	i915_gem_object_ggtt_unpin(ctx); -err_unref: -	drm_gem_object_unreference(&ctx->base); -	return NULL; -} -  /**   * Lock protecting IPS related data structures   */ @@ -3623,7 +3822,7 @@ static void ironlake_disable_drps(struct drm_device *dev)   * ourselves, instead of doing a rmw cycle (which might result in us clearing   * all limits and the gpu stuck at whatever frequency it is at atm).   */ -static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val) +static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)  {  	u32 limits; @@ -3633,9 +3832,15 @@ static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val)  	 * the hw runs at the minimal clock before selecting the desired  	 * frequency, if the down threshold expires in that window we will not  	 * receive a down interrupt. */ -	limits = dev_priv->rps.max_freq_softlimit << 24; -	if (val <= dev_priv->rps.min_freq_softlimit) -		limits |= dev_priv->rps.min_freq_softlimit << 16; +	if (IS_GEN9(dev_priv->dev)) { +		limits = (dev_priv->rps.max_freq_softlimit) << 23; +		if (val <= dev_priv->rps.min_freq_softlimit) +			limits |= (dev_priv->rps.min_freq_softlimit) << 14; +	} else { +		limits = dev_priv->rps.max_freq_softlimit << 24; +		if (val <= dev_priv->rps.min_freq_softlimit) +			limits |= dev_priv->rps.min_freq_softlimit << 16; +	}  	return limits;  } @@ -3643,6 +3848,8 @@ static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val)  static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)  {  	int new_power; +	u32 threshold_up = 0, threshold_down = 0; /* in % */ +	u32 ei_up = 0, ei_down = 0;  	new_power = dev_priv->rps.power;  	switch (dev_priv->rps.power) { @@ -3664,9 +3871,9 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)  		break;  	}  	/* Max/min bins are special */ -	if (val == dev_priv->rps.min_freq_softlimit) +	if (val <= dev_priv->rps.min_freq_softlimit)  		new_power = LOW_POWER; -	if (val == dev_priv->rps.max_freq_softlimit) +	if (val >= dev_priv->rps.max_freq_softlimit)  		new_power = HIGH_POWER;  	if (new_power == dev_priv->rps.power)  		return; @@ -3675,59 +3882,53 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)  	switch (new_power) {  	case LOW_POWER:  		/* Upclock if more than 95% busy over 16ms */ -		I915_WRITE(GEN6_RP_UP_EI, 12500); -		I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800); +		ei_up = 16000; +		threshold_up = 95;  		/* Downclock if less than 85% busy over 32ms */ -		I915_WRITE(GEN6_RP_DOWN_EI, 25000); -		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250); - -		I915_WRITE(GEN6_RP_CONTROL, -			   GEN6_RP_MEDIA_TURBO | -			   GEN6_RP_MEDIA_HW_NORMAL_MODE | -			   GEN6_RP_MEDIA_IS_GFX | -			   GEN6_RP_ENABLE | -			   GEN6_RP_UP_BUSY_AVG | -			   GEN6_RP_DOWN_IDLE_AVG); +		ei_down = 32000; +		threshold_down = 85;  		break;  	case BETWEEN:  		/* Upclock if more than 90% busy over 13ms */ -		I915_WRITE(GEN6_RP_UP_EI, 10250); -		I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225); +		ei_up = 13000; +		threshold_up = 90;  		/* Downclock if less than 75% busy over 32ms */ -		I915_WRITE(GEN6_RP_DOWN_EI, 25000); -		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750); - -		I915_WRITE(GEN6_RP_CONTROL, -			   GEN6_RP_MEDIA_TURBO | -			   GEN6_RP_MEDIA_HW_NORMAL_MODE | -			   GEN6_RP_MEDIA_IS_GFX | -			   GEN6_RP_ENABLE | -			   GEN6_RP_UP_BUSY_AVG | -			   GEN6_RP_DOWN_IDLE_AVG); +		ei_down = 32000; +		threshold_down = 75;  		break;  	case HIGH_POWER:  		/* Upclock if more than 85% busy over 10ms */ -		I915_WRITE(GEN6_RP_UP_EI, 8000); -		I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800); +		ei_up = 10000; +		threshold_up = 85;  		/* Downclock if less than 60% busy over 32ms */ -		I915_WRITE(GEN6_RP_DOWN_EI, 25000); -		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000); - -		I915_WRITE(GEN6_RP_CONTROL, -			   GEN6_RP_MEDIA_TURBO | -			   GEN6_RP_MEDIA_HW_NORMAL_MODE | -			   GEN6_RP_MEDIA_IS_GFX | -			   GEN6_RP_ENABLE | -			   GEN6_RP_UP_BUSY_AVG | -			   GEN6_RP_DOWN_IDLE_AVG); +		ei_down = 32000; +		threshold_down = 60;  		break;  	} +	I915_WRITE(GEN6_RP_UP_EI, +		GT_INTERVAL_FROM_US(dev_priv, ei_up)); +	I915_WRITE(GEN6_RP_UP_THRESHOLD, +		GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100))); + +	I915_WRITE(GEN6_RP_DOWN_EI, +		GT_INTERVAL_FROM_US(dev_priv, ei_down)); +	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, +		GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100))); + +	 I915_WRITE(GEN6_RP_CONTROL, +		    GEN6_RP_MEDIA_TURBO | +		    GEN6_RP_MEDIA_HW_NORMAL_MODE | +		    GEN6_RP_MEDIA_IS_GFX | +		    GEN6_RP_ENABLE | +		    GEN6_RP_UP_BUSY_AVG | +		    GEN6_RP_DOWN_IDLE_AVG); +  	dev_priv->rps.power = new_power;  	dev_priv->rps.last_adj = 0;  } @@ -3737,11 +3938,10 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)  	u32 mask = 0;  	if (val > dev_priv->rps.min_freq_softlimit) -		mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; +		mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;  	if (val < dev_priv->rps.max_freq_softlimit) -		mask |= GEN6_PM_RP_UP_THRESHOLD; +		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; -	mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);  	mask &= dev_priv->pm_rps_events;  	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); @@ -3750,13 +3950,13 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)  /* gen6_set_rps is called to update the frequency request, but should also be   * called when the range (min_delay and max_delay) is modified so that we can   * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -void gen6_set_rps(struct drm_device *dev, u8 val) +static void gen6_set_rps(struct drm_device *dev, u8 val)  {  	struct drm_i915_private *dev_priv = dev->dev_private;  	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); -	WARN_ON(val > dev_priv->rps.max_freq_softlimit); -	WARN_ON(val < dev_priv->rps.min_freq_softlimit); +	WARN_ON(val > dev_priv->rps.max_freq); +	WARN_ON(val < dev_priv->rps.min_freq);  	/* min/max delay may still have been modified so be sure to  	 * write the limits value. @@ -3764,7 +3964,10 @@ void gen6_set_rps(struct drm_device *dev, u8 val)  	if (val != dev_priv->rps.cur_freq) {  		gen6_set_rps_thresholds(dev_priv, val); -		if (IS_HASWELL(dev) || IS_BROADWELL(dev)) +		if (IS_GEN9(dev)) +			I915_WRITE(GEN6_RPNSWREQ, +				   GEN9_FREQUENCY(val)); +		else if (IS_HASWELL(dev) || IS_BROADWELL(dev))  			I915_WRITE(GEN6_RPNSWREQ,  				   HSW_FREQUENCY(val));  		else @@ -3777,7 +3980,7 @@ void gen6_set_rps(struct drm_device *dev, u8 val)  	/* Make sure we continue to get interrupts  	 * until we hit the minimum or maximum frequencies.  	 */ -	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val)); +	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));  	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));  	POSTING_READ(GEN6_RPNSWREQ); @@ -3786,6 +3989,27 @@ void gen6_set_rps(struct drm_device *dev, u8 val)  	trace_intel_gpu_freq_change(val * 50);  } +static void valleyview_set_rps(struct drm_device *dev, u8 val) +{ +	struct drm_i915_private *dev_priv = dev->dev_private; + +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); +	WARN_ON(val > dev_priv->rps.max_freq); +	WARN_ON(val < dev_priv->rps.min_freq); + +	if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), +		      "Odd GPU freq value\n")) +		val &= ~1; + +	if (val != dev_priv->rps.cur_freq) +		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + +	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + +	dev_priv->rps.cur_freq = val; +	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); +} +  /* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down   *   * * If Gfx is Idle, then @@ -3798,10 +4022,11 @@ void gen6_set_rps(struct drm_device *dev, u8 val)  static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)  {  	struct drm_device *dev = dev_priv->dev; +	u32 val = dev_priv->rps.idle_freq;  	/* CHV and latest VLV don't need to force the gfx clock */  	if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) { -		valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); +		valleyview_set_rps(dev_priv->dev, val);  		return;  	} @@ -3809,7 +4034,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)  	 * When we are idle.  Drop to min voltage state.  	 */ -	if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit) +	if (dev_priv->rps.cur_freq <= val)  		return;  	/* Mask turbo interrupt so that they will not come in between */ @@ -3818,10 +4043,9 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)  	vlv_force_gfx_clock(dev_priv, true); -	dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit; +	dev_priv->rps.cur_freq = val; -	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, -					dev_priv->rps.min_freq_softlimit); +	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);  	if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))  				& GENFREQSTATUS) == 0, 100)) @@ -3829,8 +4053,19 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)  	vlv_force_gfx_clock(dev_priv, false); -	I915_WRITE(GEN6_PMINTRMSK, -		   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); +	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); +} + +void gen6_rps_busy(struct drm_i915_private *dev_priv) +{ +	mutex_lock(&dev_priv->rps.hw_lock); +	if (dev_priv->rps.enabled) { +		if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) +			gen6_rps_reset_ei(dev_priv); +		I915_WRITE(GEN6_PMINTRMSK, +			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); +	} +	mutex_unlock(&dev_priv->rps.hw_lock);  }  void gen6_rps_idle(struct drm_i915_private *dev_priv) @@ -3842,46 +4077,34 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)  		if (IS_VALLEYVIEW(dev))  			vlv_set_rps_idle(dev_priv);  		else -			gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); +			gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);  		dev_priv->rps.last_adj = 0; +		I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);  	}  	mutex_unlock(&dev_priv->rps.hw_lock);  }  void gen6_rps_boost(struct drm_i915_private *dev_priv)  { -	struct drm_device *dev = dev_priv->dev; +	u32 val;  	mutex_lock(&dev_priv->rps.hw_lock); -	if (dev_priv->rps.enabled) { -		if (IS_VALLEYVIEW(dev)) -			valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); -		else -			gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); +	val = dev_priv->rps.max_freq_softlimit; +	if (dev_priv->rps.enabled && +	    dev_priv->mm.busy && +	    dev_priv->rps.cur_freq < val) { +		intel_set_rps(dev_priv->dev, val);  		dev_priv->rps.last_adj = 0;  	}  	mutex_unlock(&dev_priv->rps.hw_lock);  } -void valleyview_set_rps(struct drm_device *dev, u8 val) +void intel_set_rps(struct drm_device *dev, u8 val)  { -	struct drm_i915_private *dev_priv = dev->dev_private; - -	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); -	WARN_ON(val > dev_priv->rps.max_freq_softlimit); -	WARN_ON(val < dev_priv->rps.min_freq_softlimit); - -	if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), -		      "Odd GPU freq value\n")) -		val &= ~1; - -	if (val != dev_priv->rps.cur_freq) -		vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - -	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - -	dev_priv->rps.cur_freq = val; -	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); +	if (IS_VALLEYVIEW(dev)) +		valleyview_set_rps(dev, val); +	else +		gen6_set_rps(dev, val);  }  static void gen9_disable_rps(struct drm_device *dev) @@ -3995,6 +4218,13 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)  	dev_priv->rps.rp0_freq		= (rp_state_cap >>  0) & 0xff;  	dev_priv->rps.rp1_freq		= (rp_state_cap >>  8) & 0xff;  	dev_priv->rps.min_freq		= (rp_state_cap >> 16) & 0xff; +	if (IS_SKYLAKE(dev)) { +		/* Store the frequency values in 16.66 MHZ units, which is +		   the natural hardware unit for SKL */ +		dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; +		dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; +		dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; +	}  	/* hw_max = RP0 until we check for overclocking */  	dev_priv->rps.max_freq		= dev_priv->rps.rp0_freq; @@ -4011,6 +4241,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)  					dev_priv->rps.max_freq);  	} +	dev_priv->rps.idle_freq = dev_priv->rps.min_freq; +  	/* Preserve min/max settings in case of re-init */  	if (dev_priv->rps.max_freq_softlimit == 0)  		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; @@ -4035,23 +4267,21 @@ static void gen9_enable_rps(struct drm_device *dev)  	gen6_init_rps_frequencies(dev); -	I915_WRITE(GEN6_RPNSWREQ, 0xc800000); -	I915_WRITE(GEN6_RC_VIDEO_FREQ, 0xc800000); +	/* Program defaults and thresholds for RPS*/ +	I915_WRITE(GEN6_RC_VIDEO_FREQ, +		GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); + +	/* 1 second timeout*/ +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, +		GT_INTERVAL_FROM_US(dev_priv, 1000000)); -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); -	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 0x12060000); -	I915_WRITE(GEN6_RP_UP_THRESHOLD, 0xe808); -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 0x3bd08); -	I915_WRITE(GEN6_RP_UP_EI, 0x101d0); -	I915_WRITE(GEN6_RP_DOWN_EI, 0x55730);  	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); -	I915_WRITE(GEN6_PMINTRMSK, 0x6); -	I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | -		   GEN6_RP_MEDIA_HW_MODE | GEN6_RP_MEDIA_IS_GFX | -		   GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG | -		   GEN6_RP_DOWN_IDLE_AVG); -	gen6_enable_rps_interrupts(dev); +	/* Leaning on the below call to gen6_set_rps to program/setup the +	 * Up/Down EI & threshold registers, as well as the RP_CONTROL, +	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ +	dev_priv->rps.power = HIGH_POWER; /* force a reset */ +	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);  } @@ -4179,7 +4409,7 @@ static void gen8_enable_rps(struct drm_device *dev)  	/* 6: Ring frequency + overclocking (our driver does this later */  	dev_priv->rps.power = HIGH_POWER; /* force a reset */ -	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); +	gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);  } @@ -4273,7 +4503,7 @@ static void gen6_enable_rps(struct drm_device *dev)  	}  	dev_priv->rps.power = HIGH_POWER; /* force a reset */ -	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); +	gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);  	rc6vids = 0;  	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); @@ -4638,6 +4868,8 @@ static void valleyview_init_gt_powersave(struct drm_device *dev)  			 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),  			 dev_priv->rps.min_freq); +	dev_priv->rps.idle_freq = dev_priv->rps.min_freq; +  	/* Preserve min/max settings in case of re-init */  	if (dev_priv->rps.max_freq_softlimit == 0)  		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; @@ -4713,6 +4945,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)  		   dev_priv->rps.min_freq) & 1,  		  "Odd GPU freq values\n"); +	dev_priv->rps.idle_freq = dev_priv->rps.min_freq; +  	/* Preserve min/max settings in case of re-init */  	if (dev_priv->rps.max_freq_softlimit == 0)  		dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; @@ -4904,124 +5138,6 @@ static void valleyview_enable_rps(struct drm_device *dev)  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);  } -void ironlake_teardown_rc6(struct drm_device *dev) -{ -	struct drm_i915_private *dev_priv = dev->dev_private; - -	if (dev_priv->ips.renderctx) { -		i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx); -		drm_gem_object_unreference(&dev_priv->ips.renderctx->base); -		dev_priv->ips.renderctx = NULL; -	} - -	if (dev_priv->ips.pwrctx) { -		i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx); -		drm_gem_object_unreference(&dev_priv->ips.pwrctx->base); -		dev_priv->ips.pwrctx = NULL; -	} -} - -static void ironlake_disable_rc6(struct drm_device *dev) -{ -	struct drm_i915_private *dev_priv = dev->dev_private; - -	if (I915_READ(PWRCTXA)) { -		/* Wake the GPU, prevent RC6, then restore RSTDBYCTL */ -		I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT); -		wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON), -			 50); - -		I915_WRITE(PWRCTXA, 0); -		POSTING_READ(PWRCTXA); - -		I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); -		POSTING_READ(RSTDBYCTL); -	} -} - -static int ironlake_setup_rc6(struct drm_device *dev) -{ -	struct drm_i915_private *dev_priv = dev->dev_private; - -	if (dev_priv->ips.renderctx == NULL) -		dev_priv->ips.renderctx = intel_alloc_context_page(dev); -	if (!dev_priv->ips.renderctx) -		return -ENOMEM; - -	if (dev_priv->ips.pwrctx == NULL) -		dev_priv->ips.pwrctx = intel_alloc_context_page(dev); -	if (!dev_priv->ips.pwrctx) { -		ironlake_teardown_rc6(dev); -		return -ENOMEM; -	} - -	return 0; -} - -static void ironlake_enable_rc6(struct drm_device *dev) -{ -	struct drm_i915_private *dev_priv = dev->dev_private; -	struct intel_engine_cs *ring = &dev_priv->ring[RCS]; -	bool was_interruptible; -	int ret; - -	/* rc6 disabled by default due to repeated reports of hanging during -	 * boot and resume. -	 */ -	if (!intel_enable_rc6(dev)) -		return; - -	WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - -	ret = ironlake_setup_rc6(dev); -	if (ret) -		return; - -	was_interruptible = dev_priv->mm.interruptible; -	dev_priv->mm.interruptible = false; - -	/* -	 * GPU can automatically power down the render unit if given a page -	 * to save state. -	 */ -	ret = intel_ring_begin(ring, 6); -	if (ret) { -		ironlake_teardown_rc6(dev); -		dev_priv->mm.interruptible = was_interruptible; -		return; -	} - -	intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); -	intel_ring_emit(ring, MI_SET_CONTEXT); -	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) | -			MI_MM_SPACE_GTT | -			MI_SAVE_EXT_STATE_EN | -			MI_RESTORE_EXT_STATE_EN | -			MI_RESTORE_INHIBIT); -	intel_ring_emit(ring, MI_SUSPEND_FLUSH); -	intel_ring_emit(ring, MI_NOOP); -	intel_ring_emit(ring, MI_FLUSH); -	intel_ring_advance(ring); - -	/* -	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW -	 * does an implicit flush, combined with MI_FLUSH above, it should be -	 * safe to assume that renderctx is valid -	 */ -	ret = intel_ring_idle(ring); -	dev_priv->mm.interruptible = was_interruptible; -	if (ret) { -		DRM_ERROR("failed to enable ironlake power savings\n"); -		ironlake_teardown_rc6(dev); -		return; -	} - -	I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); -	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); - -	intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); -} -  static unsigned long intel_pxfreq(u32 vidfreq)  {  	unsigned long freq; @@ -5534,12 +5650,7 @@ static void gen6_suspend_rps(struct drm_device *dev)  	flush_delayed_work(&dev_priv->rps.delayed_resume_work); -	/* -	 * TODO: disable RPS interrupts on GEN9+ too once RPS support -	 * is added for it. -	 */ -	if (INTEL_INFO(dev)->gen < 9) -		gen6_disable_rps_interrupts(dev); +	gen6_disable_rps_interrupts(dev);  }  /** @@ -5569,7 +5680,6 @@ void intel_disable_gt_powersave(struct drm_device *dev)  	if (IS_IRONLAKE_M(dev)) {  		ironlake_disable_drps(dev); -		ironlake_disable_rc6(dev);  	} else if (INTEL_INFO(dev)->gen >= 6) {  		intel_suspend_gt_powersave(dev); @@ -5597,12 +5707,7 @@ static void intel_gen6_powersave_work(struct work_struct *work)  	mutex_lock(&dev_priv->rps.hw_lock); -	/* -	 * TODO: reset/enable RPS interrupts on GEN9+ too, once RPS support is -	 * added for it. -	 */ -	if (INTEL_INFO(dev)->gen < 9) -		gen6_reset_rps_interrupts(dev); +	gen6_reset_rps_interrupts(dev);  	if (IS_CHERRYVIEW(dev)) {  		cherryview_enable_rps(dev); @@ -5619,10 +5724,16 @@ static void intel_gen6_powersave_work(struct work_struct *work)  		gen6_enable_rps(dev);  		__gen6_update_ring_freq(dev);  	} + +	WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); +	WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); + +	WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); +	WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); +  	dev_priv->rps.enabled = true; -	if (INTEL_INFO(dev)->gen < 9) -		gen6_enable_rps_interrupts(dev); +	gen6_enable_rps_interrupts(dev);  	mutex_unlock(&dev_priv->rps.hw_lock); @@ -5633,10 +5744,13 @@ void intel_enable_gt_powersave(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private; +	/* Powersaving is controlled by the host when inside a VM */ +	if (intel_vgpu_active(dev)) +		return; +  	if (IS_IRONLAKE_M(dev)) {  		mutex_lock(&dev->struct_mutex);  		ironlake_enable_drps(dev); -		ironlake_enable_rc6(dev);  		intel_init_emon(dev);  		mutex_unlock(&dev->struct_mutex);  	} else if (INTEL_INFO(dev)->gen >= 6) { @@ -6169,11 +6283,22 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)  	gen6_check_mch_setup(dev);  } +static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) +{ +	I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); + +	/* +	 * Disable trickle feed and enable pnd deadline calculation +	 */ +	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); +	I915_WRITE(CBR1_VLV, 0); +} +  static void valleyview_init_clock_gating(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private; -	I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); +	vlv_init_display_clock_gating(dev_priv);  	/* WaDisableEarlyCull:vlv */  	I915_WRITE(_3D_CHICKEN3, @@ -6221,8 +6346,6 @@ static void valleyview_init_clock_gating(struct drm_device *dev)  	I915_WRITE(GEN7_UCGCTL4,  		   I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); -	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); -  	/*  	 * BSpec says this must be set, even though  	 * WaDisable4x2SubspanOptimization isn't listed for VLV. @@ -6259,9 +6382,7 @@ static void cherryview_init_clock_gating(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private; -	I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); - -	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); +	vlv_init_display_clock_gating(dev_priv);  	/* WaVSRefCountFullforceMissDisable:chv */  	/* WaDSRefCountFullforceMissDisable:chv */ @@ -6396,7 +6517,8 @@ void intel_init_clock_gating(struct drm_device *dev)  {  	struct drm_i915_private *dev_priv = dev->dev_private; -	dev_priv->display.init_clock_gating(dev); +	if (dev_priv->display.init_clock_gating) +		dev_priv->display.init_clock_gating(dev);  }  void intel_suspend_hw(struct drm_device *dev) @@ -6422,7 +6544,7 @@ void intel_init_pm(struct drm_device *dev)  	if (INTEL_INFO(dev)->gen >= 9) {  		skl_setup_wm_latency(dev); -		dev_priv->display.init_clock_gating = gen9_init_clock_gating; +		dev_priv->display.init_clock_gating = skl_init_clock_gating;  		dev_priv->display.update_wm = skl_update_wm;  		dev_priv->display.update_sprite_wm = skl_update_sprite_wm;  	} else if (HAS_PCH_SPLIT(dev)) { @@ -6450,7 +6572,7 @@ void intel_init_pm(struct drm_device *dev)  		else if (INTEL_INFO(dev)->gen == 8)  			dev_priv->display.init_clock_gating = broadwell_init_clock_gating;  	} else if (IS_CHERRYVIEW(dev)) { -		dev_priv->display.update_wm = cherryview_update_wm; +		dev_priv->display.update_wm = valleyview_update_wm;  		dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;  		dev_priv->display.init_clock_gating =  			cherryview_init_clock_gating; @@ -6618,7 +6740,9 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)  int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)  { -	if (IS_CHERRYVIEW(dev_priv->dev)) +	if (IS_GEN9(dev_priv->dev)) +		return (val * GT_FREQUENCY_MULTIPLIER) / GEN9_FREQ_SCALER; +	else if (IS_CHERRYVIEW(dev_priv->dev))  		return chv_gpu_freq(dev_priv, val);  	else if (IS_VALLEYVIEW(dev_priv->dev))  		return byt_gpu_freq(dev_priv, val); @@ -6628,7 +6752,9 @@ int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)  int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)  { -	if (IS_CHERRYVIEW(dev_priv->dev)) +	if (IS_GEN9(dev_priv->dev)) +		return (val * GEN9_FREQ_SCALER) / GT_FREQUENCY_MULTIPLIER; +	else if (IS_CHERRYVIEW(dev_priv->dev))  		return chv_freq_opcode(dev_priv, val);  	else if (IS_VALLEYVIEW(dev_priv->dev))  		return byt_freq_opcode(dev_priv, val);  |