diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 570 | 
1 files changed, 303 insertions, 267 deletions
| diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a52986a9e7a6..07f663cd2d1c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -43,6 +43,7 @@  #include "i915_fixed.h"  #include "i915_irq.h"  #include "i915_trace.h" +#include "display/intel_bw.h"  #include "intel_pm.h"  #include "intel_sideband.h"  #include "../../../platform/x86/intel_ips.h" @@ -3637,10 +3638,6 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv)  static bool  intel_has_sagv(struct drm_i915_private *dev_priv)  { -	/* HACK! */ -	if (IS_GEN(dev_priv, 12)) -		return false; -  	return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) &&  		dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED;  } @@ -3757,42 +3754,120 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)  	return 0;  } -bool intel_can_enable_sagv(struct intel_atomic_state *state) +void intel_sagv_pre_plane_update(struct intel_atomic_state *state)  { -	struct drm_device *dev = state->base.dev; -	struct drm_i915_private *dev_priv = to_i915(dev); -	struct intel_crtc *crtc; -	struct intel_plane *plane; -	struct intel_crtc_state *crtc_state; -	enum pipe pipe; -	int level, latency; +	struct drm_i915_private *dev_priv = to_i915(state->base.dev); +	const struct intel_bw_state *new_bw_state; +	const struct intel_bw_state *old_bw_state; +	u32 new_mask = 0; +	/* +	 * Just return if we can't control SAGV or don't have it. +	 * This is different from situation when we have SAGV but just can't +	 * afford it due to DBuf limitation - in case if SAGV is completely +	 * disabled in a BIOS, we are not even allowed to send a PCode request, +	 * as it will throw an error. So have to check it here. +	 */  	if (!intel_has_sagv(dev_priv)) -		return false; +		return; + +	new_bw_state = intel_atomic_get_new_bw_state(state); +	if (!new_bw_state) +		return; + +	if (INTEL_GEN(dev_priv) < 11 && !intel_can_enable_sagv(dev_priv, new_bw_state)) { +		intel_disable_sagv(dev_priv); +		return; +	} +	old_bw_state = intel_atomic_get_old_bw_state(state);  	/* -	 * If there are no active CRTCs, no additional checks need be performed +	 * Nothing to mask  	 */ -	if (hweight8(state->active_pipes) == 0) -		return true; +	if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask) +		return; + +	new_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask;  	/* -	 * SKL+ workaround: bspec recommends we disable SAGV when we have -	 * more then one pipe enabled +	 * If new mask is zero - means there is nothing to mask, +	 * we can only unmask, which should be done in unmask.  	 */ -	if (hweight8(state->active_pipes) > 1) +	if (!new_mask) +		return; + +	/* +	 * Restrict required qgv points before updating the configuration. +	 * According to BSpec we can't mask and unmask qgv points at the same +	 * time. Also masking should be done before updating the configuration +	 * and unmasking afterwards. +	 */ +	icl_pcode_restrict_qgv_points(dev_priv, new_mask); +} + +void intel_sagv_post_plane_update(struct intel_atomic_state *state) +{ +	struct drm_i915_private *dev_priv = to_i915(state->base.dev); +	const struct intel_bw_state *new_bw_state; +	const struct intel_bw_state *old_bw_state; +	u32 new_mask = 0; + +	/* +	 * Just return if we can't control SAGV or don't have it. +	 * This is different from situation when we have SAGV but just can't +	 * afford it due to DBuf limitation - in case if SAGV is completely +	 * disabled in a BIOS, we are not even allowed to send a PCode request, +	 * as it will throw an error. So have to check it here. +	 */ +	if (!intel_has_sagv(dev_priv)) +		return; + +	new_bw_state = intel_atomic_get_new_bw_state(state); +	if (!new_bw_state) +		return; + +	if (INTEL_GEN(dev_priv) < 11 && intel_can_enable_sagv(dev_priv, new_bw_state)) { +		intel_enable_sagv(dev_priv); +		return; +	} + +	old_bw_state = intel_atomic_get_old_bw_state(state); +	/* +	 * Nothing to unmask +	 */ +	if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask) +		return; + +	new_mask = new_bw_state->qgv_points_mask; + +	/* +	 * Allow required qgv points after updating the configuration. +	 * According to BSpec we can't mask and unmask qgv points at the same +	 * time. Also masking should be done before updating the configuration +	 * and unmasking afterwards. +	 */ +	icl_pcode_restrict_qgv_points(dev_priv, new_mask); +} + +static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); +	struct intel_plane *plane; +	const struct intel_plane_state *plane_state; +	int level, latency; + +	if (!intel_has_sagv(dev_priv))  		return false; -	/* Since we're now guaranteed to only have one active CRTC... */ -	pipe = ffs(state->active_pipes) - 1; -	crtc = intel_get_crtc_for_pipe(dev_priv, pipe); -	crtc_state = to_intel_crtc_state(crtc->base.state); +	if (!crtc_state->hw.active) +		return true;  	if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)  		return false; -	for_each_intel_plane_on_crtc(dev, crtc, plane) { -		struct skl_plane_wm *wm = +	intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { +		const struct skl_plane_wm *wm =  			&crtc_state->wm.skl.optimal.planes[plane->id];  		/* Skip this plane if it's not enabled */ @@ -3807,7 +3882,7 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)  		latency = dev_priv->wm.skl_latency[level];  		if (skl_needs_memory_bw_wa(dev_priv) && -		    plane->base.state->fb->modifier == +		    plane_state->uapi.fb->modifier ==  		    I915_FORMAT_MOD_X_TILED)  			latency += 15; @@ -3823,6 +3898,112 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state)  	return true;  } +static bool tgl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); +	enum plane_id plane_id; + +	if (!crtc_state->hw.active) +		return true; + +	for_each_plane_id_on_crtc(crtc, plane_id) { +		const struct skl_ddb_entry *plane_alloc = +			&crtc_state->wm.skl.plane_ddb_y[plane_id]; +		const struct skl_plane_wm *wm = +			&crtc_state->wm.skl.optimal.planes[plane_id]; + +		if (skl_ddb_entry_size(plane_alloc) < wm->sagv_wm0.min_ddb_alloc) +			return false; +	} + +	return true; +} + +static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + +	if (INTEL_GEN(dev_priv) >= 12) +		return tgl_crtc_can_enable_sagv(crtc_state); +	else +		return skl_crtc_can_enable_sagv(crtc_state); +} + +bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, +			   const struct intel_bw_state *bw_state) +{ +	if (INTEL_GEN(dev_priv) < 11 && +	    bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) +		return false; + +	return bw_state->pipe_sagv_reject == 0; +} + +static int intel_compute_sagv_mask(struct intel_atomic_state *state) +{ +	struct drm_i915_private *dev_priv = to_i915(state->base.dev); +	int ret; +	struct intel_crtc *crtc; +	struct intel_crtc_state *new_crtc_state; +	struct intel_bw_state *new_bw_state = NULL; +	const struct intel_bw_state *old_bw_state = NULL; +	int i; + +	for_each_new_intel_crtc_in_state(state, crtc, +					 new_crtc_state, i) { +		new_bw_state = intel_atomic_get_bw_state(state); +		if (IS_ERR(new_bw_state)) +			return PTR_ERR(new_bw_state); + +		old_bw_state = intel_atomic_get_old_bw_state(state); + +		if (intel_crtc_can_enable_sagv(new_crtc_state)) +			new_bw_state->pipe_sagv_reject &= ~BIT(crtc->pipe); +		else +			new_bw_state->pipe_sagv_reject |= BIT(crtc->pipe); +	} + +	if (!new_bw_state) +		return 0; + +	new_bw_state->active_pipes = +		intel_calc_active_pipes(state, old_bw_state->active_pipes); + +	if (new_bw_state->active_pipes != old_bw_state->active_pipes) { +		ret = intel_atomic_lock_global_state(&new_bw_state->base); +		if (ret) +			return ret; +	} + +	for_each_new_intel_crtc_in_state(state, crtc, +					 new_crtc_state, i) { +		struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal; + +		/* +		 * We store use_sagv_wm in the crtc state rather than relying on +		 * that bw state since we have no convenient way to get at the +		 * latter from the plane commit hooks (especially in the legacy +		 * cursor case) +		 */ +		pipe_wm->use_sagv_wm = INTEL_GEN(dev_priv) >= 12 && +				       intel_can_enable_sagv(dev_priv, new_bw_state); +	} + +	if (intel_can_enable_sagv(dev_priv, new_bw_state) != +	    intel_can_enable_sagv(dev_priv, old_bw_state)) { +		ret = intel_atomic_serialize_global_state(&new_bw_state->base); +		if (ret) +			return ret; +	} else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) { +		ret = intel_atomic_lock_global_state(&new_bw_state->base); +		if (ret) +			return ret; +	} + +	return 0; +} +  /*   * Calculate initial DBuf slice offset, based on slice size   * and mask(i.e if slice size is 1024 and second slice is enabled @@ -4016,6 +4197,7 @@ static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,  				 int color_plane);  static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,  				 int level, +				 unsigned int latency,  				 const struct skl_wm_params *wp,  				 const struct skl_wm_level *result_prev,  				 struct skl_wm_level *result /* out */); @@ -4038,7 +4220,9 @@ skl_cursor_allocation(const struct intel_crtc_state *crtc_state,  	drm_WARN_ON(&dev_priv->drm, ret);  	for (level = 0; level <= max_level; level++) { -		skl_compute_plane_wm(crtc_state, level, &wp, &wm, &wm); +		unsigned int latency = dev_priv->wm.skl_latency[level]; + +		skl_compute_plane_wm(crtc_state, level, latency, &wp, &wm, &wm);  		if (wm.min_ddb_alloc == U16_MAX)  			break; @@ -4544,6 +4728,20 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,  	return total_data_rate;  } +static const struct skl_wm_level * +skl_plane_wm_level(const struct intel_crtc_state *crtc_state, +		   enum plane_id plane_id, +		   int level) +{ +	const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; +	const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; + +	if (level == 0 && pipe_wm->use_sagv_wm) +		return &wm->sagv_wm0; + +	return &wm->wm[level]; +} +  static int  skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state)  { @@ -4580,7 +4778,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state)  							 plane_data_rate,  							 uv_plane_data_rate); -  	skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate,  					   alloc, &num_active);  	alloc_size = skl_ddb_entry_size(alloc); @@ -4780,7 +4977,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate,  	wm_intermediate_val = latency * pixel_rate * cpp;  	ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size); -	if (INTEL_GEN(dev_priv) >= 10) +	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))  		ret = add_fixed16_u32(ret, 1);  	return ret; @@ -4915,18 +5112,19 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state,  					   wp->y_min_scanlines,  					   wp->dbuf_block_size); -		if (INTEL_GEN(dev_priv) >= 10) +		if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))  			interm_pbpl++;  		wp->plane_blocks_per_line = div_fixed16(interm_pbpl,  							wp->y_min_scanlines); -	} else if (wp->x_tiled && IS_GEN(dev_priv, 9)) { -		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, -					   wp->dbuf_block_size); -		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);  	} else {  		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, -					   wp->dbuf_block_size) + 1; +					   wp->dbuf_block_size); + +		if (!wp->x_tiled || +		    INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) +			interm_pbpl++; +  		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);  	} @@ -4972,12 +5170,12 @@ static bool skl_wm_has_lines(struct drm_i915_private *dev_priv, int level)  static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,  				 int level, +				 unsigned int latency,  				 const struct skl_wm_params *wp,  				 const struct skl_wm_level *result_prev,  				 struct skl_wm_level *result /* out */)  {  	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); -	u32 latency = dev_priv->wm.skl_latency[level];  	uint_fixed_16_16_t method1, method2;  	uint_fixed_16_16_t selected_result;  	u32 res_blocks, res_lines, min_ddb_alloc = 0; @@ -5106,14 +5304,29 @@ skl_compute_wm_levels(const struct intel_crtc_state *crtc_state,  	for (level = 0; level <= max_level; level++) {  		struct skl_wm_level *result = &levels[level]; +		unsigned int latency = dev_priv->wm.skl_latency[level]; -		skl_compute_plane_wm(crtc_state, level, wm_params, -				     result_prev, result); +		skl_compute_plane_wm(crtc_state, level, latency, +				     wm_params, result_prev, result);  		result_prev = result;  	}  } +static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, +				const struct skl_wm_params *wm_params, +				struct skl_plane_wm *plane_wm) +{ +	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); +	struct skl_wm_level *sagv_wm = &plane_wm->sagv_wm0; +	struct skl_wm_level *levels = plane_wm->wm; +	unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; + +	skl_compute_plane_wm(crtc_state, 0, latency, +			     wm_params, &levels[0], +			     sagv_wm); +} +  static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state,  				      const struct skl_wm_params *wp,  				      struct skl_plane_wm *wm) @@ -5166,10 +5379,6 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state,  				trans_offset_b;  	} else {  		res_blocks = wm0_sel_res_b + trans_offset_b; - -		/* WA BUG:1938466 add one block for non y-tile planes */ -		if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0)) -			res_blocks += 1;  	}  	/* @@ -5185,6 +5394,8 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,  				     const struct intel_plane_state *plane_state,  				     enum plane_id plane_id, int color_plane)  { +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);  	struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id];  	struct skl_wm_params wm_params;  	int ret; @@ -5195,6 +5406,10 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state,  		return ret;  	skl_compute_wm_levels(crtc_state, &wm_params, wm->wm); + +	if (INTEL_GEN(dev_priv) >= 12) +		tgl_compute_sagv_wm(crtc_state, &wm_params, wm); +  	skl_compute_transition_wm(crtc_state, &wm_params, wm);  	return 0; @@ -5354,8 +5569,12 @@ void skl_write_plane_wm(struct intel_plane *plane,  		&crtc_state->wm.skl.plane_ddb_uv[plane_id];  	for (level = 0; level <= max_level; level++) { +		const struct skl_wm_level *wm_level; + +		wm_level = skl_plane_wm_level(crtc_state, plane_id, level); +  		skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level), -				   &wm->wm[level]); +				   wm_level);  	}  	skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),  			   &wm->trans_wm); @@ -5388,8 +5607,12 @@ void skl_write_cursor_wm(struct intel_plane *plane,  		&crtc_state->wm.skl.plane_ddb_y[plane_id];  	for (level = 0; level <= max_level; level++) { +		const struct skl_wm_level *wm_level; + +		wm_level = skl_plane_wm_level(crtc_state, plane_id, level); +  		skl_write_wm_level(dev_priv, CUR_WM(pipe, level), -				   &wm->wm[level]); +				   wm_level);  	}  	skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm); @@ -5424,8 +5647,8 @@ static bool skl_plane_wm_equals(struct drm_i915_private *dev_priv,  	return skl_wm_level_equals(&wm1->trans_wm, &wm2->trans_wm);  } -static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, -					   const struct skl_ddb_entry *b) +static bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, +				    const struct skl_ddb_entry *b)  {  	return a->start < b->end && b->start < a->end;  } @@ -5553,23 +5776,25 @@ skl_print_wm_changes(struct intel_atomic_state *state)  				continue;  			drm_dbg_kms(&dev_priv->drm, -				    "[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm" -				    " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n", +				    "[PLANE:%d:%s]   level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm" +				    " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm\n",  				    plane->base.base.id, plane->base.name,  				    enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en),  				    enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en),  				    enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en),  				    enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en),  				    enast(old_wm->trans_wm.plane_en), +				    enast(old_wm->sagv_wm0.plane_en),  				    enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en),  				    enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en),  				    enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en),  				    enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), -				    enast(new_wm->trans_wm.plane_en)); +				    enast(new_wm->trans_wm.plane_en), +				    enast(new_wm->sagv_wm0.plane_en));  			drm_dbg_kms(&dev_priv->drm, -				    "[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" -				      " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", +				    "[PLANE:%d:%s]   lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" +				      " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n",  				    plane->base.base.id, plane->base.name,  				    enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l,  				    enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, @@ -5580,6 +5805,7 @@ skl_print_wm_changes(struct intel_atomic_state *state)  				    enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l,  				    enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l,  				    enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, +				    enast(old_wm->sagv_wm0.ignore_lines), old_wm->sagv_wm0.plane_res_l,  				    enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l,  				    enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, @@ -5589,37 +5815,42 @@ skl_print_wm_changes(struct intel_atomic_state *state)  				    enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l,  				    enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l,  				    enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, -				    enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l); +				    enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l, +				    enast(new_wm->sagv_wm0.ignore_lines), new_wm->sagv_wm0.plane_res_l);  			drm_dbg_kms(&dev_priv->drm, -				    "[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" -				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", +				    "[PLANE:%d:%s]  blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" +				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",  				    plane->base.base.id, plane->base.name,  				    old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b,  				    old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b,  				    old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b,  				    old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b,  				    old_wm->trans_wm.plane_res_b, +				    old_wm->sagv_wm0.plane_res_b,  				    new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b,  				    new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b,  				    new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b,  				    new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, -				    new_wm->trans_wm.plane_res_b); +				    new_wm->trans_wm.plane_res_b, +				    new_wm->sagv_wm0.plane_res_b);  			drm_dbg_kms(&dev_priv->drm, -				    "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" -				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", +				    "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" +				    " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n",  				    plane->base.base.id, plane->base.name,  				    old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc,  				    old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc,  				    old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc,  				    old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc,  				    old_wm->trans_wm.min_ddb_alloc, +				    old_wm->sagv_wm0.min_ddb_alloc,  				    new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc,  				    new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc,  				    new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc,  				    new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, -				    new_wm->trans_wm.min_ddb_alloc); +				    new_wm->trans_wm.min_ddb_alloc, +				    new_wm->sagv_wm0.min_ddb_alloc);  		}  	}  } @@ -5780,6 +6011,10 @@ skl_compute_wm(struct intel_atomic_state *state)  	if (ret)  		return ret; +	ret = intel_compute_sagv_mask(state); +	if (ret) +		return ret; +  	/*  	 * skl_compute_ddb() will have adjusted the final watermarks  	 * based on how much ddb is available. Now we can actually @@ -5876,8 +6111,7 @@ static void ilk_optimize_watermarks(struct intel_atomic_state *state,  	mutex_unlock(&dev_priv->wm.wm_mutex);  } -static inline void skl_wm_level_from_reg_val(u32 val, -					     struct skl_wm_level *level) +static void skl_wm_level_from_reg_val(u32 val, struct skl_wm_level *level)  {  	level->plane_en = val & PLANE_WM_EN;  	level->ignore_lines = val & PLANE_WM_IGNORE_LINES; @@ -5909,6 +6143,9 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,  			skl_wm_level_from_reg_val(val, &wm->wm[level]);  		} +		if (INTEL_GEN(dev_priv) >= 12) +			wm->sagv_wm0 = wm->wm[0]; +  		if (plane_id != PLANE_CURSOR)  			val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));  		else @@ -6593,16 +6830,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)  	I915_WRITE(ILK_DISPLAY_CHICKEN2,  		   I915_READ(ILK_DISPLAY_CHICKEN2) |  		   ILK_ELPIN_409_SELECT); -	I915_WRITE(_3D_CHICKEN2, -		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 | -		   _3D_CHICKEN2_WM_READ_PIPELINED); - -	/* WaDisableRenderCachePipelinedFlush:ilk */ -	I915_WRITE(CACHE_MODE_0, -		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - -	/* WaDisable_RenderCache_OperationalFlush:ilk */ -	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));  	g4x_disable_trickle_feed(dev_priv); @@ -6665,27 +6892,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)  		   I915_READ(ILK_DISPLAY_CHICKEN2) |  		   ILK_ELPIN_409_SELECT); -	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */ -	I915_WRITE(_3D_CHICKEN, -		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - -	/* WaDisable_RenderCache_OperationalFlush:snb */ -	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - -	/* -	 * BSpec recoomends 8x4 when MSAA is used, -	 * however in practice 16x4 seems fastest. -	 * -	 * Note that PS/WM thread counts depend on the WIZ hashing -	 * disable bit, which we don't touch here, but it's good -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). -	 */ -	I915_WRITE(GEN6_GT_MODE, -		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - -	I915_WRITE(CACHE_MODE_0, -		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); -  	I915_WRITE(GEN6_UCGCTL1,  		   I915_READ(GEN6_UCGCTL1) |  		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE | @@ -6708,18 +6914,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)  		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |  		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE); -	/* WaStripsFansDisableFastClipPerformanceFix:snb */ -	I915_WRITE(_3D_CHICKEN3, -		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); - -	/* -	 * Bspec says: -	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and -	 * 3DSTATE_SF number of SF output attributes is more than 16." -	 */ -	I915_WRITE(_3D_CHICKEN3, -		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); -  	/*  	 * According to the spec the following bits should be  	 * set in order to enable memory self-refresh and fbc: @@ -6749,24 +6943,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)  	gen6_check_mch_setup(dev_priv);  } -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) -{ -	u32 reg = I915_READ(GEN7_FF_THREAD_MODE); - -	/* -	 * WaVSThreadDispatchOverride:ivb,vlv -	 * -	 * This actually overrides the dispatch -	 * mode for all thread types. -	 */ -	reg &= ~GEN7_FF_SCHED_MASK; -	reg |= GEN7_FF_TS_SCHED_HW; -	reg |= GEN7_FF_VS_SCHED_HW; -	reg |= GEN7_FF_DS_SCHED_HW; - -	I915_WRITE(GEN7_FF_THREAD_MODE, reg); -} -  static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)  {  	/* @@ -6850,6 +7026,10 @@ static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)  	if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0))  		I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) |  			   TGL_VRH_GATING_DIS); + +	/* Wa_14011059788:tgl */ +	intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, +			 0, DFR_DISABLE);  }  static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) @@ -6882,9 +7062,6 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)  	val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);  	/* ReadHitWriteOnlyDisable:cnl */  	val |= RCCUNIT_CLKGATE_DIS; -	/* WaSarbUnitClockGatingDisable:cnl (pre-prod) */ -	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) -		val |= SARBUNIT_CLKGATE_DIS;  	I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);  	/* Wa_2201832410:cnl */ @@ -6992,45 +7169,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)  static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)  { -	/* L3 caching of data atomics doesn't work -- disable it. */ -	I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); -	I915_WRITE(HSW_ROW_CHICKEN3, -		   _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); -  	/* This is required by WaCatErrorRejectionIssue:hsw */  	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, -			I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | -			GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - -	/* WaVSRefCountFullforceMissDisable:hsw */ -	I915_WRITE(GEN7_FF_THREAD_MODE, -		   I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); - -	/* WaDisable_RenderCache_OperationalFlush:hsw */ -	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - -	/* enable HiZ Raw Stall Optimization */ -	I915_WRITE(CACHE_MODE_0_GEN7, -		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - -	/* WaDisable4x2SubspanOptimization:hsw */ -	I915_WRITE(CACHE_MODE_1, -		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - -	/* -	 * BSpec recommends 8x4 when MSAA is used, -	 * however in practice 16x4 seems fastest. -	 * -	 * Note that PS/WM thread counts depend on the WIZ hashing -	 * disable bit, which we don't touch here, but it's good -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). -	 */ -	I915_WRITE(GEN7_GT_MODE, -		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - -	/* WaSampleCChickenBitEnable:hsw */ -	I915_WRITE(HALF_SLICE_CHICKEN3, -		   _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); +		   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | +		   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);  	/* WaSwitchSolVfFArbitrationPriority:hsw */  	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -7044,32 +7186,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)  	I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); -	/* WaDisableEarlyCull:ivb */ -	I915_WRITE(_3D_CHICKEN3, -		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); -  	/* WaDisableBackToBackFlipFix:ivb */  	I915_WRITE(IVB_CHICKEN3,  		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |  		   CHICKEN3_DGMG_DONE_FIX_DISABLE); -	/* WaDisablePSDDualDispatchEnable:ivb */ -	if (IS_IVB_GT1(dev_priv)) -		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, -			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - -	/* WaDisable_RenderCache_OperationalFlush:ivb */ -	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - -	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ -	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, -		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); - -	/* WaApplyL3ControlAndL3ChickenMode:ivb */ -	I915_WRITE(GEN7_L3CNTLREG1, -			GEN7_WA_FOR_GEN7_L3_CONTROL); -	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, -		   GEN7_WA_L3_CHICKEN_MODE);  	if (IS_IVB_GT1(dev_priv))  		I915_WRITE(GEN7_ROW_CHICKEN2,  			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7081,10 +7202,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)  			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));  	} -	/* WaForceL3Serialization:ivb */ -	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & -		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE); -  	/*  	 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.  	 * This implements the WaDisableRCZUnitClockGating:ivb workaround. @@ -7099,29 +7216,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)  	g4x_disable_trickle_feed(dev_priv); -	gen7_setup_fixed_func_scheduler(dev_priv); - -	if (0) { /* causes HiZ corruption on ivb:gt1 */ -		/* enable HiZ Raw Stall Optimization */ -		I915_WRITE(CACHE_MODE_0_GEN7, -			   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); -	} - -	/* WaDisable4x2SubspanOptimization:ivb */ -	I915_WRITE(CACHE_MODE_1, -		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - -	/* -	 * BSpec recommends 8x4 when MSAA is used, -	 * however in practice 16x4 seems fastest. -	 * -	 * Note that PS/WM thread counts depend on the WIZ hashing -	 * disable bit, which we don't touch here, but it's good -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). -	 */ -	I915_WRITE(GEN7_GT_MODE, -		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); -  	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);  	snpcr &= ~GEN6_MBC_SNPCR_MASK;  	snpcr |= GEN6_MBC_SNPCR_MED; @@ -7135,28 +7229,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)  static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)  { -	/* WaDisableEarlyCull:vlv */ -	I915_WRITE(_3D_CHICKEN3, -		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); -  	/* WaDisableBackToBackFlipFix:vlv */  	I915_WRITE(IVB_CHICKEN3,  		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |  		   CHICKEN3_DGMG_DONE_FIX_DISABLE); -	/* WaPsdDispatchEnable:vlv */ -	/* WaDisablePSDDualDispatchEnable:vlv */ -	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, -		   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | -				      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - -	/* WaDisable_RenderCache_OperationalFlush:vlv */ -	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - -	/* WaForceL3Serialization:vlv */ -	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & -		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE); -  	/* WaDisableDopClockGating:vlv */  	I915_WRITE(GEN7_ROW_CHICKEN2,  		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); @@ -7166,8 +7243,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)  		   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |  		   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); -	gen7_setup_fixed_func_scheduler(dev_priv); -  	/*  	 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.  	 * This implements the WaDisableRCZUnitClockGating:vlv workaround. @@ -7182,30 +7257,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)  		   I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);  	/* -	 * BSpec says this must be set, even though -	 * WaDisable4x2SubspanOptimization isn't listed for VLV. -	 */ -	I915_WRITE(CACHE_MODE_1, -		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - -	/* -	 * BSpec recommends 8x4 when MSAA is used, -	 * however in practice 16x4 seems fastest. -	 * -	 * Note that PS/WM thread counts depend on the WIZ hashing -	 * disable bit, which we don't touch here, but it's good -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). -	 */ -	I915_WRITE(GEN7_GT_MODE, -		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - -	/* -	 * WaIncreaseL3CreditsForVLVB0:vlv -	 * This is the hardware default actually. -	 */ -	I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); - -	/*  	 * WaDisableVLVClockGating_VBIIssue:vlv  	 * Disable clock gating on th GCFG unit to prevent a delay  	 * in the reporting of vblank events. @@ -7257,13 +7308,6 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)  		dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;  	I915_WRITE(DSPCLK_GATE_D, dspclk_gate); -	/* WaDisableRenderCachePipelinedFlush */ -	I915_WRITE(CACHE_MODE_0, -		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - -	/* WaDisable_RenderCache_OperationalFlush:g4x */ -	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); -  	g4x_disable_trickle_feed(dev_priv);  } @@ -7279,11 +7323,6 @@ static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)  	intel_uncore_write(uncore,  			   MI_ARB_STATE,  			   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - -	/* WaDisable_RenderCache_OperationalFlush:gen4 */ -	intel_uncore_write(uncore, -			   CACHE_MODE_0, -			   _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));  }  static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7296,9 +7335,6 @@ static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)  	I915_WRITE(RENCLK_GATE_D2, 0);  	I915_WRITE(MI_ARB_STATE,  		   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - -	/* WaDisable_RenderCache_OperationalFlush:gen4 */ -	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));  }  static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) |