diff options
author | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
commit | cc9b94029e9ef51787af908e9856b1eed314bc00 (patch) | |
tree | 9675310b89d0f6fb1f7bd9423f0638c4ee5226fd /drivers/gpu/drm/i915/intel_pm.c | |
parent | 13bed58ce8748d430a26e353a09b89f9d613a71f (diff) | |
parent | 1b5b42216469b05ef4b5916cb40b127dfab1da88 (diff) |
Merge branch 'topic/error' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator into regulator-fixed
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_pm.c | 2470 |
1 files changed, 1530 insertions, 940 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a7ef45da0a9e..a2f751cd187a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -26,6 +26,7 @@ */ #include <linux/cpufreq.h> +#include <drm/drm_plane_helper.h> #include "i915_drv.h" #include "intel_drv.h" #include "../../../platform/x86/intel_ips.h" @@ -54,10 +55,38 @@ #define INTEL_RC6p_ENABLE (1<<1) #define INTEL_RC6pp_ENABLE (1<<2) -static void bxt_init_clock_gating(struct drm_device *dev) +static void gen9_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); + + I915_WRITE(GEN8_CONFIG0, + I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); + + /* WaEnableChickenDCPR:skl,bxt,kbl */ + I915_WRITE(GEN8_CHICKEN_DCPR_1, + I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); + + /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ + /* WaFbcWakeMemOn:skl,bxt,kbl */ + I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | + DISP_FBC_WM_DIS | + DISP_FBC_MEMORY_WAKE); + + /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */ + I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | + ILK_DPFC_DISABLE_DUMMY0); +} + +static void bxt_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + gen9_init_clock_gating(dev); + /* WaDisableSDEUnitClockGating:bxt */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); @@ -80,7 +109,7 @@ static void bxt_init_clock_gating(struct drm_device *dev) static void i915_pineview_get_mem_freq(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); u32 tmp; tmp = I915_READ(CLKCFG); @@ -119,7 +148,7 @@ static void i915_pineview_get_mem_freq(struct drm_device *dev) static void i915_ironlake_get_mem_freq(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); u16 ddrpll, csipll; ddrpll = I915_READ16(DDRMPLL1); @@ -290,7 +319,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) { - struct drm_device *dev = dev_priv->dev; + struct drm_device *dev = &dev_priv->drm; u32 val; if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { @@ -311,6 +340,11 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) I915_WRITE(FW_BLC_SELF, val); POSTING_READ(FW_BLC_SELF); } else if (IS_I915GM(dev)) { + /* + * FIXME can't find a bit like this for 915G, and + * and yet it does have the related watermark in + * FW_BLC_SELF. What's going on? + */ val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : _MASKED_BIT_DISABLE(INSTPM_SELF_EN); I915_WRITE(INSTPM, val); @@ -346,7 +380,7 @@ static const int pessimal_latency_ns = 5000; static int vlv_get_fifo_size(struct drm_device *dev, enum pipe pipe, int plane) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); int sprite0_start, sprite1_start, size; switch (pipe) { @@ -397,7 +431,7 @@ static int vlv_get_fifo_size(struct drm_device *dev, static int i9xx_get_fifo_size(struct drm_device *dev, int plane) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dsparb = I915_READ(DSPARB); int size; @@ -413,7 +447,7 @@ static int i9xx_get_fifo_size(struct drm_device *dev, int plane) static int i830_get_fifo_size(struct drm_device *dev, int plane) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dsparb = I915_READ(DSPARB); int size; @@ -430,7 +464,7 @@ static int i830_get_fifo_size(struct drm_device *dev, int plane) static int i845_get_fifo_size(struct drm_device *dev, int plane) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dsparb = I915_READ(DSPARB); int size; @@ -608,7 +642,7 @@ static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) static void pineview_update_wm(struct drm_crtc *unused_crtc) { struct drm_device *dev = unused_crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; const struct cxsr_latency *latency; u32 reg; @@ -905,7 +939,7 @@ static unsigned int vlv_wm_method2(unsigned int pixel_rate, static void vlv_setup_wm_latency(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); /* all latencies in usec */ dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3; @@ -931,7 +965,7 @@ static uint16_t vlv_compute_wm_level(struct intel_plane *plane, if (dev_priv->wm.pri_latency[level] == 0) return USHRT_MAX; - if (!state->visible) + if (!state->base.visible) return 0; cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0); @@ -973,7 +1007,7 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) if (plane->base.type == DRM_PLANE_TYPE_CURSOR) continue; - if (state->visible) { + if (state->base.visible) { wm_state->num_active_planes++; total_rate += drm_format_plane_cpp(state->base.fb->pixel_format, 0); } @@ -989,7 +1023,7 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) continue; } - if (!state->visible) { + if (!state->base.visible) { plane->wm.fifo_size = 0; continue; } @@ -1089,7 +1123,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) struct intel_plane_state *state = to_intel_plane_state(plane->base.state); - if (!state->visible) + if (!state->base.visible) continue; /* normal watermarks */ @@ -1296,7 +1330,7 @@ static void vlv_merge_wm(struct drm_device *dev, static void vlv_update_wm(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; struct vlv_wm_values wm = {}; @@ -1352,7 +1386,7 @@ static void g4x_update_wm(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; static const int sr_latency_ns = 12000; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); int planea_wm, planeb_wm, cursora_wm, cursorb_wm; int plane_sr, cursor_sr; unsigned int enabled = 0; @@ -1409,7 +1443,7 @@ static void g4x_update_wm(struct drm_crtc *crtc) static void i965_update_wm(struct drm_crtc *unused_crtc) { struct drm_device *dev = unused_crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; int srwm = 1; int cursor_sr = 16; @@ -1483,7 +1517,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc) static void i9xx_update_wm(struct drm_crtc *unused_crtc) { struct drm_device *dev = unused_crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); const struct intel_watermark_params *wm_info; uint32_t fwater_lo; uint32_t fwater_hi; @@ -1551,7 +1585,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) obj = intel_fb_obj(enabled->primary->state->fb); /* self-refresh seems busted with untiled */ - if (obj->tiling_mode == I915_TILING_NONE) + if (!i915_gem_object_is_tiled(obj)) enabled = NULL; } @@ -1575,6 +1609,9 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) unsigned long line_time_us; int entries; + if (IS_I915GM(dev) || IS_I945GM(dev)) + cpp = 4; + line_time_us = max(htotal * 1000 / clock, 1); /* Use ns/us then divide to preserve precision */ @@ -1589,7 +1626,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) if (IS_I945G(dev) || IS_I945GM(dev)) I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); - else if (IS_I915GM(dev)) + else I915_WRITE(FW_BLC_SELF, srwm & 0x3f); } @@ -1613,7 +1650,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) static void i845_update_wm(struct drm_crtc *unused_crtc) { struct drm_device *dev = unused_crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; const struct drm_display_mode *adjusted_mode; uint32_t fwater_lo; @@ -1738,7 +1775,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; uint32_t method1, method2; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); @@ -1748,7 +1785,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->dst), + drm_rect_width(&pstate->base.dst), cpp, mem_value); return min(method1, method2); @@ -1766,13 +1803,13 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; uint32_t method1, method2; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), cstate->base.adjusted_mode.crtc_htotal, - drm_rect_width(&pstate->dst), + drm_rect_width(&pstate->base.dst), cpp, mem_value); return min(method1, method2); } @@ -1791,7 +1828,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, * this is necessary to avoid flickering. */ int cpp = 4; - int width = pstate->visible ? pstate->base.crtc_w : 64; + int width = pstate->base.visible ? pstate->base.crtc_w : 64; if (!cstate->base.active) return 0; @@ -1809,10 +1846,10 @@ static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, int cpp = pstate->base.fb ? drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0; - if (!cstate->base.active || !pstate->visible) + if (!cstate->base.active || !pstate->base.visible) return 0; - return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), cpp); + return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp); } static unsigned int ilk_display_fifo_size(const struct drm_device *dev) @@ -2012,10 +2049,10 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, } static uint32_t -hsw_compute_linetime_wm(struct drm_device *dev, - struct intel_crtc_state *cstate) +hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) { - struct drm_i915_private *dev_priv = dev->dev_private; + const struct intel_atomic_state *intel_state = + to_intel_atomic_state(cstate->base.state); const struct drm_display_mode *adjusted_mode = &cstate->base.adjusted_mode; u32 linetime, ips_linetime; @@ -2024,7 +2061,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, return 0; if (WARN_ON(adjusted_mode->crtc_clock == 0)) return 0; - if (WARN_ON(dev_priv->cdclk_freq == 0)) + if (WARN_ON(intel_state->cdclk == 0)) return 0; /* The WM are computed with base on how long it takes to fill a single @@ -2033,7 +2070,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, adjusted_mode->crtc_clock); ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, - dev_priv->cdclk_freq); + intel_state->cdclk); return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | PIPE_WM_LINETIME_TIME(linetime); @@ -2041,7 +2078,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); if (IS_GEN9(dev)) { uint32_t val; @@ -2090,32 +2127,34 @@ static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) GEN9_MEM_LATENCY_LEVEL_MASK; /* + * If a level n (n > 1) has a 0us latency, all levels m (m >= n) + * need to be disabled. We make sure to sanitize the values out + * of the punit to satisfy this requirement. + */ + for (level = 1; level <= max_level; level++) { + if (wm[level] == 0) { + for (i = level + 1; i <= max_level; i++) + wm[i] = 0; + break; + } + } + + /* * WaWmMemoryReadLatency:skl * * punit doesn't take into account the read latency so we need - * to add 2us to the various latency levels we retrieve from - * the punit. - * - W0 is a bit special in that it's the only level that - * can't be disabled if we want to have display working, so - * we always add 2us there. - * - For levels >=1, punit returns 0us latency when they are - * disabled, so we respect that and don't add 2us then - * - * Additionally, if a level n (n > 1) has a 0us latency, all - * levels m (m >= n) need to be disabled. We make sure to - * sanitize the values out of the punit to satisfy this - * requirement. + * to add 2us to the various latency levels we retrieve from the + * punit when level 0 response data us 0us. */ - wm[0] += 2; - for (level = 1; level <= max_level; level++) - if (wm[level] != 0) + if (wm[0] == 0) { + wm[0] += 2; + for (level = 1; level <= max_level; level++) { + if (wm[level] == 0) + break; wm[level] += 2; - else { - for (i = level + 1; i <= max_level; i++) - wm[i] = 0; - - break; } + } + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { uint64_t sskpd = I915_READ64(MCH_SSKPD); @@ -2146,14 +2185,14 @@ static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5]) { /* ILK sprite LP0 latency is 1300 ns */ - if (INTEL_INFO(dev)->gen == 5) + if (IS_GEN5(dev)) wm[0] = 13; } static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) { /* ILK cursor LP0 latency is 1300 ns */ - if (INTEL_INFO(dev)->gen == 5) + if (IS_GEN5(dev)) wm[0] = 13; /* WaDoubleCursorLP3Latency:ivb */ @@ -2207,7 +2246,7 @@ static void intel_print_wm_latency(struct drm_device *dev, static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, uint16_t wm[5], uint16_t min) { - int level, max_level = ilk_wm_max_level(dev_priv->dev); + int level, max_level = ilk_wm_max_level(&dev_priv->drm); if (wm[0] >= min) return false; @@ -2221,7 +2260,7 @@ static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, static void snb_wm_latency_quirk(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); bool changed; /* @@ -2243,7 +2282,7 @@ static void snb_wm_latency_quirk(struct drm_device *dev) static void ilk_setup_wm_latency(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); intel_read_wm_latency(dev, dev_priv->wm.pri_latency); @@ -2265,7 +2304,7 @@ static void ilk_setup_wm_latency(struct drm_device *dev) static void skl_setup_wm_latency(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); intel_read_wm_latency(dev, dev_priv->wm.skl_latency); intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency); @@ -2301,7 +2340,7 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); struct intel_pipe_wm *pipe_wm; struct drm_device *dev = state->dev; - const struct drm_i915_private *dev_priv = dev->dev_private; + const struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane; struct intel_plane_state *pristate = NULL; struct intel_plane_state *sprstate = NULL; @@ -2309,7 +2348,7 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) int level, max_level = ilk_wm_max_level(dev), usable_level; struct ilk_wm_maximums max; - pipe_wm = &cstate->wm.optimal.ilk; + pipe_wm = &cstate->wm.ilk.optimal; for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { struct intel_plane_state *ps; @@ -2329,10 +2368,10 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) pipe_wm->pipe_enabled = cstate->base.active; if (sprstate) { - pipe_wm->sprites_enabled = sprstate->visible; - pipe_wm->sprites_scaled = sprstate->visible && - (drm_rect_width(&sprstate->dst) != drm_rect_width(&sprstate->src) >> 16 || - drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16); + pipe_wm->sprites_enabled = sprstate->base.visible; + pipe_wm->sprites_scaled = sprstate->base.visible && + (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 || + drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16); } usable_level = max_level; @@ -2352,7 +2391,7 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) pipe_wm->wm[0] = pipe_wm->raw_wm[0]; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - pipe_wm->linetime = hsw_compute_linetime_wm(dev, cstate); + pipe_wm->linetime = hsw_compute_linetime_wm(cstate); if (!ilk_validate_pipe_wm(dev, pipe_wm)) return -EINVAL; @@ -2391,7 +2430,7 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc *intel_crtc, struct intel_crtc_state *newstate) { - struct intel_pipe_wm *a = &newstate->wm.intermediate; + struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; int level, max_level = ilk_wm_max_level(dev); @@ -2400,7 +2439,7 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * currently active watermarks to get values that are safe both before * and after the vblank. */ - *a = newstate->wm.optimal.ilk; + *a = newstate->wm.ilk.optimal; a->pipe_enabled |= b->pipe_enabled; a->sprites_enabled |= b->sprites_enabled; a->sprites_scaled |= b->sprites_scaled; @@ -2429,7 +2468,7 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(a, &newstate->wm.optimal.ilk, sizeof(*a)) == 0) + if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) == 0) newstate->wm.need_postvbl_update = false; return 0; @@ -2476,7 +2515,7 @@ static void ilk_wm_merge(struct drm_device *dev, const struct ilk_wm_maximums *max, struct intel_pipe_wm *merged) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); int level, max_level = ilk_wm_max_level(dev); int last_enabled_level = max_level; @@ -2536,7 +2575,7 @@ static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm) /* The value we need to program into the WM_LPx latency field */ static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); if (IS_HASWELL(dev) || IS_BROADWELL(dev)) return 2 * level; @@ -2736,7 +2775,7 @@ static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv, static void ilk_write_wm_values(struct drm_i915_private *dev_priv, struct ilk_wm_values *results) { - struct drm_device *dev = dev_priv->dev; + struct drm_device *dev = &dev_priv->drm; struct ilk_wm_values *previous = &dev_priv->wm.hw; unsigned int dirty; uint32_t val; @@ -2811,18 +2850,12 @@ static void ilk_write_wm_values(struct drm_i915_private *dev_priv, bool ilk_disable_lp_wm(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } -/* - * On gen9, we need to allocate Display Data Buffer (DDB) portions to the - * different active planes. - */ - -#define SKL_DDB_SIZE 896 /* in blocks */ -#define BXT_DDB_SIZE 512 +#define SKL_SAGV_BLOCK_TIME 30 /* µs */ /* * Return the index of a plane in the SKL DDB and wm result arrays. Primary @@ -2846,49 +2879,227 @@ skl_wm_plane_id(const struct intel_plane *plane) } } +static bool +intel_has_sagv(struct drm_i915_private *dev_priv) +{ + if (IS_KABYLAKE(dev_priv)) + return true; + + if (IS_SKYLAKE(dev_priv) && + dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED) + return true; + + return false; +} + +/* + * SAGV dynamically adjusts the system agent voltage and clock frequencies + * depending on power and performance requirements. The display engine access + * to system memory is blocked during the adjustment time. Because of the + * blocking time, having this enabled can cause full system hangs and/or pipe + * underruns if we don't meet all of the following requirements: + * + * - <= 1 pipe enabled + * - All planes can enable watermarks for latencies >= SAGV engine block time + * - We're not using an interlaced display configuration + */ +int +intel_enable_sagv(struct drm_i915_private *dev_priv) +{ + int ret; + + if (!intel_has_sagv(dev_priv)) + return 0; + + if (dev_priv->sagv_status == I915_SAGV_ENABLED) + return 0; + + DRM_DEBUG_KMS("Enabling the SAGV\n"); + mutex_lock(&dev_priv->rps.hw_lock); + + ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, + GEN9_SAGV_ENABLE); + + /* We don't need to wait for the SAGV when enabling */ + mutex_unlock(&dev_priv->rps.hw_lock); + + /* + * Some skl systems, pre-release machines in particular, + * don't actually have an SAGV. + */ + if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { + DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; + return 0; + } else if (ret < 0) { + DRM_ERROR("Failed to enable the SAGV\n"); + return ret; + } + + dev_priv->sagv_status = I915_SAGV_ENABLED; + return 0; +} + +static int +intel_do_sagv_disable(struct drm_i915_private *dev_priv) +{ + int ret; + uint32_t temp = GEN9_SAGV_DISABLE; + + ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, + &temp); + if (ret) + return ret; + else + return temp & GEN9_SAGV_IS_DISABLED; +} + +int +intel_disable_sagv(struct drm_i915_private *dev_priv) +{ + int ret, result; + + if (!intel_has_sagv(dev_priv)) + return 0; + + if (dev_priv->sagv_status == I915_SAGV_DISABLED) + return 0; + + DRM_DEBUG_KMS("Disabling the SAGV\n"); + mutex_lock(&dev_priv->rps.hw_lock); + + /* bspec says to keep retrying for at least 1 ms */ + ret = wait_for(result = intel_do_sagv_disable(dev_priv), 1); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret == -ETIMEDOUT) { + DRM_ERROR("Request to disable SAGV timed out\n"); + return -ETIMEDOUT; + } + + /* + * Some skl systems, pre-release machines in particular, + * don't actually have an SAGV. + */ + if (IS_SKYLAKE(dev_priv) && result == -ENXIO) { + DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; + return 0; + } else if (result < 0) { + DRM_ERROR("Failed to disable the SAGV\n"); + return result; + } + + dev_priv->sagv_status = I915_SAGV_DISABLED; + return 0; +} + +bool intel_can_enable_sagv(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_crtc *crtc; + enum pipe pipe; + int level, plane; + + if (!intel_has_sagv(dev_priv)) + return false; + + /* + * SKL workaround: bspec recommends we disable the SAGV when we have + * more then one pipe enabled + * + * If there are no active CRTCs, no additional checks need be performed + */ + if (hweight32(intel_state->active_crtcs) == 0) + return true; + else if (hweight32(intel_state->active_crtcs) > 1) + return false; + + /* Since we're now guaranteed to only have one active CRTC... */ + pipe = ffs(intel_state->active_crtcs) - 1; + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + + if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE) + return false; + + for_each_plane(dev_priv, pipe, plane) { + /* Skip this plane if it's not enabled */ + if (intel_state->wm_results.plane[pipe][plane][0] == 0) + continue; + + /* Find the highest enabled wm level for this plane */ + for (level = ilk_wm_max_level(dev); + intel_state->wm_results.plane[pipe][plane][level] == 0; --level) + { } + + /* + * If any of the planes on this pipe don't enable wm levels + * that incur memory latencies higher then 30µs we can't enable + * the SAGV + */ + if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME) + return false; + } + + return true; +} + static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, const struct intel_crtc_state *cstate, - const struct intel_wm_config *config, - struct skl_ddb_entry *alloc /* out */) + struct skl_ddb_entry *alloc, /* out */ + int *num_active /* out */) { + struct drm_atomic_state *state = cstate->base.state; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *for_crtc = cstate->base.crtc; - struct drm_crtc *crtc; unsigned int pipe_size, ddb_size; int nth_active_pipe; + int pipe = to_intel_crtc(for_crtc)->pipe; - if (!cstate->base.active) { + if (WARN_ON(!state) || !cstate->base.active) { alloc->start = 0; alloc->end = 0; + *num_active = hweight32(dev_priv->active_crtcs); return; } - if (IS_BROXTON(dev)) - ddb_size = BXT_DDB_SIZE; + if (intel_state->active_pipe_changes) + *num_active = hweight32(intel_state->active_crtcs); else - ddb_size = SKL_DDB_SIZE; + *num_active = hweight32(dev_priv->active_crtcs); - ddb_size -= 4; /* 4 blocks for bypass path allocation */ - - nth_active_pipe = 0; - for_each_crtc(dev, crtc) { - if (!to_intel_crtc(crtc)->active) - continue; + ddb_size = INTEL_INFO(dev_priv)->ddb_size; + WARN_ON(ddb_size == 0); - if (crtc == for_crtc) - break; + ddb_size -= 4; /* 4 blocks for bypass path allocation */ - nth_active_pipe++; + /* + * If the state doesn't change the active CRTC's, then there's + * no need to recalculate; the existing pipe allocation limits + * should remain unchanged. Note that we're safe from racing + * commits since any racing commit that changes the active CRTC + * list would need to grab _all_ crtc locks, including the one + * we currently hold. + */ + if (!intel_state->active_pipe_changes) { + *alloc = dev_priv->wm.skl_hw.ddb.pipe[pipe]; + return; } - pipe_size = ddb_size / config->num_pipes_active; - alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active; + nth_active_pipe = hweight32(intel_state->active_crtcs & + (drm_crtc_mask(for_crtc) - 1)); + pipe_size = ddb_size / hweight32(intel_state->active_crtcs); + alloc->start = nth_active_pipe * ddb_size / *num_active; alloc->end = alloc->start + pipe_size; } -static unsigned int skl_cursor_allocation(const struct intel_wm_config *config) +static unsigned int skl_cursor_allocation(int num_active) { - if (config->num_pipes_active == 1) + if (num_active == 1) return 32; return 8; @@ -2932,6 +3143,46 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, } } +/* + * Determines the downscale amount of a plane for the purposes of watermark calculations. + * The bspec defines downscale amount as: + * + * """ + * Horizontal down scale amount = maximum[1, Horizontal source size / + * Horizontal destination size] + * Vertical down scale amount = maximum[1, Vertical source size / + * Vertical destination size] + * Total down scale amount = Horizontal down scale amount * + * Vertical down scale amount + * """ + * + * Return value is provided in 16.16 fixed point form to retain fractional part. + * Caller should take care of dividing & rounding off the value. + */ +static uint32_t +skl_plane_downscale_amount(const struct intel_plane_state *pstate) +{ + uint32_t downscale_h, downscale_w; + uint32_t src_w, src_h, dst_w, dst_h; + + if (WARN_ON(!pstate->base.visible)) + return DRM_PLANE_HELPER_NO_SCALING; + + /* n.b., src is 16.16 fixed point, dst is whole integer */ + src_w = drm_rect_width(&pstate->base.src); + src_h = drm_rect_height(&pstate->base.src); + dst_w = drm_rect_width(&pstate->base.dst); + dst_h = drm_rect_height(&pstate->base.dst); + if (intel_rotation_90_or_270(pstate->base.rotation)) + swap(dst_w, dst_h); + + downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); + downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); + + /* Provide result in 16.16 fixed point */ + return (uint64_t)downscale_w * downscale_h >> 16; +} + static unsigned int skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate, @@ -2939,26 +3190,39 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, { struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); struct drm_framebuffer *fb = pstate->fb; + uint32_t down_scale_amount, data_rate; uint32_t width = 0, height = 0; + unsigned format = fb ? fb->pixel_format : DRM_FORMAT_XRGB8888; - width = drm_rect_width(&intel_pstate->src) >> 16; - height = drm_rect_height(&intel_pstate->src) >> 16; + if (!intel_pstate->base.visible) + return 0; + if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR) + return 0; + if (y && format != DRM_FORMAT_NV12) + return 0; + + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; if (intel_rotation_90_or_270(pstate->rotation)) swap(width, height); /* for planar format */ - if (fb->pixel_format == DRM_FORMAT_NV12) { + if (format == DRM_FORMAT_NV12) { if (y) /* y-plane data rate */ - return width * height * - drm_format_plane_cpp(fb->pixel_format, 0); + data_rate = width * height * + drm_format_plane_cpp(format, 0); else /* uv-plane data rate */ - return (width / 2) * (height / 2) * - drm_format_plane_cpp(fb->pixel_format, 1); + data_rate = (width / 2) * (height / 2) * + drm_format_plane_cpp(format, 1); + } else { + /* for packed formats */ + data_rate = width * height * drm_format_plane_cpp(format, 0); } - /* for packed formats */ - return width * height * drm_format_plane_cpp(fb->pixel_format, 0); + down_scale_amount = skl_plane_downscale_amount(intel_pstate); + + return (uint64_t)data_rate * down_scale_amount >> 16; } /* @@ -2967,86 +3231,186 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, * 3 * 4096 * 8192 * 4 < 2^32 */ static unsigned int -skl_get_total_relative_data_rate(const struct intel_crtc_state *cstate) +skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) { - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); - struct drm_device *dev = intel_crtc->base.dev; + struct drm_crtc_state *cstate = &intel_cstate->base; + struct drm_atomic_state *state = cstate->state; + struct drm_crtc *crtc = cstate->crtc; + struct drm_device *dev = crtc->dev; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + const struct drm_plane *plane; const struct intel_plane *intel_plane; - unsigned int total_data_rate = 0; + struct drm_plane_state *pstate; + unsigned int rate, total_data_rate = 0; + int id; + int i; - for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { - const struct drm_plane_state *pstate = intel_plane->base.state; + if (WARN_ON(!state)) + return 0; - if (pstate->fb == NULL) - continue; + /* Calculate and cache data rate for each plane */ + for_each_plane_in_state(state, plane, pstate, i) { + id = skl_wm_plane_id(to_intel_plane(plane)); + intel_plane = to_intel_plane(plane); - if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) + if (intel_plane->pipe != intel_crtc->pipe) continue; /* packed/uv */ - total_data_rate += skl_plane_relative_data_rate(cstate, - pstate, - 0); + rate = skl_plane_relative_data_rate(intel_cstate, + pstate, 0); + intel_cstate->wm.skl.plane_data_rate[id] = rate; - if (pstate->fb->pixel_format == DRM_FORMAT_NV12) - /* y-plane */ - total_data_rate += skl_plane_relative_data_rate(cstate, - pstate, - 1); + /* y-plane */ + rate = skl_plane_relative_data_rate(intel_cstate, + pstate, 1); + intel_cstate->wm.skl.plane_y_data_rate[id] = rate; + } + + /* Calculate CRTC's total data rate from cached values */ + for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { + int id = skl_wm_plane_id(intel_plane); + + /* packed/uv */ + total_data_rate += intel_cstate->wm.skl.plane_data_rate[id]; + total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id]; } return total_data_rate; } -static void +static uint16_t +skl_ddb_min_alloc(const struct drm_plane_state *pstate, + const int y) +{ + struct drm_framebuffer *fb = pstate->fb; + struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); + uint32_t src_w, src_h; + uint32_t min_scanlines = 8; + uint8_t plane_bpp; + + if (WARN_ON(!fb)) + return 0; + + /* For packed formats, no y-plane, return 0 */ + if (y && fb->pixel_format != DRM_FORMAT_NV12) + return 0; + + /* For Non Y-tile return 8-blocks */ + if (fb->modifier[0] != I915_FORMAT_MOD_Y_TILED && + fb->modifier[0] != I915_FORMAT_MOD_Yf_TILED) + return 8; + + src_w = drm_rect_width(&intel_pstate->base.src) >> 16; + src_h = drm_rect_height(&intel_pstate->base.src) >> 16; + + if (intel_rotation_90_or_270(pstate->rotation)) + swap(src_w, src_h); + + /* Halve UV plane width and height for NV12 */ + if (fb->pixel_format == DRM_FORMAT_NV12 && !y) { + src_w /= 2; + src_h /= 2; + } + + if (fb->pixel_format == DRM_FORMAT_NV12 && !y) + plane_bpp = drm_format_plane_cpp(fb->pixel_format, 1); + else + plane_bpp = drm_format_plane_cpp(fb->pixel_format, 0); + + if (intel_rotation_90_or_270(pstate->rotation)) { + switch (plane_bpp) { + case 1: + min_scanlines = 32; + break; + case 2: + min_scanlines = 16; + break; + case 4: + min_scanlines = 8; + break; + case 8: + min_scanlines = 4; + break; + default: + WARN(1, "Unsupported pixel depth %u for rotation", + plane_bpp); + min_scanlines = 32; + } + } + + return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3; +} + +static int skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct skl_ddb_allocation *ddb /* out */) { + struct drm_atomic_state *state = cstate->base.state; struct drm_crtc *crtc = cstate->base.crtc; struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_wm_config *config = &dev_priv->wm.config; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_plane *intel_plane; + struct drm_plane *plane; + struct drm_plane_state *pstate; enum pipe pipe = intel_crtc->pipe; struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; uint16_t alloc_size, start, cursor_blocks; - uint16_t minimum[I915_MAX_PLANES]; - uint16_t y_minimum[I915_MAX_PLANES]; + uint16_t *minimum = cstate->wm.skl.minimum_blocks; + uint16_t *y_minimum = cstate->wm.skl.minimum_y_blocks; unsigned int total_data_rate; + int num_active; + int id, i; + + if (WARN_ON(!state)) + return 0; - skl_ddb_get_pipe_allocation_limits(dev, cstate, config, alloc); + if (!cstate->base.active) { + ddb->pipe[pipe].start = ddb->pipe[pipe].end = 0; + memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); + memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe])); + return 0; + } + + skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); if (alloc_size == 0) { memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); - memset(&ddb->plane[pipe][PLANE_CURSOR], 0, - sizeof(ddb->plane[pipe][PLANE_CURSOR])); - return; + return 0; } - cursor_blocks = skl_cursor_allocation(config); + cursor_blocks = skl_cursor_allocation(num_active); ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks; ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; alloc_size -= cursor_blocks; - alloc->end -= cursor_blocks; /* 1. Allocate the mininum required blocks for each active plane */ - for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { - struct drm_plane *plane = &intel_plane->base; - struct drm_framebuffer *fb = plane->state->fb; - int id = skl_wm_plane_id(intel_plane); + for_each_plane_in_state(state, plane, pstate, i) { + intel_plane = to_intel_plane(plane); + id = skl_wm_plane_id(intel_plane); - if (!to_intel_plane_state(plane->state)->visible) + if (intel_plane->pipe != pipe) continue; - if (plane->type == DRM_PLANE_TYPE_CURSOR) + if (!to_intel_plane_state(pstate)->base.visible) { + minimum[id] = 0; + y_minimum[id] = 0; continue; + } + if (plane->type == DRM_PLANE_TYPE_CURSOR) { + minimum[id] = 0; + y_minimum[id] = 0; + continue; + } - minimum[id] = 8; - alloc_size -= minimum[id]; - y_minimum[id] = (fb->pixel_format == DRM_FORMAT_NV12) ? 8 : 0; - alloc_size -= y_minimum[id]; + minimum[id] = skl_ddb_min_alloc(pstate, 0); + y_minimum[id] = skl_ddb_min_alloc(pstate, 1); + } + + for (i = 0; i < PLANE_CURSOR; i++) { + alloc_size -= minimum[i]; + alloc_size -= y_minimum[i]; } /* @@ -3056,21 +3420,16 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * FIXME: we may not allocate every single block here. */ total_data_rate = skl_get_total_relative_data_rate(cstate); + if (total_data_rate == 0) + return 0; start = alloc->start; for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { - struct drm_plane *plane = &intel_plane->base; - struct drm_plane_state *pstate = intel_plane->base.state; unsigned int data_rate, y_data_rate; uint16_t plane_blocks, y_plane_blocks = 0; int id = skl_wm_plane_id(intel_plane); - if (!to_intel_plane_state(pstate)->visible) - continue; - if (plane->type == DRM_PLANE_TYPE_CURSOR) - continue; - - data_rate = skl_plane_relative_data_rate(cstate, pstate, 0); + data_rate = cstate->wm.skl.plane_data_rate[id]; /* * allocation for (packed formats) or (uv-plane part of planar format): @@ -3081,30 +3440,32 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, plane_blocks += div_u64((uint64_t)alloc_size * data_rate, total_data_rate); - ddb->plane[pipe][id].start = start; - ddb->plane[pipe][id].end = start + plane_blocks; + /* Leave disabled planes at (0,0) */ + if (data_rate) { + ddb->plane[pipe][id].start = start; + ddb->plane[pipe][id].end = start + plane_blocks; + } start += plane_blocks; /* * allocation for y_plane part of planar format: */ - if (pstate->fb->pixel_format == DRM_FORMAT_NV12) { - y_data_rate = skl_plane_relative_data_rate(cstate, - pstate, - 1); - y_plane_blocks = y_minimum[id]; - y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, - total_data_rate); + y_data_rate = cstate->wm.skl.plane_y_data_rate[id]; + y_plane_blocks = y_minimum[id]; + y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, + total_data_rate); + + if (y_data_rate) { ddb->y_plane[pipe][id].start = start; ddb->y_plane[pipe][id].end = start + y_plane_blocks; - - start += y_plane_blocks; } + start += y_plane_blocks; } + return 0; } static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config) @@ -3133,27 +3494,14 @@ static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latenc } static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, - uint32_t horiz_pixels, uint8_t cpp, - uint64_t tiling, uint32_t latency) + uint32_t latency, uint32_t plane_blocks_per_line) { uint32_t ret; - uint32_t plane_bytes_per_line, plane_blocks_per_line; uint32_t wm_intermediate_val; if (latency == 0) return UINT_MAX; - plane_bytes_per_line = horiz_pixels * cpp; - - if (tiling == I915_FORMAT_MOD_Y_TILED || - tiling == I915_FORMAT_MOD_Yf_TILED) { - plane_bytes_per_line *= 4; - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); - plane_blocks_per_line /= 4; - } else { - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); - } - wm_intermediate_val = latency * pixel_rate; ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * plane_blocks_per_line; @@ -3161,35 +3509,41 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, return ret; } -static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb, - const struct intel_crtc *intel_crtc) +static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, + struct intel_plane_state *pstate) { - struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + uint64_t adjusted_pixel_rate; + uint64_t downscale_amount; + uint64_t pixel_rate; + + /* Shouldn't reach here on disabled planes... */ + if (WARN_ON(!pstate->base.visible)) + return 0; /* - * If ddb allocation of pipes changed, it may require recalculation of - * watermarks + * Adjusted plane pixel rate is just the pipe's adjusted pixel rate + * with additional adjustments for plane-specific scaling. */ - if (memcmp(new_ddb->pipe, cur_ddb->pipe, sizeof(new_ddb->pipe))) - return true; + adjusted_pixel_rate = skl_pipe_pixel_rate(cstate); + downscale_amount = skl_plane_downscale_amount(pstate); - return false; + pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; + WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0)); + + return pixel_rate; } -static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, - struct intel_crtc_state *cstate, - struct intel_plane *intel_plane, - uint16_t ddb_allocation, - int level, - uint16_t *out_blocks, /* out */ - uint8_t *out_lines /* out */) +static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, + struct intel_crtc_state *cstate, + struct intel_plane_state *intel_pstate, + uint16_t ddb_allocation, + int level, + uint16_t *out_blocks, /* out */ + uint8_t *out_lines, /* out */ + bool *enabled /* out */) { - struct drm_plane *plane = &intel_plane->base; - struct drm_framebuffer *fb = plane->state->fb; - struct intel_plane_state *intel_pstate = - to_intel_plane_state(plane->state); + struct drm_plane_state *pstate = &intel_pstate->base; + struct drm_framebuffer *fb = pstate->fb; uint32_t latency = dev_priv->wm.skl_latency[level]; uint32_t method1, method2; uint32_t plane_bytes_per_line, plane_blocks_per_line; @@ -3197,50 +3551,68 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t selected_result; uint8_t cpp; uint32_t width = 0, height = 0; + uint32_t plane_pixel_rate; + uint32_t y_tile_minimum, y_min_scanlines; - if (latency == 0 || !cstate->base.active || !intel_pstate->visible) - return false; + if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { + *enabled = false; + return 0; + } - width = drm_rect_width(&intel_pstate->src) >> 16; - height = drm_rect_height(&intel_pstate->src) >> 16; + width = drm_rect_width(&intel_pstate->base.src) >> 16; + height = drm_rect_height(&intel_pstate->base.src) >> 16; - if (intel_rotation_90_or_270(plane->state->rotation)) + if (intel_rotation_90_or_270(pstate->rotation)) swap(width, height); cpp = drm_format_plane_cpp(fb->pixel_format, 0); - method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate), - cpp, latency); - method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate), - cstate->base.adjusted_mode.crtc_htotal, - width, - cpp, - fb->modifier[0], - latency); + plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); + + if (intel_rotation_90_or_270(pstate->rotation)) { + int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? + drm_format_plane_cpp(fb->pixel_format, 1) : + drm_format_plane_cpp(fb->pixel_format, 0); + + switch (cpp) { + case 1: + y_min_scanlines = 16; + break; + case 2: + y_min_scanlines = 8; + break; + default: + WARN(1, "Unsupported pixel depth for rotation"); + case 4: + y_min_scanlines = 4; + break; + } + } else { + y_min_scanlines = 4; + } plane_bytes_per_line = width * cpp; - plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); + if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { + plane_blocks_per_line = + DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); + plane_blocks_per_line /= y_min_scanlines; + } else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + + 1; + } else { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); + } + + method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); + method2 = skl_wm_method2(plane_pixel_rate, + cstate->base.adjusted_mode.crtc_htotal, + latency, + plane_blocks_per_line); + + y_tile_minimum = plane_blocks_per_line * y_min_scanlines; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { - uint32_t min_scanlines = 4; - uint32_t y_tile_minimum; - if (intel_rotation_90_or_270(plane->state->rotation)) { - int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? - drm_format_plane_cpp(fb->pixel_format, 1) : - drm_format_plane_cpp(fb->pixel_format, 0); - - switch (cpp) { - case 1: - min_scanlines = 16; - break; - case 2: - min_scanlines = 8; - break; - case 8: - WARN(1, "Unsupported pixel depth for rotation"); - } - } - y_tile_minimum = plane_blocks_per_line * min_scanlines; selected_result = max(method2, y_tile_minimum); } else { if ((ddb_allocation / plane_blocks_per_line) >= 1) @@ -3254,46 +3626,108 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (level >= 1 && level <= 7) { if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) - res_lines += 4; - else + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { + res_blocks += y_tile_minimum; + res_lines += y_min_scanlines; + } else { res_blocks++; + } } - if (res_blocks >= ddb_allocation || res_lines > 31) - return false; + if (res_blocks >= ddb_allocation || res_lines > 31) { + *enabled = false; + + /* + * If there are no valid level 0 watermarks, then we can't + * support this display configuration. + */ + if (level) { + return 0; + } else { + DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); + DRM_DEBUG_KMS("Plane %d.%d: blocks required = %u/%u, lines required = %u/31\n", + to_intel_crtc(cstate->base.crtc)->pipe, + skl_wm_plane_id(to_intel_plane(pstate->plane)), + res_blocks, ddb_allocation, res_lines); + + return -EINVAL; + } + } *out_blocks = res_blocks; *out_lines = res_lines; + *enabled = true; - return true; + return 0; } -static void skl_compute_wm_level(const struct drm_i915_private *dev_priv, - struct skl_ddb_allocation *ddb, - struct intel_crtc_state *cstate, - int level, - struct skl_wm_level *result) +static int +skl_compute_wm_level(const struct drm_i915_private *dev_priv, + struct skl_ddb_allocation *ddb, + struct intel_crtc_state *cstate, + int level, + struct skl_wm_level *result) { - struct drm_device *dev = dev_priv->dev; + struct drm_atomic_state *state = cstate->base.state; struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); + struct drm_plane *plane; struct intel_plane *intel_plane; + struct intel_plane_state *intel_pstate; uint16_t ddb_blocks; enum pipe pipe = intel_crtc->pipe; + int ret; - for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { + /* + * We'll only calculate watermarks for planes that are actually + * enabled, so make sure all other planes are set as disabled. + */ + memset(result, 0, sizeof(*result)); + + for_each_intel_plane_mask(&dev_priv->drm, + intel_plane, + cstate->base.plane_mask) { int i = skl_wm_plane_id(intel_plane); + plane = &intel_plane->base; + intel_pstate = NULL; + if (state) + intel_pstate = + intel_atomic_get_existing_plane_state(state, + intel_plane); + + /* + * Note: If we start supporting multiple pending atomic commits + * against the same planes/CRTC's in the future, plane->state + * will no longer be the correct pre-state to use for the + * calculations here and we'll need to change where we get the + * 'unchanged' plane data from. + * + * For now this is fine because we only allow one queued commit + * against a CRTC. Even if the plane isn't modified by this + * transaction and we don't have a plane lock, we still have + * the CRTC's lock, so we know that no other transactions are + * racing with us to update it. + */ + if (!intel_pstate) + intel_pstate = to_intel_plane_state(plane->state); + + WARN_ON(!intel_pstate->base.fb); + ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); - result->plane_en[i] = skl_compute_plane_wm(dev_priv, - cstate, - intel_plane, - ddb_blocks, - level, - &result->plane_res_b[i], - &result->plane_res_l[i]); + ret = skl_compute_plane_wm(dev_priv, + cstate, + intel_pstate, + ddb_blocks, + level, + &result->plane_res_b[i], + &result->plane_res_l[i], + &result->plane_en[i]); + if (ret) + return ret; } + + return 0; } static uint32_t @@ -3327,21 +3761,26 @@ static void skl_compute_transition_wm(struct intel_crtc_state *cstate, } } -static void skl_compute_pipe_wm(struct intel_crtc_state *cstate, - struct skl_ddb_allocation *ddb, - struct skl_pipe_wm *pipe_wm) +static int skl_build_pipe_wm(struct intel_crtc_state *cstate, + struct skl_ddb_allocation *ddb, + struct skl_pipe_wm *pipe_wm) { struct drm_device *dev = cstate->base.crtc->dev; - const struct drm_i915_private *dev_priv = dev->dev_private; + const struct drm_i915_private *dev_priv = to_i915(dev); int level, max_level = ilk_wm_max_level(dev); + int ret; for (level = 0; level <= max_level; level++) { - skl_compute_wm_level(dev_priv, ddb, cstate, - level, &pipe_wm->wm[level]); + ret = skl_compute_wm_level(dev_priv, ddb, cstate, + level, &pipe_wm->wm[level]); + if (ret) + return ret; } pipe_wm->linetime = skl_compute_linetime_wm(cstate); skl_compute_transition_wm(cstate, &pipe_wm->trans_wm); + + return 0; } static void skl_compute_wm_results(struct drm_device *dev, @@ -3411,293 +3850,345 @@ static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, I915_WRITE(reg, 0); } -static void skl_write_wm_values(struct drm_i915_private *dev_priv, - const struct skl_wm_values *new) +void skl_write_plane_wm(struct intel_crtc *intel_crtc, + const struct skl_wm_values *wm, + int plane) { - struct drm_device *dev = dev_priv->dev; - struct intel_crtc *crtc; - - for_each_intel_crtc(dev, crtc) { - int i, level, max_level = ilk_wm_max_level(dev); - enum pipe pipe = crtc->pipe; + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + int level, max_level = ilk_wm_max_level(dev); + enum pipe pipe = intel_crtc->pipe; - if (!new->dirty[pipe]) - continue; + for (level = 0; level <= max_level; level++) { + I915_WRITE(PLANE_WM(pipe, plane, level), + wm->plane[pipe][plane][level]); + } + I915_WRITE(PLANE_WM_TRANS(pipe, plane), wm->plane_trans[pipe][plane]); - I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]); + skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane), + &wm->ddb.plane[pipe][plane]); + skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane), + &wm->ddb.y_plane[pipe][plane]); +} - for (level = 0; level <= max_level; level++) { - for (i = 0; i < intel_num_planes(crtc); i++) - I915_WRITE(PLANE_WM(pipe, i, level), - new->plane[pipe][i][level]); - I915_WRITE(CUR_WM(pipe, level), - new->plane[pipe][PLANE_CURSOR][level]); - } - for (i = 0; i < intel_num_planes(crtc); i++) - I915_WRITE(PLANE_WM_TRANS(pipe, i), - new->plane_trans[pipe][i]); - I915_WRITE(CUR_WM_TRANS(pipe), - new->plane_trans[pipe][PLANE_CURSOR]); - - for (i = 0; i < intel_num_planes(crtc); i++) { - skl_ddb_entry_write(dev_priv, - PLANE_BUF_CFG(pipe, i), - &new->ddb.plane[pipe][i]); - skl_ddb_entry_write(dev_priv, - PLANE_NV12_BUF_CFG(pipe, i), - &new->ddb.y_plane[pipe][i]); - } +void skl_write_cursor_wm(struct intel_crtc *intel_crtc, + const struct skl_wm_values *wm) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + int level, max_level = ilk_wm_max_level(dev); + enum pipe pipe = intel_crtc->pipe; - skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), - &new->ddb.plane[pipe][PLANE_CURSOR]); + for (level = 0; level <= max_level; level++) { + I915_WRITE(CUR_WM(pipe, level), + wm->plane[pipe][PLANE_CURSOR][level]); } + I915_WRITE(CUR_WM_TRANS(pipe), wm->plane_trans[pipe][PLANE_CURSOR]); + + skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), + &wm->ddb.plane[pipe][PLANE_CURSOR]); } -/* - * When setting up a new DDB allocation arrangement, we need to correctly - * sequence the times at which the new allocations for the pipes are taken into - * account or we'll have pipes fetching from space previously allocated to - * another pipe. - * - * Roughly the sequence looks like: - * 1. re-allocate the pipe(s) with the allocation being reduced and not - * overlapping with a previous light-up pipe (another way to put it is: - * pipes with their new allocation strickly included into their old ones). - * 2. re-allocate the other pipes that get their allocation reduced - * 3. allocate the pipes having their allocation increased - * - * Steps 1. and 2. are here to take care of the following case: - * - Initially DDB looks like this: - * | B | C | - * - enable pipe A. - * - pipe B has a reduced DDB allocation that overlaps with the old pipe C - * allocation - * | A | B | C | - * - * We need to sequence the re-allocation: C, B, A (and not B, C, A). - */ +bool skl_ddb_allocation_equals(const struct skl_ddb_allocation *old, + const struct skl_ddb_allocation *new, + enum pipe pipe) +{ + return new->pipe[pipe].start == old->pipe[pipe].start && + new->pipe[pipe].end == old->pipe[pipe].end; +} -static void -skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass) +static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, + const struct skl_ddb_entry *b) { - int plane; + return a->start < b->end && b->start < a->end; +} - DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); +bool skl_ddb_allocation_overlaps(struct drm_atomic_state *state, + const struct skl_ddb_allocation *old, + const struct skl_ddb_allocation *new, + enum pipe pipe) +{ + struct drm_device *dev = state->dev; + struct intel_crtc *intel_crtc; + enum pipe otherp; - for_each_plane(dev_priv, pipe, plane) { - I915_WRITE(PLANE_SURF(pipe, plane), - I915_READ(PLANE_SURF(pipe, plane))); + for_each_intel_crtc(dev, intel_crtc) { + otherp = intel_crtc->pipe; + + if (otherp == pipe) + continue; + + if (skl_ddb_entries_overlap(&new->pipe[pipe], + &old->pipe[otherp])) + return true; } - I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); + + return false; } -static bool -skl_ddb_allocation_included(const struct skl_ddb_allocation *old, - const struct skl_ddb_allocation *new, - enum pipe pipe) +static int skl_update_pipe_wm(struct drm_crtc_state *cstate, + struct skl_ddb_allocation *ddb, /* out */ + struct skl_pipe_wm *pipe_wm, /* out */ + bool *changed /* out */) { - uint16_t old_size, new_size; + struct intel_crtc *intel_crtc = to_intel_crtc(cstate->crtc); + struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate); + int ret; - old_size = skl_ddb_entry_size(&old->pipe[pipe]); - new_size = skl_ddb_entry_size(&new->pipe[pipe]); + ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm); + if (ret) + return ret; - return old_size != new_size && - new->pipe[pipe].start >= old->pipe[pipe].start && - new->pipe[pipe].end <= old->pipe[pipe].end; + if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm))) + *changed = false; + else + *changed = true; + + return 0; } -static void skl_flush_wm_values(struct drm_i915_private *dev_priv, - struct skl_wm_values *new_values) +static uint32_t +pipes_modified(struct drm_atomic_state *state) { - struct drm_device *dev = dev_priv->dev; - struct skl_ddb_allocation *cur_ddb, *new_ddb; - bool reallocated[I915_MAX_PIPES] = {}; - struct intel_crtc *crtc; - enum pipe pipe; + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + uint32_t i, ret = 0; - new_ddb = &new_values->ddb; - cur_ddb = &dev_priv->wm.skl_hw.ddb; + for_each_crtc_in_state(state, crtc, cstate, i) + ret |= drm_crtc_mask(crtc); - /* - * First pass: flush the pipes with the new allocation contained into - * the old space. - * - * We'll wait for the vblank on those pipes to ensure we can safely - * re-allocate the freed space without this pipe fetching from it. - */ - for_each_intel_crtc(dev, crtc) { - if (!crtc->active) - continue; + return ret; +} - pipe = crtc->pipe; +int +skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) +{ + struct drm_atomic_state *state = cstate->base.state; + struct drm_device *dev = state->dev; + struct drm_crtc *crtc = cstate->base.crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; + struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + struct drm_plane_state *plane_state; + struct drm_plane *plane; + enum pipe pipe = intel_crtc->pipe; + int id; - if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe)) - continue; + WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); + + drm_for_each_plane_mask(plane, dev, crtc->state->plane_mask) { + id = skl_wm_plane_id(to_intel_plane(plane)); - skl_wm_flush_pipe(dev_priv, pipe, 1); - intel_wait_for_vblank(dev, pipe); + if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][id], + &new_ddb->plane[pipe][id]) && + skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][id], + &new_ddb->y_plane[pipe][id])) + continue; - reallocated[pipe] = true; + plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); } + return 0; +} + +static int +skl_compute_ddb(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct intel_crtc *intel_crtc; + struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb; + uint32_t realloc_pipes = pipes_modified(state); + int ret; /* - * Second pass: flush the pipes that are having their allocation - * reduced, but overlapping with a previous allocation. - * - * Here as well we need to wait for the vblank to make sure the freed - * space is not used anymore. + * If this is our first atomic update following hardware readout, + * we can't trust the DDB that the BIOS programmed for us. Let's + * pretend that all pipes switched active status so that we'll + * ensure a full DDB recompute. */ - for_each_intel_crtc(dev, crtc) { - if (!crtc->active) - continue; - - pipe = crtc->pipe; + if (dev_priv->wm.distrust_bios_wm) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + state->acquire_ctx); + if (ret) + return ret; - if (reallocated[pipe]) - continue; + intel_state->active_pipe_changes = ~0; - if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) < - skl_ddb_entry_size(&cur_ddb->pipe[pipe])) { - skl_wm_flush_pipe(dev_priv, pipe, 2); - intel_wait_for_vblank(dev, pipe); - reallocated[pipe] = true; - } + /* + * We usually only initialize intel_state->active_crtcs if we + * we're doing a modeset; make sure this field is always + * initialized during the sanitization process that happens + * on the first commit too. + */ + if (!intel_state->modeset) + intel_state->active_crtcs = dev_priv->active_crtcs; } /* - * Third pass: flush the pipes that got more space allocated. + * If the modeset changes which CRTC's are active, we need to + * recompute the DDB allocation for *all* active pipes, even + * those that weren't otherwise being modified in any way by this + * atomic commit. Due to the shrinking of the per-pipe allocations + * when new active CRTC's are added, it's possible for a pipe that + * we were already using and aren't changing at all here to suddenly + * become invalid if its DDB needs exceeds its new allocation. * - * We don't need to actively wait for the update here, next vblank - * will just get more DDB space with the correct WM values. + * Note that if we wind up doing a full DDB recompute, we can't let + * any other display updates race with this transaction, so we need + * to grab the lock on *all* CRTC's. */ - for_each_intel_crtc(dev, crtc) { - if (!crtc->active) - continue; + if (intel_state->active_pipe_changes) { + realloc_pipes = ~0; + intel_state->wm_results.dirty_pipes = ~0; + } - pipe = crtc->pipe; + for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { + struct intel_crtc_state *cstate; - /* - * At this point, only the pipes more space than before are - * left to re-allocate. - */ - if (reallocated[pipe]) - continue; + cstate = intel_atomic_get_crtc_state(state, intel_crtc); + if (IS_ERR(cstate)) + return PTR_ERR(cstate); - skl_wm_flush_pipe(dev_priv, pipe, 3); + ret = skl_allocate_pipe_ddb(cstate, ddb); + if (ret) + return ret; + + ret = skl_ddb_add_affected_planes(cstate); + if (ret) + return ret; } + + return 0; } -static bool skl_update_pipe_wm(struct drm_crtc *crtc, - struct skl_ddb_allocation *ddb, /* out */ - struct skl_pipe_wm *pipe_wm /* out */) +static void +skl_copy_wm_for_pipe(struct skl_wm_values *dst, + struct skl_wm_values *src, + enum pipe pipe) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); - - skl_allocate_pipe_ddb(cstate, ddb); - skl_compute_pipe_wm(cstate, ddb, pipe_wm); + dst->wm_linetime[pipe] = src->wm_linetime[pipe]; + memcpy(dst->plane[pipe], src->plane[pipe], + sizeof(dst->plane[pipe])); + memcpy(dst->plane_trans[pipe], src->plane_trans[pipe], + sizeof(dst->plane_trans[pipe])); - if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm))) - return false; - - intel_crtc->wm.active.skl = *pipe_wm; - - return true; + dst->ddb.pipe[pipe] = src->ddb.pipe[pipe]; + memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], + sizeof(dst->ddb.y_plane[pipe])); + memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], + sizeof(dst->ddb.plane[pipe])); } -static void skl_update_other_pipe_wm(struct drm_device *dev, - struct drm_crtc *crtc, - struct skl_wm_values *r) +static int +skl_compute_wm(struct drm_atomic_state *state) { - struct intel_crtc *intel_crtc; - struct intel_crtc *this_crtc = to_intel_crtc(crtc); + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct skl_wm_values *results = &intel_state->wm_results; + struct skl_pipe_wm *pipe_wm; + bool changed = false; + int ret, i; /* - * If the WM update hasn't changed the allocation for this_crtc (the - * crtc we are currently computing the new WM values for), other - * enabled crtcs will keep the same allocation and we don't need to - * recompute anything for them. + * If this transaction isn't actually touching any CRTC's, don't + * bother with watermark calculation. Note that if we pass this + * test, we're guaranteed to hold at least one CRTC state mutex, + * which means we can safely use values like dev_priv->active_crtcs + * since any racing commits that want to update them would need to + * hold _all_ CRTC state mutexes. */ - if (!skl_ddb_allocation_changed(&r->ddb, this_crtc)) - return; + for_each_crtc_in_state(state, crtc, cstate, i) + changed = true; + if (!changed) + return 0; + + /* Clear all dirty flags */ + results->dirty_pipes = 0; + + ret = skl_compute_ddb(state); + if (ret) + return ret; /* - * Otherwise, because of this_crtc being freshly enabled/disabled, the - * other active pipes need new DDB allocation and WM values. + * Calculate WM's for all pipes that are part of this transaction. + * Note that the DDB allocation above may have added more CRTC's that + * weren't otherwise being modified (and set bits in dirty_pipes) if + * pipe allocations had to change. + * + * FIXME: Now that we're doing this in the atomic check phase, we + * should allow skl_update_pipe_wm() to return failure in cases where + * no suitable watermark values can be found. */ - for_each_intel_crtc(dev, intel_crtc) { - struct skl_pipe_wm pipe_wm = {}; - bool wm_changed; + for_each_crtc_in_state(state, crtc, cstate, i) { + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc_state *intel_cstate = + to_intel_crtc_state(cstate); + + pipe_wm = &intel_cstate->wm.skl.optimal; + ret = skl_update_pipe_wm(cstate, &results->ddb, pipe_wm, + &changed); + if (ret) + return ret; - if (this_crtc->pipe == intel_crtc->pipe) - continue; + if (changed) + results->dirty_pipes |= drm_crtc_mask(crtc); - if (!intel_crtc->active) + if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) + /* This pipe's WM's did not change */ continue; - wm_changed = skl_update_pipe_wm(&intel_crtc->base, - &r->ddb, &pipe_wm); - - /* - * If we end up re-computing the other pipe WM values, it's - * because it was really needed, so we expect the WM values to - * be different. - */ - WARN_ON(!wm_changed); - - skl_compute_wm_results(dev, &pipe_wm, r, intel_crtc); - r->dirty[intel_crtc->pipe] = true; + intel_cstate->update_wm_pre = true; + skl_compute_wm_results(crtc->dev, pipe_wm, results, intel_crtc); } -} - -static void skl_clear_wm(struct skl_wm_values *watermarks, enum pipe pipe) -{ - watermarks->wm_linetime[pipe] = 0; - memset(watermarks->plane[pipe], 0, - sizeof(uint32_t) * 8 * I915_MAX_PLANES); - memset(watermarks->plane_trans[pipe], - 0, sizeof(uint32_t) * I915_MAX_PLANES); - watermarks->plane_trans[pipe][PLANE_CURSOR] = 0; - - /* Clear ddb entries for pipe */ - memset(&watermarks->ddb.pipe[pipe], 0, sizeof(struct skl_ddb_entry)); - memset(&watermarks->ddb.plane[pipe], 0, - sizeof(struct skl_ddb_entry) * I915_MAX_PLANES); - memset(&watermarks->ddb.y_plane[pipe], 0, - sizeof(struct skl_ddb_entry) * I915_MAX_PLANES); - memset(&watermarks->ddb.plane[pipe][PLANE_CURSOR], 0, - sizeof(struct skl_ddb_entry)); + return 0; } static void skl_update_wm(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct skl_wm_values *results = &dev_priv->wm.skl_results; + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); - struct skl_pipe_wm *pipe_wm = &cstate->wm.optimal.skl; + struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; + enum pipe pipe = intel_crtc->pipe; + if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) + return; - /* Clear all dirty flags */ - memset(results->dirty, 0, sizeof(bool) * I915_MAX_PIPES); + intel_crtc->wm.active.skl = *pipe_wm; - skl_clear_wm(results, intel_crtc->pipe); + mutex_lock(&dev_priv->wm.wm_mutex); - if (!skl_update_pipe_wm(crtc, &results->ddb, pipe_wm)) - return; + /* + * If this pipe isn't active already, we're going to be enabling it + * very soon. Since it's safe to update a pipe's ddb allocation while + * the pipe's shut off, just do so here. Already active pipes will have + * their watermarks updated once we update their planes. + */ + if (crtc->state->active_changed) { + int plane; - skl_compute_wm_results(dev, pipe_wm, results, intel_crtc); - results->dirty[intel_crtc->pipe] = true; + for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) + skl_write_plane_wm(intel_crtc, results, plane); + + skl_write_cursor_wm(intel_crtc, results); + } - skl_update_other_pipe_wm(dev, crtc, results); - skl_write_wm_values(dev_priv, results); - skl_flush_wm_values(dev_priv, results); + skl_copy_wm_for_pipe(hw_vals, results, pipe); - /* store the new configuration */ - dev_priv->wm.skl_hw = *results; + mutex_unlock(&dev_priv->wm.wm_mutex); } static void ilk_compute_wm_config(struct drm_device *dev, @@ -3720,7 +4211,7 @@ static void ilk_compute_wm_config(struct drm_device *dev, static void ilk_program_watermarks(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; + struct drm_device *dev = &dev_priv->drm; struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; struct ilk_wm_maximums max; struct intel_wm_config config = {}; @@ -3757,7 +4248,7 @@ static void ilk_initial_watermarks(struct intel_crtc_state *cstate) struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); mutex_lock(&dev_priv->wm.wm_mutex); - intel_crtc->wm.active.ilk = cstate->wm.intermediate; + intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate; ilk_program_watermarks(dev_priv); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -3769,7 +4260,7 @@ static void ilk_optimize_watermarks(struct intel_crtc_state *cstate) mutex_lock(&dev_priv->wm.wm_mutex); if (cstate->wm.need_postvbl_update) { - intel_crtc->wm.active.ilk = cstate->wm.optimal.ilk; + intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal; ilk_program_watermarks(dev_priv); } mutex_unlock(&dev_priv->wm.wm_mutex); @@ -3822,11 +4313,11 @@ static void skl_pipe_wm_active_state(uint32_t val, static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct skl_wm_values *hw = &dev_priv->wm.skl_hw; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); - struct skl_pipe_wm *active = &cstate->wm.optimal.skl; + struct skl_pipe_wm *active = &cstate->wm.skl.optimal; enum pipe pipe = intel_crtc->pipe; int level, i, max_level; uint32_t temp; @@ -3849,7 +4340,7 @@ static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) if (!intel_crtc->active) return; - hw->dirty[pipe] = true; + hw->dirty_pipes |= drm_crtc_mask(crtc); active->linetime = hw->wm_linetime[pipe]; @@ -3876,23 +4367,31 @@ static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) void skl_wm_get_hw_state(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; struct drm_crtc *crtc; skl_ddb_get_hw_state(dev_priv, ddb); list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) skl_pipe_wm_get_hw_state(crtc); + + if (dev_priv->active_crtcs) { + /* Fully recompute DDB on first atomic commit */ + dev_priv->wm.distrust_bios_wm = true; + } else { + /* Easy/common case; just sanitize DDB now if everything off */ + memset(ddb, 0, sizeof(*ddb)); + } } static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct ilk_wm_values *hw = &dev_priv->wm.hw; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); - struct intel_pipe_wm *active = &cstate->wm.optimal.ilk; + struct intel_pipe_wm *active = &cstate->wm.ilk.optimal; enum pipe pipe = intel_crtc->pipe; static const i915_reg_t wm0_pipe_reg[] = { [PIPE_A] = WM0_PIPEA_ILK, @@ -4092,7 +4591,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) void ilk_wm_get_hw_state(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct ilk_wm_values *hw = &dev_priv->wm.hw; struct drm_crtc *crtc; @@ -4154,7 +4653,7 @@ void ilk_wm_get_hw_state(struct drm_device *dev) */ void intel_update_watermarks(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); if (dev_priv->display.update_wm) dev_priv->display.update_wm(crtc); @@ -4169,9 +4668,8 @@ DEFINE_SPINLOCK(mchdev_lock); * mchdev_lock. */ static struct drm_i915_private *i915_mch_dev; -bool ironlake_set_drps(struct drm_device *dev, u8 val) +bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) { - struct drm_i915_private *dev_priv = dev->dev_private; u16 rgvswctl; assert_spin_locked(&mchdev_lock); @@ -4193,9 +4691,8 @@ bool ironlake_set_drps(struct drm_device *dev, u8 val) return true; } -static void ironlake_enable_drps(struct drm_device *dev) +static void ironlake_enable_drps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; u32 rgvmodectl; u8 fmax, fmin, fstart, vstart; @@ -4252,7 +4749,7 @@ static void ironlake_enable_drps(struct drm_device *dev) DRM_ERROR("stuck trying to change perf mode\n"); mdelay(1); - ironlake_set_drps(dev, fstart); + ironlake_set_drps(dev_priv, fstart); dev_priv->ips.last_count1 = I915_READ(DMIEC) + I915_READ(DDREC) + I915_READ(CSIEC); @@ -4263,9 +4760,8 @@ static void ironlake_enable_drps(struct drm_device *dev) spin_unlock_irq(&mchdev_lock); } -static void ironlake_disable_drps(struct drm_device *dev) +static void ironlake_disable_drps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; u16 rgvswctl; spin_lock_irq(&mchdev_lock); @@ -4280,7 +4776,7 @@ static void ironlake_disable_drps(struct drm_device *dev) I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); /* Go back to the starting frequency */ - ironlake_set_drps(dev, dev_priv->ips.fstart); + ironlake_set_drps(dev_priv, dev_priv->ips.fstart); mdelay(1); rgvswctl |= MEMCTL_CMD_STS; I915_WRITE(MEMSWCTL, rgvswctl); @@ -4326,19 +4822,23 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) new_power = dev_priv->rps.power; switch (dev_priv->rps.power) { case LOW_POWER: - if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) + if (val > dev_priv->rps.efficient_freq + 1 && + val > dev_priv->rps.cur_freq) new_power = BETWEEN; break; case BETWEEN: - if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) + if (val <= dev_priv->rps.efficient_freq && + val < dev_priv->rps.cur_freq) new_power = LOW_POWER; - else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) + else if (val >= dev_priv->rps.rp0_freq && + val > dev_priv->rps.cur_freq) new_power = HIGH_POWER; break; case HIGH_POWER: - if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) + if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && + val < dev_priv->rps.cur_freq) new_power = BETWEEN; break; } @@ -4384,22 +4884,24 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) } I915_WRITE(GEN6_RP_UP_EI, - GT_INTERVAL_FROM_US(dev_priv, ei_up)); + GT_INTERVAL_FROM_US(dev_priv, ei_up)); I915_WRITE(GEN6_RP_UP_THRESHOLD, - GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100))); + GT_INTERVAL_FROM_US(dev_priv, + ei_up * threshold_up / 100)); I915_WRITE(GEN6_RP_DOWN_EI, - GT_INTERVAL_FROM_US(dev_priv, ei_down)); + GT_INTERVAL_FROM_US(dev_priv, ei_down)); I915_WRITE(GEN6_RP_DOWN_THRESHOLD, - GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100))); + GT_INTERVAL_FROM_US(dev_priv, + ei_down * threshold_down / 100)); - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); dev_priv->rps.power = new_power; dev_priv->rps.up_threshold = threshold_up; @@ -4424,12 +4926,10 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) /* gen6_set_rps is called to update the frequency request, but should also be * called when the range (min_delay and max_delay) is modified so that we can * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static void gen6_set_rps(struct drm_device *dev, u8 val) +static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { - struct drm_i915_private *dev_priv = dev->dev_private; - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) return; WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); @@ -4442,10 +4942,10 @@ static void gen6_set_rps(struct drm_device *dev, u8 val) if (val != dev_priv->rps.cur_freq) { gen6_set_rps_thresholds(dev_priv, val); - if (IS_GEN9(dev)) + if (IS_GEN9(dev_priv)) I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val)); - else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(val)); else @@ -4467,15 +4967,13 @@ static void gen6_set_rps(struct drm_device *dev, u8 val) trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); } -static void valleyview_set_rps(struct drm_device *dev, u8 val) +static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) { - struct drm_i915_private *dev_priv = dev->dev_private; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); WARN_ON(val > dev_priv->rps.max_freq); WARN_ON(val < dev_priv->rps.min_freq); - if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), + if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), "Odd GPU freq value\n")) val &= ~1; @@ -4508,7 +5006,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) /* Wake up the media well, as that takes a lot less * power than the Render well. */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); - valleyview_set_rps(dev_priv->dev, val); + valleyview_set_rps(dev_priv, val); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); } @@ -4520,22 +5018,36 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); + + gen6_enable_rps_interrupts(dev_priv); + + /* Ensure we start at the user's desired frequency */ + intel_set_rps(dev_priv, + clamp(dev_priv->rps.cur_freq, + dev_priv->rps.min_freq_softlimit, + dev_priv->rps.max_freq_softlimit)); } mutex_unlock(&dev_priv->rps.hw_lock); } void gen6_rps_idle(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; + /* Flush our bottom-half so that it does not race with us + * setting the idle frequency and so that it is bounded by + * our rpm wakeref. And then disable the interrupts to stop any + * futher RPS reclocking whilst we are asleep. + */ + gen6_disable_rps_interrupts(dev_priv); mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); else - gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); + gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); dev_priv->rps.last_adj = 0; - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } mutex_unlock(&dev_priv->rps.hw_lock); @@ -4552,9 +5064,9 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!(dev_priv->mm.busy && + if (!(dev_priv->gt.awake && dev_priv->rps.enabled && - dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) + dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) return; /* Force a RPS boost (and don't count it against the client) if @@ -4568,7 +5080,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->rps.interrupts_enabled) { dev_priv->rps.client_boost = true; - queue_work(dev_priv->wq, &dev_priv->rps.work); + schedule_work(&dev_priv->rps.work); } spin_unlock_irq(&dev_priv->irq_lock); @@ -4581,49 +5093,39 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->rps.client_lock); } -void intel_set_rps(struct drm_device *dev, u8 val) +void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) - valleyview_set_rps(dev, val); + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + valleyview_set_rps(dev_priv, val); else - gen6_set_rps(dev, val); + gen6_set_rps(dev_priv, val); } -static void gen9_disable_rc6(struct drm_device *dev) +static void gen9_disable_rc6(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - I915_WRITE(GEN6_RC_CONTROL, 0); I915_WRITE(GEN9_PG_ENABLE, 0); } -static void gen9_disable_rps(struct drm_device *dev) +static void gen9_disable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rps(struct drm_device *dev) +static void gen6_disable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - I915_WRITE(GEN6_RC_CONTROL, 0); I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rps(struct drm_device *dev) +static void cherryview_disable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_disable_rps(struct drm_device *dev) +static void valleyview_disable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - /* we're doing forcewake before Disabling RC6, * This what the BIOS expects when going into suspend */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -4633,34 +5135,45 @@ static void valleyview_disable_rps(struct drm_device *dev) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void intel_print_rc6_info(struct drm_device *dev, u32 mode) +static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) { - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) mode = GEN6_RC_CTL_RC6_ENABLE; else mode = 0; } - if (HAS_RC6p(dev)) - DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n", - onoff(mode & GEN6_RC_CTL_RC6_ENABLE), - onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), - onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); + if (HAS_RC6p(dev_priv)) + DRM_DEBUG_DRIVER("Enabling RC6 states: " + "RC6 %s RC6p %s RC6pp %s\n", + onoff(mode & GEN6_RC_CTL_RC6_ENABLE), + onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), + onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); else - DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n", - onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); + DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n", + onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); } -static bool bxt_check_bios_rc6_setup(const struct drm_device *dev) +static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; bool enable_rc6 = true; unsigned long rc6_ctx_base; + u32 rc_ctl; + int rc_sw_target; + + rc_ctl = I915_READ(GEN6_RC_CONTROL); + rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >> + RC_SW_TARGET_STATE_SHIFT; + DRM_DEBUG_DRIVER("BIOS enabled RC states: " + "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", + onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), + onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), + rc_sw_target); if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { - DRM_DEBUG_KMS("RC6 Base location not set properly.\n"); + DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); enable_rc6 = false; } @@ -4672,7 +5185,7 @@ static bool bxt_check_bios_rc6_setup(const struct drm_device *dev) if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) && (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base + ggtt->stolen_reserved_size))) { - DRM_DEBUG_KMS("RC6 Base address not as expected.\n"); + DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); enable_rc6 = false; } @@ -4680,31 +5193,40 @@ static bool bxt_check_bios_rc6_setup(const struct drm_device *dev) ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { - DRM_DEBUG_KMS("Engine Idle wait time not set properly.\n"); + DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); + enable_rc6 = false; + } + + if (!I915_READ(GEN8_PUSHBUS_CONTROL) || + !I915_READ(GEN8_PUSHBUS_ENABLE) || + !I915_READ(GEN8_PUSHBUS_SHIFT)) { + DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); enable_rc6 = false; } - if (!(I915_READ(GEN6_RC_CONTROL) & (GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_HW_ENABLE)) && - ((I915_READ(GEN6_RC_CONTROL) & GEN6_RC_CTL_HW_ENABLE) || - !(I915_READ(GEN6_RC_STATE) & RC6_STATE))) { - DRM_DEBUG_KMS("HW/SW RC6 is not enabled by BIOS.\n"); + if (!I915_READ(GEN6_GFXPAUSE)) { + DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); + enable_rc6 = false; + } + + if (!I915_READ(GEN8_MISC_CTRL0)) { + DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); enable_rc6 = false; } return enable_rc6; } -int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) +int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) { /* No RC6 before Ironlake and code is gone for ilk. */ - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_INFO(dev_priv)->gen < 6) return 0; if (!enable_rc6) return 0; - if (IS_BROXTON(dev) && !bxt_check_bios_rc6_setup(dev)) { + if (IS_BROXTON(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) { DRM_INFO("RC6 disabled by BIOS\n"); return 0; } @@ -4713,62 +5235,53 @@ int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) if (enable_rc6 >= 0) { int mask; - if (HAS_RC6p(dev)) + if (HAS_RC6p(dev_priv)) mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | INTEL_RC6pp_ENABLE; else mask = INTEL_RC6_ENABLE; if ((enable_rc6 & mask) != enable_rc6) - DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", - enable_rc6 & mask, enable_rc6, mask); + DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d " + "(requested %d, valid %d)\n", + enable_rc6 & mask, enable_rc6, mask); return enable_rc6 & mask; } - if (IS_IVYBRIDGE(dev)) + if (IS_IVYBRIDGE(dev_priv)) return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); return INTEL_RC6_ENABLE; } -int intel_enable_rc6(const struct drm_device *dev) +static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { - return i915.enable_rc6; -} - -static void gen6_init_rps_frequencies(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t rp_state_cap; - u32 ddcc_status = 0; - int ret; - /* All of these values are in units of 50MHz */ - dev_priv->rps.cur_freq = 0; + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ - if (IS_BROXTON(dev)) { - rp_state_cap = I915_READ(BXT_RP_STATE_CAP); + if (IS_BROXTON(dev_priv)) { + u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; } else { - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; } - /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; - if (IS_HASWELL(dev) || IS_BROADWELL(dev) || - IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { - ret = sandybridge_pcode_read(dev_priv, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status); - if (0 == ret) + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || + IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(dev_priv, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status) == 0) dev_priv->rps.efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), @@ -4776,44 +5289,37 @@ static void gen6_init_rps_frequencies(struct drm_device *dev) dev_priv->rps.max_freq); } - if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is - the natural hardware unit for SKL */ + * the natural hardware unit for SKL + */ dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; } +} - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; +static void reset_rps(struct drm_i915_private *dev_priv, + void (*set)(struct drm_i915_private *, u8)) +{ + u8 freq = dev_priv->rps.cur_freq; - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + /* force a reset */ + dev_priv->rps.power = -1; + dev_priv->rps.cur_freq = -1; - if (dev_priv->rps.min_freq_softlimit == 0) { - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - dev_priv->rps.min_freq_softlimit = - max_t(int, dev_priv->rps.efficient_freq, - intel_freq_opcode(dev_priv, 450)); - else - dev_priv->rps.min_freq_softlimit = - dev_priv->rps.min_freq; - } + set(dev_priv, freq); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ -static void gen9_enable_rps(struct drm_device *dev) +static void gen9_enable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - gen6_init_rps_frequencies(dev); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { /* * BIOS could leave the Hw Turbo enabled, so need to explicitly * clear out the Control register just to avoid inconsitency @@ -4823,7 +5329,7 @@ static void gen9_enable_rps(struct drm_device *dev) * if the Turbo is left enabled in the Control register, as the * Up/Down interrupts would remain masked. */ - gen9_disable_rps(dev); + gen9_disable_rps(dev_priv); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return; } @@ -4841,15 +5347,13 @@ static void gen9_enable_rps(struct drm_device *dev) /* Leaning on the below call to gen6_set_rps to program/setup the * Up/Down EI & threshold registers, as well as the RP_CONTROL, * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen9_enable_rc6(struct drm_device *dev) +static void gen9_enable_rc6(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine; uint32_t rc6_mask = 0; @@ -4866,7 +5370,7 @@ static void gen9_enable_rc6(struct drm_device *dev) /* 2b: Program RC6 thresholds.*/ /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ - if (IS_SKYLAKE(dev)) + if (IS_SKYLAKE(dev_priv)) I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); else I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); @@ -4875,7 +5379,7 @@ static void gen9_enable_rc6(struct drm_device *dev) for_each_engine(engine, dev_priv) I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - if (HAS_GUC_UCODE(dev)) + if (HAS_GUC(dev_priv)) I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); I915_WRITE(GEN6_RC_SLEEP, 0); @@ -4885,12 +5389,12 @@ static void gen9_enable_rc6(struct drm_device *dev) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); /* WaRsUseTimeoutMode */ - if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) || - IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { + if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_D0) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | GEN7_RC_CTL_TO_MODE | @@ -4906,19 +5410,17 @@ static void gen9_enable_rc6(struct drm_device *dev) * 3b: Enable Coarse Power Gating only when RC6 is enabled. * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. */ - if (NEEDS_WaRsDisableCoarsePowerGating(dev)) + if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) I915_WRITE(GEN9_PG_ENABLE, 0); else I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - } -static void gen8_enable_rps(struct drm_device *dev) +static void gen8_enable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine; uint32_t rc6_mask = 0; @@ -4932,9 +5434,6 @@ static void gen8_enable_rps(struct drm_device *dev) /* 2a: Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev); - /* 2b: Program RC6 thresholds.*/ I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ @@ -4942,16 +5441,16 @@ static void gen8_enable_rps(struct drm_device *dev) for_each_engine(engine, dev_priv) I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); I915_WRITE(GEN6_RC_SLEEP, 0); - if (IS_BROADWELL(dev)) + if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ else I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ /* 3: Enable RC6 */ - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - intel_print_rc6_info(dev, rc6_mask); - if (IS_BROADWELL(dev)) + intel_print_rc6_info(dev_priv, rc6_mask); + if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | GEN7_RC_CTL_TO_MODE | rc6_mask); @@ -4991,17 +5490,15 @@ static void gen8_enable_rps(struct drm_device *dev) /* 6: Ring frequency + overclocking (our driver does this later */ - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen6_enable_rps(struct drm_device *dev) +static void gen6_enable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine; - u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; + u32 rc6vids, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; int ret; @@ -5025,9 +5522,6 @@ static void gen6_enable_rps(struct drm_device *dev) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* Initialize rps frequencies */ - gen6_init_rps_frequencies(dev); - /* disable the counters and set deterministic thresholds */ I915_WRITE(GEN6_RC_CONTROL, 0); @@ -5042,7 +5536,7 @@ static void gen6_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC_SLEEP, 0); I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - if (IS_IVYBRIDGE(dev)) + if (IS_IVYBRIDGE(dev_priv)) I915_WRITE(GEN6_RC6_THRESHOLD, 125000); else I915_WRITE(GEN6_RC6_THRESHOLD, 50000); @@ -5050,12 +5544,12 @@ static void gen6_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ /* Check if we are enabling RC6 */ - rc6_mode = intel_enable_rc6(dev_priv->dev); + rc6_mode = intel_enable_rc6(); if (rc6_mode & INTEL_RC6_ENABLE) rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; /* We don't use those on Haswell */ - if (!IS_HASWELL(dev)) { + if (!IS_HASWELL(dev_priv)) { if (rc6_mode & INTEL_RC6p_ENABLE) rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; @@ -5063,7 +5557,7 @@ static void gen6_enable_rps(struct drm_device *dev) rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; } - intel_print_rc6_info(dev, rc6_mask); + intel_print_rc6_info(dev_priv, rc6_mask); I915_WRITE(GEN6_RC_CONTROL, rc6_mask | @@ -5078,22 +5572,13 @@ static void gen6_enable_rps(struct drm_device *dev) if (ret) DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); - ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); - if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", - (dev_priv->rps.max_freq_softlimit & 0xff) * 50, - (pcu_mbox & 0xff) * 50); - dev_priv->rps.max_freq = pcu_mbox & 0xff; - } - - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); + reset_rps(dev_priv, gen6_set_rps); rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - if (IS_GEN6(dev) && ret) { + if (IS_GEN6(dev_priv) && ret) { DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); - } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); rc6vids &= 0xffff00; @@ -5106,9 +5591,8 @@ static void gen6_enable_rps(struct drm_device *dev) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void __gen6_update_ring_freq(struct drm_device *dev) +static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; int min_freq = 15; unsigned int gpu_freq; unsigned int max_ia_freq, min_ring_freq; @@ -5137,7 +5621,7 @@ static void __gen6_update_ring_freq(struct drm_device *dev) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; @@ -5155,16 +5639,16 @@ static void __gen6_update_ring_freq(struct drm_device *dev) int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. */ ring_freq = gpu_freq; - } else if (INTEL_INFO(dev)->gen >= 8) { + } else if (INTEL_INFO(dev_priv)->gen >= 8) { /* max(2 * GT, DDR). NB: GT is 50MHz units */ ring_freq = max(min_ring_freq, gpu_freq); - } else if (IS_HASWELL(dev)) { + } else if (IS_HASWELL(dev_priv)) { ring_freq = mult_frac(gpu_freq, 5, 4); ring_freq = max(min_ring_freq, ring_freq); /* leave ia_freq as the default, chosen by cpufreq */ @@ -5191,26 +5675,13 @@ static void __gen6_update_ring_freq(struct drm_device *dev) } } -void gen6_update_ring_freq(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (!HAS_CORE_RING_FREQ(dev)) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - __gen6_update_ring_freq(dev); - mutex_unlock(&dev_priv->rps.hw_lock); -} - static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; u32 val, rp0; val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); - switch (INTEL_INFO(dev)->eu_total) { + switch (INTEL_INFO(dev_priv)->sseu.eu_total) { case 8: /* (2 * 4) config */ rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); @@ -5321,9 +5792,8 @@ static void cherryview_check_pctx(struct drm_i915_private *dev_priv) WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); } -static void cherryview_setup_pctx(struct drm_device *dev) +static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; unsigned long pctx_paddr, paddr; u32 pcbr; @@ -5342,23 +5812,20 @@ static void cherryview_setup_pctx(struct drm_device *dev) DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); } -static void valleyview_setup_pctx(struct drm_device *dev) +static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *pctx; unsigned long pctx_paddr; u32 pcbr; int pctx_size = 24*1024; - mutex_lock(&dev->struct_mutex); - pcbr = I915_READ(VLV_PCBR); if (pcbr) { /* BIOS set it up already, grab the pre-alloc'd space */ int pcbr_offset; pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; - pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, + pctx = i915_gem_object_create_stolen_for_preallocated(&dev_priv->drm, pcbr_offset, I915_GTT_OFFSET_NONE, pctx_size); @@ -5375,7 +5842,7 @@ static void valleyview_setup_pctx(struct drm_device *dev) * overlap with other ranges, such as the frame buffer, protected * memory, or any other relevant ranges. */ - pctx = i915_gem_object_create_stolen(dev, pctx_size); + pctx = i915_gem_object_create_stolen(&dev_priv->drm, pctx_size); if (!pctx) { DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); goto out; @@ -5387,17 +5854,14 @@ static void valleyview_setup_pctx(struct drm_device *dev) out: DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); dev_priv->vlv_pctx = pctx; - mutex_unlock(&dev->struct_mutex); } -static void valleyview_cleanup_pctx(struct drm_device *dev) +static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - if (WARN_ON(!dev_priv->vlv_pctx)) return; - drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base); + i915_gem_object_put_unlocked(dev_priv->vlv_pctx); dev_priv->vlv_pctx = NULL; } @@ -5412,17 +5876,14 @@ static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) dev_priv->rps.gpll_ref_freq); } -static void valleyview_init_gt_powersave(struct drm_device *dev) +static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; u32 val; - valleyview_setup_pctx(dev); + valleyview_setup_pctx(dev_priv); vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); switch ((val >> 6) & 3) { case 0: @@ -5458,30 +5919,16 @@ static void valleyview_init_gt_powersave(struct drm_device *dev) DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } -static void cherryview_init_gt_powersave(struct drm_device *dev) +static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; u32 val; - cherryview_setup_pctx(dev); + cherryview_setup_pctx(dev_priv); vlv_init_gpll_ref_freq(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); mutex_unlock(&dev_priv->sb_lock); @@ -5523,27 +5970,15 @@ static void cherryview_init_gt_powersave(struct drm_device *dev) dev_priv->rps.rp1_freq | dev_priv->rps.min_freq) & 1, "Odd GPU freq values\n"); - - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); } -static void valleyview_cleanup_gt_powersave(struct drm_device *dev) +static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) { - valleyview_cleanup_pctx(dev); + valleyview_cleanup_pctx(dev_priv); } -static void cherryview_enable_rps(struct drm_device *dev) +static void cherryview_enable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine; u32 gtfifodbg, val, rc6_mode = 0, pcbr; @@ -5588,8 +6023,8 @@ static void cherryview_enable_rps(struct drm_device *dev) pcbr = I915_READ(VLV_PCBR); /* 3: Enable RC6 */ - if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && - (pcbr >> VLV_PCBR_ADDR_SHIFT)) + if ((intel_enable_rc6() & INTEL_RC6_ENABLE) && + (pcbr >> VLV_PCBR_ADDR_SHIFT)) rc6_mode = GEN7_RC_CTL_TO_MODE; I915_WRITE(GEN6_RC_CONTROL, rc6_mode); @@ -5625,23 +6060,13 @@ static void cherryview_enable_rps(struct drm_device *dev) DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void valleyview_enable_rps(struct drm_device *dev) +static void valleyview_enable_rps(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *engine; u32 gtfifodbg, val, rc6_mode = 0; @@ -5694,10 +6119,10 @@ static void valleyview_enable_rps(struct drm_device *dev) VLV_MEDIA_RC6_COUNT_EN | VLV_RENDER_RC6_COUNT_EN)); - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; - intel_print_rc6_info(dev, rc6_mode); + intel_print_rc6_info(dev_priv, rc6_mode); I915_WRITE(GEN6_RC_CONTROL, rc6_mode); @@ -5715,16 +6140,7 @@ static void valleyview_enable_rps(struct drm_device *dev) DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - dev_priv->rps.idle_freq); - - valleyview_set_rps(dev_priv->dev, dev_priv->rps.idle_freq); + reset_rps(dev_priv, valleyview_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } @@ -5814,10 +6230,9 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; unsigned long val; - if (INTEL_INFO(dev)->gen != 5) + if (INTEL_INFO(dev_priv)->gen != 5) return 0; spin_lock_irq(&mchdev_lock); @@ -5857,11 +6272,10 @@ static int _pxvid_to_vd(u8 pxvid) static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) { - struct drm_device *dev = dev_priv->dev; const int vd = _pxvid_to_vd(pxvid); const int vm = vd - 1125; - if (INTEL_INFO(dev)->is_mobile) + if (INTEL_INFO(dev_priv)->is_mobile) return vm > 0 ? vm : 0; return vd; @@ -5902,9 +6316,7 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) void i915_update_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; - - if (INTEL_INFO(dev)->gen != 5) + if (INTEL_INFO(dev_priv)->gen != 5) return; spin_lock_irq(&mchdev_lock); @@ -5953,10 +6365,9 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; unsigned long val; - if (INTEL_INFO(dev)->gen != 5) + if (INTEL_INFO(dev_priv)->gen != 5) return 0; spin_lock_irq(&mchdev_lock); @@ -6058,19 +6469,11 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower); */ bool i915_gpu_busy(void) { - struct drm_i915_private *dev_priv; - struct intel_engine_cs *engine; bool ret = false; spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; - - for_each_engine(engine, dev_priv) - ret |= !list_empty(&engine->request_list); - -out_unlock: + if (i915_mch_dev) + ret = i915_mch_dev->gt.awake; spin_unlock_irq(&mchdev_lock); return ret; @@ -6097,7 +6500,7 @@ bool i915_gpu_turbo_disable(void) dev_priv->ips.max_delay = dev_priv->ips.fstart; - if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) + if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart)) ret = false; out_unlock: @@ -6145,9 +6548,8 @@ void intel_gpu_ips_teardown(void) spin_unlock_irq(&mchdev_lock); } -static void intel_init_emon(struct drm_device *dev) +static void intel_init_emon(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; u32 lcfuse; u8 pxw[16]; int i; @@ -6216,10 +6618,8 @@ static void intel_init_emon(struct drm_device *dev) dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); } -void intel_init_gt_powersave(struct drm_device *dev) +void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - /* * RPM depends on RC6 to save restore the GT HW context, so make RC6 a * requirement. @@ -6229,106 +6629,145 @@ void intel_init_gt_powersave(struct drm_device *dev) intel_runtime_pm_get(dev_priv); } - if (IS_CHERRYVIEW(dev)) - cherryview_init_gt_powersave(dev); - else if (IS_VALLEYVIEW(dev)) - valleyview_init_gt_powersave(dev); -} + mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->rps.hw_lock); -void intel_cleanup_gt_powersave(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + /* Initialize RPS limits (for userspace) */ + if (IS_CHERRYVIEW(dev_priv)) + cherryview_init_gt_powersave(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_init_gt_powersave(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_init_rps_frequencies(dev_priv); - if (IS_CHERRYVIEW(dev)) - return; - else if (IS_VALLEYVIEW(dev)) - valleyview_cleanup_gt_powersave(dev); + /* Derive initial user preferences/limits from the hardware limits */ + dev_priv->rps.idle_freq = dev_priv->rps.min_freq; + dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + dev_priv->rps.min_freq_softlimit = + max_t(int, + dev_priv->rps.efficient_freq, + intel_freq_opcode(dev_priv, 450)); + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN6(dev_priv) || + IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { + u32 params = 0; + + sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (dev_priv->rps.max_freq & 0xff) * 50, + (params & 0xff) * 50); + dev_priv->rps.max_freq = params & 0xff; + } + } - if (!i915.enable_rc6) - intel_runtime_pm_put(dev_priv); + /* Finally allow us to boost to max by default */ + dev_priv->rps.boost_freq = dev_priv->rps.max_freq; + + mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->drm.struct_mutex); + + intel_autoenable_gt_powersave(dev_priv); } -static void gen6_suspend_rps(struct drm_device *dev) +void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; + if (IS_VALLEYVIEW(dev_priv)) + valleyview_cleanup_gt_powersave(dev_priv); - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - gen6_disable_rps_interrupts(dev); + if (!i915.enable_rc6) + intel_runtime_pm_put(dev_priv); } /** * intel_suspend_gt_powersave - suspend PM work and helper threads - * @dev: drm device + * @dev_priv: i915 device * * We don't want to disable RC6 or other features here, we just want * to make sure any work we've queued has finished and won't bother * us while we're suspended. */ -void intel_suspend_gt_powersave(struct drm_device *dev) +void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - - if (INTEL_INFO(dev)->gen < 6) + if (INTEL_GEN(dev_priv) < 6) return; - gen6_suspend_rps(dev); + if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) + intel_runtime_pm_put(dev_priv); - /* Force GPU to min freq during suspend */ - gen6_rps_idle(dev_priv); + /* gen6_rps_idle() will be called later to disable interrupts */ } -void intel_disable_gt_powersave(struct drm_device *dev) +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; + dev_priv->rps.enabled = true; /* force disabling */ + intel_disable_gt_powersave(dev_priv); - if (IS_IRONLAKE_M(dev)) { - ironlake_disable_drps(dev); - } else if (INTEL_INFO(dev)->gen >= 6) { - intel_suspend_gt_powersave(dev); + gen6_reset_rps_interrupts(dev_priv); +} - mutex_lock(&dev_priv->rps.hw_lock); - if (INTEL_INFO(dev)->gen >= 9) { - gen9_disable_rc6(dev); - gen9_disable_rps(dev); - } else if (IS_CHERRYVIEW(dev)) - cherryview_disable_rps(dev); - else if (IS_VALLEYVIEW(dev)) - valleyview_disable_rps(dev); - else - gen6_disable_rps(dev); +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) +{ + if (!READ_ONCE(dev_priv->rps.enabled)) + return; - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->rps.hw_lock); + + if (INTEL_GEN(dev_priv) >= 9) { + gen9_disable_rc6(dev_priv); + gen9_disable_rps(dev_priv); + } else if (IS_CHERRYVIEW(dev_priv)) { + cherryview_disable_rps(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_disable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_disable_rps(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_disable_drps(dev_priv); } + + dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); } -static void intel_gen6_powersave_work(struct work_struct *work) +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - rps.delayed_resume_work.work); - struct drm_device *dev = dev_priv->dev; + /* We shouldn't be disabling as we submit, so this should be less + * racy than it appears! + */ + if (READ_ONCE(dev_priv->rps.enabled)) + return; - mutex_lock(&dev_priv->rps.hw_lock); + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; - gen6_reset_rps_interrupts(dev); + mutex_lock(&dev_priv->rps.hw_lock); - if (IS_CHERRYVIEW(dev)) { - cherryview_enable_rps(dev); - } else if (IS_VALLEYVIEW(dev)) { - valleyview_enable_rps(dev); - } else if (INTEL_INFO(dev)->gen >= 9) { - gen9_enable_rc6(dev); - gen9_enable_rps(dev); - if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) - __gen6_update_ring_freq(dev); - } else if (IS_BROADWELL(dev)) { - gen8_enable_rps(dev); - __gen6_update_ring_freq(dev); - } else { - gen6_enable_rps(dev); - __gen6_update_ring_freq(dev); + if (IS_CHERRYVIEW(dev_priv)) { + cherryview_enable_rps(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv)) { + valleyview_enable_rps(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 9) { + gen9_enable_rc6(dev_priv); + gen9_enable_rps(dev_priv); + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + gen6_update_ring_freq(dev_priv); + } else if (IS_BROADWELL(dev_priv)) { + gen8_enable_rps(dev_priv); + gen6_update_ring_freq(dev_priv); + } else if (INTEL_GEN(dev_priv) >= 6) { + gen6_enable_rps(dev_priv); + gen6_update_ring_freq(dev_priv); + } else if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + intel_init_emon(dev_priv); } WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); @@ -6338,28 +6777,53 @@ static void intel_gen6_powersave_work(struct work_struct *work) WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); dev_priv->rps.enabled = true; + mutex_unlock(&dev_priv->rps.hw_lock); +} - gen6_enable_rps_interrupts(dev); +static void __intel_autoenable_gt_powersave(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), rps.autoenable_work.work); + struct intel_engine_cs *rcs; + struct drm_i915_gem_request *req; - mutex_unlock(&dev_priv->rps.hw_lock); + if (READ_ONCE(dev_priv->rps.enabled)) + goto out; + + rcs = &dev_priv->engine[RCS]; + if (rcs->last_context) + goto out; + + if (!rcs->init_context) + goto out; + mutex_lock(&dev_priv->drm.struct_mutex); + + req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); + if (IS_ERR(req)) + goto unlock; + + if (!i915.enable_execlists && i915_switch_context(req) == 0) + rcs->init_context(req); + + /* Mark the device busy, calling intel_enable_gt_powersave() */ + i915_add_request_no_flush(req); + +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); +out: intel_runtime_pm_put(dev_priv); } -void intel_enable_gt_powersave(struct drm_device *dev) +void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev)) + if (READ_ONCE(dev_priv->rps.enabled)) return; - if (IS_IRONLAKE_M(dev)) { - ironlake_enable_drps(dev); - mutex_lock(&dev->struct_mutex); - intel_init_emon(dev); - mutex_unlock(&dev->struct_mutex); - } else if (INTEL_INFO(dev)->gen >= 6) { + if (IS_IRONLAKE_M(dev_priv)) { + ironlake_enable_drps(dev_priv); + intel_init_emon(dev_priv); + } else if (INTEL_INFO(dev_priv)->gen >= 6) { /* * PCU communication is slow and this doesn't need to be * done at any specific time, so do this out of our fast path @@ -6372,26 +6836,16 @@ void intel_enable_gt_powersave(struct drm_device *dev) * paths, so the _noresume version is enough (and in case of * runtime resume it's necessary). */ - if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, - round_jiffies_up_relative(HZ))) + if (queue_delayed_work(dev_priv->wq, + &dev_priv->rps.autoenable_work, + round_jiffies_up_relative(HZ))) intel_runtime_pm_get_noresume(dev_priv); } } -void intel_reset_gt_powersave(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (INTEL_INFO(dev)->gen < 6) - return; - - gen6_suspend_rps(dev); - dev_priv->rps.enabled = false; -} - static void ibx_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); /* * On Ibex Peak and Cougar Point, we need to disable clock @@ -6403,7 +6857,7 @@ static void ibx_init_clock_gating(struct drm_device *dev) static void g4x_disable_trickle_feed(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; for_each_pipe(dev_priv, pipe) { @@ -6418,7 +6872,7 @@ static void g4x_disable_trickle_feed(struct drm_device *dev) static void ilk_init_lp_watermarks(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); @@ -6432,7 +6886,7 @@ static void ilk_init_lp_watermarks(struct drm_device *dev) static void ironlake_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; /* @@ -6506,7 +6960,7 @@ static void ironlake_init_clock_gating(struct drm_device *dev) static void cpt_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); int pipe; uint32_t val; @@ -6543,7 +6997,7 @@ static void cpt_init_clock_gating(struct drm_device *dev) static void gen6_check_mch_setup(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); uint32_t tmp; tmp = I915_READ(MCH_SSKPD); @@ -6554,7 +7008,7 @@ static void gen6_check_mch_setup(struct drm_device *dev) static void gen6_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); @@ -6669,7 +7123,7 @@ static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) static void lpt_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); /* * TODO: this bit should only be enabled when really needed, then @@ -6688,7 +7142,7 @@ static void lpt_init_clock_gating(struct drm_device *dev) static void lpt_suspend_hw(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); if (HAS_PCH_LPT_LP(dev)) { uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); @@ -6698,11 +7152,69 @@ static void lpt_suspend_hw(struct drm_device *dev) } } -static void broadwell_init_clock_gating(struct drm_device *dev) +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, + int general_prio_credits, + int high_prio_credits) +{ + u32 misccpctl; + + /* WaTempDisableDOPClkGating:bdw */ + misccpctl = I915_READ(GEN7_MISCCPCTL); + I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); + + I915_WRITE(GEN8_L3SQCREG1, + L3_GENERAL_PRIO_CREDITS(general_prio_credits) | + L3_HIGH_PRIO_CREDITS(high_prio_credits)); + + /* + * Wait at least 100 clocks before re-enabling clock gating. + * See the definition of L3SQCREG1 in BSpec. + */ + POSTING_READ(GEN8_L3SQCREG1); + udelay(1); + I915_WRITE(GEN7_MISCCPCTL, misccpctl); +} + +static void kabylake_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + gen9_init_clock_gating(dev); + + /* WaDisableSDEUnitClockGating:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableGamClockGating:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | + GEN6_GAMUNIT_CLOCK_GATE_DISABLE); + + /* WaFbcNukeOnHostModify:kbl */ + I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | + ILK_DPFC_NUKE_ON_ANY_MODIFICATION); +} + +static void skylake_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + + gen9_init_clock_gating(dev); + + /* WAC6entrylatency:skl */ + I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) | + FBC_LLC_FULLY_OPEN); + + /* WaFbcNukeOnHostModify:skl */ + I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | + ILK_DPFC_NUKE_ON_ANY_MODIFICATION); +} + +static void broadwell_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; - uint32_t misccpctl; ilk_init_lp_watermarks(dev); @@ -6733,20 +7245,8 @@ static void broadwell_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - /* - * WaProgramL3SqcReg1Default:bdw - * WaTempDisableDOPClkGating:bdw - */ - misccpctl = I915_READ(GEN7_MISCCPCTL); - I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); - /* - * Wait at least 100 clocks before re-enabling clock gating. See - * the definition of L3SQCREG1 in BSpec. - */ - POSTING_READ(GEN8_L3SQCREG1); - udelay(1); - I915_WRITE(GEN7_MISCCPCTL, misccpctl); + /* WaProgramL3SqcReg1Default:bdw */ + gen8_set_l3sqc_credits(dev_priv, 30, 2); /* * WaGttCachingOffByDefault:bdw @@ -6755,12 +7255,16 @@ static void broadwell_init_clock_gating(struct drm_device *dev) */ I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); + /* WaKVMNotificationOnConfigChange:bdw */ + I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) + | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); + lpt_init_clock_gating(dev); } static void haswell_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); ilk_init_lp_watermarks(dev); @@ -6816,7 +7320,7 @@ static void haswell_init_clock_gating(struct drm_device *dev) static void ivybridge_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); uint32_t snpcr; ilk_init_lp_watermarks(dev); @@ -6914,7 +7418,7 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) static void valleyview_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); /* WaDisableEarlyCull:vlv */ I915_WRITE(_3D_CHICKEN3, @@ -6996,7 +7500,7 @@ static void valleyview_init_clock_gating(struct drm_device *dev) static void cherryview_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); /* WaVSRefCountFullforceMissDisable:chv */ /* WaDSRefCountFullforceMissDisable:chv */ @@ -7017,6 +7521,13 @@ static void cherryview_init_clock_gating(struct drm_device *dev) GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* + * WaProgramL3SqcReg1Default:chv + * See gfxspecs/Related Documents/Performance Guide/ + * LSQC Setting Recommendations. + */ + gen8_set_l3sqc_credits(dev_priv, 38, 2); + + /* * GTT cache may not work with big pages, so if those * are ever enabled GTT cache may need to be disabled. */ @@ -7025,7 +7536,7 @@ static void cherryview_init_clock_gating(struct drm_device *dev) static void g4x_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dspclk_gate; I915_WRITE(RENCLK_GATE_D1, 0); @@ -7052,7 +7563,7 @@ static void g4x_init_clock_gating(struct drm_device *dev) static void crestline_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); I915_WRITE(RENCLK_GATE_D2, 0); @@ -7068,7 +7579,7 @@ static void crestline_init_clock_gating(struct drm_device *dev) static void broadwater_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | I965_RCC_CLOCK_GATE_DISABLE | @@ -7085,7 +7596,7 @@ static void broadwater_init_clock_gating(struct drm_device *dev) static void gen3_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); u32 dstate = I915_READ(D_STATE); dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | @@ -7110,7 +7621,7 @@ static void gen3_init_clock_gating(struct drm_device *dev) static void i85x_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); @@ -7124,7 +7635,7 @@ static void i85x_init_clock_gating(struct drm_device *dev) static void i830_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); @@ -7135,7 +7646,7 @@ static void i830_init_clock_gating(struct drm_device *dev) void intel_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); dev_priv->display.init_clock_gating(dev); } @@ -7163,9 +7674,9 @@ static void nop_init_clock_gating(struct drm_device *dev) void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { if (IS_SKYLAKE(dev_priv)) - dev_priv->display.init_clock_gating = nop_init_clock_gating; + dev_priv->display.init_clock_gating = skylake_init_clock_gating; else if (IS_KABYLAKE(dev_priv)) - dev_priv->display.init_clock_gating = nop_init_clock_gating; + dev_priv->display.init_clock_gating = kabylake_init_clock_gating; else if (IS_BROXTON(dev_priv)) dev_priv->display.init_clock_gating = bxt_init_clock_gating; else if (IS_BROADWELL(dev_priv)) @@ -7203,7 +7714,7 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) /* Set up chip specific power management-related functions */ void intel_init_pm(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); intel_fbc_init(dev_priv); @@ -7217,6 +7728,7 @@ void intel_init_pm(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 9) { skl_setup_wm_latency(dev); dev_priv->display.update_wm = skl_update_wm; + dev_priv->display.compute_global_watermarks = skl_compute_wm; } else if (HAS_PCH_SPLIT(dev)) { ilk_setup_wm_latency(dev); @@ -7276,50 +7788,133 @@ void intel_init_pm(struct drm_device *dev) } } +static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) +{ + uint32_t flags = + I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; + + switch (flags) { + case GEN6_PCODE_SUCCESS: + return 0; + case GEN6_PCODE_UNIMPLEMENTED_CMD: + case GEN6_PCODE_ILLEGAL_CMD: + return -ENXIO; + case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + return -EOVERFLOW; + case GEN6_PCODE_TIMEOUT: + return -ETIMEDOUT; + default: + MISSING_CASE(flags) + return 0; + } +} + +static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) +{ + uint32_t flags = + I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; + + switch (flags) { + case GEN6_PCODE_SUCCESS: + return 0; + case GEN6_PCODE_ILLEGAL_CMD: + return -ENXIO; + case GEN7_PCODE_TIMEOUT: + return -ETIMEDOUT; + case GEN7_PCODE_ILLEGAL_DATA: + return -EINVAL; + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + return -EOVERFLOW; + default: + MISSING_CASE(flags); + return 0; + } +} + int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { + int status; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { + /* GEN6_PCODE_* are outside of the forcewake domain, we can + * use te fw I915_READ variants to reduce the amount of work + * required when reading/writing. + */ + + if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); return -EAGAIN; } - I915_WRITE(GEN6_PCODE_DATA, *val); - I915_WRITE(GEN6_PCODE_DATA1, 0); - I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); + I915_WRITE_FW(GEN6_PCODE_DATA, *val); + I915_WRITE_FW(GEN6_PCODE_DATA1, 0); + I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) { + if (intel_wait_for_register_fw(dev_priv, + GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, + 500)) { DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); return -ETIMEDOUT; } - *val = I915_READ(GEN6_PCODE_DATA); - I915_WRITE(GEN6_PCODE_DATA, 0); + *val = I915_READ_FW(GEN6_PCODE_DATA); + I915_WRITE_FW(GEN6_PCODE_DATA, 0); + + if (INTEL_GEN(dev_priv) > 6) + status = gen7_check_mailbox_status(dev_priv); + else + status = gen6_check_mailbox_status(dev_priv); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n", + status); + return status; + } return 0; } -int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val) +int sandybridge_pcode_write(struct drm_i915_private *dev_priv, + u32 mbox, u32 val) { + int status; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { + /* GEN6_PCODE_* are outside of the forcewake domain, we can + * use te fw I915_READ variants to reduce the amount of work + * required when reading/writing. + */ + + if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); return -EAGAIN; } - I915_WRITE(GEN6_PCODE_DATA, val); - I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); + I915_WRITE_FW(GEN6_PCODE_DATA, val); + I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) { + if (intel_wait_for_register_fw(dev_priv, + GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, + 500)) { DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); return -ETIMEDOUT; } - I915_WRITE(GEN6_PCODE_DATA, 0); + I915_WRITE_FW(GEN6_PCODE_DATA, 0); + + if (INTEL_GEN(dev_priv) > 6) + status = gen7_check_mailbox_status(dev_priv); + else + status = gen6_check_mailbox_status(dev_priv); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n", + status); + return status; + } return 0; } @@ -7389,48 +7984,43 @@ static void __intel_rps_boost_work(struct work_struct *work) struct request_boost *boost = container_of(work, struct request_boost, work); struct drm_i915_gem_request *req = boost->req; - if (!i915_gem_request_completed(req, true)) - gen6_rps_boost(to_i915(req->engine->dev), NULL, - req->emitted_jiffies); + if (!i915_gem_request_completed(req)) + gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); - i915_gem_request_unreference__unlocked(req); + i915_gem_request_put(req); kfree(boost); } -void intel_queue_rps_boost_for_request(struct drm_device *dev, - struct drm_i915_gem_request *req) +void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) { struct request_boost *boost; - if (req == NULL || INTEL_INFO(dev)->gen < 6) + if (req == NULL || INTEL_GEN(req->i915) < 6) return; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(req)) return; boost = kmalloc(sizeof(*boost), GFP_ATOMIC); if (boost == NULL) return; - i915_gem_request_reference(req); - boost->req = req; + boost->req = i915_gem_request_get(req); INIT_WORK(&boost->work, __intel_rps_boost_work); - queue_work(to_i915(dev)->wq, &boost->work); + queue_work(req->i915->wq, &boost->work); } void intel_pm_setup(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); mutex_init(&dev_priv->rps.hw_lock); spin_lock_init(&dev_priv->rps.client_lock); - INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, - intel_gen6_powersave_work); + INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, + __intel_autoenable_gt_powersave); INIT_LIST_HEAD(&dev_priv->rps.clients); - INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); - INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); |