diff options
73 files changed, 4525 insertions, 1406 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 0d21402945ab..3c6d57df262d 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -76,7 +76,7 @@ config DRM_I915_CAPTURE_ERROR This option enables capturing the GPU state when a hang is detected. This information is vital for triaging hangs and assists in debugging. Please report any hang to - https://bugs.freedesktop.org/enter_bug.cgi?product=DRI + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI for triaging. If in doubt, say "Y". @@ -105,11 +105,11 @@ config DRM_I915_USERPTR If in doubt, say "Y". config DRM_I915_GVT - bool "Enable Intel GVT-g graphics virtualization host support" - depends on DRM_I915 - depends on 64BIT - default n - help + bool "Enable Intel GVT-g graphics virtualization host support" + depends on DRM_I915 + depends on 64BIT + default n + help Choose this option if you want to enable Intel GVT-g graphics virtualization technology host support with integrated graphics. With GVT-g, it's possible to have one integrated graphics diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 00786a142ff0..eea79125b3ea 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -1,34 +1,34 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_I915_WERROR - bool "Force GCC to throw an error instead of a warning when compiling" - # As this may inadvertently break the build, only allow the user - # to shoot oneself in the foot iff they aim really hard - depends on EXPERT - # We use the dependency on !COMPILE_TEST to not be enabled in - # allmodconfig or allyesconfig configurations - depends on !COMPILE_TEST + bool "Force GCC to throw an error instead of a warning when compiling" + # As this may inadvertently break the build, only allow the user + # to shoot oneself in the foot iff they aim really hard + depends on EXPERT + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST select HEADER_TEST - default n - help - Add -Werror to the build flags for (and only for) i915.ko. - Do not enable this unless you are writing code for the i915.ko module. + default n + help + Add -Werror to the build flags for (and only for) i915.ko. + Do not enable this unless you are writing code for the i915.ko module. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG - bool "Enable additional driver debugging" - depends on DRM_I915 - select DEBUG_FS - select PREEMPT_COUNT - select REFCOUNT_FULL - select I2C_CHARDEV - select STACKDEPOT - select DRM_DP_AUX_CHARDEV - select X86_MSR # used by igt/pm_rpm - select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) - select DRM_DEBUG_MM if DRM=y + bool "Enable additional driver debugging" + depends on DRM_I915 + select DEBUG_FS + select PREEMPT_COUNT + select REFCOUNT_FULL + select I2C_CHARDEV + select STACKDEPOT + select DRM_DP_AUX_CHARDEV + select X86_MSR # used by igt/pm_rpm + select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) + select DRM_DEBUG_MM if DRM=y select DRM_DEBUG_SELFTEST select DMABUF_SELFTESTS select SW_SYNC # signaling validation framework (igt/syncobj*) @@ -36,14 +36,14 @@ config DRM_I915_DEBUG select DRM_I915_SELFTEST select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_DEBUG_MMIO - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_MMIO bool "Always insert extra checks around mmio access by default" @@ -59,16 +59,16 @@ config DRM_I915_DEBUG_MMIO If in doubt, say "N". config DRM_I915_DEBUG_GEM - bool "Insert extra checks into the GEM internals" - default n - depends on DRM_I915_WERROR - help - Enable extra sanity checks (including BUGs) along the GEM driver - paths that may slow the system down and if hit hang the machine. + bool "Insert extra checks into the GEM internals" + default n + depends on DRM_I915_WERROR + help + Enable extra sanity checks (including BUGs) along the GEM driver + paths that may slow the system down and if hit hang the machine. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_ERRLOG_GEM bool "Insert extra logging (very verbose) for common GEM errors" @@ -111,41 +111,41 @@ config DRM_I915_TRACE_GTT If in doubt, say "N". config DRM_I915_SW_FENCE_DEBUG_OBJECTS - bool "Enable additional driver debugging for fence objects" - depends on DRM_I915 - select DEBUG_OBJECTS - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + bool "Enable additional driver debugging for fence objects" + depends on DRM_I915 + select DEBUG_OBJECTS + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_SW_FENCE_CHECK_DAG - bool "Enable additional driver debugging for detecting dependency cycles" - depends on DRM_I915 - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + bool "Enable additional driver debugging for detecting dependency cycles" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_GUC - bool "Enable additional driver debugging for GuC" - depends on DRM_I915 - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will help resolve GuC related issues. + bool "Enable additional driver debugging for GuC" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will help resolve GuC related issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_SELFTEST bool "Enable selftests upon driver load" @@ -178,15 +178,15 @@ config DRM_I915_SELFTEST_BROKEN If in doubt, say "N". config DRM_I915_LOW_LEVEL_TRACEPOINTS - bool "Enable low level request tracing events" - depends on DRM_I915 - default n - help - Choose this option to turn on low level request tracing events. - This provides the ability to precisely monitor engine utilisation - and also analyze the request dependency resolving timeline. - - If in doubt, say "N". + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. + + If in doubt, say "N". config DRM_I915_DEBUG_VBLANK_EVADE bool "Enable extra debug warnings for vblank evasion" diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a6006aa715ff..e791d9323b51 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -51,6 +51,7 @@ i915-y += i915_drv.o \ i915_utils.o \ intel_csr.o \ intel_device_info.o \ + intel_memory_region.o \ intel_pch.o \ intel_pm.o \ intel_runtime_pm.o \ @@ -121,6 +122,7 @@ gem-y += \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ gem/i915_gem_pm.o \ + gem/i915_gem_region.o \ gem/i915_gem_shmem.o \ gem/i915_gem_shrinker.o \ gem/i915_gem_stolen.o \ diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 1def550c68c8..f0307b04cc13 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1270,7 +1270,7 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, " "disabling port %c DVI/HDMI support\n", port_name(port), info->alternate_ddc_pin, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedly sharing the @@ -1278,9 +1278,14 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, * port. Otherwise they share the same ddc bin and * system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dvi = false; info->supports_hdmi = false; info->alternate_ddc_pin = 0; @@ -1316,7 +1321,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, " "disabling port %c DP support\n", port_name(port), info->alternate_aux_channel, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedlt sharing the @@ -1324,9 +1329,14 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, * port. Otherwise they share the same aux channel * and system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dp = false; info->alternate_aux_channel = 0; } @@ -1615,7 +1625,7 @@ parse_general_definitions(struct drm_i915_private *dev_priv, expected_size = 37; } else if (bdb->version <= 215) { expected_size = 38; - } else if (bdb->version <= 216) { + } else if (bdb->version <= 229) { expected_size = 39; } else { expected_size = sizeof(*child); diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 9ab34902663e..fa44eb73d088 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1420,6 +1420,9 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + switch (crtc_state->gamma_mode) { case GAMMA_MODE_MODE_8BIT: return 8; @@ -1433,6 +1436,9 @@ static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) return 0; @@ -1457,6 +1463,9 @@ static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return 0; + switch (crtc_state->gamma_mode) { case GAMMA_MODE_MODE_8BIT: return 8; @@ -1473,9 +1482,6 @@ int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_stat struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - if (!crtc_state->gamma_enable) - return 0; - if (HAS_GMCH(dev_priv)) { if (IS_CHERRYVIEW(dev_priv)) return chv_gamma_precision(crtc_state); @@ -1613,6 +1619,9 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) static void i9xx_read_luts(struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return; + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); } @@ -1659,6 +1668,9 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) static void i965_read_luts(struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return; + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); else @@ -1701,10 +1713,10 @@ chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) static void chv_read_luts(struct intel_crtc_state *crtc_state) { - if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); - else + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) crtc_state->base.gamma_lut = chv_read_cgm_lut(crtc_state); + else + i965_read_luts(crtc_state); } static struct drm_property_blob * @@ -1742,6 +1754,12 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state) static void ilk_read_luts(struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return; + + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) + return; + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); else @@ -1788,6 +1806,9 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) static void glk_read_luts(struct intel_crtc_state *crtc_state) { + if (!crtc_state->gamma_enable) + return; + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); else diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 3c1e885e0187..6c1315c7bcde 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -45,6 +45,7 @@ #include "intel_lspcon.h" #include "intel_panel.h" #include "intel_psr.h" +#include "intel_sprite.h" #include "intel_tc.h" #include "intel_vdsc.h" @@ -3330,6 +3331,86 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, POSTING_READ(intel_dp->regs.dp_tp_ctl); } +static void +tgl_clear_psr2_transcoder_exitline(const struct intel_crtc_state *cstate) +{ + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + u32 val; + + if (!cstate->dc3co_exitline) + return; + + val = I915_READ(EXITLINE(cstate->cpu_transcoder)); + val &= ~(EXITLINE_MASK | EXITLINE_ENABLE); + I915_WRITE(EXITLINE(cstate->cpu_transcoder), val); +} + +static void +tgl_set_psr2_transcoder_exitline(const struct intel_crtc_state *cstate) +{ + u32 val, exit_scanlines; + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + + if (!cstate->dc3co_exitline) + return; + + exit_scanlines = cstate->dc3co_exitline; + exit_scanlines <<= EXITLINE_SHIFT; + val = I915_READ(EXITLINE(cstate->cpu_transcoder)); + val &= ~(EXITLINE_MASK | EXITLINE_ENABLE); + val |= exit_scanlines; + val |= EXITLINE_ENABLE; + I915_WRITE(EXITLINE(cstate->cpu_transcoder), val); +} + +static void tgl_dc3co_exitline_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *cstate) +{ + u32 exit_scanlines; + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + u32 crtc_vdisplay = cstate->base.adjusted_mode.crtc_vdisplay; + + cstate->dc3co_exitline = 0; + + if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC3CO)) + return; + + /* B.Specs:49196 DC3CO only works with pipeA and DDIA.*/ + if (to_intel_crtc(cstate->base.crtc)->pipe != PIPE_A || + encoder->port != PORT_A) + return; + + if (!cstate->has_psr2 || !cstate->base.active) + return; + + /* + * DC3CO Exit time 200us B.Spec 49196 + * PSR2 transcoder Early Exit scanlines = ROUNDUP(200 / line time) + 1 + */ + exit_scanlines = + intel_usecs_to_scanlines(&cstate->base.adjusted_mode, 200) + 1; + + if (WARN_ON(exit_scanlines > crtc_vdisplay)) + return; + + cstate->dc3co_exitline = crtc_vdisplay - exit_scanlines; + DRM_DEBUG_KMS("DC3CO exit scanlines %d\n", cstate->dc3co_exitline); +} + +static void tgl_dc3co_exitline_get_config(struct intel_crtc_state *crtc_state) +{ + u32 val; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (INTEL_GEN(dev_priv) < 12) + return; + + val = I915_READ(EXITLINE(crtc_state->cpu_transcoder)); + + if (val & EXITLINE_ENABLE) + crtc_state->dc3co_exitline = val & EXITLINE_MASK; +} + static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) @@ -3342,6 +3423,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, int level = intel_ddi_dp_level(intel_dp); enum transcoder transcoder = crtc_state->cpu_transcoder; + tgl_set_psr2_transcoder_exitline(crtc_state); intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); @@ -3666,6 +3748,7 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); + tgl_clear_psr2_transcoder_exitline(old_crtc_state); } static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, @@ -4212,6 +4295,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, break; } + if (encoder->type == INTEL_OUTPUT_EDP) + tgl_dc3co_exitline_get_config(pipe_config); + pipe_config->has_audio = intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder); @@ -4289,10 +4375,13 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A) pipe_config->cpu_transcoder = TRANSCODER_EDP; - if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) + if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) { ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); - else + } else { ret = intel_dp_compute_config(encoder, pipe_config, conn_state); + tgl_dc3co_exitline_compute_config(encoder, pipe_config); + } + if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 05fb672a00b9..3cf39fc153b3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -135,8 +135,6 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); static void chv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); -static void intel_begin_crtc_commit(struct intel_atomic_state *, struct intel_crtc *); -static void intel_finish_crtc_commit(struct intel_atomic_state *, struct intel_crtc *); static void intel_crtc_init_scalers(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state); @@ -4401,45 +4399,6 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc) I915_WRITE(PIPE_CHICKEN(pipe), tmp); } -static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state, - const struct intel_crtc_state *new_crtc_state) -{ - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ - crtc->base.mode = new_crtc_state->base.mode; - - /* - * Update pipe size and adjust fitter if needed: the reason for this is - * that in compute_mode_changes we check the native mode (not the pfit - * mode) to see if we can flip rather than do a full mode set. In the - * fastboot case, we'll flip, but if we don't update the pipesrc and - * pfit state, we'll end up with a big fb scanned out into the wrong - * sized surface. - */ - - I915_WRITE(PIPESRC(crtc->pipe), - ((new_crtc_state->pipe_src_w - 1) << 16) | - (new_crtc_state->pipe_src_h - 1)); - - /* on skylake this is done by detaching scalers */ - if (INTEL_GEN(dev_priv) >= 9) { - skl_detach_scalers(new_crtc_state); - - if (new_crtc_state->pch_pfit.enabled) - skylake_pfit_enable(new_crtc_state); - } else if (HAS_PCH_SPLIT(dev_priv)) { - if (new_crtc_state->pch_pfit.enabled) - ironlake_pfit_enable(new_crtc_state); - else if (old_crtc_state->pch_pfit.enabled) - ironlake_pfit_disable(old_crtc_state); - } - - if (INTEL_GEN(dev_priv) >= 11) - icl_set_pipe_chicken(crtc); -} - static void intel_fdi_normal_train(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -10579,16 +10538,16 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) /* ILK+ do this automagically */ if (HAS_GMCH(dev_priv) && plane_state->base.rotation & DRM_MODE_ROTATE_180) - base += (plane_state->base.crtc_h * - plane_state->base.crtc_w - 1) * fb->format->cpp[0]; + base += (drm_rect_height(&plane_state->base.dst) * + drm_rect_width(&plane_state->base.dst) - 1) * fb->format->cpp[0]; return base; } static u32 intel_cursor_position(const struct intel_plane_state *plane_state) { - int x = plane_state->base.crtc_x; - int y = plane_state->base.crtc_y; + int x = plane_state->base.dst.x1; + int y = plane_state->base.dst.y1; u32 pos = 0; if (x < 0) { @@ -10610,8 +10569,8 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) { const struct drm_mode_config *config = &plane_state->base.plane->dev->mode_config; - int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; + int width = drm_rect_width(&plane_state->base.dst); + int height = drm_rect_height(&plane_state->base.dst); return width > 0 && width <= config->cursor_width && height > 0 && height <= config->cursor_height; @@ -10630,8 +10589,8 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state) if (!plane_state->base.visible) return 0; - src_x = plane_state->base.src_x >> 16; - src_y = plane_state->base.src_y >> 16; + src_x = plane_state->base.src.x1 >> 16; + src_y = plane_state->base.src.y1 >> 16; intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); offset = intel_plane_compute_aligned_offset(&src_x, &src_y, @@ -10666,6 +10625,10 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, if (ret) return ret; + /* Use the unclipped src/dst rectangles, which we program to hw */ + plane_state->base.src = drm_plane_state_src(&plane_state->base); + plane_state->base.dst = drm_plane_state_dest(&plane_state->base); + ret = intel_cursor_check_surface(plane_state); if (ret) return ret; @@ -10708,7 +10671,7 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state) { - int width = plane_state->base.crtc_w; + int width = drm_rect_width(&plane_state->base.dst); /* * 845g/865g are only limited by the width of their cursors, @@ -10734,8 +10697,8 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state, /* Check for which cursor types we support */ if (!i845_cursor_size_ok(plane_state)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", - plane_state->base.crtc_w, - plane_state->base.crtc_h); + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); return -EINVAL; } @@ -10768,8 +10731,8 @@ static void i845_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { - unsigned int width = plane_state->base.crtc_w; - unsigned int height = plane_state->base.crtc_h; + unsigned int width = drm_rect_width(&plane_state->base.src); + unsigned int height = drm_rect_height(&plane_state->base.dst); cntl = plane_state->ctl | i845_cursor_ctl_crtc(crtc_state); @@ -10871,7 +10834,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) cntl |= MCURSOR_TRICKLE_FEED_DISABLE; - switch (plane_state->base.crtc_w) { + switch (drm_rect_width(&plane_state->base.dst)) { case 64: cntl |= MCURSOR_MODE_64_ARGB_AX; break; @@ -10882,7 +10845,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, cntl |= MCURSOR_MODE_256_ARGB_AX; break; default: - MISSING_CASE(plane_state->base.crtc_w); + MISSING_CASE(drm_rect_width(&plane_state->base.dst)); return 0; } @@ -10896,8 +10859,8 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; + int width = drm_rect_width(&plane_state->base.dst); + int height = drm_rect_height(&plane_state->base.dst); if (!intel_cursor_size_ok(plane_state)) return false; @@ -10950,17 +10913,19 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, /* Check for which cursor types we support */ if (!i9xx_cursor_size_ok(plane_state)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", - plane_state->base.crtc_w, - plane_state->base.crtc_h); + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); return -EINVAL; } WARN_ON(plane_state->base.visible && plane_state->color_plane[0].stride != fb->pitches[0]); - if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) { + if (fb->pitches[0] != + drm_rect_width(&plane_state->base.dst) * fb->format->cpp[0]) { DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n", - fb->pitches[0], plane_state->base.crtc_w); + fb->pitches[0], + drm_rect_width(&plane_state->base.dst)); return -EINVAL; } @@ -10975,7 +10940,7 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, * Refuse the put the cursor into that compromised position. */ if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && - plane_state->base.visible && plane_state->base.crtc_x < 0) { + plane_state->base.visible && plane_state->base.dst.x1 < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } @@ -10995,11 +10960,14 @@ static void i9xx_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { + unsigned width = drm_rect_width(&plane_state->base.dst); + unsigned height = drm_rect_height(&plane_state->base.dst); + cntl = plane_state->ctl | i9xx_cursor_ctl_crtc(crtc_state); - if (plane_state->base.crtc_h != plane_state->base.crtc_w) - fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1); + if (width != height) + fbc_ctl = CUR_FBC_CTL_EN | (height - 1); base = intel_cursor_base(plane_state); pos = intel_cursor_position(plane_state); @@ -12808,6 +12776,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(pixel_multiplier); PIPE_CONF_CHECK_I(output_format); + PIPE_CONF_CHECK_I(dc3co_exitline); PIPE_CONF_CHECK_BOOL(has_hdmi_sink); if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -13694,13 +13663,95 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) return crtc->base.funcs->get_vblank_counter(&crtc->base); } +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (!IS_GEN(dev_priv, 2)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); + + if (crtc_state->has_pch_encoder) { + enum pipe pch_transcoder = + intel_crtc_pch_transcoder(crtc); + + intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); + } +} + +static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ + crtc->base.mode = new_crtc_state->base.mode; + + /* + * Update pipe size and adjust fitter if needed: the reason for this is + * that in compute_mode_changes we check the native mode (not the pfit + * mode) to see if we can flip rather than do a full mode set. In the + * fastboot case, we'll flip, but if we don't update the pipesrc and + * pfit state, we'll end up with a big fb scanned out into the wrong + * sized surface. + */ + intel_set_pipe_src_size(new_crtc_state); + + /* on skylake this is done by detaching scalers */ + if (INTEL_GEN(dev_priv) >= 9) { + skl_detach_scalers(new_crtc_state); + + if (new_crtc_state->pch_pfit.enabled) + skylake_pfit_enable(new_crtc_state); + } else if (HAS_PCH_SPLIT(dev_priv)) { + if (new_crtc_state->pch_pfit.enabled) + ironlake_pfit_enable(new_crtc_state); + else if (old_crtc_state->pch_pfit.enabled) + ironlake_pfit_disable(old_crtc_state); + } + + if (INTEL_GEN(dev_priv) >= 11) + icl_set_pipe_chicken(crtc); +} + +static void commit_pipe_config(struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + bool modeset = needs_modeset(new_crtc_state); + + /* + * During modesets pipe configuration was programmed as the + * CRTC was enabled. + */ + if (!modeset) { + if (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe) + intel_color_commit(new_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9) + skl_detach_scalers(new_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + bdw_set_pipemisc(new_crtc_state); + + if (new_crtc_state->update_pipe) + intel_pipe_fastset(old_crtc_state, new_crtc_state); + } + + if (dev_priv->display.atomic_update_watermarks) + dev_priv->display.atomic_update_watermarks(state, + new_crtc_state); +} + static void intel_update_crtc(struct intel_crtc *crtc, struct intel_atomic_state *state, struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state) { - struct drm_device *dev = state->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); bool modeset = needs_modeset(new_crtc_state); struct intel_plane_state *new_plane_state = intel_atomic_get_new_plane_state(state, @@ -13724,14 +13775,27 @@ static void intel_update_crtc(struct intel_crtc *crtc, else if (new_plane_state) intel_fbc_enable(crtc, new_crtc_state, new_plane_state); - intel_begin_crtc_commit(state, crtc); + /* Perform vblank evasion around commit operation */ + intel_pipe_update_start(new_crtc_state); + + commit_pipe_config(state, old_crtc_state, new_crtc_state); if (INTEL_GEN(dev_priv) >= 9) skl_update_planes_on_crtc(state, crtc); else i9xx_update_planes_on_crtc(state, crtc); - intel_finish_crtc_commit(state, crtc); + intel_pipe_update_end(new_crtc_state); + + /* + * We usually enable FIFO underrun interrupts as part of the + * CRTC enable sequence during modesets. But when we inherit a + * valid pipe configuration from the BIOS we need to take care + * of enabling them on the CRTC's first fastset. + */ + if (new_crtc_state->update_pipe && !modeset && + old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); } static void intel_old_crtc_state_disables(struct intel_atomic_state *state, @@ -14346,7 +14410,7 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for - * @new_state: the plane state being prepared + * @_new_plane_state: the plane state being prepared * * Prepares a framebuffer for usage on a display plane. Generally this * involves pinning the underlying object and updating the frontbuffer tracking @@ -14357,12 +14421,14 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) */ int intel_prepare_plane_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) + struct drm_plane_state *_new_plane_state) { + struct intel_plane_state *new_plane_state = + to_intel_plane_state(_new_plane_state); struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_state->state); + to_intel_atomic_state(new_plane_state->base.state); struct drm_i915_private *dev_priv = to_i915(plane->dev); - struct drm_framebuffer *fb = new_state->fb; + struct drm_framebuffer *fb = new_plane_state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; @@ -14393,9 +14459,9 @@ intel_prepare_plane_fb(struct drm_plane *plane, } } - if (new_state->fence) { /* explicit fencing */ + if (new_plane_state->base.fence) { /* explicit fencing */ ret = i915_sw_fence_await_dma_fence(&intel_state->commit_ready, - new_state->fence, + new_plane_state->base.fence, I915_FENCE_TIMEOUT, GFP_KERNEL); if (ret < 0) @@ -14409,7 +14475,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) return ret; - ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); + ret = intel_plane_pin_fb(new_plane_state); i915_gem_object_unpin_pages(obj); if (ret) @@ -14418,7 +14484,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, fb_obj_bump_render_priority(obj); intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_DIRTYFB); - if (!new_state->fence) { /* implicit fencing */ + if (!new_plane_state->base.fence) { /* implicit fencing */ struct dma_fence *fence; ret = i915_sw_fence_await_reservation(&intel_state->commit_ready, @@ -14430,11 +14496,13 @@ intel_prepare_plane_fb(struct drm_plane *plane, fence = dma_resv_get_excl_rcu(obj->base.resv); if (fence) { - add_rps_boost_after_vblank(new_state->crtc, fence); + add_rps_boost_after_vblank(new_plane_state->base.crtc, + fence); dma_fence_put(fence); } } else { - add_rps_boost_after_vblank(new_state->crtc, new_state->fence); + add_rps_boost_after_vblank(new_plane_state->base.crtc, + new_plane_state->base.fence); } /* @@ -14456,16 +14524,18 @@ intel_prepare_plane_fb(struct drm_plane *plane, /** * intel_cleanup_plane_fb - Cleans up an fb after plane use * @plane: drm plane to clean up for - * @old_state: the state from the previous modeset + * @_old_plane_state: the state from the previous modeset * * Cleans up a framebuffer that has just been removed from a plane. */ void intel_cleanup_plane_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) + struct drm_plane_state *_old_plane_state) { + struct intel_plane_state *old_plane_state = + to_intel_plane_state(_old_plane_state); struct intel_atomic_state *intel_state = - to_intel_atomic_state(old_state->state); + to_intel_atomic_state(old_plane_state->base.state); struct drm_i915_private *dev_priv = to_i915(plane->dev); if (intel_state->rps_interactive) { @@ -14474,7 +14544,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, } /* Should only be called after a successful intel_prepare_plane_fb()! */ - intel_plane_unpin_fb(to_intel_plane_state(old_state)); + intel_plane_unpin_fb(old_plane_state); } int @@ -14515,72 +14585,6 @@ skl_max_scale(const struct intel_crtc_state *crtc_state, return max_scale; } -static void intel_begin_crtc_commit(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); - struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - bool modeset = needs_modeset(new_crtc_state); - - /* Perform vblank evasion around commit operation */ - intel_pipe_update_start(new_crtc_state); - - if (modeset) - goto out; - - if (new_crtc_state->base.color_mgmt_changed || - new_crtc_state->update_pipe) - intel_color_commit(new_crtc_state); - - if (new_crtc_state->update_pipe) - intel_update_pipe_config(old_crtc_state, new_crtc_state); - else if (INTEL_GEN(dev_priv) >= 9) - skl_detach_scalers(new_crtc_state); - - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - bdw_set_pipemisc(new_crtc_state); - -out: - if (dev_priv->display.atomic_update_watermarks) - dev_priv->display.atomic_update_watermarks(state, - new_crtc_state); -} - -void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - if (!IS_GEN(dev_priv, 2)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); - - if (crtc_state->has_pch_encoder) { - enum pipe pch_transcoder = - intel_crtc_pch_transcoder(crtc); - - intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); - } -} - -static void intel_finish_crtc_commit(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); - struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - - intel_pipe_update_end(new_crtc_state); - - if (new_crtc_state->update_pipe && - !needs_modeset(new_crtc_state) && - old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) - intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); -} - /** * intel_plane_destroy - destroy a plane * @plane: plane to destroy diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 2782f23ee887..bc2cf4bec0e8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -350,7 +350,7 @@ enum phy_fia { &(dev)->mode_config.plane_list, \ base.head) \ for_each_if((plane_mask) & \ - drm_plane_mask(&intel_plane->base))) + drm_plane_mask(&intel_plane->base)) #define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \ list_for_each_entry(intel_plane, \ @@ -440,6 +440,14 @@ enum phy_fia { (__i)--) \ for_each_if(crtc) +#define intel_atomic_crtc_state_for_each_plane_state( \ + plane, plane_state, \ + crtc_state) \ + for_each_intel_plane_mask(((crtc_state)->base.state->dev), (plane), \ + ((crtc_state)->base.plane_mask)) \ + for_each_if ((plane_state = \ + to_intel_plane_state(__drm_atomic_get_current_plane_state((crtc_state)->base.state, &plane->base)))) + void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0616284c6da6..6f9e7927e248 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -695,7 +695,11 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv) u32 mask; mask = DC_STATE_EN_UPTO_DC5; - if (INTEL_GEN(dev_priv) >= 11) + + if (INTEL_GEN(dev_priv) >= 12) + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6 + | DC_STATE_EN_DC9; + else if (IS_GEN(dev_priv, 11)) mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; else if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_EN_DC9; @@ -765,6 +769,52 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state) dev_priv->csr.dc_state = val & mask; } +static u32 +sanitize_target_dc_state(struct drm_i915_private *dev_priv, + u32 target_dc_state) +{ + u32 states[] = { + DC_STATE_EN_UPTO_DC6, + DC_STATE_EN_UPTO_DC5, + DC_STATE_EN_DC3CO, + DC_STATE_DISABLE, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { + if (target_dc_state != states[i]) + continue; + + if (dev_priv->csr.allowed_dc_mask & target_dc_state) + break; + + target_dc_state = states[i + 1]; + } + + return target_dc_state; +} + +static void tgl_enable_dc3co(struct drm_i915_private *dev_priv) +{ + DRM_DEBUG_KMS("Enabling DC3CO\n"); + gen9_set_dc_state(dev_priv, DC_STATE_EN_DC3CO); +} + +static void tgl_disable_dc3co(struct drm_i915_private *dev_priv) +{ + u32 val; + + DRM_DEBUG_KMS("Disabling DC3CO\n"); + val = I915_READ(DC_STATE_EN); + val &= ~DC_STATE_DC3CO_STATUS; + I915_WRITE(DC_STATE_EN, val); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + /* + * Delay of 200us DC3CO Exit time B.Spec 49196 + */ + usleep_range(200, 210); +} + static void bxt_enable_dc9(struct drm_i915_private *dev_priv) { assert_can_enable_dc9(dev_priv); @@ -820,6 +870,51 @@ lookup_power_well(struct drm_i915_private *dev_priv, return &dev_priv->power_domains.power_wells[0]; } +/** + * intel_display_power_set_target_dc_state - Set target dc state. + * @dev_priv: i915 device + * @state: state which needs to be set as target_dc_state. + * + * This function set the "DC off" power well target_dc_state, + * based upon this target_dc_stste, "DC off" power well will + * enable desired DC state. + */ +void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, + u32 state) +{ + struct i915_power_well *power_well; + bool dc_off_enabled; + struct i915_power_domains *power_domains = &dev_priv->power_domains; + + mutex_lock(&power_domains->lock); + power_well = lookup_power_well(dev_priv, SKL_DISP_DC_OFF); + + if (WARN_ON(!power_well)) + goto unlock; + + state = sanitize_target_dc_state(dev_priv, state); + + if (state == dev_priv->csr.target_dc_state) + goto unlock; + + dc_off_enabled = power_well->desc->ops->is_enabled(dev_priv, + power_well); + /* + * If DC off power well is disabled, need to enable and disable the + * DC off power well to effect target DC state. + */ + if (!dc_off_enabled) + power_well->desc->ops->enable(dev_priv, power_well); + + dev_priv->csr.target_dc_state = state; + + if (!dc_off_enabled) + power_well->desc->ops->disable(dev_priv, power_well); + +unlock: + mutex_unlock(&power_domains->lock); +} + static void assert_can_enable_dc5(struct drm_i915_private *dev_priv) { bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv, @@ -932,7 +1027,8 @@ static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0; + return ((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC3CO) == 0 && + (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0); } static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) @@ -948,6 +1044,11 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = {}; + if (dev_priv->csr.target_dc_state == DC_STATE_EN_DC3CO) { + tgl_disable_dc3co(dev_priv); + return; + } + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); dev_priv->display.get_cdclk(dev_priv, &cdclk_state); @@ -980,10 +1081,17 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, if (!dev_priv->csr.dmc_payload) return; - if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC6) + switch (dev_priv->csr.target_dc_state) { + case DC_STATE_EN_DC3CO: + tgl_enable_dc3co(dev_priv); + break; + case DC_STATE_EN_UPTO_DC6: skl_enable_dc6(dev_priv); - else if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5) + break; + case DC_STATE_EN_UPTO_DC5: gen9_enable_dc5(dev_priv); + break; + } } static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, @@ -2931,7 +3039,7 @@ static const struct i915_power_well_desc skl_power_wells[] = { .name = "DC off", .domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3013,7 +3121,7 @@ static const struct i915_power_well_desc bxt_power_wells[] = { .name = "DC off", .domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3073,7 +3181,7 @@ static const struct i915_power_well_desc glk_power_wells[] = { .name = "DC off", .domains = GLK_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3242,7 +3350,7 @@ static const struct i915_power_well_desc cnl_power_wells[] = { .name = "DC off", .domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3370,7 +3478,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { .name = "DC off", .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3603,7 +3711,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { .name = "DC off", .domains = TGL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3924,14 +4032,17 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (INTEL_GEN(dev_priv) >= 11) { - max_dc = 2; + if (INTEL_GEN(dev_priv) >= 12) { + max_dc = 4; /* * DC9 has a separate HW flow from the rest of the DC states, * not depending on the DMC firmware. It's needed by system * suspend/resume, so allow it unconditionally. */ mask = DC_STATE_EN_DC9; + } else if (IS_GEN(dev_priv, 11)) { + max_dc = 2; + mask = DC_STATE_EN_DC9; } else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; @@ -3950,7 +4061,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, requested_dc = enable_dc; } else if (enable_dc == -1) { requested_dc = max_dc; - } else if (enable_dc > max_dc && enable_dc <= 2) { + } else if (enable_dc > max_dc && enable_dc <= 4) { DRM_DEBUG_KMS("Adjusting requested max DC state (%d->%d)\n", enable_dc, max_dc); requested_dc = max_dc; @@ -3959,10 +4070,20 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, requested_dc = max_dc; } - if (requested_dc > 1) + switch (requested_dc) { + case 4: + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6; + break; + case 3: + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC5; + break; + case 2: mask |= DC_STATE_EN_UPTO_DC6; - if (requested_dc > 0) + break; + case 1: mask |= DC_STATE_EN_UPTO_DC5; + break; + } DRM_DEBUG_KMS("Allowed DC state mask %02x\n", mask); @@ -4023,6 +4144,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc); + dev_priv->csr.target_dc_state = + sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index 737b5def7fc6..1da04f3e0fb3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -100,6 +100,7 @@ enum i915_power_well_id { SKL_DISP_PW_MISC_IO, SKL_DISP_PW_1, SKL_DISP_PW_2, + SKL_DISP_DC_OFF, }; #define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) @@ -256,6 +257,8 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915); void intel_display_power_resume_early(struct drm_i915_private *i915); void intel_display_power_suspend(struct drm_i915_private *i915); void intel_display_power_resume(struct drm_i915_private *i915); +void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, + u32 state); const char * intel_display_power_domain_str(enum intel_display_power_domain domain); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 1602aac7ca0f..40390d855815 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -870,6 +870,7 @@ struct intel_crtc_state { bool has_psr; bool has_psr2; + u32 dc3co_exitline; /* * Frequence the dpll for the port should run at. Differs from the diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 0e45c61d7331..6594f2af1257 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5282,6 +5282,9 @@ static bool icl_combo_port_connected(struct drm_i915_private *dev_priv, { enum port port = intel_dig_port->base.port; + if (HAS_PCH_MCC(dev_priv) && port == PORT_C) + return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); + return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port); } diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 5e9e84c94a15..ec10fa7d3c69 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -247,8 +247,7 @@ static struct intel_shared_dpll * intel_find_shared_dpll(struct intel_atomic_state *state, const struct intel_crtc *crtc, const struct intel_dpll_hw_state *pll_state, - enum intel_dpll_id range_min, - enum intel_dpll_id range_max) + unsigned long dpll_mask) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll, *unused_pll = NULL; @@ -257,7 +256,9 @@ intel_find_shared_dpll(struct intel_atomic_state *state, shared_dpll = intel_atomic_get_shared_dpll_state(&state->base); - for (i = range_min; i <= range_max; i++) { + WARN_ON(dpll_mask & ~(BIT(I915_NUM_PLLS) - 1)); + + for_each_set_bit(i, &dpll_mask, I915_NUM_PLLS) { pll = &dev_priv->shared_dplls[i]; /* Only want to check enabled timings first */ @@ -464,8 +465,8 @@ static bool ibx_get_dpll(struct intel_atomic_state *state, } else { pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_PCH_PLL_A, - DPLL_ID_PCH_PLL_B); + BIT(DPLL_ID_PCH_PLL_B) | + BIT(DPLL_ID_PCH_PLL_A)); } if (!pll) @@ -814,7 +815,8 @@ hsw_ddi_hdmi_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_WRPLL1, DPLL_ID_WRPLL2); + BIT(DPLL_ID_WRPLL2) | + BIT(DPLL_ID_WRPLL1)); if (!pll) return NULL; @@ -877,7 +879,7 @@ static bool hsw_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SPLL, DPLL_ID_SPLL); + BIT(DPLL_ID_SPLL)); } else { return false; } @@ -1447,13 +1449,13 @@ static bool skl_get_dpll(struct intel_atomic_state *state, if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL0, - DPLL_ID_SKL_DPLL0); + BIT(DPLL_ID_SKL_DPLL0)); else pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL1, - DPLL_ID_SKL_DPLL3); + BIT(DPLL_ID_SKL_DPLL3) | + BIT(DPLL_ID_SKL_DPLL2) | + BIT(DPLL_ID_SKL_DPLL1)); if (!pll) return false; @@ -2401,8 +2403,9 @@ static bool cnl_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL0, - DPLL_ID_SKL_DPLL2); + BIT(DPLL_ID_SKL_DPLL2) | + BIT(DPLL_ID_SKL_DPLL1) | + BIT(DPLL_ID_SKL_DPLL0)); if (!pll) { DRM_DEBUG_KMS("No PLL selected\n"); return false; @@ -2975,7 +2978,7 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, &crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT]; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum port port = encoder->port; - bool has_dpll4 = false; + unsigned long dpll_mask; if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) { DRM_DEBUG_KMS("Could not calculate combo PHY PLL state.\n"); @@ -2984,13 +2987,16 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, } if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) - has_dpll4 = true; + dpll_mask = + BIT(DPLL_ID_EHL_DPLL4) | + BIT(DPLL_ID_ICL_DPLL1) | + BIT(DPLL_ID_ICL_DPLL0); + else + dpll_mask = BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - DPLL_ID_ICL_DPLL0, - has_dpll4 ? DPLL_ID_EHL_DPLL4 - : DPLL_ID_ICL_DPLL1); + dpll_mask); if (!port_dpll->pll) { DRM_DEBUG_KMS("No combo PHY PLL found for [ENCODER:%d:%s]\n", encoder->base.base.id, encoder->base.name); @@ -3023,8 +3029,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state, port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - DPLL_ID_ICL_TBTPLL, - DPLL_ID_ICL_TBTPLL); + BIT(DPLL_ID_ICL_TBTPLL)); if (!port_dpll->pll) { DRM_DEBUG_KMS("No TBT-ALT PLL found\n"); return false; @@ -3043,8 +3048,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state, encoder->port)); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - dpll_id, - dpll_id); + BIT(dpll_id)); if (!port_dpll->pll) { DRM_DEBUG_KMS("No MG PHY PLL found\n"); goto err_unreference_tbt_pll; diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index b3c7eef53bf3..50f22abcd30e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -534,6 +534,73 @@ transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder trans) return trans == TRANSCODER_EDP; } +static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate) +{ + if (!cstate || !cstate->base.active) + return 0; + + return DIV_ROUND_UP(1000 * 1000, + drm_mode_vrefresh(&cstate->base.adjusted_mode)); +} + +static void psr2_program_idle_frames(struct drm_i915_private *dev_priv, + u32 idle_frames) +{ + u32 val; + + idle_frames <<= EDP_PSR2_IDLE_FRAME_SHIFT; + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); + val &= ~EDP_PSR2_IDLE_FRAME_MASK; + val |= idle_frames; + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); +} + +static void tgl_psr2_enable_dc3co(struct drm_i915_private *dev_priv) +{ + psr2_program_idle_frames(dev_priv, 0); + intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO); +} + +static void tgl_psr2_disable_dc3co(struct drm_i915_private *dev_priv) +{ + int idle_frames; + + intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + /* + * Restore PSR2 idle frame let's use 6 as the minimum to cover all known + * cases including the off-by-one issue that HW has in some cases. + */ + idle_frames = max(6, dev_priv->vbt.psr.idle_frames); + idle_frames = max(idle_frames, dev_priv->psr.sink_sync_latency + 1); + psr2_program_idle_frames(dev_priv, idle_frames); +} + +static void tgl_dc5_idle_thread(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), psr.idle_work.work); + + mutex_lock(&dev_priv->psr.lock); + /* If delayed work is pending, it is not idle */ + if (delayed_work_pending(&dev_priv->psr.idle_work)) + goto unlock; + + DRM_DEBUG_KMS("DC5/6 idle thread\n"); + tgl_psr2_disable_dc3co(dev_priv); +unlock: + mutex_unlock(&dev_priv->psr.lock); +} + +static void tgl_disallow_dc3co_on_psr2_exit(struct drm_i915_private *dev_priv) +{ + if (!dev_priv->psr.dc3co_enabled) + return; + + cancel_delayed_work(&dev_priv->psr.idle_work); + /* Before PSR2 exit disallow dc3co*/ + tgl_psr2_disable_dc3co(dev_priv); +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -746,6 +813,8 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, dev_priv->psr.psr2_enabled = intel_psr2_enabled(dev_priv, crtc_state); dev_priv->psr.busy_frontbuffer_bits = 0; dev_priv->psr.pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + dev_priv->psr.dc3co_enabled = !!crtc_state->dc3co_exitline; + dev_priv->psr.dc3co_exit_delay = intel_get_frame_time_us(crtc_state); dev_priv->psr.transcoder = crtc_state->cpu_transcoder; /* @@ -829,6 +898,7 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) } if (dev_priv->psr.psr2_enabled) { + tgl_disallow_dc3co_on_psr2_exit(dev_priv); val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); WARN_ON(!(val & EDP_PSR2_ENABLE)); val &= ~EDP_PSR2_ENABLE; @@ -901,6 +971,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, mutex_unlock(&dev_priv->psr.lock); cancel_work_sync(&dev_priv->psr.work); + cancel_delayed_work_sync(&dev_priv->psr.idle_work); } static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv) @@ -1208,6 +1279,44 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->psr.lock); } +/* + * When we will be completely rely on PSR2 S/W tracking in future, + * intel_psr_flush() will invalidate and flush the PSR for ORIGIN_FLIP + * event also therefore tgl_dc3co_flush() require to be changed + * accrodingly in future. + */ +static void +tgl_dc3co_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits, enum fb_op_origin origin) +{ + u32 delay; + + mutex_lock(&dev_priv->psr.lock); + + if (!dev_priv->psr.dc3co_enabled) + goto unlock; + + if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active) + goto unlock; + + /* + * At every frontbuffer flush flip event modified delay of delayed work, + * when delayed work schedules that means display has been idle. + */ + if (!(frontbuffer_bits & + INTEL_FRONTBUFFER_ALL_MASK(dev_priv->psr.pipe))) + goto unlock; + + tgl_psr2_enable_dc3co(dev_priv); + /* DC5/DC6 required idle frames = 6 */ + delay = 6 * dev_priv->psr.dc3co_exit_delay; + mod_delayed_work(system_wq, &dev_priv->psr.idle_work, + usecs_to_jiffies(delay)); + +unlock: + mutex_unlock(&dev_priv->psr.lock); +} + /** * intel_psr_flush - Flush PSR * @dev_priv: i915 device @@ -1227,8 +1336,10 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, if (!CAN_PSR(dev_priv)) return; - if (origin == ORIGIN_FLIP) + if (origin == ORIGIN_FLIP) { + tgl_dc3co_flush(dev_priv, frontbuffer_bits, origin); return; + } mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { @@ -1284,6 +1395,7 @@ void intel_psr_init(struct drm_i915_private *dev_priv) dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link; INIT_WORK(&dev_priv->psr.work, intel_psr_work); + INIT_DELAYED_WORK(&dev_priv->psr.idle_work, tgl_dc5_idle_thread); mutex_init(&dev_priv->psr.lock); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 9234586830d1..cfe80590f0ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -114,6 +114,24 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } +static inline bool +i915_gem_context_nopreempt(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + +static inline void +i915_gem_context_set_nopreempt(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + +static inline void +i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx) +{ + clear_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} + static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) { return !ctx->file_priv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index ab8e1367dfc8..fe97b8ba4fda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -146,6 +146,7 @@ struct i915_gem_context { #define CONTEXT_CLOSED 1 #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 #define CONTEXT_USER_ENGINES 3 +#define CONTEXT_NOPREEMPT 4 struct mutex mutex; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 98816c35ffc3..e96901888323 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2077,6 +2077,9 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; + if (i915_gem_context_nopreempt(eb->gem_context)) + eb->request->flags |= I915_REQUEST_NOPREEMPT; + return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 0c41e04ab8fa..5ae694c24df4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -117,13 +117,6 @@ create_st: goto err; } - /* Mark the pages as dontneed whilst they are still pinned. As soon - * as they are unpinned they are allowed to be reaped by the shrinker, - * and the caller is expected to repopulate - the contents of this - * object are only valid whilst active and pinned. - */ - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -143,7 +136,6 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, internal_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { @@ -188,6 +180,15 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_object_internal_ops); + /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj); + obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 086a9bf5adcc..c5e14c9c805c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -140,6 +140,24 @@ i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_CONTIGUOUS; +} + +static inline bool +i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_VOLATILE; +} + +static inline void +i915_gem_object_set_volatile(struct drm_i915_gem_object *obj) +{ + obj->flags |= I915_BO_ALLOC_VOLATILE; +} + +static inline bool i915_gem_object_type_has(const struct drm_i915_gem_object *obj, unsigned long flags) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index c00b4f077f9e..a387e3ee728b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -119,6 +119,11 @@ struct drm_i915_gem_object { I915_SELFTEST_DECLARE(struct list_head st_link); + unsigned long flags; +#define I915_BO_ALLOC_CONTIGUOUS BIT(0) +#define I915_BO_ALLOC_VOLATILE BIT(1) +#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) + /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit @@ -160,6 +165,21 @@ struct drm_i915_gem_object { atomic_t pages_pin_count; atomic_t shrink_pin; + /** + * Memory region for this object. + */ + struct intel_memory_region *region; + /** + * List of memory region blocks allocated for this object. + */ + struct list_head blocks; + /** + * Element within memory_region->objects or region->purgeable + * if the object is marked as DONTNEED. Access is protected by + * region->obj_lock. + */ + struct list_head region_link; + struct sg_table *pages; void *mapping; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 2e941f093a20..b0ec0959c13f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -18,6 +18,9 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->mm.lock); + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_DONTNEED; + /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { obj->write_domain = 0; @@ -160,6 +163,9 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) if (IS_ERR_OR_NULL(pages)) return pages; + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_WILLNEED; + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c new file mode 100644 index 000000000000..d3f7733bc7ed --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_gem_region.h" +#include "i915_drv.h" + +void +i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + __intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks); + + obj->mm.dirty = false; + sg_free_table(pages); + kfree(pages); +} + +int +i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + struct list_head *blocks = &obj->mm.blocks; + resource_size_t size = obj->base.size; + resource_size_t prev_end; + struct i915_buddy_block *block; + unsigned int flags; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + int ret; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) { + kfree(st); + return -ENOMEM; + } + + flags = I915_ALLOC_MIN_PAGE_SIZE; + if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) + flags |= I915_ALLOC_CONTIGUOUS; + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks); + if (ret) + goto err_free_sg; + + GEM_BUG_ON(list_empty(blocks)); + + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + prev_end = (resource_size_t)-1; + + list_for_each_entry(block, blocks, link) { + u64 block_size, offset; + + block_size = min_t(u64, size, + i915_buddy_block_size(&mem->mm, block)); + offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(overflows_type(block_size, sg->length)); + + if (offset != prev_end || + add_overflows_t(typeof(sg->length), sg->length, block_size)) { + if (st->nents) { + sg_page_sizes |= sg->length; + sg = __sg_next(sg); + } + + sg_dma_address(sg) = mem->region.start + offset; + sg_dma_len(sg) = block_size; + + sg->length = block_size; + + st->nents++; + } else { + sg->length += block_size; + sg_dma_len(sg) += block_size; + } + + prev_end = offset + block_size; + }; + + sg_page_sizes |= sg->length; + sg_mark_end(sg); + i915_sg_trim(st); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err_free_sg: + sg_free_table(st); + kfree(st); + return ret; +} + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem, + unsigned long flags) +{ + INIT_LIST_HEAD(&obj->mm.blocks); + obj->mm.region = intel_memory_region_get(mem); + obj->flags |= flags; + + mutex_lock(&mem->objects.lock); + + if (obj->flags & I915_BO_ALLOC_VOLATILE) + list_add(&obj->mm.region_link, &mem->objects.purgeable); + else + list_add(&obj->mm.region_link, &mem->objects.list); + + mutex_unlock(&mem->objects.lock); +} + +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + + mutex_lock(&mem->objects.lock); + list_del(&obj->mm.region_link); + mutex_unlock(&mem->objects.lock); + + intel_memory_region_put(mem); +} + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + + /* + * NB: Our use of resource_size_t for the size stems from using struct + * resource for the mem->region. We might need to revisit this in the + * future. + */ + + GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); + + if (!mem) + return ERR_PTR(-ENODEV); + + size = round_up(size, mem->min_page_size); + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); + + /* + * XXX: There is a prevalence of the assumption that we fit the + * object's page count inside a 32bit _signed_ variable. Let's document + * this and catch if we ever need to fix it. In the meantime, if you do + * spot such a local variable, please consider fixing! + */ + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + return mem->ops->create_object(mem, size, flags); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h new file mode 100644 index 000000000000..f2ff6f8bff74 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_REGION_H__ +#define __I915_GEM_REGION_H__ + +#include <linux/types.h> + +struct intel_memory_region; +struct drm_i915_gem_object; +struct sg_table; + +int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj); +void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages); + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem, + unsigned long flags); +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index e42abddd4a36..f27772f6779a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -8,6 +8,7 @@ #include "i915_selftest.h" +#include "gem/i915_gem_region.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" @@ -17,6 +18,7 @@ #include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" +#include "selftests/mock_region.h" #include "selftests/i915_random.h" static const unsigned int page_sizes[] = { @@ -113,8 +115,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, st)) goto err; - obj->mm.madv = I915_MADV_DONTNEED; - GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); __i915_gem_object_set_pages(obj, st, sg_page_sizes); @@ -135,7 +135,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, huge_pages_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops huge_page_ops = { @@ -168,6 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &huge_page_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; @@ -227,8 +228,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) i915_sg_trim(st); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -261,8 +260,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; sg_dma_address(sg) = page_size; - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -281,7 +278,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj, { fake_free_huge_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -321,6 +317,8 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) else i915_gem_object_init(obj, &fake_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; @@ -452,6 +450,88 @@ out_device: return err; } +static int igt_mock_memory_region_huge_pages(void *arg) +{ + const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; + struct i915_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct intel_memory_region *mem; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int bit; + int err = 0; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("%s failed to create memory region\n", __func__); + return PTR_ERR(mem); + } + + for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + unsigned int page_size = BIT(bit); + resource_size_t phys; + int i; + + for (i = 0; i < ARRAY_SIZE(flags); ++i) { + obj = i915_gem_object_create_region(mem, page_size, + flags[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_region; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + phys = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(phys, page_size)) { + pr_err("%s addr misaligned(%pa) page_size=%u\n", + __func__, &phys, page_size); + err = -EINVAL; + goto out_unpin; + } + + if (vma->page_sizes.gtt != page_size) { + pr_err("%s page_sizes.gtt=%u, expected=%u\n", + __func__, vma->page_sizes.gtt, + page_size); + err = -EINVAL; + goto out_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + } + + goto out_region; + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_region: + intel_memory_region_put(mem); + return err; +} + static int igt_mock_ppgtt_misaligned_dma(void *arg) { struct i915_ppgtt *ppgtt = arg; @@ -1623,6 +1703,7 @@ int i915_gem_huge_page_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_memory_region_huge_pages), SUBTEST(igt_mock_ppgtt_misaligned_dma), SUBTEST(igt_mock_ppgtt_huge_fill), SUBTEST(igt_mock_ppgtt_64K), diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index fb58c0919ea1..e5c235051ae5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -263,7 +263,7 @@ static int live_parallel_switch(void *arg) if (!data) { i915_gem_context_unlock_engines(ctx); err = -ENOMEM; - goto out; + goto out_file; } m = 0; /* Use the first context as our template for the engines */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index be34d97ac18f..59c3083c1ec1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -62,7 +62,7 @@ int __intel_context_do_pin(struct intel_context *ce) } err = 0; - with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref) + with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref) err = ce->ops->pin(ce); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index c9e8c8ccbd47..93ea367fe624 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists) return READ_ONCE(*execlists->active); } +static inline void +execlists_active_lock_bh(struct intel_engine_execlists *execlists) +{ + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&execlists->tasklet); +} + +static inline void +execlists_active_unlock_bh(struct intel_engine_execlists *execlists) +{ + tasklet_unlock(&execlists->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ +} + struct i915_request * execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); @@ -407,8 +421,9 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine, engine->serial++; /* contexts lost */ } -bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct intel_gt *gt); +bool intel_engine_is_idle(struct intel_engine_cs *engine); +void intel_engine_flush_submission(struct intel_engine_cs *engine); void intel_engines_reset_default_submission(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 80fd072ac719..c9d639c6becb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -736,6 +736,7 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) static struct intel_context * create_kernel_context(struct intel_engine_cs *engine) { + static struct lock_class_key kernel; struct intel_context *ce; int err; @@ -751,6 +752,14 @@ create_kernel_context(struct intel_engine_cs *engine) return ERR_PTR(err); } + /* + * Give our perma-pinned kernel timelines a separate lockdep class, + * so that we can use them from within the normal user timelines + * should we need to inject GPU operations during their request + * construction. + */ + lockdep_set_class(&ce->timeline->mutex, &kernel); + return ce; } @@ -1040,6 +1049,25 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return idle; } +void intel_engine_flush_submission(struct intel_engine_cs *engine) +{ + struct tasklet_struct *t = &engine->execlists.tasklet; + + if (__tasklet_is_scheduled(t)) { + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); + } + + /* Otherwise flush the tasklet if it was running on another cpu */ + tasklet_unlock_wait(t); +} + /** * intel_engine_is_idle() - Report if the engine has finished process all work * @engine: the intel_engine_cs @@ -1058,21 +1086,9 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) /* Waiting to drain ELSP? */ if (execlists_active(&engine->execlists)) { - struct tasklet_struct *t = &engine->execlists.tasklet; - synchronize_hardirq(engine->i915->drm.pdev->irq); - local_bh_disable(); - if (tasklet_trylock(t)) { - /* Must wait for any GPU reset in progress. */ - if (__tasklet_is_enabled(t)) - t->func(t->data); - tasklet_unlock(t); - } - local_bh_enable(); - - /* Otherwise flush the tasklet if it was on another cpu */ - tasklet_unlock_wait(t); + intel_engine_flush_submission(engine); if (execlists_active(&engine->execlists)) return false; @@ -1229,9 +1245,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { struct drm_i915_private *dev_priv = engine->i915; - const struct intel_engine_execlists * const execlists = - &engine->execlists; - unsigned long flags; + struct intel_engine_execlists * const execlists = &engine->execlists; u64 addr; if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) @@ -1313,7 +1327,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - spin_lock_irqsave(&engine->active.lock, flags); + execlists_active_lock_bh(execlists); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; @@ -1351,7 +1365,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (tl) intel_timeline_put(tl); } - spin_unlock_irqrestore(&engine->active.lock, flags); + execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); @@ -1458,10 +1472,10 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_unlock_irqrestore(&engine->active.lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); - wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); if (wakeref) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref); + intel_runtime_pm_put(engine->uncore->rpm, wakeref); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } @@ -1493,8 +1507,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->active.lock, flags); - write_seqlock(&engine->stats.lock); + execlists_active_lock_bh(execlists); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1522,8 +1536,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->active.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); + execlists_active_unlock_bh(execlists); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 8e5e513eddc9..67eb6183648a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -185,7 +185,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_engine_init__pm(struct intel_engine_cs *engine) { - struct intel_runtime_pm *rpm = &engine->i915->runtime_pm; + struct intel_runtime_pm *rpm = engine->uncore->rpm; intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index b0227ab2fe1b..8e63cffcabe0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -138,6 +138,7 @@ /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ #define MI_LRI_CS_MMIO (1<<19) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) @@ -162,7 +163,8 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) -#define MI_BATCH_RESOURCE_STREAMER (1<<10) +#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10) +#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/ /* * 3D instructions used by the kernel @@ -223,6 +225,7 @@ #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8f44cf8c79b2..b3619a2a5d0e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -273,7 +273,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt) void intel_gt_flush_ggtt_writes(struct intel_gt *gt) { - struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; intel_wakeref_t wakeref; /* @@ -297,13 +297,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) wmb(); - if (INTEL_INFO(i915)->has_coherent_ggtt) + if (INTEL_INFO(gt->i915)->has_coherent_ggtt) return; intel_gt_chipset_flush(gt); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - struct intel_uncore *uncore = gt->uncore; + with_intel_runtime_pm(uncore->rpm, wakeref) { unsigned long flags; spin_lock_irqsave(&uncore->lock, flags); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index b52e2ba3d092..bd1bd3e00a94 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -94,7 +94,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { - intel_wakeref_init(>->wakeref, >->i915->runtime_pm, &wf_ops); + intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } @@ -136,11 +136,18 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) intel_uc_sanitize(>->uc); - if (!reset_engines(gt) && !force) - return; + for_each_engine(engine, gt->i915, id) + if (engine->reset.prepare) + engine->reset.prepare(engine); + + if (reset_engines(gt) || force) { + for_each_engine(engine, gt->i915, id) + __intel_engine_reset(engine, false); + } for_each_engine(engine, gt->i915, id) - __intel_engine_reset(engine, false); + if (engine->reset.finish) + engine->reset.finish(engine); } void intel_gt_pm_disable(struct intel_gt *gt) @@ -222,7 +229,7 @@ void intel_gt_suspend(struct intel_gt *gt) /* We expect to be idle already; but also want to be independent */ wait_for_idle(gt); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) intel_rc6_disable(>->rc6); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 8aed89fd2cdc..cbb4069b11e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -4,6 +4,7 @@ * Copyright © 2019 Intel Corporation */ +#include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" #include "intel_gt.h" #include "intel_gt_pm.h" @@ -19,6 +20,15 @@ static void retire_requests(struct intel_timeline *tl) break; } +static void flush_submission(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt->i915, id) + intel_engine_flush_submission(engine); +} + long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) { struct intel_gt_timelines *timelines = >->timelines; @@ -32,10 +42,14 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) if (unlikely(timeout < 0)) timeout = -timeout, interruptible = false; + flush_submission(gt); /* kick the ksoftirqd tasklets */ + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { - if (!mutex_trylock(&tl->mutex)) + if (!mutex_trylock(&tl->mutex)) { + active_count++; /* report busy to caller, try again? */ continue; + } intel_timeline_get(tl); GEM_BUG_ON(!tl->active_count); @@ -48,7 +62,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) fence = i915_active_fence_get(&tl->last_request); if (fence) { timeout = dma_fence_wait_timeout(fence, - true, + interruptible, timeout); dma_fence_put(fence); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 802f516a3430..be4b263621c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -109,6 +109,11 @@ enum intel_gt_scratch_field { /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, + /* 6 * 8 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048, + + /* 4 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096, }; #endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 9814b18b32ad..c14dbeb3ccc3 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -271,7 +271,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (intel_gt_is_wedged(gt)) return; - wakeref = intel_runtime_pm_get_if_in_use(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm); if (!wakeref) return; @@ -322,7 +322,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (hung) hangcheck_declare_hang(gt, hung, stuck); - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); /* Reset timer in case GPU hangs without another request being added */ intel_gt_queue_hangcheck(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 468438fb47af..26b6b699193b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -247,8 +247,12 @@ static void __context_pin_release(struct intel_context *ce) static void mark_eio(struct i915_request *rq) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); + if (i915_request_completed(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + dma_fence_set_error(&rq->fence, -EIO); i915_request_mark_complete(rq); } @@ -669,64 +673,6 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - NOP(13), - LRI(2, POSTED), - REG16(0x200), - REG16(0x204), - - NOP(11), - LRI(50, POSTED), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - END(), }; @@ -857,6 +803,15 @@ static const u8 gen12_rcs_offsets[] = { static const u8 *reg_offsets(const struct intel_engine_cs *engine) { + /* + * The gen12+ lists only have the registers we program in the basic + * default state. We rely on the context image using relative + * addressing to automatic fixup the register state between the + * physical engines for virtual engine. + */ + GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && + !intel_engine_has_relative_mmio(engine)); + if (engine->class == RENDER_CLASS) { if (INTEL_GEN(engine->i915) >= 12) return gen12_rcs_offsets; @@ -892,7 +847,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - struct intel_engine_cs *owner; if (i915_request_completed(rq)) continue; /* XXX */ @@ -907,8 +861,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) * engine so that it can be moved across onto another physical * engine as load dictates. */ - owner = rq->hw_context->engine; - if (likely(owner == engine)) { + if (likely(rq->execution_mask == engine->mask)) { GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); @@ -919,6 +872,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { + struct intel_engine_cs *owner = rq->hw_context->engine; + /* * Decouple the virtual breadcrumb before moving it * back to the virtual engine -- we don't want the @@ -928,7 +883,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) { - spin_lock(&rq->lock); + spin_lock_nested(&rq->lock, + SINGLE_DEPTH_NESTING); i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } @@ -1137,25 +1093,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); - if (!execlists->pending[0]) + if (!execlists->pending[0]) { + GEM_TRACE_ERR("Nothing pending for promotion!\n"); return false; + } - if (execlists->pending[execlists_num_ports(execlists)]) + if (execlists->pending[execlists_num_ports(execlists)]) { + GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", + execlists_num_ports(execlists)); return false; + } for (port = execlists->pending; (rq = *port); port++) { - if (ce == rq->hw_context) + if (ce == rq->hw_context) { + GEM_TRACE_ERR("Duplicate context in pending[%zd]\n", + port - execlists->pending); return false; + } ce = rq->hw_context; if (i915_request_completed(rq)) continue; - if (i915_active_is_idle(&ce->active)) + if (i915_active_is_idle(&ce->active)) { + GEM_TRACE_ERR("Inactive context in pending[%zd]\n", + port - execlists->pending); return false; + } + + if (!i915_vma_is_pinned(ce->state)) { + GEM_TRACE_ERR("Unpinned context in pending[%zd]\n", + port - execlists->pending); + return false; + } - if (!i915_vma_is_pinned(ce->state)) + if (!i915_vma_is_pinned(ce->ring->vma)) { + GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n", + port - execlists->pending); return false; + } } return ce; @@ -1232,6 +1208,10 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; + if (unlikely((prev->flags ^ next->flags) & + (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) + return false; + if (!can_merge_ctx(prev->hw_context, next->hw_context)) return false; @@ -1288,7 +1268,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, static struct i915_request * last_active(const struct intel_engine_execlists *execlists) { - struct i915_request * const *last = execlists->active; + struct i915_request * const *last = READ_ONCE(execlists->active); while (*last && i915_request_completed(*last)) last++; @@ -1680,6 +1660,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last->hw_context == rq->hw_context) goto done; + if (i915_request_has_sentinel(last)) + goto done; + /* * If GVT overrides us we only ever submit * port[0], leaving port[1] empty. Note that we @@ -1859,6 +1842,13 @@ static void process_csb(struct intel_engine_cs *engine) const u8 num_entries = execlists->csb_size; u8 head, tail; + /* + * As we modify our execlists state tracking we require exclusive + * access. Either we are inside the tasklet, or the tasklet is disabled + * and we assume that is only inside the reset paths and so serialised. + */ + GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && + !reset_in_progress(execlists)); GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); /* @@ -1980,8 +1970,11 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - if (!engine->execlists.pending[0]) + if (!engine->execlists.pending[0]) { + rcu_read_lock(); /* protect peeking at execlists->active */ execlists_dequeue(engine); + rcu_read_unlock(); + } } /* @@ -2773,8 +2766,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!rq) goto unwind; + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + ce = rq->hw_context; - GEM_BUG_ON(i915_active_is_idle(&ce->active)); GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); /* Proclaim we have exclusive access to the context image! */ @@ -2782,10 +2777,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) rq = active_request(rq); if (!rq) { + /* Idle context; tidy up the ring so we can restart afresh */ ce->ring->head = ce->ring->tail; goto out_replay; } + /* Context has requests still in-flight; it should not be idle! */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); ce->ring->head = intel_ring_wrap(ce->ring, rq->head); /* @@ -3447,7 +3445,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } - if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12) + if (INTEL_GEN(engine->i915) >= 12) engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; } @@ -4169,6 +4167,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.i915 = ctx->i915; ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 7b3d9d4517a0..77445d100ca8 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -282,14 +282,14 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct intel_engine_cs *engine; - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN6_GRDOM_RENDER, [BCS0] = GEN6_GRDOM_BLT, [VCS0] = GEN6_GRDOM_MEDIA, [VCS1] = GEN8_GRDOM_MEDIA2, [VECS0] = GEN6_GRDOM_VECS, }; + struct intel_engine_cs *engine; u32 hw_mask; if (engine_mask == ALL_ENGINES) { @@ -413,7 +413,7 @@ static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN11_GRDOM_RENDER, [BCS0] = GEN11_GRDOM_BLT, [VCS0] = GEN11_GRDOM_MEDIA, @@ -811,7 +811,7 @@ void intel_gt_set_wedged(struct intel_gt *gt) intel_wakeref_t wakeref; mutex_lock(>->reset.mutex); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) __intel_gt_set_wedged(gt); mutex_unlock(>->reset.mutex); } @@ -872,8 +872,14 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; - if (!ok) + if (!ok) { + /* + * Warn CI about the unrecoverable wedged condition. + * Time for a reboot. + */ + add_taint_for_CI(TAINT_WARN); return false; + } /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -1186,7 +1192,7 @@ void intel_gt_handle_error(struct intel_gt *gt, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); engine_mask &= INTEL_INFO(gt->i915)->engine_mask; @@ -1246,7 +1252,7 @@ void intel_gt_handle_error(struct intel_gt *gt, wake_up_all(>->reset.queue); out: - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 5d43cbc3f345..747f7c7790eb 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -240,6 +240,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, struct mock_engine *engine; GEM_BUG_ON(id >= I915_NUM_ENGINES); + GEM_BUG_ON(!i915->gt.uncore); engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) @@ -248,6 +249,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; engine->base.gt = &i915->gt; + engine->base.uncore = i915->gt.uncore; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index e8a40df79bd0..569a4105d49e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1695,14 +1695,14 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); drain_delayed_work(>->hangcheck.work); /* flush param */ err = intel_gt_live_subtests(tests, gt); i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 393ae5321e1d..1276da059dc6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -19,6 +19,36 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) +#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ + +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + static int live_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; @@ -143,11 +173,11 @@ static int live_unlite_restore(struct drm_i915_private *i915, int prio) GEM_BUG_ON(!ce[1]->ring->size); intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); - local_bh_disable(); /* appease lockdep */ + local_irq_disable(); /* appease lockdep */ __context_pin_acquire(ce[1]); __execlists_update_reg_state(ce[1], engine); __context_pin_release(ce[1]); - local_bh_enable(); + local_irq_enable(); rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); if (IS_ERR(rq[0])) { @@ -2076,6 +2106,158 @@ static int live_virtual_mask(void *arg) return 0; } +static int preserved_virtual_engine(struct drm_i915_private *i915, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct i915_request *last = NULL; + struct i915_gem_context *ctx; + struct intel_context *ve; + struct i915_vma *scratch; + struct igt_live_test t; + unsigned int n; + int err = 0; + u32 *cs; + + ctx = kernel_context(i915); + if (!ctx) + return -ENOMEM; + + scratch = create_scratch(siblings[0]->gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_scratch; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < NUM_GPR_DW; n++) { + struct intel_engine_cs *engine = siblings[n % nsibling]; + struct i915_request *rq; + + rq = i915_request_create(ve); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_end; + } + + i915_request_put(last); + last = i915_request_get(rq); + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto out_end; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); + *cs++ = n + 1; + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* Restrict this request to run on a particular engine */ + rq->execution_mask = engine->mask; + i915_request_add(rq); + } + + if (i915_request_wait(last, 0, HZ / 5) < 0) { + err = -ETIME; + goto out_end; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_end; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n] != n) { + pr_err("Incorrect value[%d] found for GPR[%d]\n", + cs[n], n); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +out_end: + if (igt_live_test_end(&t)) + err = -EIO; + i915_request_put(last); +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_scratch: + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(ctx); + return err; +} + +static int live_virtual_preserved(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_gt *gt = &i915->gt; + unsigned int class, inst; + + /* + * Check that the context image retains non-privileged (user) registers + * from one engine to the next. For this we check that the CS_GPR + * are preserved. + */ + + if (USES_GUC_SUBMISSION(i915)) + return 0; + + /* As we use CS_GPR we cannot run before they existed on all engines. */ + if (INTEL_GEN(i915) < 9) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + siblings[nsibling++] = gt->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = preserved_virtual_engine(i915, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + static int bond_virtual_engine(struct drm_i915_private *i915, unsigned int class, struct intel_engine_cs **siblings, @@ -2277,6 +2459,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), + SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_bond), }; @@ -2419,10 +2602,280 @@ static int live_lrc_layout(void *arg) return err; } +static int __live_lrc_state(struct i915_gem_context *fixme, + struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + enum { + RING_START_IDX = 0, + RING_TAIL_IDX, + MAX_IDX + }; + u32 expected[MAX_IDX]; + u32 *cs; + int err; + int n; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) + goto err_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + cs = intel_ring_begin(rq, 4 * MAX_IDX); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_unpin; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); + *cs++ = 0; + + expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); + *cs++ = 0; + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + expected[RING_TAIL_IDX] = ce->ring->tail; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < MAX_IDX; n++) { + if (cs[n] != expected[n]) { + pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", + engine->name, n, cs[n], expected[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_unpin: + intel_context_unpin(ce); +err_put: + intel_context_put(ce); + return err; +} + +static int live_lrc_state(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check the live register state matches what we expect for this + * intel_context. + */ + + fixme = kernel_context(gt->i915); + if (!fixme) + return -ENOMEM; + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + for_each_engine(engine, gt->i915, id) { + err = __live_lrc_state(fixme, engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(fixme); + return err; +} + +static int gpr_make_dirty(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + u32 *cs; + int n; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = CS_GPR(engine, n); + *cs++ = STACK_MAGIC; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + return 0; +} + +static int __live_gpr_clear(struct i915_gem_context *fixme, + struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + u32 *cs; + int err; + int n; + + if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) + return 0; /* GPR only on rcs0 for gen8 */ + + err = gpr_make_dirty(engine); + if (err) + return err; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_put; + } + + cs = intel_ring_begin(rq, 4 * NUM_GPR_DW); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_put; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n]) { + pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", + engine->name, + n / 2, n & 1 ? "udw" : "ldw", + cs[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_put: + intel_context_put(ce); + return err; +} + +static int live_gpr_clear(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check that GPR registers are cleared in new contexts as we need + * to avoid leaking any information from previous contexts. + */ + + fixme = kernel_context(gt->i915); + if (!fixme) + return -ENOMEM; + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + for_each_engine(engine, gt->i915, id) { + err = __live_gpr_clear(fixme, engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(fixme); + return err; +} + int intel_lrc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_lrc_layout), + SUBTEST(live_lrc_state), + SUBTEST(live_gpr_clear), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index d79482db7fe8..419b38fa7828 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -17,7 +17,7 @@ static int igt_global_reset(void *arg) /* Check that we can issue a global GPU reset */ igt_global_reset_lock(gt); - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); reset_count = i915_reset_count(>->i915->gpu_error); @@ -28,7 +28,7 @@ static int igt_global_reset(void *arg) err = -EINVAL; } - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -45,14 +45,14 @@ static int igt_wedged_reset(void *arg) /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(gt); - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); intel_gt_set_wedged(gt); GEM_BUG_ON(!intel_gt_is_wedged(gt)); intel_gt_reset(gt, ALL_ENGINES, NULL); - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); return intel_gt_is_wedged(gt) ? -EIO : 0; diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 95627e80f246..dc11f7ad50a2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -33,6 +33,30 @@ struct wa_lists { } engine[I915_NUM_ENGINES]; }; +static int request_add_sync(struct i915_request *rq, int err) +{ + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + + return err; +} + +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIMEDOUT; + i915_request_put(rq); + + return err; +} + static void reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) { @@ -243,7 +267,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine, struct i915_gem_context *ctx; struct intel_context *ce; struct i915_request *rq; - intel_wakeref_t wakeref; int err = 0; ctx = kernel_context(engine->i915); @@ -255,9 +278,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref) - rq = igt_spinner_create_request(spin, ce, MI_NOOP); + rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); @@ -267,13 +288,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, goto err; } - i915_request_add(rq); - - if (spin && !igt_wait_for_spinner(spin, rq)) { - pr_err("Spinner failed to start\n"); - err = -ETIMEDOUT; - } - + err = request_add_spin(rq, spin); err: if (err && spin) igt_spinner_end(spin); @@ -313,7 +328,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (err) goto out_spin; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(engine->uncore->rpm, wakeref) err = reset(engine); igt_spinner_end(&spin); @@ -586,15 +601,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; err_request: - i915_request_add(rq); - if (err) - goto out_batch; - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = request_add_sync(rq, err); + if (err) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); intel_gt_set_wedged(&ctx->i915->gt); - err = -EIO; goto out_batch; } @@ -697,7 +708,6 @@ static int live_dirty_whitelist(void *arg) struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; struct drm_file *file; int err = 0; @@ -706,13 +716,9 @@ static int live_dirty_whitelist(void *arg) if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - file = mock_file(i915); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto out_rpm; - } + if (IS_ERR(file)) + return PTR_ERR(file); ctx = live_context(i915, file); if (IS_ERR(ctx)) { @@ -731,8 +737,6 @@ static int live_dirty_whitelist(void *arg) out_file: mock_file_free(i915, file); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; } @@ -782,6 +786,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, if (IS_ERR(rq)) return PTR_ERR(rq); + i915_vma_lock(results); + err = i915_request_await_object(rq, results->obj, true); + if (err == 0) + err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(results); + if (err) + goto err_req; + srm = MI_STORE_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) srm++; @@ -807,12 +819,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, intel_ring_advance(rq, cs); err_req: - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; - - return err; + return request_add_sync(rq, err); } static int scrub_whitelisted_registers(struct i915_gem_context *ctx, @@ -872,9 +879,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, err = engine->emit_bb_start(rq, batch->node.start, 0, 0); err_request: - i915_request_add(rq); - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; + err = request_add_sync(rq, err); err_unpin: i915_gem_object_unpin_map(batch->obj); @@ -1229,12 +1234,10 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { + ret = request_add_spin(rq, &spin); + if (ret) { pr_err("Spinner failed to start\n"); igt_spinner_fini(&spin); - ret = -ETIMEDOUT; goto err; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 36332064de9c..2cf2d3314f62 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -607,7 +607,6 @@ out_unlock: void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; /* @@ -616,7 +615,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) */ flush_work(&log->relay.flush_work); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) guc_action_flush_log(guc); /* GuC would have updated log buffer by now, so capture it */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d4625c97b4f9..33608a114d4e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -185,7 +185,7 @@ int intel_huc_check_status(struct intel_huc *huc) if (!intel_huc_is_supported(huc)) return -ENODEV; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) status = intel_uncore_read(gt->uncore, huc->status.reg); return (status & huc->status.mask) == huc->status.value; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 29a9eec60d2e..3fdbc935d155 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -587,7 +587,7 @@ void intel_uc_suspend(struct intel_uc *uc) if (!intel_guc_is_running(guc)) return; - with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) + with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) intel_uc_runtime_suspend(uc); } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 298a3e879e65..a541b6ae534f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2405,6 +2405,13 @@ static int i915_dmc_info(struct seq_file *m, void *unused) if (INTEL_GEN(dev_priv) >= 12) { dc5_reg = TGL_DMC_DEBUG_DC5_COUNT; dc6_reg = TGL_DMC_DEBUG_DC6_COUNT; + /* + * NOTE: DMC_DEBUG3 is a general purpose reg. + * According to B.Specs:49196 DMC f/w reuses DC5/6 counter + * reg for DC3CO debugging and validation, + * but TGL DMC f/w is using DMC_DEBUG3 reg for DC3CO counter. + */ + seq_printf(m, "DC3CO count: %d\n", I915_READ(DMC_DEBUG3)); } else { dc5_reg = IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT : SKL_CSR_DC3_DC5_COUNT; @@ -3583,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, i915_wedged_get, i915_wedged_set, "%llu\n"); +static int +i915_perf_noa_delay_set(void *data, u64 val) +{ + struct drm_i915_private *i915 = data; + const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; + + /* + * This would lead to infinite waits as we're doing timestamp + * difference on the CS with only 32bits. + */ + if (val > mul_u32_u32(U32_MAX, clk)) + return -EINVAL; + + atomic64_set(&i915->perf.noa_programming_delay, val); + return 0; +} + +static int +i915_perf_noa_delay_get(void *data, u64 *val) +{ + struct drm_i915_private *i915 = data; + + *val = atomic64_read(&i915->perf.noa_programming_delay); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops, + i915_perf_noa_delay_get, + i915_perf_noa_delay_set, + "%llu\n"); + #define DROP_UNBOUND BIT(0) #define DROP_BOUND BIT(1) #define DROP_RETIRE BIT(2) @@ -3592,6 +3630,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, #define DROP_IDLE BIT(6) #define DROP_RESET_ACTIVE BIT(7) #define DROP_RESET_SEQNO BIT(8) +#define DROP_RCU BIT(9) #define DROP_ALL (DROP_UNBOUND | \ DROP_BOUND | \ DROP_RETIRE | \ @@ -3600,7 +3639,8 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, DROP_SHRINK_ALL |\ DROP_IDLE | \ DROP_RESET_ACTIVE | \ - DROP_RESET_SEQNO) + DROP_RESET_SEQNO | \ + DROP_RCU) static int i915_drop_caches_get(void *data, u64 *val) { @@ -3652,6 +3692,9 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(i915); fs_reclaim_release(GFP_KERNEL); + if (val & DROP_RCU) + rcu_barrier(); + if (val & DROP_FREED) i915_gem_drain_freed_objects(i915); @@ -4333,6 +4376,7 @@ static const struct i915_debugfs_files { const char *name; const struct file_operations *fops; } i915_debugfs_files[] = { + {"i915_perf_noa_delay", &i915_perf_noa_delay_fops}, {"i915_wedged", &i915_wedged_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1da67b242113..c46b339064c0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -85,6 +85,7 @@ #include "intel_device_info.h" #include "intel_pch.h" #include "intel_runtime_pm.h" +#include "intel_memory_region.h" #include "intel_uncore.h" #include "intel_wakeref.h" #include "intel_wopcm.h" @@ -93,6 +94,7 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_gtt.h" #include "i915_gpu_error.h" +#include "i915_perf_types.h" #include "i915_request.h" #include "i915_scheduler.h" #include "gt/intel_timeline.h" @@ -338,6 +340,7 @@ struct intel_csr { i915_reg_t mmioaddr[20]; u32 mmiodata[20]; u32 dc_state; + u32 target_dc_state; u32 allowed_dc_mask; intel_wakeref_t wakeref; }; @@ -500,6 +503,9 @@ struct i915_psr { bool sink_not_reliable; bool irq_aux_error; u16 su_x_granularity; + bool dc3co_enabled; + u32 dc3co_exit_delay; + struct delayed_work idle_work; }; #define QUIRK_LVDS_SSC_DISABLE (1<<1) @@ -974,305 +980,6 @@ struct intel_wm_config { bool sprites_scaled; }; -struct i915_oa_format { - u32 format; - int size; -}; - -struct i915_oa_reg { - i915_reg_t addr; - u32 value; -}; - -struct i915_oa_config { - char uuid[UUID_STRING_LEN + 1]; - int id; - - const struct i915_oa_reg *mux_regs; - u32 mux_regs_len; - const struct i915_oa_reg *b_counter_regs; - u32 b_counter_regs_len; - const struct i915_oa_reg *flex_regs; - u32 flex_regs_len; - - struct attribute_group sysfs_metric; - struct attribute *attrs[2]; - struct device_attribute sysfs_metric_id; - - atomic_t ref_count; -}; - -struct i915_perf_stream; - -/** - * struct i915_perf_stream_ops - the OPs to support a specific stream type - */ -struct i915_perf_stream_ops { - /** - * @enable: Enables the collection of HW samples, either in response to - * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened - * without `I915_PERF_FLAG_DISABLED`. - */ - void (*enable)(struct i915_perf_stream *stream); - - /** - * @disable: Disables the collection of HW samples, either in response - * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying - * the stream. - */ - void (*disable)(struct i915_perf_stream *stream); - - /** - * @poll_wait: Call poll_wait, passing a wait queue that will be woken - * once there is something ready to read() for the stream - */ - void (*poll_wait)(struct i915_perf_stream *stream, - struct file *file, - poll_table *wait); - - /** - * @wait_unlocked: For handling a blocking read, wait until there is - * something to ready to read() for the stream. E.g. wait on the same - * wait queue that would be passed to poll_wait(). - */ - int (*wait_unlocked)(struct i915_perf_stream *stream); - - /** - * @read: Copy buffered metrics as records to userspace - * **buf**: the userspace, destination buffer - * **count**: the number of bytes to copy, requested by userspace - * **offset**: zero at the start of the read, updated as the read - * proceeds, it represents how many bytes have been copied so far and - * the buffer offset for copying the next record. - * - * Copy as many buffered i915 perf samples and records for this stream - * to userspace as will fit in the given buffer. - * - * Only write complete records; returning -%ENOSPC if there isn't room - * for a complete record. - * - * Return any error condition that results in a short read such as - * -%ENOSPC or -%EFAULT, even though these may be squashed before - * returning to userspace. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @destroy: Cleanup any stream specific resources. - * - * The stream will always be disabled before this is called. - */ - void (*destroy)(struct i915_perf_stream *stream); -}; - -/** - * struct i915_perf_stream - state for a single open stream FD - */ -struct i915_perf_stream { - /** - * @dev_priv: i915 drm device - */ - struct drm_i915_private *dev_priv; - - /** - * @link: Links the stream into ``&drm_i915_private->streams`` - */ - struct list_head link; - - /** - * @wakeref: As we keep the device awake while the perf stream is - * active, we track our runtime pm reference for later release. - */ - intel_wakeref_t wakeref; - - /** - * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` - * properties given when opening a stream, representing the contents - * of a single sample as read() by userspace. - */ - u32 sample_flags; - - /** - * @sample_size: Considering the configured contents of a sample - * combined with the required header size, this is the total size - * of a single sample record. - */ - int sample_size; - - /** - * @ctx: %NULL if measuring system-wide across all contexts or a - * specific context that is being monitored. - */ - struct i915_gem_context *ctx; - - /** - * @enabled: Whether the stream is currently enabled, considering - * whether the stream was opened in a disabled state and based - * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. - */ - bool enabled; - - /** - * @ops: The callbacks providing the implementation of this specific - * type of configured stream. - */ - const struct i915_perf_stream_ops *ops; - - /** - * @oa_config: The OA configuration used by the stream. - */ - struct i915_oa_config *oa_config; - - /** - * @pinned_ctx: The OA context specific information. - */ - struct intel_context *pinned_ctx; - u32 specific_ctx_id; - u32 specific_ctx_id_mask; - - struct hrtimer poll_check_timer; - wait_queue_head_t poll_wq; - bool pollin; - - bool periodic; - int period_exponent; - - /** - * @oa_buffer: State of the OA buffer. - */ - struct { - struct i915_vma *vma; - u8 *vaddr; - u32 last_ctx_id; - int format; - int format_size; - int size_exponent; - - /** - * @ptr_lock: Locks reads and writes to all head/tail state - * - * Consider: the head and tail pointer state needs to be read - * consistently from a hrtimer callback (atomic context) and - * read() fop (user context) with tail pointer updates happening - * in atomic context and head updates in user context and the - * (unlikely) possibility of read() errors needing to reset all - * head/tail state. - * - * Note: Contention/performance aren't currently a significant - * concern here considering the relatively low frequency of - * hrtimer callbacks (5ms period) and that reads typically only - * happen in response to a hrtimer event and likely complete - * before the next callback. - * - * Note: This lock is not held *while* reading and copying data - * to userspace so the value of head observed in htrimer - * callbacks won't represent any partial consumption of data. - */ - spinlock_t ptr_lock; - - /** - * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to - * used for reading. - * - * Initial values of 0xffffffff are invalid and imply that an - * update is required (and should be ignored by an attempted - * read) - */ - struct { - u32 offset; - } tails[2]; - - /** - * @aged_tail_idx: Index for the aged tail ready to read() data up to. - */ - unsigned int aged_tail_idx; - - /** - * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer - * was read; used to determine when it is old enough to trust. - */ - u64 aging_timestamp; - - /** - * @head: Although we can always read back the head pointer register, - * we prefer to avoid trusting the HW state, just to avoid any - * risk that some hardware condition could * somehow bump the - * head pointer unpredictably and cause us to forward the wrong - * OA buffer data to userspace. - */ - u32 head; - } oa_buffer; -}; - -/** - * struct i915_oa_ops - Gen specific implementation of an OA unit stream - */ -struct i915_oa_ops { - /** - * @is_valid_b_counter_reg: Validates register's address for - * programming boolean counters for a particular platform. - */ - bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, - u32 addr); - - /** - * @is_valid_mux_reg: Validates register's address for programming mux - * for a particular platform. - */ - bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @is_valid_flex_reg: Validates register's address for programming - * flex EU filtering for a particular platform. - */ - bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @enable_metric_set: Selects and applies any MUX configuration to set - * up the Boolean and Custom (B/C) counters that are part of the - * counter reports being sampled. May apply system constraints such as - * disabling EU clock gating as required. - */ - int (*enable_metric_set)(struct i915_perf_stream *stream); - - /** - * @disable_metric_set: Remove system constraints associated with using - * the OA unit. - */ - void (*disable_metric_set)(struct i915_perf_stream *stream); - - /** - * @oa_enable: Enable periodic sampling - */ - void (*oa_enable)(struct i915_perf_stream *stream); - - /** - * @oa_disable: Disable periodic sampling - */ - void (*oa_disable)(struct i915_perf_stream *stream); - - /** - * @read: Copy data from the circular OA buffer into a given userspace - * buffer. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @oa_hw_tail_read: read the OA tail pointer register - * - * In particular this enables us to share all the fiddly code for - * handling the OA unit tail pointer race that affects multiple - * generations. - */ - u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); -}; - struct intel_cdclk_state { unsigned int cdclk, vco, ref, bypass; u8 voltage_level; @@ -1560,6 +1267,8 @@ struct drm_i915_private { I915_SAGV_NOT_CONTROLLED } sagv_status; + u32 sagv_block_time_us; + struct { /* * Raw watermark latency values: @@ -1630,61 +1339,7 @@ struct drm_i915_private { struct intel_runtime_pm runtime_pm; - struct { - bool initialized; - - struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; - - /* - * Lock associated with adding/modifying/removing OA configs - * in dev_priv->perf.metrics_idr. - */ - struct mutex metrics_lock; - - /* - * List of dynamic configurations, you need to hold - * dev_priv->perf.metrics_lock to access it. - */ - struct idr metrics_idr; - - /* - * Lock associated with anything below within this structure - * except exclusive_stream. - */ - struct mutex lock; - struct list_head streams; - - /* - * The stream currently using the OA unit. If accessed - * outside a syscall associated to its file - * descriptor, you need to hold - * dev_priv->drm.struct_mutex. - */ - struct i915_perf_stream *exclusive_stream; - - /** - * For rate limiting any notifications of spurious - * invalid OA reports - */ - struct ratelimit_state spurious_report_rs; - - struct i915_oa_config test_config; - - u32 gen7_latched_oastatus1; - u32 ctx_oactxctrl_offset; - u32 ctx_flexeu0_offset; - - /** - * The RPT_ID/reason field for Gen8+ includes a bit - * to determine if the CTX ID in the report is valid - * but the specific bit differs between Gen 8 and 9 - */ - u32 gen8_valid_ctx_bit; - - struct i915_oa_ops ops; - const struct i915_oa_format *oa_formats; - } perf; + struct i915_perf perf; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct intel_gt gt; diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 167a7b56ed5b..f6f9675848b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -37,10 +37,8 @@ struct drm_i915_private; #define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ - pr_err("%s:%d GEM_BUG_ON(%s)\n", \ - __func__, __LINE__, __stringify(condition)); \ - GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \ - __func__, __LINE__, __stringify(condition)); \ + GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \ + __func__, __LINE__, __stringify(condition)); \ BUG(); \ } \ } while(0) @@ -66,17 +64,33 @@ struct drm_i915_private; #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) #define GEM_TRACE(...) trace_printk(__VA_ARGS__) +#define GEM_TRACE_ERR(...) do { \ + pr_err(__VA_ARGS__); \ + trace_printk(__VA_ARGS__); \ +} while (0) #define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL) #define GEM_TRACE_DUMP_ON(expr) \ do { if (expr) ftrace_dump(DUMP_ALL); } while (0) #else #define GEM_TRACE(...) do { } while (0) +#define GEM_TRACE_ERR(...) do { } while (0) #define GEM_TRACE_DUMP() do { } while (0) #define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif #define I915_GEM_IDLE_TIMEOUT (HZ / 5) +static inline void tasklet_lock(struct tasklet_struct *t) +{ + while (!tasklet_trylock(t)) + cpu_relax(); +} + +static inline bool tasklet_is_locked(const struct tasklet_struct *t) +{ + return test_bit(TASKLET_STATE_RUN, &t->state); +} + static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index f4b3cbb1adce..ad33fbe90a28 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -5,6 +5,7 @@ #include "gt/intel_engine_user.h" #include "i915_drv.h" +#include "i915_perf.h" int i915_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -156,6 +157,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(i915)->has_coherent_ggtt; break; + case I915_PARAM_PERF_REVISION: + value = i915_perf_ioctl_version(); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f2371b6083c6..a7c968b01af3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -412,7 +412,7 @@ void gen9_reset_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); @@ -423,7 +423,7 @@ void gen9_enable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); if (!guc->interrupts.enabled) { @@ -440,7 +440,7 @@ void gen9_disable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); guc->interrupts.enabled = false; @@ -2249,8 +2249,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; pins = hpd_tgp; } else if (HAS_PCH_MCC(dev_priv)) { - ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; - tc_hotplug_trigger = 0; + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; + tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); pins = hpd_icp; } else { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; @@ -3377,8 +3377,8 @@ static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, - SDE_DDI_MASK_TGP, 0, - TGP_DDI_HPD_ENABLE_MASK, 0, + SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1), hpd_icp); } diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 296452f9efe4..4f1806f65040 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -46,7 +46,8 @@ i915_param_named(modeset, int, 0400, i915_param_named_unsafe(enable_dc, int, 0400, "Enable power-saving display C-states. " - "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)"); + "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; " + "3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)"); i915_param_named_unsafe(enable_fbc, int, 0600, "Enable frame buffer compression for power savings " diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 231388d06c82..54ec1c4190ac 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -196,7 +196,9 @@ #include <linux/uuid.h> #include "gem/i915_gem_context.h" -#include "gem/i915_gem_pm.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "gt/intel_lrc_reg.h" #include "i915_drv.h" @@ -342,11 +344,14 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { * struct perf_open_properties - for validated properties given to open a stream * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags * @single_context: Whether a single or all gpu contexts should be monitored + * @hold_preemption: Whether the preemption is disabled for the filtered + * context * @ctx_handle: A gem ctx handle for use with @single_context * @metrics_set: An ID for an OA unit metric set advertised via sysfs * @oa_format: An OA unit HW report format * @oa_periodic: Whether to enable periodic OA unit sampling * @oa_period_exponent: The OA unit sampling period is derived from this + * @engine: The engine (typically rcs0) being monitored by the OA unit * * As read_properties_unlocked() enumerates and validates the properties given * to open a stream of metrics the configuration is built up in the structure @@ -356,6 +361,7 @@ struct perf_open_properties { u32 sample_flags; u64 single_context:1; + u64 hold_preemption:1; u64 ctx_handle; /* OA sampling state */ @@ -363,69 +369,66 @@ struct perf_open_properties { int oa_format; bool oa_periodic; int oa_period_exponent; + + struct intel_engine_cs *engine; +}; + +struct i915_oa_config_bo { + struct llist_node node; + + struct i915_oa_config *oa_config; + struct i915_vma *vma; }; static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); -static void free_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +void i915_oa_config_release(struct kref *ref) { - if (!PTR_ERR(oa_config->flex_regs)) - kfree(oa_config->flex_regs); - if (!PTR_ERR(oa_config->b_counter_regs)) - kfree(oa_config->b_counter_regs); - if (!PTR_ERR(oa_config->mux_regs)) - kfree(oa_config->mux_regs); - kfree(oa_config); -} + struct i915_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); -static void put_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) -{ - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + kfree(oa_config->flex_regs); + kfree(oa_config->b_counter_regs); + kfree(oa_config->mux_regs); - free_oa_config(dev_priv, oa_config); + kfree_rcu(oa_config, rcu); } -static int get_oa_config(struct drm_i915_private *dev_priv, - int metrics_set, - struct i915_oa_config **out_config) +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) { - int ret; - - if (metrics_set == 1) { - *out_config = &dev_priv->perf.test_config; - atomic_inc(&dev_priv->perf.test_config.ref_count); - return 0; - } - - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); - if (ret) - return ret; + struct i915_oa_config *oa_config; - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); - if (!*out_config) - ret = -EINVAL; + rcu_read_lock(); + if (metrics_set == 1) + oa_config = &perf->test_config; else - atomic_inc(&(*out_config)->ref_count); + oa_config = idr_find(&perf->metrics_idr, metrics_set); + if (oa_config) + oa_config = i915_oa_config_get(oa_config); + rcu_read_unlock(); - mutex_unlock(&dev_priv->perf.metrics_lock); + return oa_config; +} - return ret; +static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) +{ + i915_oa_config_put(oa_bo->oa_config); + i915_vma_put(oa_bo->vma); + kfree(oa_bo); } static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; - return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; + return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; } static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - u32 oastatus1 = I915_READ(GEN7_OASTATUS1); + struct intel_uncore *uncore = stream->uncore; + u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; } @@ -456,7 +459,6 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) */ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; int report_size = stream->oa_buffer.format_size; unsigned long flags; unsigned int aged_idx; @@ -479,7 +481,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aged_tail = stream->oa_buffer.tails[aged_idx].offset; aging_tail = stream->oa_buffer.tails[!aged_idx].offset; - hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream); + hw_tail = stream->perf->ops.oa_hw_tail_read(stream); /* The tail pointer increases in 64 byte increments, * not in report_size steps... @@ -655,7 +657,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; int report_size = stream->oa_buffer.format_size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); @@ -740,7 +742,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & OAREPORT_REASON_MASK); if (reason == 0) { - if (__ratelimit(&dev_priv->perf.spurious_report_rs)) + if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -755,7 +757,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -789,7 +791,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * switches since it's not-uncommon for periodic samples to * identify a switch before any 'context switch' report. */ - if (!dev_priv->perf.exclusive_stream->ctx || + if (!stream->perf->exclusive_stream->ctx || stream->specific_ctx_id == ctx_id || stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || reason & OAREPORT_REASON_CTX_SWITCH) { @@ -798,7 +800,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * While filtering for a single context we avoid * leaking the IDs of other contexts. */ - if (dev_priv->perf.exclusive_stream->ctx && + if (stream->perf->exclusive_stream->ctx && stream->specific_ctx_id != ctx_id) { report32[2] = INVALID_CTX_ID; } @@ -830,7 +832,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, */ head += gtt_offset; - I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); + intel_uncore_write(uncore, GEN8_OAHEADPTR, + head & GEN8_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -864,14 +867,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 oastatus; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus = I915_READ(GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); /* * We treat OABUFFER_OVERFLOW as a significant error: @@ -896,14 +899,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", stream->period_exponent); - dev_priv->perf.ops.oa_disable(stream); - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_disable(stream); + stream->perf->ops.oa_enable(stream); /* * Note: .oa_enable() is expected to re-init the oabuffer and * reset GEN8_OASTATUS for us */ - oastatus = I915_READ(GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { @@ -911,8 +914,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - I915_WRITE(GEN8_OASTATUS, - oastatus & ~GEN8_OASTATUS_REPORT_LOST); + intel_uncore_write(uncore, GEN8_OASTATUS, + oastatus & ~GEN8_OASTATUS_REPORT_LOST); } return gen8_append_oa_reports(stream, buf, count, offset); @@ -943,7 +946,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; int report_size = stream->oa_buffer.format_size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); @@ -1017,7 +1020,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * copying it to userspace... */ if (report32[0] == 0) { - if (__ratelimit(&dev_priv->perf.spurious_report_rs)) + if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -1043,9 +1046,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, */ head += gtt_offset; - I915_WRITE(GEN7_OASTATUS2, - ((head & GEN7_OASTATUS2_HEAD_MASK) | - GEN7_OASTATUS2_MEM_SELECT_GGTT)); + intel_uncore_write(uncore, GEN7_OASTATUS2, + (head & GEN7_OASTATUS2_HEAD_MASK) | + GEN7_OASTATUS2_MEM_SELECT_GGTT); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1075,21 +1078,21 @@ static int gen7_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 oastatus1; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus1 = I915_READ(GEN7_OASTATUS1); + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); /* XXX: On Haswell we don't have a safe way to clear oastatus1 * bits while the OA unit is enabled (while the tail pointer * may be updated asynchronously) so we ignore status bits * that have already been reported to userspace. */ - oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1; + oastatus1 &= ~stream->perf->gen7_latched_oastatus1; /* We treat OABUFFER_OVERFLOW as a significant error: * @@ -1120,10 +1123,10 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", stream->period_exponent); - dev_priv->perf.ops.oa_disable(stream); - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_disable(stream); + stream->perf->ops.oa_enable(stream); - oastatus1 = I915_READ(GEN7_OASTATUS1); + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); } if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { @@ -1131,7 +1134,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - dev_priv->perf.gen7_latched_oastatus1 |= + stream->perf->gen7_latched_oastatus1 |= GEN7_OASTATUS1_REPORT_LOST; } @@ -1196,9 +1199,7 @@ static int i915_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - return dev_priv->perf.ops.read(stream, buf, count, offset); + return stream->perf->ops.read(stream, buf, count, offset); } static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) @@ -1209,7 +1210,7 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) int err; for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - if (ce->engine->class != RENDER_CLASS) + if (ce->engine != stream->engine) /* first match! */ continue; /* @@ -1239,14 +1240,13 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) */ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = stream->dev_priv; struct intel_context *ce; ce = oa_pin_context(stream); if (IS_ERR(ce)) return PTR_ERR(ce); - switch (INTEL_GEN(i915)) { + switch (INTEL_GEN(ce->engine->i915)) { case 7: { /* * On Haswell we don't do any post processing of the reports @@ -1260,7 +1260,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 8: case 9: case 10: - if (USES_GUC_SUBMISSION(i915)) { + if (USES_GUC_SUBMISSION(ce->engine->i915)) { /* * When using GuC, the context descriptor we write in * i915 is read by GuC and rewritten before it's @@ -1296,7 +1296,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) } default: - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(INTEL_GEN(ce->engine->i915)); } ce->tag = stream->specific_ctx_id_mask; @@ -1338,40 +1338,55 @@ free_oa_buffer(struct i915_perf_stream *stream) stream->oa_buffer.vaddr = NULL; } +static void +free_oa_configs(struct i915_perf_stream *stream) +{ + struct i915_oa_config_bo *oa_bo, *tmp; + + i915_oa_config_put(stream->oa_config); + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) + free_oa_config_bo(oa_bo); +} + +static void +free_noa_wait(struct i915_perf_stream *stream) +{ + i915_vma_unpin_and_release(&stream->noa_wait, 0); +} + static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; - BUG_ON(stream != dev_priv->perf.exclusive_stream); + BUG_ON(stream != perf->exclusive_stream); /* * Unset exclusive_stream first, it will be checked while disabling * the metric set on gen8+. */ - mutex_lock(&dev_priv->drm.struct_mutex); - dev_priv->perf.exclusive_stream = NULL; - dev_priv->perf.ops.disable_metric_set(stream); - mutex_unlock(&dev_priv->drm.struct_mutex); + perf->exclusive_stream = NULL; + perf->ops.disable_metric_set(stream); free_oa_buffer(stream); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_engine_pm_put(stream->engine); if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(dev_priv, stream->oa_config); + free_oa_configs(stream); + free_noa_wait(stream); - if (dev_priv->perf.spurious_report_rs.missed) { + if (perf->spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", - dev_priv->perf.spurious_report_rs.missed); + perf->spurious_report_rs.missed); } } static void gen7_init_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; @@ -1380,13 +1395,14 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) /* Pre-DevBDW: OABUFFER must be set with counters off, * before OASTATUS1, but after OASTATUS2 */ - I915_WRITE(GEN7_OASTATUS2, - gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ + intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ + gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); stream->oa_buffer.head = gtt_offset; - I915_WRITE(GEN7_OABUFFER, gtt_offset); + intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); - I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ + intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */ + gtt_offset | OABUFFER_SIZE_16M); /* Mark that we need updated tail pointers to read from... */ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; @@ -1398,7 +1414,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) * already seen since they can't be cleared while periodic * sampling is enabled. */ - dev_priv->perf.gen7_latched_oastatus1 = 0; + stream->perf->gen7_latched_oastatus1 = 0; /* NB: although the OA buffer will initially be allocated * zeroed via shmfs (and so this memset is redundant when @@ -1413,25 +1429,22 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) */ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ stream->pollin = false; } static void gen8_init_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - I915_WRITE(GEN8_OASTATUS, 0); - I915_WRITE(GEN8_OAHEADPTR, gtt_offset); + intel_uncore_write(uncore, GEN8_OASTATUS, 0); + intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); stream->oa_buffer.head = gtt_offset; - I915_WRITE(GEN8_OABUFFER_UDW, 0); + intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); /* * PRM says: @@ -1441,9 +1454,9 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) * to enable proper functionality of the overflow * bit." */ - I915_WRITE(GEN8_OABUFFER, gtt_offset | + intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset | OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); - I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); + intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; @@ -1472,17 +1485,12 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) */ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - /* - * Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ stream->pollin = false; } static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; - struct drm_i915_private *dev_priv = stream->dev_priv; struct i915_vma *vma; int ret; @@ -1492,7 +1500,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); - bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); + bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); return PTR_ERR(bo); @@ -1515,10 +1523,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) goto err_unpin; } - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", - i915_ggtt_offset(stream->oa_buffer.vma), - stream->oa_buffer.vaddr); - return 0; err_unpin: @@ -1533,50 +1537,380 @@ err_unref: return ret; } -static void config_oa_regs(struct drm_i915_private *dev_priv, - const struct i915_oa_reg *regs, - u32 n_regs) +static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, + bool save, i915_reg_t reg, u32 offset, + u32 dword_count) +{ + u32 cmd; + u32 d; + + cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM; + if (INTEL_GEN(stream->perf->i915) >= 8) + cmd++; + + for (d = 0; d < dword_count; d++) { + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(reg) + 4 * d; + *cs++ = intel_gt_scratch_offset(stream->engine->gt, + offset) + 4 * d; + *cs++ = 0; + } + + return cs; +} + +static int alloc_noa_wait(struct i915_perf_stream *stream) +{ + struct drm_i915_private *i915 = stream->perf->i915; + struct drm_i915_gem_object *bo; + struct i915_vma *vma; + const u64 delay_ticks = 0xffffffffffffffff - + DIV64_U64_ROUND_UP( + atomic64_read(&stream->perf->noa_programming_delay) * + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz, + 1000000ull); + const u32 base = stream->engine->mmio_base; +#define CS_GPR(x) GEN8_RING_CS_GPR(base, x) + u32 *batch, *ts0, *cs, *jump; + int ret, i; + enum { + START_TS, + NOW_TS, + DELTA_TS, + JUMP_PREDICATE, + DELTA_TARGET, + N_CS_GPR + }; + + bo = i915_gem_object_create_internal(i915, 4096); + if (IS_ERR(bo)) { + DRM_ERROR("Failed to allocate NOA wait batchbuffer\n"); + return PTR_ERR(bo); + } + + /* + * We pin in GGTT because we jump into this buffer now because + * multiple OA config BOs will have a jump to this address and it + * needs to be fixed during the lifetime of the i915/perf stream. + */ + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); + if (IS_ERR(batch)) { + ret = PTR_ERR(batch); + goto err_unpin; + } + + /* Save registers. */ + for (i = 0; i < N_CS_GPR; i++) + cs = save_restore_register( + stream, cs, true /* save */, CS_GPR(i), + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + cs = save_restore_register( + stream, cs, true /* save */, MI_PREDICATE_RESULT_1, + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + + /* First timestamp snapshot location. */ + ts0 = cs; + + /* + * Initial snapshot of the timestamp register to implement the wait. + * We work with 32b values, so clear out the top 32b bits of the + * register because the ALU works 64bits. + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)); + + /* + * This is the location we're going to jump back into until the + * required amount of time has passed. + */ + jump = cs; + + /* + * Take another snapshot of the timestamp register. Take care to clear + * up the top 32bits of CS_GPR(1) as we're using it for other + * operations below. + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)); + + /* + * Do a diff between the 2 timestamps and store the result back into + * CS_GPR(1). + */ + *cs++ = MI_MATH(5); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); + *cs++ = MI_MATH_SUB; + *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU); + *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); + + /* + * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the + * timestamp have rolled over the 32bits) into the predicate register + * to be used for the predicated jump. + */ + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); + + /* Restart from the beginning if we had timestamps roll over. */ + *cs++ = (INTEL_GEN(i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8) | + MI_BATCH_PREDICATE; + *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; + *cs++ = 0; + + /* + * Now add the diff between to previous timestamps and add it to : + * (((1 * << 64) - 1) - delay_ns) + * + * When the Carry Flag contains 1 this means the elapsed time is + * longer than the expected delay, and we can exit the wait loop. + */ + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)); + *cs++ = lower_32_bits(delay_ticks); + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4; + *cs++ = upper_32_bits(delay_ticks); + + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); + + /* + * Transfer the result into the predicate register to be used for the + * predicated jump. + */ + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); + + /* Predicate the jump. */ + *cs++ = (INTEL_GEN(i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8) | + MI_BATCH_PREDICATE; + *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; + *cs++ = 0; + + /* Restore registers. */ + for (i = 0; i < N_CS_GPR; i++) + cs = save_restore_register( + stream, cs, false /* restore */, CS_GPR(i), + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + cs = save_restore_register( + stream, cs, false /* restore */, MI_PREDICATE_RESULT_1, + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + + /* And return to the ring. */ + *cs++ = MI_BATCH_BUFFER_END; + + GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch)); + + i915_gem_object_flush_map(bo); + i915_gem_object_unpin_map(bo); + + stream->noa_wait = vma; + return 0; + +err_unpin: + i915_vma_unpin_and_release(&vma, 0); +err_unref: + i915_gem_object_put(bo); + return ret; +} + +static u32 *write_cs_mi_lri(u32 *cs, + const struct i915_oa_reg *reg_data, + u32 n_regs) { u32 i; for (i = 0; i < n_regs; i++) { - const struct i915_oa_reg *reg = regs + i; + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min_t(u32, + n_regs - i, + MI_LOAD_REGISTER_IMM_MAX_REGS); + + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); + } + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); + *cs++ = reg_data[i].value; + } + + return cs; +} + +static int num_lri_dwords(int num_regs) +{ + int count = 0; + + if (num_regs > 0) { + count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); + count += num_regs * 2; + } + + return count; +} + +static struct i915_oa_config_bo * +alloc_oa_config_buffer(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config) +{ + struct drm_i915_gem_object *obj; + struct i915_oa_config_bo *oa_bo; + size_t config_length = 0; + u32 *cs; + int err; - I915_WRITE(reg->addr, reg->value); + oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); + if (!oa_bo) + return ERR_PTR(-ENOMEM); + + config_length += num_lri_dwords(oa_config->mux_regs_len); + config_length += num_lri_dwords(oa_config->b_counter_regs_len); + config_length += num_lri_dwords(oa_config->flex_regs_len); + config_length++; /* MI_BATCH_BUFFER_END */ + config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); + + obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_free; + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_oa_bo; + } + + cs = write_cs_mi_lri(cs, + oa_config->mux_regs, + oa_config->mux_regs_len); + cs = write_cs_mi_lri(cs, + oa_config->b_counter_regs, + oa_config->b_counter_regs_len); + cs = write_cs_mi_lri(cs, + oa_config->flex_regs, + oa_config->flex_regs_len); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + oa_bo->vma = i915_vma_instance(obj, + &stream->engine->gt->ggtt->vm, + NULL); + if (IS_ERR(oa_bo->vma)) { + err = PTR_ERR(oa_bo->vma); + goto err_oa_bo; } + + oa_bo->oa_config = i915_oa_config_get(oa_config); + llist_add(&oa_bo->node, &stream->oa_config_bos); + + return oa_bo; + +err_oa_bo: + i915_gem_object_put(obj); +err_free: + kfree(oa_bo); + return ERR_PTR(err); } -static void delay_after_mux(void) +static struct i915_vma * +get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) { + struct i915_oa_config_bo *oa_bo; + /* - * It apparently takes a fairly long time for a new MUX - * configuration to be be applied after these register writes. - * This delay duration was derived empirically based on the - * render_basic config but hopefully it covers the maximum - * configuration latency. - * - * As a fallback, the checks in _append_oa_reports() to skip - * invalid OA reports do also seem to work to discard reports - * generated before this config has completed - albeit not - * silently. - * - * Unfortunately this is essentially a magic number, since we - * don't currently know of a reliable mechanism for predicting - * how long the MUX config will take to apply and besides - * seeing invalid reports we don't know of a reliable way to - * explicitly check that the MUX config has landed. - * - * It's even possible we've miss characterized the underlying - * problem - it just seems like the simplest explanation why - * a delay at this location would mitigate any invalid reports. + * Look for the buffer in the already allocated BOs attached + * to the stream. */ - usleep_range(15000, 20000); + llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { + if (oa_bo->oa_config == oa_config && + memcmp(oa_bo->oa_config->uuid, + oa_config->uuid, + sizeof(oa_config->uuid)) == 0) + goto out; + } + + oa_bo = alloc_oa_config_buffer(stream, oa_config); + if (IS_ERR(oa_bo)) + return ERR_CAST(oa_bo); + +out: + return i915_vma_get(oa_bo->vma); +} + +static int emit_oa_config(struct i915_perf_stream *stream, + struct intel_context *ce) +{ + struct i915_request *rq; + struct i915_vma *vma; + int err; + + vma = get_oa_vma(stream, stream->oa_config); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma_unpin; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, 0); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + if (err) + goto err_add_request; + + err = rq->engine->emit_bb_start(rq, + vma->node.start, 0, + I915_DISPATCH_SECURE); +err_add_request: + i915_request_add(rq); +err_vma_unpin: + i915_vma_unpin(vma); +err_vma_put: + i915_vma_put(vma); + return err; +} + +static struct intel_context *oa_context(struct i915_perf_stream *stream) +{ + return stream->pinned_ctx ?: stream->engine->kernel_context; } static int hsw_enable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; + struct intel_uncore *uncore = stream->uncore; /* * PRM: @@ -1588,31 +1922,24 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) * count the events from non-render domain. Unit level clock * gating for RCS should also be disabled. */ - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE)); - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE)); + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + GEN7_DOP_CLOCK_GATE_ENABLE, 0); + intel_uncore_rmw(uncore, GEN6_UCGCTL1, + 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); - - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(stream, oa_context(stream)); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & - ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | - GEN7_DOP_CLOCK_GATE_ENABLE)); + intel_uncore_rmw(uncore, GEN6_UCGCTL1, + GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0); + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + 0, GEN7_DOP_CLOCK_GATE_ENABLE); - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & - ~GT_NOA_ENABLE)); + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, @@ -1647,9 +1974,8 @@ static void gen8_update_reg_state_unlocked(const struct intel_context *ce, const struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = ce->engine->i915; - u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset; - u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; + u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; + u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ i915_reg_t flex_regs[] = { EU_PERF_CNTL0, @@ -1672,7 +1998,8 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); - reg_state[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(i915, &ce->sseu); + reg_state[CTX_R_PWR_CLK_STATE] = + intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu); } struct flex { @@ -1827,9 +2154,9 @@ static int gen8_configure_context(struct i915_gem_context *ctx, static int gen8_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config) { - struct drm_i915_private *i915 = stream->dev_priv; + struct drm_i915_private *i915 = stream->perf->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ - const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) struct flex regs[] = { { @@ -1838,7 +2165,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, }, { GEN8_OACTXCONTROL, - i915->perf.ctx_oactxctrl_offset + 1, + stream->perf->ctx_oactxctrl_offset + 1, ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | GEN8_OA_COUNTER_RESUME) @@ -1859,7 +2186,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, for (i = 2; i < ARRAY_SIZE(regs); i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); - lockdep_assert_held(&i915->drm.struct_mutex); + lockdep_assert_held(&stream->perf->lock); /* * The OA register config is setup through the context image. This image @@ -1922,7 +2249,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, static int gen8_enable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; const struct i915_oa_config *oa_config = stream->oa_config; int ret; @@ -1949,10 +2276,10 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_GEN_RANGE(dev_priv, 9, 11)) { - I915_WRITE(GEN8_OA_DEBUG, - _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | - GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); + if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) { + intel_uncore_write(uncore, GEN8_OA_DEBUG, + _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); } /* @@ -1964,41 +2291,33 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); - - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(stream, oa_context(stream)); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ gen8_configure_all_contexts(stream, NULL); - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & - ~GT_NOA_ENABLE)); + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } static void gen10_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ gen8_configure_all_contexts(stream, NULL); /* Make sure we disable noa to save power. */ - I915_WRITE(RPM_CONFIG1, - I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); + intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); } static void gen7_oa_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; struct i915_gem_context *ctx = stream->ctx; u32 ctx_id = stream->specific_ctx_id; bool periodic = stream->periodic; @@ -2016,19 +2335,19 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) */ gen7_init_oa_buffer(stream); - I915_WRITE(GEN7_OACONTROL, - (ctx_id & GEN7_OACONTROL_CTX_MASK) | - (period_exponent << - GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | - (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | - (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | - (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | - GEN7_OACONTROL_ENABLE); + intel_uncore_write(uncore, GEN7_OACONTROL, + (ctx_id & GEN7_OACONTROL_CTX_MASK) | + (period_exponent << + GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | + (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | + (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | + (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | + GEN7_OACONTROL_ENABLE); } static void gen8_oa_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 report_format = stream->oa_buffer.format; /* @@ -2047,9 +2366,9 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) * filtering and instead filter on the cpu based on the context-id * field of reports */ - I915_WRITE(GEN8_OACONTROL, (report_format << - GEN8_OA_REPORT_FORMAT_SHIFT) | - GEN8_OA_COUNTER_ENABLE); + intel_uncore_write(uncore, GEN8_OACONTROL, + (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) | + GEN8_OA_COUNTER_ENABLE); } /** @@ -2063,9 +2382,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) */ static void i915_oa_stream_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_enable(stream); if (stream->periodic) hrtimer_start(&stream->poll_check_timer, @@ -2075,7 +2392,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) static void gen7_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = &stream->dev_priv->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_write(uncore, GEN7_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -2086,7 +2403,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream) static void gen8_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = &stream->dev_priv->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_write(uncore, GEN8_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -2105,9 +2422,7 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) */ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - dev_priv->perf.ops.oa_disable(stream); + stream->perf->ops.oa_disable(stream); if (stream->periodic) hrtimer_cancel(&stream->poll_check_timer); @@ -2144,15 +2459,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, struct drm_i915_perf_open_param *param, struct perf_open_properties *props) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; int format_size; int ret; - /* If the sysfs metrics/ directory wasn't registered for some + if (!props->engine) { + DRM_DEBUG("OA engine not specified\n"); + return -EINVAL; + } + + /* + * If the sysfs metrics/ directory wasn't registered for some * reason then don't let userspace try their luck with config * IDs */ - if (!dev_priv->perf.metrics_kobj) { + if (!perf->metrics_kobj) { DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); return -EINVAL; } @@ -2162,16 +2483,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - if (!dev_priv->perf.ops.enable_metric_set) { + if (!perf->ops.enable_metric_set) { DRM_DEBUG("OA unit not supported\n"); return -ENODEV; } - /* To avoid the complexity of having to accurately filter + /* + * To avoid the complexity of having to accurately filter * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - if (dev_priv->perf.exclusive_stream) { + if (perf->exclusive_stream) { DRM_DEBUG("OA unit already in use\n"); return -EBUSY; } @@ -2181,9 +2503,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } + stream->engine = props->engine; + stream->uncore = stream->engine->gt->uncore; + stream->sample_size = sizeof(struct drm_i915_perf_record_header); - format_size = dev_priv->perf.oa_formats[props->oa_format].size; + format_size = perf->oa_formats[props->oa_format].size; stream->sample_flags |= SAMPLE_OA_REPORT; stream->sample_size += format_size; @@ -2192,8 +2517,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (WARN_ON(stream->oa_buffer.format_size == 0)) return -EINVAL; + stream->hold_preemption = props->hold_preemption; + stream->oa_buffer.format = - dev_priv->perf.oa_formats[props->oa_format].format; + perf->oa_formats[props->oa_format].format; stream->periodic = props->oa_periodic; if (stream->periodic) @@ -2207,9 +2534,16 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } } - ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); + ret = alloc_noa_wait(stream); if (ret) { + DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); + goto err_noa_wait_alloc; + } + + stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); + if (!stream->oa_config) { DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); + ret = -EINVAL; goto err_config; } @@ -2225,27 +2559,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * In our case we are expecting that taking pm + FORCEWAKE * references will effectively disable RC6. */ - stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + intel_engine_pm_get(stream->engine); + intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); ret = alloc_oa_buffer(stream); if (ret) goto err_oa_buf_alloc; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - goto err_lock; - stream->ops = &i915_oa_stream_ops; - dev_priv->perf.exclusive_stream = stream; + perf->exclusive_stream = stream; - ret = dev_priv->perf.ops.enable_metric_set(stream); + ret = perf->ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; } - mutex_unlock(&dev_priv->drm.struct_mutex); + DRM_DEBUG("opening stream oa config uuid=%s\n", + stream->oa_config->uuid); hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -2256,20 +2587,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return 0; err_enable: - dev_priv->perf.exclusive_stream = NULL; - dev_priv->perf.ops.disable_metric_set(stream); - mutex_unlock(&dev_priv->drm.struct_mutex); + perf->exclusive_stream = NULL; + perf->ops.disable_metric_set(stream); -err_lock: free_oa_buffer(stream); err_oa_buf_alloc: - put_oa_config(dev_priv, stream->oa_config); + free_oa_configs(stream); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_engine_pm_put(stream->engine); err_config: + free_noa_wait(stream); + +err_noa_wait_alloc: if (stream->ctx) oa_put_render_ctx_id(stream); @@ -2359,7 +2691,7 @@ static ssize_t i915_perf_read(struct file *file, loff_t *ppos) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; ssize_t ret; /* To ensure it's handled consistently we simply treat all reads of a @@ -2382,15 +2714,15 @@ static ssize_t i915_perf_read(struct file *file, if (ret) return ret; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_read_locked(stream, file, buf, count, ppos); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } while (ret == -EAGAIN); } else { - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_read_locked(stream, file, buf, count, ppos); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } /* We allow the poll checking to sometimes report false positive EPOLLIN @@ -2428,7 +2760,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) /** * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream - * @dev_priv: i915 device instance * @stream: An i915 perf stream * @file: An i915 perf stream file * @wait: poll() state table @@ -2437,15 +2768,14 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that * will be woken for new stream data. * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. * * Returns: any poll events that are ready without sleeping */ -static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, - struct i915_perf_stream *stream, - struct file *file, - poll_table *wait) +static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait) { __poll_t events = 0; @@ -2479,12 +2809,12 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, static __poll_t i915_perf_poll(struct file *file, poll_table *wait) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; __poll_t ret; - mutex_lock(&dev_priv->perf.lock); - ret = i915_perf_poll_locked(dev_priv, stream, file, wait); - mutex_unlock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); + ret = i915_perf_poll_locked(stream, file, wait); + mutex_unlock(&perf->lock); return ret; } @@ -2509,6 +2839,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) if (stream->ops->enable) stream->ops->enable(stream); + + if (stream->hold_preemption) + i915_gem_context_set_nopreempt(stream->ctx); } /** @@ -2533,17 +2866,54 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) /* Allow stream->ops->disable() to refer to this */ stream->enabled = false; + if (stream->hold_preemption) + i915_gem_context_clear_nopreempt(stream->ctx); + if (stream->ops->disable) stream->ops->disable(stream); } +static long i915_perf_config_locked(struct i915_perf_stream *stream, + unsigned long metrics_set) +{ + struct i915_oa_config *config; + long ret = stream->oa_config->id; + + config = i915_perf_get_oa_config(stream->perf, metrics_set); + if (!config) + return -EINVAL; + + if (config != stream->oa_config) { + int err; + + /* + * If OA is bound to a specific context, emit the + * reconfiguration inline from that context. The update + * will then be ordered with respect to submission on that + * context. + * + * When set globally, we use a low priority kernel context, + * so it will effectively take effect when idle. + */ + err = emit_oa_config(stream, oa_context(stream)); + if (err == 0) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + i915_oa_config_put(config); + + return ret; +} + /** * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs * @stream: An i915 perf stream * @cmd: the ioctl request * @arg: the ioctl data * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. * * Returns: zero on success or a negative error code. Returns -EINVAL for @@ -2560,6 +2930,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, case I915_PERF_IOCTL_DISABLE: i915_perf_disable_locked(stream); return 0; + case I915_PERF_IOCTL_CONFIG: + return i915_perf_config_locked(stream, arg); } return -EINVAL; @@ -2581,12 +2953,12 @@ static long i915_perf_ioctl(struct file *file, unsigned long arg) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; long ret; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_ioctl_locked(stream, cmd, arg); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); return ret; } @@ -2598,7 +2970,7 @@ static long i915_perf_ioctl(struct file *file, * Frees all resources associated with the given i915 perf @stream, disabling * any associated data capture in the process. * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. */ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) @@ -2609,8 +2981,6 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) if (stream->ops->destroy) stream->ops->destroy(stream); - list_del(&stream->link); - if (stream->ctx) i915_gem_context_put(stream->ctx); @@ -2631,14 +3001,14 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) static int i915_perf_release(struct inode *inode, struct file *file) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); i915_perf_destroy_locked(stream); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); /* Release the reference the perf stream kept on the driver. */ - drm_dev_put(&dev_priv->drm); + drm_dev_put(&perf->i915->drm); return 0; } @@ -2660,7 +3030,7 @@ static const struct file_operations fops = { /** * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD - * @dev_priv: i915 device instance + * @perf: i915 perf instance * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` * @props: individually validated u64 property value pairs * @file: drm file @@ -2668,7 +3038,7 @@ static const struct file_operations fops = { * See i915_perf_ioctl_open() for interface details. * * Implements further stream config validation and stream initialization on - * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex + * behalf of i915_perf_open_ioctl() with the &perf->lock mutex * taken to serialize with any non-file-operation driver hooks. * * Note: at this point the @props have only been validated in isolation and @@ -2683,7 +3053,7 @@ static const struct file_operations fops = { * Returns: zero on success or a negative error code. */ static int -i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, +i915_perf_open_ioctl_locked(struct i915_perf *perf, struct drm_i915_perf_open_param *param, struct perf_open_properties *props, struct drm_file *file) @@ -2708,6 +3078,15 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, } } + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -2722,7 +3101,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. */ - if (IS_HASWELL(dev_priv) && specific_ctx) + if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option @@ -2732,7 +3111,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, */ if (privileged_op && i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { - DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); + DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); ret = -EACCES; goto err_ctx; } @@ -2743,7 +3122,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, goto err_ctx; } - stream->dev_priv = dev_priv; + stream->perf = perf; stream->ctx = specific_ctx; ret = i915_oa_stream_init(stream, param, props); @@ -2759,8 +3138,6 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, goto err_flags; } - list_add(&stream->link, &dev_priv->perf.streams); - if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) f_flags |= O_CLOEXEC; if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) @@ -2769,7 +3146,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); if (stream_fd < 0) { ret = stream_fd; - goto err_open; + goto err_flags; } if (!(param->flags & I915_PERF_FLAG_DISABLED)) @@ -2778,12 +3155,10 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, /* Take a reference on the driver that will be kept with stream_fd * until its release. */ - drm_dev_get(&dev_priv->drm); + drm_dev_get(&perf->i915->drm); return stream_fd; -err_open: - list_del(&stream->link); err_flags: if (stream->ops->destroy) stream->ops->destroy(stream); @@ -2796,15 +3171,15 @@ err: return ret; } -static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) +static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { return div64_u64(1000000000ULL * (2ULL << exponent), - 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); + 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz); } /** * read_properties_unlocked - validate + copy userspace stream open properties - * @dev_priv: i915 device instance + * @perf: i915 perf instance * @uprops: The array of u64 key value pairs given by userspace * @n_props: The number of key value pairs expected in @uprops * @props: The stream configuration built up while validating properties @@ -2817,7 +3192,7 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) * we shouldn't validate or assume anything about ordering here. This doesn't * rule out defining new properties with ordering requirements in the future. */ -static int read_properties_unlocked(struct drm_i915_private *dev_priv, +static int read_properties_unlocked(struct i915_perf *perf, u64 __user *uprops, u32 n_props, struct perf_open_properties *props) @@ -2832,6 +3207,15 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, return -EINVAL; } + /* At the moment we only support using i915-perf on the RCS. */ + props->engine = intel_engine_lookup_user(perf->i915, + I915_ENGINE_CLASS_RENDER, + 0); + if (!props->engine) { + DRM_DEBUG("No RENDER-capable engines\n"); + return -EINVAL; + } + /* Considering that ID = 0 is reserved and assuming that we don't * (currently) expect any configurations to ever specify duplicate * values for a particular property ID then the last _PROP_MAX value is @@ -2883,7 +3267,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, value); return -EINVAL; } - if (!dev_priv->perf.oa_formats[value].size) { + if (!perf->oa_formats[value].size) { DRM_DEBUG("Unsupported OA report format %llu\n", value); return -EINVAL; @@ -2904,7 +3288,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, */ BUILD_BUG_ON(sizeof(oa_period) != 8); - oa_period = oa_exponent_to_ns(dev_priv, value); + oa_period = oa_exponent_to_ns(perf, value); /* This check is primarily to ensure that oa_period <= * UINT32_MAX (before passing to do_div which only @@ -2929,6 +3313,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_HOLD_PREEMPTION: + props->hold_preemption = !!value; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -2958,7 +3345,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, * mutex to avoid an awkward lockdep with mmap_sem. * * Most of the implementation details are handled by - * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock + * i915_perf_open_ioctl_locked() after taking the &perf->lock * mutex for serializing with any non-file-operation driver hooks. * * Return: A newly opened i915 Perf stream file descriptor or negative @@ -2967,13 +3354,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_open_param *param = data; struct perf_open_properties props; u32 known_open_flags; int ret; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -2986,124 +3373,128 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = read_properties_unlocked(dev_priv, + ret = read_properties_unlocked(perf, u64_to_user_ptr(param->properties_ptr), param->num_properties, &props); if (ret) return ret; - mutex_lock(&dev_priv->perf.lock); - ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); - mutex_unlock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); + ret = i915_perf_open_ioctl_locked(perf, param, &props, file); + mutex_unlock(&perf->lock); return ret; } /** * i915_perf_register - exposes i915-perf to userspace - * @dev_priv: i915 device instance + * @i915: i915 device instance * * In particular OA metric sets are advertised under a sysfs metrics/ * directory allowing userspace to enumerate valid IDs that can be * used to open an i915-perf stream. */ -void i915_perf_register(struct drm_i915_private *dev_priv) +void i915_perf_register(struct drm_i915_private *i915) { + struct i915_perf *perf = &i915->perf; int ret; - if (!dev_priv->perf.initialized) + if (!perf->i915) return; /* To be sure we're synchronized with an attempted * i915_perf_open_ioctl(); considering that we register after * being exposed to userspace. */ - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); - dev_priv->perf.metrics_kobj = + perf->metrics_kobj = kobject_create_and_add("metrics", - &dev_priv->drm.primary->kdev->kobj); - if (!dev_priv->perf.metrics_kobj) + &i915->drm.primary->kdev->kobj); + if (!perf->metrics_kobj) goto exit; - sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr); - - if (INTEL_GEN(dev_priv) >= 11) { - i915_perf_load_test_config_icl(dev_priv); - } else if (IS_CANNONLAKE(dev_priv)) { - i915_perf_load_test_config_cnl(dev_priv); - } else if (IS_COFFEELAKE(dev_priv)) { - if (IS_CFL_GT2(dev_priv)) - i915_perf_load_test_config_cflgt2(dev_priv); - if (IS_CFL_GT3(dev_priv)) - i915_perf_load_test_config_cflgt3(dev_priv); - } else if (IS_GEMINILAKE(dev_priv)) { - i915_perf_load_test_config_glk(dev_priv); - } else if (IS_KABYLAKE(dev_priv)) { - if (IS_KBL_GT2(dev_priv)) - i915_perf_load_test_config_kblgt2(dev_priv); - else if (IS_KBL_GT3(dev_priv)) - i915_perf_load_test_config_kblgt3(dev_priv); - } else if (IS_BROXTON(dev_priv)) { - i915_perf_load_test_config_bxt(dev_priv); - } else if (IS_SKYLAKE(dev_priv)) { - if (IS_SKL_GT2(dev_priv)) - i915_perf_load_test_config_sklgt2(dev_priv); - else if (IS_SKL_GT3(dev_priv)) - i915_perf_load_test_config_sklgt3(dev_priv); - else if (IS_SKL_GT4(dev_priv)) - i915_perf_load_test_config_sklgt4(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { - i915_perf_load_test_config_chv(dev_priv); - } else if (IS_BROADWELL(dev_priv)) { - i915_perf_load_test_config_bdw(dev_priv); - } else if (IS_HASWELL(dev_priv)) { - i915_perf_load_test_config_hsw(dev_priv); -} - - if (dev_priv->perf.test_config.id == 0) + sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr); + + if (INTEL_GEN(i915) >= 11) { + i915_perf_load_test_config_icl(i915); + } else if (IS_CANNONLAKE(i915)) { + i915_perf_load_test_config_cnl(i915); + } else if (IS_COFFEELAKE(i915)) { + if (IS_CFL_GT2(i915)) + i915_perf_load_test_config_cflgt2(i915); + if (IS_CFL_GT3(i915)) + i915_perf_load_test_config_cflgt3(i915); + } else if (IS_GEMINILAKE(i915)) { + i915_perf_load_test_config_glk(i915); + } else if (IS_KABYLAKE(i915)) { + if (IS_KBL_GT2(i915)) + i915_perf_load_test_config_kblgt2(i915); + else if (IS_KBL_GT3(i915)) + i915_perf_load_test_config_kblgt3(i915); + } else if (IS_BROXTON(i915)) { + i915_perf_load_test_config_bxt(i915); + } else if (IS_SKYLAKE(i915)) { + if (IS_SKL_GT2(i915)) + i915_perf_load_test_config_sklgt2(i915); + else if (IS_SKL_GT3(i915)) + i915_perf_load_test_config_sklgt3(i915); + else if (IS_SKL_GT4(i915)) + i915_perf_load_test_config_sklgt4(i915); + } else if (IS_CHERRYVIEW(i915)) { + i915_perf_load_test_config_chv(i915); + } else if (IS_BROADWELL(i915)) { + i915_perf_load_test_config_bdw(i915); + } else if (IS_HASWELL(i915)) { + i915_perf_load_test_config_hsw(i915); + } + + if (perf->test_config.id == 0) goto sysfs_error; - ret = sysfs_create_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.test_config.sysfs_metric); + ret = sysfs_create_group(perf->metrics_kobj, + &perf->test_config.sysfs_metric); if (ret) goto sysfs_error; - atomic_set(&dev_priv->perf.test_config.ref_count, 1); + perf->test_config.perf = perf; + kref_init(&perf->test_config.ref); goto exit; sysfs_error: - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + kobject_put(perf->metrics_kobj); + perf->metrics_kobj = NULL; exit: - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } /** * i915_perf_unregister - hide i915-perf from userspace - * @dev_priv: i915 device instance + * @i915: i915 device instance * * i915-perf state cleanup is split up into an 'unregister' and * 'deinit' phase where the interface is first hidden from * userspace by i915_perf_unregister() before cleaning up * remaining state in i915_perf_fini(). */ -void i915_perf_unregister(struct drm_i915_private *dev_priv) +void i915_perf_unregister(struct drm_i915_private *i915) { - if (!dev_priv->perf.metrics_kobj) + struct i915_perf *perf = &i915->perf; + + if (!perf->metrics_kobj) return; - sysfs_remove_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.test_config.sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, + &perf->test_config.sysfs_metric); - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + kobject_put(perf->metrics_kobj); + perf->metrics_kobj = NULL; } -static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) { static const i915_reg_t flex_eu_regs[] = { EU_PERF_CNTL0, @@ -3123,7 +3514,7 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) return false; } -static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || @@ -3133,7 +3524,7 @@ static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr <= i915_mmio_reg_offset(OACEC7_1)); } -static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && @@ -3144,34 +3535,34 @@ static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); } -static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); } -static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen8_is_valid_mux_addr(dev_priv, addr) || + return gen8_is_valid_mux_addr(perf, addr) || addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); } -static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || (addr >= 0x25100 && addr <= 0x2FF90) || (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); } -static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || (addr >= 0x182300 && addr <= 0x1823A4); } @@ -3194,8 +3585,8 @@ static u32 mask_reg_value(u32 reg, u32 val) return val; } -static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, - bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), +static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf, + bool (*is_valid)(struct i915_perf *perf, u32 addr), u32 __user *regs, u32 n_regs) { @@ -3225,7 +3616,7 @@ static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, if (err) goto addr_err; - if (!is_valid(dev_priv, addr)) { + if (!is_valid(perf, addr)) { DRM_DEBUG("Invalid oa_reg address: %X\n", addr); err = -EINVAL; goto addr_err; @@ -3258,7 +3649,7 @@ static ssize_t show_dynamic_id(struct device *dev, return sprintf(buf, "%d\n", oa_config->id); } -static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, +static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf, struct i915_oa_config *oa_config) { sysfs_attr_init(&oa_config->sysfs_metric_id.attr); @@ -3273,7 +3664,7 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, oa_config->sysfs_metric.name = oa_config->uuid; oa_config->sysfs_metric.attrs = oa_config->attrs; - return sysfs_create_group(dev_priv->perf.metrics_kobj, + return sysfs_create_group(perf->metrics_kobj, &oa_config->sysfs_metric); } @@ -3293,17 +3684,18 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_oa_config *args = data; struct i915_oa_config *oa_config, *tmp; + static struct i915_oa_reg *regs; int err, id; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } - if (!dev_priv->perf.metrics_kobj) { + if (!perf->metrics_kobj) { DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); return -EINVAL; } @@ -3326,7 +3718,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, return -ENOMEM; } - atomic_set(&oa_config->ref_count, 1); + oa_config->perf = perf; + kref_init(&oa_config->ref); if (!uuid_is_valid(args->uuid)) { DRM_DEBUG("Invalid uuid format for OA config\n"); @@ -3340,59 +3733,59 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); oa_config->mux_regs_len = args->n_mux_regs; - oa_config->mux_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_mux_reg, - u64_to_user_ptr(args->mux_regs_ptr), - args->n_mux_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_mux_reg, + u64_to_user_ptr(args->mux_regs_ptr), + args->n_mux_regs); - if (IS_ERR(oa_config->mux_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for mux_regs\n"); - err = PTR_ERR(oa_config->mux_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->mux_regs = regs; oa_config->b_counter_regs_len = args->n_boolean_regs; - oa_config->b_counter_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_b_counter_reg, - u64_to_user_ptr(args->boolean_regs_ptr), - args->n_boolean_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_b_counter_reg, + u64_to_user_ptr(args->boolean_regs_ptr), + args->n_boolean_regs); - if (IS_ERR(oa_config->b_counter_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); - err = PTR_ERR(oa_config->b_counter_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->b_counter_regs = regs; - if (INTEL_GEN(dev_priv) < 8) { + if (INTEL_GEN(perf->i915) < 8) { if (args->n_flex_regs != 0) { err = -EINVAL; goto reg_err; } } else { oa_config->flex_regs_len = args->n_flex_regs; - oa_config->flex_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_flex_reg, - u64_to_user_ptr(args->flex_regs_ptr), - args->n_flex_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_flex_reg, + u64_to_user_ptr(args->flex_regs_ptr), + args->n_flex_regs); - if (IS_ERR(oa_config->flex_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for flex_regs\n"); - err = PTR_ERR(oa_config->flex_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->flex_regs = regs; } - err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + err = mutex_lock_interruptible(&perf->metrics_lock); if (err) goto reg_err; /* We shouldn't have too many configs, so this iteration shouldn't be * too costly. */ - idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { + idr_for_each_entry(&perf->metrics_idr, tmp, id) { if (!strcmp(tmp->uuid, oa_config->uuid)) { DRM_DEBUG("OA config already exists with this uuid\n"); err = -EADDRINUSE; @@ -3400,14 +3793,14 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, } } - err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); + err = create_dynamic_oa_sysfs_entry(perf, oa_config); if (err) { DRM_DEBUG("Failed to create sysfs entry for OA config\n"); goto sysfs_err; } /* Config id 0 is invalid, id 1 for kernel stored test config. */ - oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, + oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL); if (oa_config->id < 0) { @@ -3416,16 +3809,16 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, goto sysfs_err; } - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); return oa_config->id; sysfs_err: - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); reg_err: - put_oa_config(dev_priv, oa_config); + i915_oa_config_put(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3444,12 +3837,12 @@ reg_err: int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; u64 *arg = data; struct i915_oa_config *oa_config; int ret; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -3459,31 +3852,33 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, return -EACCES; } - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + ret = mutex_lock_interruptible(&perf->metrics_lock); if (ret) - goto lock_err; + return ret; - oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); + oa_config = idr_find(&perf->metrics_idr, *arg); if (!oa_config) { DRM_DEBUG("Failed to remove unknown OA config\n"); ret = -ENOENT; - goto config_err; + goto err_unlock; } GEM_BUG_ON(*arg != oa_config->id); - sysfs_remove_group(dev_priv->perf.metrics_kobj, - &oa_config->sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); - idr_remove(&dev_priv->perf.metrics_idr, *arg); + idr_remove(&perf->metrics_idr, *arg); + + mutex_unlock(&perf->metrics_lock); DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(dev_priv, oa_config); + i915_oa_config_put(oa_config); -config_err: - mutex_unlock(&dev_priv->perf.metrics_lock); -lock_err: + return 0; + +err_unlock: + mutex_unlock(&perf->metrics_lock); return ret; } @@ -3531,103 +3926,103 @@ static struct ctl_table dev_root[] = { /** * i915_perf_init - initialize i915-perf state on module load - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Initializes i915-perf state without exposing anything to userspace. * * Note: i915-perf initialization is split into an 'init' and 'register' * phase with the i915_perf_register() exposing state to userspace. */ -void i915_perf_init(struct drm_i915_private *dev_priv) -{ - if (IS_HASWELL(dev_priv)) { - dev_priv->perf.ops.is_valid_b_counter_reg = - gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = - hsw_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = NULL; - dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set; - dev_priv->perf.ops.oa_enable = gen7_oa_enable; - dev_priv->perf.ops.oa_disable = gen7_oa_disable; - dev_priv->perf.ops.read = gen7_oa_read; - dev_priv->perf.ops.oa_hw_tail_read = - gen7_oa_hw_tail_read; - - dev_priv->perf.oa_formats = hsw_oa_formats; - } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { +void i915_perf_init(struct drm_i915_private *i915) +{ + struct i915_perf *perf = &i915->perf; + + /* XXX const struct i915_perf_ops! */ + + if (IS_HASWELL(i915)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; + perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr; + perf->ops.is_valid_flex_reg = NULL; + perf->ops.enable_metric_set = hsw_enable_metric_set; + perf->ops.disable_metric_set = hsw_disable_metric_set; + perf->ops.oa_enable = gen7_oa_enable; + perf->ops.oa_disable = gen7_oa_disable; + perf->ops.read = gen7_oa_read; + perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read; + + perf->oa_formats = hsw_oa_formats; + } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of * this driver, before upstreaming did this) it didn't seem * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - dev_priv->perf.oa_formats = gen8_plus_oa_formats; + perf->oa_formats = gen8_plus_oa_formats; - dev_priv->perf.ops.oa_enable = gen8_oa_enable; - dev_priv->perf.ops.oa_disable = gen8_oa_disable; - dev_priv->perf.ops.read = gen8_oa_read; - dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; + perf->ops.read = gen8_oa_read; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - if (IS_GEN_RANGE(dev_priv, 8, 9)) { - dev_priv->perf.ops.is_valid_b_counter_reg = + if (IS_GEN_RANGE(i915, 8, 9)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = + perf->ops.is_valid_mux_reg = gen8_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = + perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->perf.ops.is_valid_mux_reg = + if (IS_CHERRYVIEW(i915)) { + perf->ops.is_valid_mux_reg = chv_is_valid_mux_addr; } - dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set; + perf->ops.enable_metric_set = gen8_enable_metric_set; + perf->ops.disable_metric_set = gen8_disable_metric_set; - if (IS_GEN(dev_priv, 8)) { - dev_priv->perf.ctx_oactxctrl_offset = 0x120; - dev_priv->perf.ctx_flexeu0_offset = 0x2ce; + if (IS_GEN(i915, 8)) { + perf->ctx_oactxctrl_offset = 0x120; + perf->ctx_flexeu0_offset = 0x2ce; - dev_priv->perf.gen8_valid_ctx_bit = BIT(25); + perf->gen8_valid_ctx_bit = BIT(25); } else { - dev_priv->perf.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.ctx_flexeu0_offset = 0x3de; + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; - dev_priv->perf.gen8_valid_ctx_bit = BIT(16); + perf->gen8_valid_ctx_bit = BIT(16); } - } else if (IS_GEN_RANGE(dev_priv, 10, 11)) { - dev_priv->perf.ops.is_valid_b_counter_reg = + } else if (IS_GEN_RANGE(i915, 10, 11)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = + perf->ops.is_valid_mux_reg = gen10_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = + perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.enable_metric_set = gen8_enable_metric_set; + perf->ops.disable_metric_set = gen10_disable_metric_set; - if (IS_GEN(dev_priv, 10)) { - dev_priv->perf.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.ctx_flexeu0_offset = 0x3de; + if (IS_GEN(i915, 10)) { + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; } else { - dev_priv->perf.ctx_oactxctrl_offset = 0x124; - dev_priv->perf.ctx_flexeu0_offset = 0x78e; + perf->ctx_oactxctrl_offset = 0x124; + perf->ctx_flexeu0_offset = 0x78e; } - dev_priv->perf.gen8_valid_ctx_bit = BIT(16); + perf->gen8_valid_ctx_bit = BIT(16); } } - if (dev_priv->perf.ops.enable_metric_set) { - INIT_LIST_HEAD(&dev_priv->perf.streams); - mutex_init(&dev_priv->perf.lock); + if (perf->ops.enable_metric_set) { + mutex_init(&perf->lock); oa_sample_rate_hard_limit = 1000 * - (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); - dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); + (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); + perf->sysctl_header = register_sysctl_table(dev_root); - mutex_init(&dev_priv->perf.metrics_lock); - idr_init(&dev_priv->perf.metrics_idr); + mutex_init(&perf->metrics_lock); + idr_init(&perf->metrics_idr); /* We set up some ratelimit state to potentially throttle any * _NOTES about spurious, invalid OA reports which we don't @@ -3639,44 +4034,70 @@ void i915_perf_init(struct drm_i915_private *dev_priv) * * Using the same limiting factors as printk_ratelimit() */ - ratelimit_state_init(&dev_priv->perf.spurious_report_rs, - 5 * HZ, 10); + ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10); /* Since we use a DRM_NOTE for spurious reports it would be * inconsistent to let __ratelimit() automatically print a * warning for throttling. */ - ratelimit_set_flags(&dev_priv->perf.spurious_report_rs, + ratelimit_set_flags(&perf->spurious_report_rs, RATELIMIT_MSG_ON_RELEASE); - dev_priv->perf.initialized = true; + atomic64_set(&perf->noa_programming_delay, + 500 * 1000 /* 500us */); + + perf->i915 = i915; } } static int destroy_config(int id, void *p, void *data) { - struct drm_i915_private *dev_priv = data; - struct i915_oa_config *oa_config = p; - - put_oa_config(dev_priv, oa_config); - + i915_oa_config_put(p); return 0; } /** * i915_perf_fini - Counter part to i915_perf_init() - * @dev_priv: i915 device instance + * @i915: i915 device instance */ -void i915_perf_fini(struct drm_i915_private *dev_priv) +void i915_perf_fini(struct drm_i915_private *i915) { - if (!dev_priv->perf.initialized) + struct i915_perf *perf = &i915->perf; + + if (!perf->i915) return; - idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); - idr_destroy(&dev_priv->perf.metrics_idr); + idr_for_each(&perf->metrics_idr, destroy_config, perf); + idr_destroy(&perf->metrics_idr); - unregister_sysctl_table(dev_priv->perf.sysctl_header); + unregister_sysctl_table(perf->sysctl_header); - memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops)); + memset(&perf->ops, 0, sizeof(perf->ops)); + perf->i915 = NULL; +} - dev_priv->perf.initialized = false; +/** + * i915_perf_ioctl_version - Version of the i915-perf subsystem + * + * This version number is used by userspace to detect available features. + */ +int i915_perf_ioctl_version(void) +{ + /* + * 1: Initial version + * I915_PERF_IOCTL_ENABLE + * I915_PERF_IOCTL_DISABLE + * + * 2: Added runtime modification of OA config. + * I915_PERF_IOCTL_CONFIG + * + * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold + * preemption on a particular context so that performance data is + * accessible from a delta of MI_RPC reports without looking at the + * OA buffer. + */ + return 3; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_perf.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index f4fb311184b1..4ceebce72060 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -6,11 +6,15 @@ #ifndef __I915_PERF_H__ #define __I915_PERF_H__ +#include <linux/kref.h> #include <linux/types.h> +#include "i915_perf_types.h" + struct drm_device; struct drm_file; struct drm_i915_private; +struct i915_oa_config; struct intel_context; struct intel_engine_cs; @@ -18,6 +22,7 @@ void i915_perf_init(struct drm_i915_private *i915); void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); +int i915_perf_ioctl_version(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); @@ -25,7 +30,29 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + void i915_oa_init_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine); +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set); + +static inline struct i915_oa_config * +i915_oa_config_get(struct i915_oa_config *oa_config) +{ + if (kref_get_unless_zero(&oa_config->ref)) + return oa_config; + else + return NULL; +} + +void i915_oa_config_release(struct kref *ref); +static inline void i915_oa_config_put(struct i915_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, i915_oa_config_release); +} + #endif /* __I915_PERF_H__ */ diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h new file mode 100644 index 000000000000..a1f733fc905a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -0,0 +1,406 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_PERF_TYPES_H_ +#define _I915_PERF_TYPES_H_ + +#include <linux/atomic.h> +#include <linux/device.h> +#include <linux/hrtimer.h> +#include <linux/llist.h> +#include <linux/poll.h> +#include <linux/sysfs.h> +#include <linux/types.h> +#include <linux/uuid.h> +#include <linux/wait.h> + +#include "i915_reg.h" +#include "intel_wakeref.h" + +struct drm_i915_private; +struct file; +struct i915_gem_context; +struct i915_perf; +struct i915_vma; +struct intel_context; +struct intel_engine_cs; + +struct i915_oa_format { + u32 format; + int size; +}; + +struct i915_oa_reg { + i915_reg_t addr; + u32 value; +}; + +struct i915_oa_config { + struct i915_perf *perf; + + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct i915_oa_reg *mux_regs; + u32 mux_regs_len; + const struct i915_oa_reg *b_counter_regs; + u32 b_counter_regs_len; + const struct i915_oa_reg *flex_regs; + u32 flex_regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct device_attribute sysfs_metric_id; + + struct kref ref; + struct rcu_head rcu; +}; + +struct i915_perf_stream; + +/** + * struct i915_perf_stream_ops - the OPs to support a specific stream type + */ +struct i915_perf_stream_ops { + /** + * @enable: Enables the collection of HW samples, either in response to + * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened + * without `I915_PERF_FLAG_DISABLED`. + */ + void (*enable)(struct i915_perf_stream *stream); + + /** + * @disable: Disables the collection of HW samples, either in response + * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying + * the stream. + */ + void (*disable)(struct i915_perf_stream *stream); + + /** + * @poll_wait: Call poll_wait, passing a wait queue that will be woken + * once there is something ready to read() for the stream + */ + void (*poll_wait)(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait); + + /** + * @wait_unlocked: For handling a blocking read, wait until there is + * something to ready to read() for the stream. E.g. wait on the same + * wait queue that would be passed to poll_wait(). + */ + int (*wait_unlocked)(struct i915_perf_stream *stream); + + /** + * @read: Copy buffered metrics as records to userspace + * **buf**: the userspace, destination buffer + * **count**: the number of bytes to copy, requested by userspace + * **offset**: zero at the start of the read, updated as the read + * proceeds, it represents how many bytes have been copied so far and + * the buffer offset for copying the next record. + * + * Copy as many buffered i915 perf samples and records for this stream + * to userspace as will fit in the given buffer. + * + * Only write complete records; returning -%ENOSPC if there isn't room + * for a complete record. + * + * Return any error condition that results in a short read such as + * -%ENOSPC or -%EFAULT, even though these may be squashed before + * returning to userspace. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @destroy: Cleanup any stream specific resources. + * + * The stream will always be disabled before this is called. + */ + void (*destroy)(struct i915_perf_stream *stream); +}; + +/** + * struct i915_perf_stream - state for a single open stream FD + */ +struct i915_perf_stream { + /** + * @perf: i915_perf backpointer + */ + struct i915_perf *perf; + + /** + * @uncore: mmio access path + */ + struct intel_uncore *uncore; + + /** + * @engine: Engine associated with this performance stream. + */ + struct intel_engine_cs *engine; + + /** + * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` + * properties given when opening a stream, representing the contents + * of a single sample as read() by userspace. + */ + u32 sample_flags; + + /** + * @sample_size: Considering the configured contents of a sample + * combined with the required header size, this is the total size + * of a single sample record. + */ + int sample_size; + + /** + * @ctx: %NULL if measuring system-wide across all contexts or a + * specific context that is being monitored. + */ + struct i915_gem_context *ctx; + + /** + * @enabled: Whether the stream is currently enabled, considering + * whether the stream was opened in a disabled state and based + * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. + */ + bool enabled; + + /** + * @hold_preemption: Whether preemption is put on hold for command + * submissions done on the @ctx. This is useful for some drivers that + * cannot easily post process the OA buffer context to subtract delta + * of performance counters not associated with @ctx. + */ + bool hold_preemption; + + /** + * @ops: The callbacks providing the implementation of this specific + * type of configured stream. + */ + const struct i915_perf_stream_ops *ops; + + /** + * @oa_config: The OA configuration used by the stream. + */ + struct i915_oa_config *oa_config; + + /** + * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily + * each time @oa_config changes. + */ + struct llist_head oa_config_bos; + + /** + * @pinned_ctx: The OA context specific information. + */ + struct intel_context *pinned_ctx; + u32 specific_ctx_id; + u32 specific_ctx_id_mask; + + struct hrtimer poll_check_timer; + wait_queue_head_t poll_wq; + bool pollin; + + bool periodic; + int period_exponent; + + /** + * @oa_buffer: State of the OA buffer. + */ + struct { + struct i915_vma *vma; + u8 *vaddr; + u32 last_ctx_id; + int format; + int format_size; + int size_exponent; + + /** + * @ptr_lock: Locks reads and writes to all head/tail state + * + * Consider: the head and tail pointer state needs to be read + * consistently from a hrtimer callback (atomic context) and + * read() fop (user context) with tail pointer updates happening + * in atomic context and head updates in user context and the + * (unlikely) possibility of read() errors needing to reset all + * head/tail state. + * + * Note: Contention/performance aren't currently a significant + * concern here considering the relatively low frequency of + * hrtimer callbacks (5ms period) and that reads typically only + * happen in response to a hrtimer event and likely complete + * before the next callback. + * + * Note: This lock is not held *while* reading and copying data + * to userspace so the value of head observed in htrimer + * callbacks won't represent any partial consumption of data. + */ + spinlock_t ptr_lock; + + /** + * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to + * used for reading. + * + * Initial values of 0xffffffff are invalid and imply that an + * update is required (and should be ignored by an attempted + * read) + */ + struct { + u32 offset; + } tails[2]; + + /** + * @aged_tail_idx: Index for the aged tail ready to read() data up to. + */ + unsigned int aged_tail_idx; + + /** + * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer + * was read; used to determine when it is old enough to trust. + */ + u64 aging_timestamp; + + /** + * @head: Although we can always read back the head pointer register, + * we prefer to avoid trusting the HW state, just to avoid any + * risk that some hardware condition could * somehow bump the + * head pointer unpredictably and cause us to forward the wrong + * OA buffer data to userspace. + */ + u32 head; + } oa_buffer; + + /** + * A batch buffer doing a wait on the GPU for the NOA logic to be + * reprogrammed. + */ + struct i915_vma *noa_wait; +}; + +/** + * struct i915_oa_ops - Gen specific implementation of an OA unit stream + */ +struct i915_oa_ops { + /** + * @is_valid_b_counter_reg: Validates register's address for + * programming boolean counters for a particular platform. + */ + bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); + + /** + * @is_valid_mux_reg: Validates register's address for programming mux + * for a particular platform. + */ + bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); + + /** + * @is_valid_flex_reg: Validates register's address for programming + * flex EU filtering for a particular platform. + */ + bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); + + /** + * @enable_metric_set: Selects and applies any MUX configuration to set + * up the Boolean and Custom (B/C) counters that are part of the + * counter reports being sampled. May apply system constraints such as + * disabling EU clock gating as required. + */ + int (*enable_metric_set)(struct i915_perf_stream *stream); + + /** + * @disable_metric_set: Remove system constraints associated with using + * the OA unit. + */ + void (*disable_metric_set)(struct i915_perf_stream *stream); + + /** + * @oa_enable: Enable periodic sampling + */ + void (*oa_enable)(struct i915_perf_stream *stream); + + /** + * @oa_disable: Disable periodic sampling + */ + void (*oa_disable)(struct i915_perf_stream *stream); + + /** + * @read: Copy data from the circular OA buffer into a given userspace + * buffer. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @oa_hw_tail_read: read the OA tail pointer register + * + * In particular this enables us to share all the fiddly code for + * handling the OA unit tail pointer race that affects multiple + * generations. + */ + u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); +}; + +struct i915_perf { + struct drm_i915_private *i915; + + struct kobject *metrics_kobj; + struct ctl_table_header *sysctl_header; + + /* + * Lock associated with adding/modifying/removing OA configs + * in perf->metrics_idr. + */ + struct mutex metrics_lock; + + /* + * List of dynamic configurations (struct i915_oa_config), you + * need to hold perf->metrics_lock to access it. + */ + struct idr metrics_idr; + + /* + * Lock associated with anything below within this structure + * except exclusive_stream. + */ + struct mutex lock; + + /* + * The stream currently using the OA unit. If accessed + * outside a syscall associated to its file + * descriptor. + */ + struct i915_perf_stream *exclusive_stream; + + /** + * For rate limiting any notifications of spurious + * invalid OA reports + */ + struct ratelimit_state spurious_report_rs; + + struct i915_oa_config test_config; + + u32 gen7_latched_oastatus1; + u32 ctx_oactxctrl_offset; + u32 ctx_flexeu0_offset; + + /** + * The RPT_ID/reason field for Gen8+ includes a bit + * to determine if the CTX ID in the report is valid + * but the specific bit differs between Gen 8 and 9 + */ + u32 gen8_valid_ctx_bit; + + struct i915_oa_ops ops; + const struct i915_oa_format *oa_formats; + + atomic64_t noa_programming_delay; +}; + +#endif /* _I915_PERF_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index abac5042da2b..c27cfef9281c 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -7,6 +7,7 @@ #include <linux/nospec.h> #include "i915_drv.h" +#include "i915_perf.h" #include "i915_query.h" #include <uapi/drm/i915_drm.h> @@ -140,10 +141,305 @@ query_engine_info(struct drm_i915_private *i915, return len; } +static int can_copy_perf_config_registers_or_number(u32 user_n_regs, + u64 user_regs_ptr, + u32 kernel_n_regs) +{ + /* + * We'll just put the number of registers, and won't copy the + * register. + */ + if (user_n_regs == 0) + return 0; + + if (user_n_regs < kernel_n_regs) + return -EINVAL; + + if (!access_ok(u64_to_user_ptr(user_regs_ptr), + 2 * sizeof(u32) * kernel_n_regs)) + return -EFAULT; + + return 0; +} + +static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs, + u32 kernel_n_regs, + u64 user_regs_ptr, + u32 *user_n_regs) +{ + u32 r; + + if (*user_n_regs == 0) { + *user_n_regs = kernel_n_regs; + return 0; + } + + *user_n_regs = kernel_n_regs; + + for (r = 0; r < kernel_n_regs; r++) { + u32 __user *user_reg_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2); + u32 __user *user_val_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 + + sizeof(u32)); + int ret; + + ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr), + user_reg_ptr); + if (ret) + return -EFAULT; + + ret = __put_user(kernel_regs[r].value, user_val_ptr); + if (ret) + return -EFAULT; + } + + return 0; +} + +static int query_perf_config_data(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item, + bool use_uuid) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct drm_i915_perf_oa_config __user *user_config_ptr = + u64_to_user_ptr(query_item->data_ptr + + sizeof(struct drm_i915_query_perf_config)); + struct drm_i915_perf_oa_config user_config; + struct i915_perf *perf = &i915->perf; + struct i915_oa_config *oa_config; + char uuid[UUID_STRING_LEN + 1]; + u64 config_id; + u32 flags, total_size; + int ret; + + if (!perf->i915) + return -ENODEV; + + total_size = + sizeof(struct drm_i915_query_perf_config) + + sizeof(struct drm_i915_perf_oa_config); + + if (query_item->length == 0) + return total_size; + + if (query_item->length < total_size) { + DRM_DEBUG("Invalid query config data item size=%u expected=%u\n", + query_item->length, total_size); + return -EINVAL; + } + + if (!access_ok(user_query_config_ptr, total_size)) + return -EFAULT; + + if (__get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + if (use_uuid) { + struct i915_oa_config *tmp; + int id; + + BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid)); + + memset(&uuid, 0, sizeof(uuid)); + if (__copy_from_user(uuid, user_query_config_ptr->uuid, + sizeof(user_query_config_ptr->uuid))) + return -EFAULT; + + oa_config = NULL; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, uuid)) { + oa_config = i915_oa_config_get(tmp); + break; + } + } + rcu_read_unlock(); + } else { + if (__get_user(config_id, &user_query_config_ptr->config)) + return -EFAULT; + + oa_config = i915_perf_get_oa_config(perf, config_id); + } + if (!oa_config) + return -ENOENT; + + if (__copy_from_user(&user_config, user_config_ptr, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs, + user_config.boolean_regs_ptr, + oa_config->b_counter_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs, + user_config.flex_regs_ptr, + oa_config->flex_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs, + user_config.mux_regs_ptr, + oa_config->mux_regs_len); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs, + oa_config->b_counter_regs_len, + user_config.boolean_regs_ptr, + &user_config.n_boolean_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->flex_regs, + oa_config->flex_regs_len, + user_config.flex_regs_ptr, + &user_config.n_flex_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->mux_regs, + oa_config->mux_regs_len, + user_config.mux_regs_ptr, + &user_config.n_mux_regs); + if (ret) + goto out; + + memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid)); + + if (__copy_to_user(user_config_ptr, &user_config, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = total_size; + +out: + i915_oa_config_put(oa_config); + return ret; +} + +static size_t sizeof_perf_config_list(size_t count) +{ + return sizeof(struct drm_i915_query_perf_config) + sizeof(u64) * count; +} + +static size_t sizeof_perf_metrics(struct i915_perf *perf) +{ + struct i915_oa_config *tmp; + size_t i; + int id; + + i = 1; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) + i++; + rcu_read_unlock(); + + return sizeof_perf_config_list(i); +} + +static int query_perf_config_list(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct i915_perf *perf = &i915->perf; + u64 *oa_config_ids = NULL; + int alloc, n_configs; + u32 flags; + int ret; + + if (!perf->i915) + return -ENODEV; + + if (query_item->length == 0) + return sizeof_perf_metrics(perf); + + if (get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + n_configs = 1; + do { + struct i915_oa_config *tmp; + u64 *ids; + int id; + + ids = krealloc(oa_config_ids, + n_configs * sizeof(*oa_config_ids), + GFP_KERNEL); + if (!ids) + return -ENOMEM; + + alloc = fetch_and_zero(&n_configs); + + ids[n_configs++] = 1ull; /* reserved for test_config */ + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (n_configs < alloc) + ids[n_configs] = id; + n_configs++; + } + rcu_read_unlock(); + + oa_config_ids = ids; + } while (n_configs > alloc); + + if (query_item->length < sizeof_perf_config_list(n_configs)) { + DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n", + query_item->length, + sizeof_perf_config_list(n_configs)); + kfree(oa_config_ids); + return -EINVAL; + } + + if (put_user(n_configs, &user_query_config_ptr->config)) { + kfree(oa_config_ids); + return -EFAULT; + } + + ret = copy_to_user(user_query_config_ptr + 1, + oa_config_ids, + n_configs * sizeof(*oa_config_ids)); + kfree(oa_config_ids); + if (ret) + return -EFAULT; + + return sizeof_perf_config_list(n_configs); +} + +static int query_perf_config(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + switch (query_item->flags) { + case DRM_I915_QUERY_PERF_CONFIG_LIST: + return query_perf_config_list(i915, query_item); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID: + return query_perf_config_data(i915, query_item, true); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID: + return query_perf_config_data(i915, query_item, false); + default: + return -EINVAL; + } +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, query_engine_info, + query_perf_config, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6d67bd238cfe..e24991e54897 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -545,7 +545,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MI_PREDICATE_SRC0_UDW _MMIO(0x2400 + 4) #define MI_PREDICATE_SRC1 _MMIO(0x2408) #define MI_PREDICATE_SRC1_UDW _MMIO(0x2408 + 4) - +#define MI_PREDICATE_DATA _MMIO(0x2410) +#define MI_PREDICATE_RESULT _MMIO(0x2418) +#define MI_PREDICATE_RESULT_1 _MMIO(0x241c) #define MI_PREDICATE_RESULT_2 _MMIO(0x2214) #define LOWER_SLICE_ENABLED (1 << 0) #define LOWER_SLICE_DISABLED (0 << 0) @@ -4145,6 +4147,7 @@ enum { #define _VTOTAL_A 0x6000c #define _VBLANK_A 0x60010 #define _VSYNC_A 0x60014 +#define _EXITLINE_A 0x60018 #define _PIPEASRC 0x6001c #define _BCLRPAT_A 0x60020 #define _VSYNCSHIFT_A 0x60028 @@ -4196,6 +4199,11 @@ enum { #define PIPESRC(trans) _MMIO_TRANS2(trans, _PIPEASRC) #define PIPE_MULT(trans) _MMIO_TRANS2(trans, _PIPE_MULT_A) +#define EXITLINE(trans) _MMIO_TRANS2(trans, _EXITLINE_A) +#define EXITLINE_ENABLE REG_BIT(31) +#define EXITLINE_MASK REG_GENMASK(12, 0) +#define EXITLINE_SHIFT 0 + /* * HSW+ eDP PSR registers * @@ -7262,6 +7270,8 @@ enum { #define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084) #define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088) +#define DMC_DEBUG3 _MMIO(0x101090) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) @@ -8870,6 +8880,7 @@ enum { #define GEN9_SAGV_DISABLE 0x0 #define GEN9_SAGV_IS_DISABLED 0x1 #define GEN9_SAGV_ENABLE 0x3 +#define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23 #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 @@ -10291,6 +10302,8 @@ enum skl_power_gate { /* GEN9 DC */ #define DC_STATE_EN _MMIO(0x45504) #define DC_STATE_DISABLE 0 +#define DC_STATE_EN_DC3CO REG_BIT(30) +#define DC_STATE_DC3CO_STATUS REG_BIT(29) #define DC_STATE_EN_UPTO_DC5 (1 << 0) #define DC_STATE_EN_DC9 (1 << 3) #define DC_STATE_EN_UPTO_DC6 (2 << 0) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 437f9fc6282e..b8a54572a4f8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -649,6 +649,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->gem_context = ce->gem_context; rq->engine = ce->engine; rq->ring = ce->ring; + rq->execution_mask = ce->engine->mask; rcu_assign_pointer(rq->timeline, tl); rq->hwsp_seqno = tl->hwsp_seqno; @@ -671,7 +672,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->batch = NULL; rq->capture_list = NULL; rq->flags = 0; - rq->execution_mask = ALL_ENGINES; INIT_LIST_HEAD(&rq->execute_cb); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 6a95242b280d..96991d64759c 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -216,8 +216,9 @@ struct i915_request { unsigned long emitted_jiffies; unsigned long flags; -#define I915_REQUEST_WAITBOOST BIT(0) -#define I915_REQUEST_NOPREEMPT BIT(1) +#define I915_REQUEST_WAITBOOST BIT(0) +#define I915_REQUEST_NOPREEMPT BIT(1) +#define I915_REQUEST_SENTINEL BIT(2) /** timeline->request entry for this request */ struct list_head link; @@ -440,6 +441,11 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq) return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); } +static inline bool i915_request_has_sentinel(const struct i915_request *rq) +{ + return unlikely(rq->flags & I915_REQUEST_SENTINEL); +} + static inline struct intel_timeline * i915_request_timeline(struct i915_request *rq) { diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index aad81acba9dc..d18e70550054 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -49,6 +49,15 @@ struct i915_sched_attr { * DAG of each request, we are able to insert it into a sorted queue when it * is ready, and are able to reorder its portion of the graph to accommodate * dynamic priority changes. + * + * Ok, there is now one active element to the "scheduler" in the backends. + * We let a new context run for a small amount of time before re-evaluating + * the run order. As we re-evaluate, we maintain the strict ordering of + * dependencies, but attempt to rotate the active contexts (the current context + * is put to the back of its priority queue, then reshuffling its dependents). + * This provides minimal timeslicing and prevents a userspace hog (e.g. + * something waiting on a user semaphore [VkEvent]) from denying service to + * others. */ struct i915_sched_node { struct list_head signalers_list; /* those before us, we depend upon */ diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c new file mode 100644 index 000000000000..c0299435d75e --- /dev/null +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_drv.h" + +static u64 +intel_memory_region_free_pages(struct intel_memory_region *mem, + struct list_head *blocks) +{ + struct i915_buddy_block *block, *on; + u64 size = 0; + + list_for_each_entry_safe(block, on, blocks, link) { + size += i915_buddy_block_size(&mem->mm, block); + i915_buddy_free(&mem->mm, block); + } + INIT_LIST_HEAD(blocks); + + return size; +} + +void +__intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, + struct list_head *blocks) +{ + mutex_lock(&mem->mm_lock); + intel_memory_region_free_pages(mem, blocks); + mutex_unlock(&mem->mm_lock); +} + +void +__intel_memory_region_put_block_buddy(struct i915_buddy_block *block) +{ + struct list_head blocks; + + INIT_LIST_HEAD(&blocks); + list_add(&block->link, &blocks); + __intel_memory_region_put_pages_buddy(block->private, &blocks); +} + +int +__intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags, + struct list_head *blocks) +{ + unsigned int min_order = 0; + unsigned long n_pages; + + GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size)); + GEM_BUG_ON(!list_empty(blocks)); + + if (flags & I915_ALLOC_MIN_PAGE_SIZE) { + min_order = ilog2(mem->min_page_size) - + ilog2(mem->mm.chunk_size); + } + + if (flags & I915_ALLOC_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_order = ilog2(size) - ilog2(mem->mm.chunk_size); + } + + n_pages = size >> ilog2(mem->mm.chunk_size); + + mutex_lock(&mem->mm_lock); + + do { + struct i915_buddy_block *block; + unsigned int order; + + order = fls(n_pages) - 1; + GEM_BUG_ON(order > mem->mm.max_order); + GEM_BUG_ON(order < min_order); + + do { + block = i915_buddy_alloc(&mem->mm, order); + if (!IS_ERR(block)) + break; + + if (order-- == min_order) + goto err_free_blocks; + } while (1); + + n_pages -= BIT(order); + + block->private = mem; + list_add(&block->link, blocks); + + if (!n_pages) + break; + } while (1); + + mutex_unlock(&mem->mm_lock); + return 0; + +err_free_blocks: + intel_memory_region_free_pages(mem, blocks); + mutex_unlock(&mem->mm_lock); + return -ENXIO; +} + +struct i915_buddy_block * +__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct i915_buddy_block *block; + LIST_HEAD(blocks); + int ret; + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, &blocks); + if (ret) + return ERR_PTR(ret); + + block = list_first_entry(&blocks, typeof(*block), link); + list_del_init(&block->link); + return block; +} + +int intel_memory_region_init_buddy(struct intel_memory_region *mem) +{ + return i915_buddy_init(&mem->mm, resource_size(&mem->region), + PAGE_SIZE); +} + +void intel_memory_region_release_buddy(struct intel_memory_region *mem) +{ + i915_buddy_fini(&mem->mm); +} + +struct intel_memory_region * +intel_memory_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start, + const struct intel_memory_region_ops *ops) +{ + struct intel_memory_region *mem; + int err; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return ERR_PTR(-ENOMEM); + + mem->i915 = i915; + mem->region = (struct resource)DEFINE_RES_MEM(start, size); + mem->io_start = io_start; + mem->min_page_size = min_page_size; + mem->ops = ops; + + mutex_init(&mem->objects.lock); + INIT_LIST_HEAD(&mem->objects.list); + INIT_LIST_HEAD(&mem->objects.purgeable); + + mutex_init(&mem->mm_lock); + + if (ops->init) { + err = ops->init(mem); + if (err) + goto err_free; + } + + kref_init(&mem->kref); + return mem; + +err_free: + kfree(mem); + return ERR_PTR(err); +} + +static void __intel_memory_region_destroy(struct kref *kref) +{ + struct intel_memory_region *mem = + container_of(kref, typeof(*mem), kref); + + if (mem->ops->release) + mem->ops->release(mem); + + mutex_destroy(&mem->mm_lock); + mutex_destroy(&mem->objects.lock); + kfree(mem); +} + +struct intel_memory_region * +intel_memory_region_get(struct intel_memory_region *mem) +{ + kref_get(&mem->kref); + return mem; +} + +void intel_memory_region_put(struct intel_memory_region *mem) +{ + kref_put(&mem->kref, __intel_memory_region_destroy); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_memory_region.c" +#include "selftests/mock_region.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h new file mode 100644 index 000000000000..52c141b6108c --- /dev/null +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_MEMORY_REGION_H__ +#define __INTEL_MEMORY_REGION_H__ + +#include <linux/kref.h> +#include <linux/ioport.h> +#include <linux/mutex.h> +#include <linux/io-mapping.h> + +#include "i915_buddy.h" + +struct drm_i915_private; +struct drm_i915_gem_object; +struct intel_memory_region; +struct sg_table; + +#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) +#define I915_ALLOC_CONTIGUOUS BIT(1) + +struct intel_memory_region_ops { + unsigned int flags; + + int (*init)(struct intel_memory_region *mem); + void (*release)(struct intel_memory_region *mem); + + struct drm_i915_gem_object * + (*create_object)(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +}; + +struct intel_memory_region { + struct drm_i915_private *i915; + + const struct intel_memory_region_ops *ops; + + struct io_mapping iomap; + struct resource region; + + struct i915_buddy_mm mm; + struct mutex mm_lock; + + struct kref kref; + + resource_size_t io_start; + resource_size_t min_page_size; + + unsigned int type; + unsigned int instance; + unsigned int id; + + struct { + struct mutex lock; /* Protects access to objects */ + struct list_head list; + struct list_head purgeable; + } objects; +}; + +int intel_memory_region_init_buddy(struct intel_memory_region *mem); +void intel_memory_region_release_buddy(struct intel_memory_region *mem); + +int __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags, + struct list_head *blocks); +struct i915_buddy_block * +__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +void __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, + struct list_head *blocks); +void __intel_memory_region_put_block_buddy(struct i915_buddy_block *block); + +struct intel_memory_region * +intel_memory_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start, + const struct intel_memory_region_ops *ops); + +struct intel_memory_region * +intel_memory_region_get(struct intel_memory_region *mem); +void intel_memory_region_put(struct intel_memory_region *mem); + +#endif diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bfcf03ab5245..b306e2338f5a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1117,10 +1117,7 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; - if (plane->id == PLANE_CURSOR) - width = plane_state->base.crtc_w; - else - width = drm_rect_width(&plane_state->base.dst); + width = drm_rect_width(&plane_state->base.dst); if (plane->id == PLANE_CURSOR) { wm = intel_wm_method2(clock, htotal, width, cpp, latency); @@ -2549,7 +2546,8 @@ static u32 ilk_compute_cur_wm(const struct intel_crtc_state *crtc_state, return ilk_wm_method2(crtc_state->pixel_rate, crtc_state->base.adjusted_mode.crtc_htotal, - plane_state->base.crtc_w, cpp, mem_value); + drm_rect_width(&plane_state->base.dst), + cpp, mem_value); } /* Only for WM_LP. */ @@ -3089,8 +3087,8 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) struct intel_pipe_wm *pipe_wm; struct drm_device *dev = state->dev; const struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_plane *plane; - const struct drm_plane_state *plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; const struct intel_plane_state *pristate = NULL; const struct intel_plane_state *sprstate = NULL; const struct intel_plane_state *curstate = NULL; @@ -3099,15 +3097,13 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) pipe_wm = &crtc_state->wm.ilk.optimal; - drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &crtc_state->base) { - const struct intel_plane_state *ps = to_intel_plane_state(plane_state); - - if (plane->type == DRM_PLANE_TYPE_PRIMARY) - pristate = ps; - else if (plane->type == DRM_PLANE_TYPE_OVERLAY) - sprstate = ps; - else if (plane->type == DRM_PLANE_TYPE_CURSOR) - curstate = ps; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) + pristate = plane_state; + else if (plane->base.type == DRM_PLANE_TYPE_OVERLAY) + sprstate = plane_state; + else if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + curstate = plane_state; } pipe_wm->pipe_enabled = crtc_state->base.active; @@ -3642,6 +3638,39 @@ intel_has_sagv(struct drm_i915_private *dev_priv) dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } +static void +skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 12) { + u32 val = 0; + int ret; + + ret = sandybridge_pcode_read(dev_priv, + GEN12_PCODE_READ_SAGV_BLOCK_TIME_US, + &val, NULL); + if (!ret) { + dev_priv->sagv_block_time_us = val; + return; + } + + DRM_DEBUG_DRIVER("Couldn't read SAGV block time!\n"); + } else if (IS_GEN(dev_priv, 11)) { + dev_priv->sagv_block_time_us = 10; + return; + } else if (IS_GEN(dev_priv, 10)) { + dev_priv->sagv_block_time_us = 20; + return; + } else if (IS_GEN(dev_priv, 9)) { + dev_priv->sagv_block_time_us = 30; + return; + } else { + MISSING_CASE(INTEL_GEN(dev_priv)); + } + + /* Default to an unusable block time */ + dev_priv->sagv_block_time_us = -1; +} + /* * SAGV dynamically adjusts the system agent voltage and clock frequencies * depending on power and performance requirements. The display engine access @@ -3730,18 +3759,10 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) struct intel_crtc_state *crtc_state; enum pipe pipe; int level, latency; - int sagv_block_time_us; if (!intel_has_sagv(dev_priv)) return false; - if (IS_GEN(dev_priv, 9)) - sagv_block_time_us = 30; - else if (IS_GEN(dev_priv, 10)) - sagv_block_time_us = 20; - else - sagv_block_time_us = 10; - /* * If there are no active CRTCs, no additional checks need be performed */ @@ -3788,7 +3809,7 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) * incur memory latencies higher than sagv_block_time_us we * can't enable SAGV. */ - if (latency < sagv_block_time_us) + if (latency < dev_priv->sagv_block_time_us) return false; } @@ -4048,7 +4069,6 @@ static uint_fixed_16_16_t skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); u32 src_w, src_h, dst_w, dst_h; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; @@ -4056,27 +4076,17 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state))) return u32_to_fixed16(0); - /* n.b., src is 16.16 fixed point, dst is whole integer */ - if (plane->id == PLANE_CURSOR) { - /* - * Cursors only support 0/180 degree rotation, - * hence no need to account for rotation here. - */ - src_w = plane_state->base.src_w >> 16; - src_h = plane_state->base.src_h >> 16; - dst_w = plane_state->base.crtc_w; - dst_h = plane_state->base.crtc_h; - } else { - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - src_w = drm_rect_width(&plane_state->base.src) >> 16; - src_h = drm_rect_height(&plane_state->base.src) >> 16; - dst_w = drm_rect_width(&plane_state->base.dst); - dst_h = drm_rect_height(&plane_state->base.dst); - } + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + * + * n.b., src is 16.16 fixed point, dst is whole integer. + */ + src_w = drm_rect_width(&plane_state->base.src) >> 16; + src_h = drm_rect_height(&plane_state->base.src) >> 16; + dst_w = drm_rect_width(&plane_state->base.dst); + dst_h = drm_rect_height(&plane_state->base.dst); fp_w_ratio = div_fixed16(src_w, dst_w); fp_h_ratio = div_fixed16(src_h, dst_h); @@ -4124,8 +4134,8 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, { struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); struct drm_atomic_state *state = crtc_state->base.state; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + const struct intel_plane_state *plane_state; + struct intel_plane *plane; int crtc_clock, dotclk; u32 pipe_max_pixel_rate; uint_fixed_16_16_t pipe_downscale; @@ -4134,12 +4144,10 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, if (!crtc_state->base.enable) return 0; - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { uint_fixed_16_16_t plane_downscale; uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); int bpp; - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); if (!intel_wm_plane_visible(crtc_state, plane_state)) continue; @@ -4227,18 +4235,16 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, u64 *uv_plane_data_rate) { struct drm_atomic_state *state = crtc_state->base.state; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; u64 total_data_rate = 0; if (WARN_ON(!state)) return 0; /* Calculate and cache data rate for each plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { - enum plane_id plane_id = to_intel_plane(plane)->id; - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + enum plane_id plane_id = plane->id; u64 rate; /* packed/y */ @@ -4259,18 +4265,16 @@ static u64 icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, u64 *plane_data_rate) { - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; u64 total_data_rate = 0; if (WARN_ON(!crtc_state->base.state)) return 0; /* Calculate and cache data rate for each plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); - enum plane_id plane_id = to_intel_plane(plane)->id; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + enum plane_id plane_id = plane->id; u64 rate; if (!plane_state->planar_linked_plane) { @@ -4282,7 +4286,7 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, /* * The slave plane might not iterate in - * drm_atomic_crtc_state_for_each_plane_state(), + * intel_atomic_crtc_state_for_each_plane_state(), * and needs the master plane state which may be * NULL if we try get_new_plane_state(), so we * always calculate from the master. @@ -4706,20 +4710,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, struct skl_wm_params *wp, int color_plane) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); const struct drm_framebuffer *fb = plane_state->base.fb; int width; - if (plane->id == PLANE_CURSOR) { - width = plane_state->base.crtc_w; - } else { - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - width = drm_rect_width(&plane_state->base.src) >> 16; - } + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ + width = drm_rect_width(&plane_state->base.src) >> 16; return skl_compute_wm_params(crtc_state, width, fb->format, fb->modifier, @@ -5065,8 +5064,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; int ret; /* @@ -5075,10 +5074,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state) */ memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, - &crtc_state->base) { - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, + crtc_state) { if (INTEL_GEN(dev_priv) >= 11) ret = icl_build_plane_wm(crtc_state, plane_state); @@ -9013,6 +9010,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) else if (IS_GEN(dev_priv, 5)) i915_ironlake_get_mem_freq(dev_priv); + if (intel_has_sagv(dev_priv)) + skl_setup_sagv_block_time(dev_priv); + /* For FIFO watermark updates */ if (INTEL_GEN(dev_priv) >= 9) { skl_setup_wm_latency(dev_priv); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 165b3a7f9744..ebe735df6504 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -82,8 +82,6 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) } GEM_BUG_ON(rem); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); return 0; @@ -95,7 +93,6 @@ static void fake_put_pages(struct drm_i915_gem_object *obj, { fake_free_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -122,6 +119,8 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &fake_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 6713efea350b..6daf6599ec79 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -35,3 +35,4 @@ selftest(reset, intel_reset_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(guc, intel_guc_live_selftest) +selftest(perf, i915_perf_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index b88084fe3269..aa5a0e7f5d9e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -26,3 +26,4 @@ selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) selftest(contexts, i915_gem_context_mock_selftests) selftest(buddy, i915_buddy_mock_selftests) +selftest(memory_region, intel_memory_region_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c new file mode 100644 index 000000000000..dc6d689e4251 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -0,0 +1,216 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/kref.h> + +#include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" + +#include "i915_selftest.h" + +#include "igt_flush_test.h" +#include "lib_sw_fence.h" + +static struct i915_perf_stream * +test_stream(struct i915_perf *perf) +{ + struct drm_i915_perf_open_param param = {}; + struct perf_open_properties props = { + .engine = intel_engine_lookup_user(perf->i915, + I915_ENGINE_CLASS_RENDER, + 0), + .sample_flags = SAMPLE_OA_REPORT, + .oa_format = I915_OA_FORMAT_C4_B8, + .metrics_set = 1, + }; + struct i915_perf_stream *stream; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + return NULL; + + stream->perf = perf; + + mutex_lock(&perf->lock); + if (i915_oa_stream_init(stream, ¶m, &props)) { + kfree(stream); + stream = NULL; + } + mutex_unlock(&perf->lock); + + return stream; +} + +static void stream_destroy(struct i915_perf_stream *stream) +{ + struct i915_perf *perf = stream->perf; + + mutex_lock(&perf->lock); + i915_perf_destroy_locked(stream); + mutex_unlock(&perf->lock); +} + +static int live_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_perf_stream *stream; + + /* Quick check we can create a perf stream */ + + stream = test_stream(&i915->perf); + if (!stream) + return -EINVAL; + + stream_destroy(stream); + return 0; +} + +static int write_timestamp(struct i915_request *rq, int slot) +{ + u32 *cs; + int len; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + len = 5; + if (INTEL_GEN(rq->i915) >= 8) + len++; + + *cs++ = GFX_OP_PIPE_CONTROL(len); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_WRITE_TIMESTAMP; + *cs++ = slot * sizeof(u32); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static ktime_t poll_status(struct i915_request *rq, int slot) +{ + while (!intel_read_status_page(rq->engine, slot) && + !i915_request_completed(rq)) + cpu_relax(); + + return ktime_get(); +} + +static int live_noa_delay(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_perf_stream *stream; + struct i915_request *rq; + ktime_t t0, t1; + u64 expected; + u32 delay; + int err; + int i; + + /* Check that the GPU delays matches expectations */ + + stream = test_stream(&i915->perf); + if (!stream) + return -ENOMEM; + + expected = atomic64_read(&stream->perf->noa_programming_delay); + + if (stream->engine->class != RENDER_CLASS) { + err = -ENODEV; + goto out; + } + + for (i = 0; i < 4; i++) + intel_write_status_page(stream->engine, 0x100 + i, 0); + + rq = i915_request_create(stream->engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + if (rq->engine->emit_init_breadcrumb && + i915_request_timeline(rq)->has_initial_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) { + i915_request_add(rq); + goto out; + } + } + + err = write_timestamp(rq, 0x100); + if (err) { + i915_request_add(rq); + goto out; + } + + err = rq->engine->emit_bb_start(rq, + i915_ggtt_offset(stream->noa_wait), 0, + I915_DISPATCH_SECURE); + if (err) { + i915_request_add(rq); + goto out; + } + + err = write_timestamp(rq, 0x102); + if (err) { + i915_request_add(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + + preempt_disable(); + t0 = poll_status(rq, 0x100); + t1 = poll_status(rq, 0x102); + preempt_enable(); + + pr_info("CPU delay: %lluns, expected %lluns\n", + ktime_sub(t1, t0), expected); + + delay = intel_read_status_page(stream->engine, 0x102); + delay -= intel_read_status_page(stream->engine, 0x100); + delay = div_u64(mul_u32_u32(delay, 1000 * 1000), + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); + pr_info("GPU delay: %uns, expected %lluns\n", + delay, expected); + + if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { + pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", + delay / 1000, + div_u64(3 * expected, 4000), + div_u64(3 * expected, 2000)); + err = -EINVAL; + } + + i915_request_put(rq); +out: + stream_destroy(stream); + return err; +} + +int i915_perf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_sanitycheck), + SUBTEST(live_noa_delay), + }; + struct i915_perf *perf = &i915->perf; + + if (!perf->metrics_kobj || !perf->ops.enable_metric_set) + return 0; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c new file mode 100644 index 000000000000..56091e7e599e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "mock_gem_device.h" +#include "mock_region.h" + +#include "gem/i915_gem_region.h" +#include "gem/selftests/mock_context.h" +#include "selftests/i915_random.h" + +static void close_objects(struct intel_memory_region *mem, + struct list_head *objects) +{ + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + if (i915_gem_object_has_pinned_pages(obj)) + i915_gem_object_unpin_pages(obj); + /* No polluting the memory region between tests */ + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + list_del(&obj->st_link); + i915_gem_object_put(obj); + } + + cond_resched(); + + i915_gem_drain_freed_objects(i915); +} + +static int igt_mock_fill(void *arg) +{ + struct intel_memory_region *mem = arg; + resource_size_t total = resource_size(&mem->region); + resource_size_t page_size; + resource_size_t rem; + unsigned long max_pages; + unsigned long page_num; + LIST_HEAD(objects); + int err = 0; + + page_size = mem->mm.chunk_size; + max_pages = div64_u64(total, page_size); + rem = total; + + for_each_prime_number_from(page_num, 1, max_pages) { + resource_size_t size = page_num * page_size; + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_region(mem, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + rem -= size; + } + + if (err == -ENOMEM) + err = 0; + if (err == -ENXIO) { + if (page_num * page_size <= rem) { + pr_err("%s failed, space still left in region\n", + __func__); + err = -EINVAL; + } else { + err = 0; + } + } + + close_objects(mem, &objects); + + return err; +} + +static struct drm_i915_gem_object * +igt_object_create(struct intel_memory_region *mem, + struct list_head *objects, + u64 size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mem, size, flags); + if (IS_ERR(obj)) + return obj; + + err = i915_gem_object_pin_pages(obj); + if (err) + goto put; + + list_add(&obj->st_link, objects); + return obj; + +put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static void igt_object_release(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + list_del(&obj->st_link); + i915_gem_object_put(obj); +} + +static int igt_mock_contiguous(void *arg) +{ + struct intel_memory_region *mem = arg; + struct drm_i915_gem_object *obj; + unsigned long n_objects; + LIST_HEAD(objects); + LIST_HEAD(holes); + I915_RND_STATE(prng); + resource_size_t total; + resource_size_t min; + u64 target; + int err = 0; + + total = resource_size(&mem->region); + + /* Min size */ + obj = igt_object_create(mem, &objects, mem->mm.chunk_size, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->mm.pages->nents != 1) { + pr_err("%s min object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* Max size */ + obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->mm.pages->nents != 1) { + pr_err("%s max object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* Internal fragmentation should not bleed into the object size */ + target = i915_prandom_u64_state(&prng); + div64_u64_rem(target, total, &target); + target = round_up(target, PAGE_SIZE); + target = max_t(u64, PAGE_SIZE, target); + + obj = igt_object_create(mem, &objects, target, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != target) { + pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__, + obj->base.size, target); + err = -EINVAL; + goto err_close_objects; + } + + if (obj->mm.pages->nents != 1) { + pr_err("%s object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* + * Try to fragment the address space, such that half of it is free, but + * the max contiguous block size is SZ_64K. + */ + + target = SZ_64K; + n_objects = div64_u64(total, target); + + while (n_objects--) { + struct list_head *list; + + if (n_objects % 2) + list = &holes; + else + list = &objects; + + obj = igt_object_create(mem, list, target, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } + } + + close_objects(mem, &holes); + + min = target; + target = total >> 1; + + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } + + igt_object_release(obj); + + /* + * Even though we have enough free space, we don't have a big enough + * contiguous block. Make sure that holds true. + */ + + do { + bool should_fail = target > min; + + obj = igt_object_create(mem, &objects, target, + I915_BO_ALLOC_CONTIGUOUS); + if (should_fail != IS_ERR(obj)) { + pr_err("%s target allocation(%llx) mismatch\n", + __func__, target); + err = -EINVAL; + goto err_close_objects; + } + + target >>= 1; + } while (target >= mem->mm.chunk_size); + +err_close_objects: + list_splice_tail(&holes, &objects); + close_objects(mem, &objects); + return err; +} + +int intel_memory_region_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_fill), + SUBTEST(igt_mock_contiguous), + }; + struct intel_memory_region *mem; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("failed to create memory region\n"); + err = PTR_ERR(mem); + goto out_unref; + } + + err = i915_subtests(tests, mem); + + intel_memory_region_put(mem); +out_unref: + drm_dev_put(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 70a7026db08d..9c22d41b30cd 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -33,6 +33,7 @@ #include "mock_gem_device.h" #include "mock_gtt.h" #include "mock_uncore.h" +#include "mock_region.h" #include "gem/selftests/mock_context.h" #include "gem/selftests/mock_gem_object.h" @@ -162,7 +163,7 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_2M; - mock_uncore_init(&i915->uncore); + mock_uncore_init(&i915->uncore, i915); i915_gem_init__mm(i915); intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c new file mode 100644 index 000000000000..7b0c99ddc2d5 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_region.h" +#include "intel_memory_region.h" + +#include "mock_region.h" + +static const struct drm_i915_gem_object_ops mock_region_obj_ops = { + .get_pages = i915_gem_object_get_pages_buddy, + .put_pages = i915_gem_object_put_pages_buddy, + .release = i915_gem_object_release_memory_region, +}; + +static struct drm_i915_gem_object * +mock_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + + if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &mock_region_obj_ops); + + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + + i915_gem_object_init_memory_region(obj, mem, flags); + + return obj; +} + +static const struct intel_memory_region_ops mock_region_ops = { + .init = intel_memory_region_init_buddy, + .release = intel_memory_region_release_buddy, + .create_object = mock_object_create, +}; + +struct intel_memory_region * +mock_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start) +{ + return intel_memory_region_create(i915, start, size, min_page_size, + io_start, &mock_region_ops); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_region.h b/drivers/gpu/drm/i915/selftests/mock_region.h new file mode 100644 index 000000000000..24608089d833 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_region.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __MOCK_REGION_H +#define __MOCK_REGION_H + +struct intel_memory_region * +mock_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start); + +#endif /* !__MOCK_REGION_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c index 49585f16d4a2..ca57e4008701 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.c +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c @@ -39,8 +39,11 @@ __nop_read(16) __nop_read(32) __nop_read(64) -void mock_uncore_init(struct intel_uncore *uncore) +void mock_uncore_init(struct intel_uncore *uncore, + struct drm_i915_private *i915) { + intel_uncore_init_early(uncore, i915); + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, nop); ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, nop); } diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h index dacb36b5ffcd..8a2cc553f466 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.h +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h @@ -25,6 +25,7 @@ #ifndef __MOCK_UNCORE_H #define __MOCK_UNCORE_H -void mock_uncore_init(struct intel_uncore *uncore); +void mock_uncore_init(struct intel_uncore *uncore, + struct drm_i915_private *i915); #endif /* !__MOCK_UNCORE_H */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 30c542144016..63d40cba97e0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait { * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. */ #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 + +/* + * Revision of the i915-perf uAPI. The value returned helps determine what + * i915-perf features are available. See drm_i915_perf_property_id. + */ +#define I915_PARAM_PERF_REVISION 54 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1844,23 +1851,31 @@ enum drm_i915_perf_property_id { * Open the stream for a specific context handle (as used with * execbuffer2). A stream opened for a specific context this way * won't typically require root privileges. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_CTX_HANDLE = 1, /** * A value of 1 requests the inclusion of raw OA unit reports as * part of stream samples. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_SAMPLE_OA, /** * The value specifies which set of OA unit metrics should be * be configured, defining the contents of any OA unit reports. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_METRICS_SET, /** * The value specifies the size and layout of OA unit reports. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_FORMAT, @@ -1870,9 +1885,22 @@ enum drm_i915_perf_property_id { * from this exponent as follows: * * 80ns * 2^(period_exponent + 1) + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_EXPONENT, + /** + * Specifying this property is only valid when specify a context to + * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property + * will hold preemption of the particular context we want to gather + * performance data about. The execbuf2 submissions must include a + * drm_i915_gem_execbuffer_ext_perf parameter for this to apply. + * + * This property is available in perf revision 3. + */ + DRM_I915_PERF_PROP_HOLD_PREEMPTION, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; @@ -1901,6 +1929,8 @@ struct drm_i915_perf_open_param { * to close and re-open a stream with the same configuration. * * It's undefined whether any pending data for the stream will be lost. + * + * This ioctl is available in perf revision 1. */ #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) @@ -1908,10 +1938,25 @@ struct drm_i915_perf_open_param { * Disable data capture for a stream. * * It is an error to try and read a stream that is disabled. + * + * This ioctl is available in perf revision 1. */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) /** + * Change metrics_set captured by a stream. + * + * If the stream is bound to a specific context, the configuration change + * will performed inline with that context such that it takes effect before + * the next execbuf submission. + * + * Returns the previously bound metrics set id, or a negative error code. + * + * This ioctl is available in perf revision 2. + */ +#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) + +/** * Common to all i915 perf records */ struct drm_i915_perf_record_header { @@ -1984,6 +2029,7 @@ struct drm_i915_query_item { __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 #define DRM_I915_QUERY_ENGINE_INFO 2 +#define DRM_I915_QUERY_PERF_CONFIG 3 /* Must be kept compact -- no holes and well documented */ /* @@ -1995,9 +2041,18 @@ struct drm_i915_query_item { __s32 length; /* - * Unused for now. Must be cleared to zero. + * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + * + * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the + * following : + * - DRM_I915_QUERY_PERF_CONFIG_LIST + * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID */ __u32 flags; +#define DRM_I915_QUERY_PERF_CONFIG_LIST 1 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 /* * Data will be written at the location pointed by data_ptr when the @@ -2125,6 +2180,56 @@ struct drm_i915_query_engine_info { struct drm_i915_engine_info engines[]; }; +/* + * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG. + */ +struct drm_i915_query_perf_config { + union { + /* + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets + * this fields to the number of configurations available. + */ + __u64 n_configs; + + /* + * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, + * i915 will use the value in this field as configuration + * identifier to decide what data to write into config_ptr. + */ + __u64 config; + + /* + * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, + * i915 will use the value in this field as configuration + * identifier to decide what data to write into config_ptr. + * + * String formatted like "%08x-%04x-%04x-%04x-%012x" + */ + char uuid[36]; + }; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will + * write an array of __u64 of configuration identifiers. + * + * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will + * write a struct drm_i915_perf_oa_config. If the following fields of + * drm_i915_perf_oa_config are set not set to 0, i915 will write into + * the associated pointers the values of submitted when the + * configuration was created : + * + * - n_mux_regs + * - n_boolean_regs + * - n_flex_regs + */ + __u8 data[]; +}; + #if defined(__cplusplus) } #endif |