diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_perf.c | 153 |
1 files changed, 90 insertions, 63 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index d453756ca128..e42d9a4de322 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -207,6 +207,8 @@ #include "i915_oa_kblgt3.h" #include "i915_oa_glk.h" #include "i915_oa_cflgt2.h" +#include "i915_oa_cflgt3.h" +#include "i915_oa_cnl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1214,9 +1216,9 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - if (i915_modparams.enable_execlists) + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; - else { + } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct intel_ring *ring; int ret; @@ -1260,7 +1262,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - if (i915_modparams.enable_execlists) { + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; @@ -1724,10 +1726,9 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr GFP_KERNEL); } - ret = i915_switch_context(req); i915_add_request(req); - return ret; + return 0; } /* @@ -1851,7 +1852,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_GEN9(dev_priv)) { + if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) { I915_WRITE(GEN8_OA_DEBUG, _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); @@ -1884,6 +1885,16 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) } +static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) +{ + /* Reset all contexts' slices/subslices configurations. */ + gen8_configure_all_contexts(dev_priv, NULL, false); + + /* Make sure we disable noa to save power. */ + I915_WRITE(RPM_CONFIG1, + I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); +} + static void gen7_oa_enable(struct drm_i915_private *dev_priv) { /* @@ -2679,8 +2690,8 @@ err: static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) { - return div_u64(1000000000ULL * (2ULL << exponent), - dev_priv->perf.oa.timestamp_frequency); + return div64_u64(1000000000ULL * (2ULL << exponent), + 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz); } /** @@ -2934,6 +2945,10 @@ void i915_perf_register(struct drm_i915_private *dev_priv) } else if (IS_COFFEELAKE(dev_priv)) { if (IS_CFL_GT2(dev_priv)) i915_perf_load_test_config_cflgt2(dev_priv); + if (IS_CFL_GT3(dev_priv)) + i915_perf_load_test_config_cflgt3(dev_priv); + } else if (IS_CANNONLAKE(dev_priv)) { + i915_perf_load_test_config_cnl(dev_priv); } if (dev_priv->perf.oa.test_config.id == 0) @@ -2991,7 +3006,7 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) int i; for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { - if (flex_eu_regs[i].reg == addr) + if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr) return true; } return false; @@ -2999,31 +3014,47 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) { - return (addr >= OASTARTTRIG1.reg && addr <= OASTARTTRIG8.reg) || - (addr >= OAREPORTTRIG1.reg && addr <= OAREPORTTRIG8.reg) || - (addr >= OACEC0_0.reg && addr <= OACEC7_1.reg); + return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && + addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || + (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && + addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || + (addr >= i915_mmio_reg_offset(OACEC0_0) && + addr <= i915_mmio_reg_offset(OACEC7_1)); } static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { - return addr == HALF_SLICE_CHICKEN2.reg || - (addr >= MICRO_BP0_0.reg && addr <= NOA_WRITE.reg) || - (addr >= OA_PERFCNT1_LO.reg && addr <= OA_PERFCNT2_HI.reg) || - (addr >= OA_PERFMATRIX_LO.reg && addr <= OA_PERFMATRIX_HI.reg); + return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || + (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && + addr <= i915_mmio_reg_offset(NOA_WRITE)) || + (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && + addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || + (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && + addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); } static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen7_is_valid_mux_addr(dev_priv, addr) || - addr == WAIT_FOR_RC6_EXIT.reg || - (addr >= RPM_CONFIG0.reg && addr <= NOA_CONFIG(8).reg); + addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || + (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && + addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); +} + +static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen8_is_valid_mux_addr(dev_priv, addr) || + (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && + addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); } static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen7_is_valid_mux_addr(dev_priv, addr) || (addr >= 0x25100 && addr <= 0x2FF90) || - addr == 0x9ec0; + (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && + addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || + addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); } static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) @@ -3038,14 +3069,14 @@ static uint32_t mask_reg_value(u32 reg, u32 val) * WaDisableSTUnitPowerOptimization workaround. Make sure the value * programmed by userspace doesn't change this. */ - if (HALF_SLICE_CHICKEN2.reg == reg) + if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function * indicated by its name and a bunch of selection fields used by OA * configs. */ - if (WAIT_FOR_RC6_EXIT.reg == reg) + if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); return val; @@ -3392,8 +3423,6 @@ static struct ctl_table dev_root[] = { */ void i915_perf_init(struct drm_i915_private *dev_priv) { - dev_priv->perf.oa.timestamp_frequency = 0; - if (IS_HASWELL(dev_priv)) { dev_priv->perf.oa.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; @@ -3409,70 +3438,68 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.oa_hw_tail_read = gen7_oa_hw_tail_read; - dev_priv->perf.oa.timestamp_frequency = 12500000; - dev_priv->perf.oa.oa_formats = hsw_oa_formats; - } else if (i915_modparams.enable_execlists) { + } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of * this driver, before upstreaming did this) it didn't seem * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - dev_priv->perf.oa.ops.is_valid_b_counter_reg = - gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = - gen8_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = - gen8_is_valid_flex_addr; + dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; dev_priv->perf.oa.ops.read = gen8_oa_read; dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; - - if (IS_GEN8(dev_priv)) { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; - - dev_priv->perf.oa.timestamp_frequency = 12500000; + if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + gen8_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); if (IS_CHERRYVIEW(dev_priv)) { dev_priv->perf.oa.ops.is_valid_mux_reg = chv_is_valid_mux_addr; } - } else if (IS_GEN9(dev_priv)) { + + dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; + + if (IS_GEN8(dev_priv)) { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; + + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); + } else { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + } + } else if (IS_GEN10(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + gen10_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; + + dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); - - switch (dev_priv->info.platform) { - case INTEL_BROXTON: - case INTEL_GEMINILAKE: - dev_priv->perf.oa.timestamp_frequency = 19200000; - break; - case INTEL_SKYLAKE: - case INTEL_KABYLAKE: - case INTEL_COFFEELAKE: - dev_priv->perf.oa.timestamp_frequency = 12000000; - break; - default: - /* Leave timestamp_frequency to 0 so we can - * detect unsupported platforms. - */ - break; - } } } - if (dev_priv->perf.oa.timestamp_frequency) { + if (dev_priv->perf.oa.ops.enable_metric_set) { hrtimer_init(&dev_priv->perf.oa.poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; @@ -3482,8 +3509,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->perf.lock); spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); - oa_sample_rate_hard_limit = - dev_priv->perf.oa.timestamp_frequency / 2; + oa_sample_rate_hard_limit = 1000 * + (INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); mutex_init(&dev_priv->perf.metrics_lock); |