diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
65 files changed, 3195 insertions, 1184 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index e49fa6fa6aee..e1c76e5bfa82 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -396,15 +396,17 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } -static int __gen125_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags, - u32 arb) +static int __xehp_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags, + u32 arb) { struct intel_context *ce = rq->context; u32 wa_offset = lrc_indirect_bb(ce); u32 *cs; + GEM_BUG_ON(!ce->wa_bb_page); + cs = intel_ring_begin(rq, 12); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -435,18 +437,18 @@ static int __gen125_emit_bb_start(struct i915_request *rq, return 0; } -int gen125_emit_bb_start_noarb(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) +int xehp_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) { - return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); + return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); } -int gen125_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) +int xehp_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) { - return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); + return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); } int gen8_emit_bb_start_noarb(struct i915_request *rq, @@ -583,6 +585,8 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE, @@ -600,15 +604,21 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen8_emit_ggtt_write_rcs(cs, rq->fence.seqno, hwsp_offset(rq), - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); return gen8_emit_fini_breadcrumb_tail(rq, cs); } @@ -715,6 +725,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->engine->i915; u32 flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -731,11 +742,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) else if (rq->engine->class == COMPUTE_CLASS) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0); + + /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen12_emit_ggtt_write_rcs(cs, rq->fence.seqno, hwsp_offset(rq), - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - flags); + 0, + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); return gen12_emit_fini_breadcrumb_tail(rq, cs); } diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index e4d24c811dd6..655e5c00ddc2 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -32,12 +32,12 @@ int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags); -int gen125_emit_bb_start_noarb(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags); -int gen125_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags); +int xehp_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +int xehp_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 2128b7a72a25..4daaa6f55668 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -476,6 +476,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); unsigned int rem = sg_dma_len(iter->sg); u64 start = vma_res->start; + u64 end = start + vma_res->vma_size; GEM_BUG_ON(!i915_vm_is_4lvl(vm)); @@ -489,9 +490,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, gen8_pte_t encode = pte_encode; unsigned int page_size; gen8_pte_t *vaddr; - u16 index, max; + u16 index, max, nent, i; max = I915_PDES; + nent = 1; if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && @@ -503,25 +505,37 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, vaddr = px_vaddr(pd); } else { - if (encode & GEN12_PPGTT_PTE_LM) { - GEM_BUG_ON(__gen8_pte_index(start, 0) % 16); - GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K); - GEM_BUG_ON(!IS_ALIGNED(iter->dma, - I915_GTT_PAGE_SIZE_64K)); - - index = __gen8_pte_index(start, 0) / 16; - page_size = I915_GTT_PAGE_SIZE_64K; - - max /= 16; - - vaddr = px_vaddr(pd); - vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; + index = __gen8_pte_index(start, 0); + page_size = I915_GTT_PAGE_SIZE; - pt->is_compact = true; - } else { - GEM_BUG_ON(pt->is_compact); - index = __gen8_pte_index(start, 0); - page_size = I915_GTT_PAGE_SIZE; + if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + /* + * Device local-memory on these platforms should + * always use 64K pages or larger (including GTT + * alignment), therefore if we know the whole + * page-table needs to be filled we can always + * safely use the compact-layout. Otherwise fall + * back to the TLB hint with PS64. If this is + * system memory we only bother with PS64. + */ + if ((encode & GEN12_PPGTT_PTE_LM) && + end - start >= SZ_2M && !index) { + index = __gen8_pte_index(start, 0) / 16; + page_size = I915_GTT_PAGE_SIZE_64K; + + max /= 16; + + vaddr = px_vaddr(pd); + vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; + + pt->is_compact = true; + } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + rem >= I915_GTT_PAGE_SIZE_64K && + !(index % 16)) { + encode |= GEN12_PTE_PS64; + page_size = I915_GTT_PAGE_SIZE_64K; + nent = 16; + } } vaddr = px_vaddr(pt); @@ -529,7 +543,12 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, do { GEM_BUG_ON(rem < page_size); - vaddr[index++] = encode | iter->dma; + + for (i = 0; i < nent; i++) { + vaddr[index++] = + encode | (iter->dma + i * + I915_GTT_PAGE_SIZE); + } start += page_size; iter->dma += page_size; @@ -745,6 +764,8 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); + /* XXX: we don't strictly need to use this layout */ + if (!pt->is_compact) { vaddr = px_vaddr(pd); vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; @@ -929,29 +950,18 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, */ ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); - if (HAS_LMEM(gt->i915)) { + if (HAS_LMEM(gt->i915)) ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; - - /* - * On some platforms the hw has dropped support for 4K GTT pages - * when dealing with LMEM, and due to the design of 64K GTT - * pages in the hw, we can only mark the *entire* page-table as - * operating in 64K GTT mode, since the enable bit is still on - * the pde, and not the pte. And since we still need to allow - * 4K GTT pages for SMEM objects, we can't have a "normal" 4K - * page-table with scratch pointing to LMEM, since that's - * undefined from the hw pov. The simplest solution is to just - * move the 64K scratch page to SMEM on such platforms and call - * it a day, since that should work for all configurations. - */ - if (HAS_64K_PAGES(gt->i915)) - ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - else - ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem; - } else { + else ppgtt->vm.alloc_pt_dma = alloc_pt_dma; - ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; - } + + /* + * Using SMEM here instead of LMEM has the advantage of not reserving + * high performance memory for a "never" used filler page. It also + * removes the device access that would be required to initialise the + * scratch page, reducing pressure on an even scarcer resource. + */ + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; ppgtt->vm.pte_encode = gen8_pte_encode; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index be09fb2e883a..fb62b7b8cbcd 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -276,6 +276,14 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); } +static inline void intel_context_close(struct intel_context *ce) +{ + set_bit(CONTEXT_CLOSED_BIT, &ce->flags); + + if (ce->ops->close) + ce->ops->close(ce); +} + static inline bool intel_context_is_closed(const struct intel_context *ce) { return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 04eacae1aca5..e36670f2e626 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -43,6 +43,8 @@ struct intel_context_ops { void (*revoke)(struct intel_context *ce, struct i915_request *rq, unsigned int preempt_timeout_ms); + void (*close)(struct intel_context *ce); + int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); @@ -197,8 +199,6 @@ struct intel_context { * context's submissions is complete. */ struct i915_sw_fence blocked; - /** @number_committed_requests: number of committed requests */ - int number_committed_requests; /** @requests: list of active requests on this context */ struct list_head requests; /** @prio: the context's current guc priority */ @@ -208,6 +208,11 @@ struct intel_context { * each priority bucket */ u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; + /** + * @sched_disable_delay_work: worker to disable scheduling on this + * context + */ + struct delayed_work sched_disable_delay_work; } guc_state; struct { diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 04e435bce79b..cbc8b857d5f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs *engine) return engine->hung_ce; } +u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value); + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1f7188129cd1..3b7d750ad054 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -486,6 +486,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->logical_mask = BIT(logical_instance); __sprint_engine_name(engine); + if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && + __ffs(CCS_MASK(engine->gt)) == engine->instance) || + engine->class == RENDER_CLASS) + engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; + + /* features common between engines sharing EUs */ + if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { + engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; + engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; + } + engine->props.heartbeat_interval_ms = CONFIG_DRM_I915_HEARTBEAT_INTERVAL; engine->props.max_busywait_duration_ns = @@ -497,20 +508,34 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->props.timeslice_duration_ms = CONFIG_DRM_I915_TIMESLICE_DURATION; - /* Override to uninterruptible for OpenCL workloads. */ - if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS) - engine->props.preempt_timeout_ms = 0; - - if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && - __ffs(CCS_MASK(engine->gt)) == engine->instance) || - engine->class == RENDER_CLASS) - engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; - - /* features common between engines sharing EUs */ - if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { - engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; - engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; - } + /* + * Mid-thread pre-emption is not available in Gen12. Unfortunately, + * some compute workloads run quite long threads. That means they get + * reset due to not pre-empting in a timely manner. So, bump the + * pre-emption timeout value to be much higher for compute engines. + */ + if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) + engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE; + + /* Cap properties according to any system limits */ +#define CLAMP_PROP(field) \ + do { \ + u64 clamp = intel_clamp_##field(engine, engine->props.field); \ + if (clamp != engine->props.field) { \ + drm_notice(&engine->i915->drm, \ + "Warning, clamping %s to %lld to prevent overflow\n", \ + #field, clamp); \ + engine->props.field = clamp; \ + } \ + } while (0) + + CLAMP_PROP(heartbeat_interval_ms); + CLAMP_PROP(max_busywait_duration_ns); + CLAMP_PROP(preempt_timeout_ms); + CLAMP_PROP(stop_timeout_ms); + CLAMP_PROP(timeslice_duration_ms); + +#undef CLAMP_PROP engine->defaults = engine->props; /* never to change again */ @@ -534,6 +559,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, return 0; } +u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value) +{ + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value) +{ + value = min(value, jiffies_to_nsecs(2)); + + return value; +} + +u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value) +{ + /* + * NB: The GuC API only supports 32bit values. However, the limit is further + * reduced due to internal calculations which would otherwise overflow. + */ + if (intel_guc_submission_is_wanted(&engine->gt->uc.guc)) + value = min_t(u64, value, guc_policy_max_preempt_timeout_ms()); + + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value) +{ + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value) +{ + /* + * NB: The GuC API only supports 32bit values. However, the limit is further + * reduced due to internal calculations which would otherwise overflow. + */ + if (intel_guc_submission_is_wanted(&engine->gt->uc.guc)) + value = min_t(u64, value, guc_policy_max_exec_quantum_ms()); + + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + static void __setup_engine_capabilities(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1274,8 +1348,13 @@ int intel_engines_init(struct intel_gt *gt) return err; err = setup(engine); - if (err) + if (err) { + intel_engine_cleanup_common(engine); return err; + } + + /* The backend should now be responsible for cleanup */ + GEM_BUG_ON(engine->release == NULL); err = engine_init_common(engine); if (err) @@ -1554,11 +1633,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, for_each_ss_steering(iter, engine->gt, slice, subslice) { instdone->sampler[slice][subslice] = intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, + GEN8_SAMPLER_INSTDONE, slice, subslice); instdone->row[slice][subslice] = intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, + GEN8_ROW_INSTDONE, slice, subslice); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index a3698f611f45..9a527e1f5be6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -22,9 +22,37 @@ static bool next_heartbeat(struct intel_engine_cs *engine) { + struct i915_request *rq; long delay; delay = READ_ONCE(engine->props.heartbeat_interval_ms); + + rq = engine->heartbeat.systole; + + /* + * FIXME: The final period extension is disabled if the period has been + * modified from the default. This is to prevent issues with certain + * selftests which override the value and expect specific behaviour. + * Once the selftests have been updated to either cope with variable + * heartbeat periods (or to override the pre-emption timeout as well, + * or just to add a selftest specific override of the extension), the + * generic override can be removed. + */ + if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER && + delay == engine->defaults.heartbeat_interval_ms) { + long longer; + + /* + * The final try is at the highest priority possible. Up until now + * a pre-emption might not even have been attempted. So make sure + * this last attempt allows enough time for a pre-emption to occur. + */ + longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2; + longer = intel_clamp_heartbeat_interval_ms(engine, longer); + if (longer > delay) + delay = longer; + } + if (!delay) return false; @@ -288,6 +316,17 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine, if (!delay && !intel_engine_has_preempt_reset(engine)) return -ENODEV; + /* FIXME: Remove together with equally marked hack in next_heartbeat. */ + if (delay != engine->defaults.heartbeat_interval_ms && + delay < 2 * engine->props.preempt_timeout_ms) { + if (intel_engine_uses_guc(engine)) + drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may downgrade individual engine resets to full GPU resets!\n", + engine->name); + else + drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may cause engine resets to target innocent contexts!\n", + engine->name); + } + intel_engine_pm_get(engine); err = mutex_lock_interruptible(&ce->timeline->mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index fe1a0d5fd4b1..ee3efd06ee54 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -201,6 +201,7 @@ #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */ #define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8) +#define MI_PREDICATE_RESULT_2_ENGINE(base) _MMIO((base) + 0x3bc) #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) #define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index c718e6dc40b5..0187bc72310d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3471,9 +3471,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { if (intel_engine_has_preemption(engine)) - engine->emit_bb_start = gen125_emit_bb_start; + engine->emit_bb_start = xehp_emit_bb_start; else - engine->emit_bb_start = gen125_emit_bb_start_noarb; + engine->emit_bb_start = xehp_emit_bb_start_noarb; } else { if (intel_engine_has_preemption(engine)) engine->emit_bb_start = gen8_emit_bb_start; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 2049a00417af..2518cebbf931 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -871,8 +871,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) u32 pte_flags; int ret; - GEM_WARN_ON(pci_resource_len(pdev, GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); - phys_addr = pci_resource_start(pdev, GTTMMADR_BAR) + gen6_gttadr_offset(i915); + GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); + phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); /* * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range @@ -931,11 +931,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) unsigned int size; u16 snb_gmch_ctl; - if (!HAS_LMEM(i915)) { - if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR)) + if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) { + if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) return -ENXIO; - ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR); + ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); ggtt->mappable_end = resource_size(&ggtt->gmadr); } @@ -986,7 +986,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.pte_encode = gen8_ggtt_pte_encode; - setup_private_pat(ggtt->vm.gt->uncore); + setup_private_pat(ggtt->vm.gt); return ggtt_probe_common(ggtt, size); } @@ -1089,10 +1089,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) unsigned int size; u16 snb_gmch_ctl; - if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR)) + if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) return -ENXIO; - ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR); + ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); ggtt->mappable_end = resource_size(&ggtt->gmadr); /* @@ -1308,7 +1308,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) wbinvd_on_all_cpus(); if (GRAPHICS_VER(ggtt->vm.i915) >= 8) - setup_private_pat(ggtt->vm.gt->uncore); + setup_private_pat(ggtt->vm.gt); intel_ggtt_restore_fences(ggtt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index d4e9702d3c8e..f50ea92910d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -187,6 +187,10 @@ #define MI_BATCH_RESOURCE_STREAMER REG_BIT(10) #define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/ +#define MI_OPCODE(x) (((x) >> 23) & 0x3f) +#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0))) +#define MI_LRI_LEN(x) (((x) & 0xff) + 1) + /* * 3D instructions used by the kernel */ diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 7af6db3194dd..976fdf27e790 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -7,6 +7,7 @@ #include <linux/mei_aux.h> #include "i915_drv.h" #include "i915_reg.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gt/intel_gsc.h" #include "gt/intel_gt.h" @@ -142,8 +143,14 @@ static void gsc_destroy_one(struct drm_i915_private *i915, struct intel_gsc_intf *intf = &gsc->intf[intf_id]; if (intf->adev) { - auxiliary_device_delete(&intf->adev->aux_dev); - auxiliary_device_uninit(&intf->adev->aux_dev); + struct auxiliary_device *aux_dev = &intf->adev->aux_dev; + + if (intf_id == 0) + intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc, + aux_dev->dev.bus); + + auxiliary_device_delete(aux_dev); + auxiliary_device_uninit(aux_dev); intf->adev = NULL; } @@ -242,14 +249,24 @@ add_device: goto fail; } + intf->adev = adev; /* needed by the notifier */ + + if (intf_id == 0) + intel_huc_register_gsc_notifier(&gsc_to_gt(gsc)->uc.huc, + aux_dev->dev.bus); + ret = auxiliary_device_add(aux_dev); if (ret < 0) { drm_err(&i915->drm, "gsc aux add failed %d\n", ret); + if (intf_id == 0) + intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc, + aux_dev->dev.bus); + intf->adev = NULL; + /* adev will be freed with the put_device() and .release sequence */ auxiliary_device_uninit(aux_dev); goto fail; } - intf->adev = adev; return; fail: diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d0b03a928b9a..8e914c4066ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -40,8 +40,6 @@ void intel_gt_common_init_early(struct intel_gt *gt) { spin_lock_init(gt->irq_lock); - INIT_LIST_HEAD(>->lmem_userfault_list); - mutex_init(>->lmem_userfault_lock); INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -231,6 +229,16 @@ static void gen6_clear_engine_error_register(struct intel_engine_cs *engine) GEN6_RING_FAULT_REG_POSTING_READ(engine); } +i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt) +{ + /* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */ + if (GRAPHICS_VER(gt->i915) < 11) + return INVALID_MMIO_REG; + + return gt->type == GT_MEDIA ? + MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS; +} + void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask) @@ -260,7 +268,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt, I915_MASTER_ERROR_INTERRUPT); } - if (GRAPHICS_VER(i915) >= 12) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG, + RING_FAULT_VALID, 0); + intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); + } else if (GRAPHICS_VER(i915) >= 12) { rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); } else if (GRAPHICS_VER(i915) >= 8) { @@ -298,6 +310,42 @@ static void gen6_check_faults(struct intel_gt *gt) } } +static void xehp_check_faults(struct intel_gt *gt) +{ + u32 fault; + + /* + * Although the fault register now lives in an MCR register range, + * the GAM registers are special and we only truly need to read + * the "primary" GAM instance rather than handling each instance + * individually. intel_gt_mcr_read_any() will automatically steer + * toward the primary instance. + */ + fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); + if (fault & RING_FAULT_VALID) { + u32 fault_data0, fault_data1; + u64 fault_addr; + + fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0); + fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1); + + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + + drm_dbg(>->i915->drm, "Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + } +} + static void gen8_check_faults(struct intel_gt *gt) { struct intel_uncore *uncore = gt->uncore; @@ -344,7 +392,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; /* From GEN8 onwards we only have one 'All Engine Fault Register' */ - if (GRAPHICS_VER(i915) >= 8) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + xehp_check_faults(gt); + else if (GRAPHICS_VER(i915) >= 8) gen8_check_faults(gt); else if (GRAPHICS_VER(i915) >= 6) gen6_check_faults(gt); @@ -807,7 +857,6 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) } intel_uncore_init_early(gt->uncore, gt); - intel_wakeref_auto_init(>->userfault_wakeref, gt->uncore->rpm); ret = intel_uncore_setup_mmio(gt->uncore, phys_addr); if (ret) @@ -828,7 +877,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915) unsigned int i; int ret; - mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; + mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915)); phys_addr = pci_resource_start(pdev, mmio_bar); /* @@ -939,7 +988,10 @@ void intel_gt_info_print(const struct intel_gt_info *info, } struct reg_and_bit { - i915_reg_t reg; + union { + i915_reg_t reg; + i915_mcr_reg_t mcr_reg; + }; u32 bit; }; @@ -965,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, return rb; } +/* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ +#define TLB_INVAL_TIMEOUT_US 100 +#define TLB_INVAL_TIMEOUT_MS 4 + +/* + * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets + * but are now considered MCR registers. Since they exist within a GAM range, + * the primary instance of the register rolls up the status from each unit. + */ +static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb) +{ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS); + else + return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0, + TLB_INVAL_TIMEOUT_US, + TLB_INVAL_TIMEOUT_MS, + NULL); +} + static void mmio_invalidate_full(struct intel_gt *gt) { static const i915_reg_t gen8_regs[] = { @@ -980,6 +1058,13 @@ static void mmio_invalidate_full(struct intel_gt *gt) [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR, }; + static const i915_mcr_reg_t xehp_regs[] = { + [RENDER_CLASS] = XEHP_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = XEHP_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = XEHP_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = XEHP_BLT_TLB_INV_CR, + [COMPUTE_CLASS] = XEHP_COMPCTX_TLB_INV_CR, + }; struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; @@ -988,7 +1073,10 @@ static void mmio_invalidate_full(struct intel_gt *gt) const i915_reg_t *regs; unsigned int num = 0; - if (GRAPHICS_VER(i915) == 12) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + regs = NULL; + num = ARRAY_SIZE(xehp_regs); + } else if (GRAPHICS_VER(i915) == 12) { regs = gen12_regs; num = ARRAY_SIZE(gen12_regs); } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { @@ -1013,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt) if (!intel_engine_pm_is_awake(engine)) continue; - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (!i915_mmio_reg_offset(rb.reg)) - continue; + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + intel_gt_mcr_multicast_write_fw(gt, + xehp_regs[engine->class], + BIT(engine->instance)); + } else { + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } awake |= engine->mask; } @@ -1037,22 +1131,17 @@ static void mmio_invalidate_full(struct intel_gt *gt) for_each_engine_masked(engine, gt, awake, tmp) { struct reg_and_bit rb; - /* - * HW architecture suggest typical invalidation time at 40us, - * with pessimistic cases up to 100us and a recommendation to - * cap at 1ms. We go a bit higher just in case. - */ - const unsigned int timeout_us = 100; - const unsigned int timeout_ms = 4; - - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (__intel_wait_for_register_fw(uncore, - rb.reg, rb.bit, 0, - timeout_us, timeout_ms, - NULL)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + rb.mcr_reg = xehp_regs[engine->class]; + rb.bit = BIT(engine->instance); + } else { + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + } + + if (wait_for_invalidate(gt, rb)) drm_err_ratelimited(>->i915->drm, "%s TLB invalidation did not complete in %ums!\n", - engine->name, timeout_ms); + engine->name, TLB_INVAL_TIMEOUT_MS); } /* diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 2ee582e287c8..e0365d556248 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -60,6 +60,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915); int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_check_and_clear_faults(struct intel_gt *gt); +i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt); void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 3f656d3dba9a..2a6a4ca7fdad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -107,7 +107,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore) return freq; } -static u32 gen5_read_clock_frequency(struct intel_uncore *uncore) +static u32 gen6_read_clock_frequency(struct intel_uncore *uncore) { /* * PRMs say: @@ -119,7 +119,27 @@ static u32 gen5_read_clock_frequency(struct intel_uncore *uncore) return 12500000; } -static u32 gen2_read_clock_frequency(struct intel_uncore *uncore) +static u32 gen5_read_clock_frequency(struct intel_uncore *uncore) +{ + /* + * 63:32 increments every 1000 ns + * 31:0 mbz + */ + return 1000000000 / 1000; +} + +static u32 g4x_read_clock_frequency(struct intel_uncore *uncore) +{ + /* + * 63:20 increments every 1/4 ns + * 19:0 mbz + * + * -> 63:32 increments every 1024 ns + */ + return 1000000000 / 1024; +} + +static u32 gen4_read_clock_frequency(struct intel_uncore *uncore) { /* * PRMs say: @@ -127,8 +147,10 @@ static u32 gen2_read_clock_frequency(struct intel_uncore *uncore) * "The value in this register increments once every 16 * hclks." (through the “Clocking Configuration” * (“CLKCFG”) MCHBAR register) + * + * Testing on actual hardware has shown there is no /16. */ - return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16; + return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000; } static u32 read_clock_frequency(struct intel_uncore *uncore) @@ -137,10 +159,16 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) return gen11_read_clock_frequency(uncore); else if (GRAPHICS_VER(uncore->i915) >= 9) return gen9_read_clock_frequency(uncore); - else if (GRAPHICS_VER(uncore->i915) >= 5) + else if (GRAPHICS_VER(uncore->i915) >= 6) + return gen6_read_clock_frequency(uncore); + else if (GRAPHICS_VER(uncore->i915) == 5) return gen5_read_clock_frequency(uncore); + else if (IS_G4X(uncore->i915)) + return g4x_read_clock_frequency(uncore); + else if (GRAPHICS_VER(uncore->i915) == 4) + return gen4_read_clock_frequency(uncore); else - return gen2_read_clock_frequency(uncore); + return 0; } void intel_gt_init_clock_frequency(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index e79405a45312..830edffe88cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -40,6 +40,9 @@ static const char * const intel_steering_types[] = { "L3BANK", "MSLICE", "LNCF", + "GAM", + "DSS", + "OADDRM", "INSTANCE 0", }; @@ -48,14 +51,23 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = { {}, }; +/* + * Although the bspec lists more "MSLICE" ranges than shown here, some of those + * are of a "GAM" subclass that has special rules. Thus we use a separate + * GAM table farther down for those. + */ static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { - { 0x004000, 0x004AFF }, - { 0x00C800, 0x00CFFF }, { 0x00DD00, 0x00DDFF }, { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ {}, }; +static const struct intel_mmio_range xehpsdv_gam_steering_table[] = { + { 0x004000, 0x004AFF }, + { 0x00C800, 0x00CFFF }, + {}, +}; + static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { { 0x00B000, 0x00B0FF }, { 0x00D800, 0x00D8FF }, @@ -89,9 +101,47 @@ static const struct intel_mmio_range pvc_instance0_steering_table[] = { {}, }; +static const struct intel_mmio_range xelpg_instance0_steering_table[] = { + { 0x000B00, 0x000BFF }, /* SQIDI */ + { 0x001000, 0x001FFF }, /* SQIDI */ + { 0x004000, 0x0048FF }, /* GAM */ + { 0x008700, 0x0087FF }, /* SQIDI */ + { 0x00B000, 0x00B0FF }, /* NODE */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DDFF }, /* OAAL2 */ + {}, +}; + +static const struct intel_mmio_range xelpg_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +/* DSS steering is used for SLICE ranges as well */ +static const struct intel_mmio_range xelpg_dss_steering_table[] = { + { 0x005200, 0x0052FF }, /* SLICE */ + { 0x005500, 0x007FFF }, /* SLICE */ + { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */ + { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */ + { 0x009680, 0x0096FF }, /* DSS */ + { 0x00D800, 0x00D87F }, /* SLICE */ + { 0x00DC00, 0x00DCFF }, /* SLICE */ + { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + {}, +}; + +static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = { + { 0x393200, 0x39323F }, + { 0x393400, 0x3934FF }, + {}, +}; + void intel_gt_mcr_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; + unsigned long fuse; + int i; /* * An mslice is unavailable only if both the meml3 for the slice is @@ -109,14 +159,36 @@ void intel_gt_mcr_init(struct intel_gt *gt) drm_warn(&i915->drm, "mslice mask all zero!\n"); } - if (IS_PONTEVECCHIO(i915)) { + if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) { + gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + fuse = REG_FIELD_GET(GT_L3_EXC_MASK, + intel_uncore_read(gt->uncore, XEHP_FUSE4)); + + /* + * Despite the register field being named "exclude mask" the + * bits actually represent enabled banks (two banks per bit). + */ + for_each_set_bit(i, &fuse, 3) + gt->info.l3bank_mask |= 0x3 << 2 * i; + + gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table; + gt->steering_table[L3BANK] = xelpg_l3bank_steering_table; + gt->steering_table[DSS] = xelpg_dss_steering_table; + } else if (IS_PONTEVECCHIO(i915)) { gt->steering_table[INSTANCE0] = pvc_instance0_steering_table; } else if (IS_DG2(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; gt->steering_table[LNCF] = dg2_lncf_steering_table; + /* + * No need to hook up the GAM table since it has a dedicated + * steering control register on DG2 and can use implicit + * steering. + */ } else if (IS_XEHPSDV(i915)) { gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; + gt->steering_table[GAM] = xehpsdv_gam_steering_table; } else if (GRAPHICS_VER(i915) >= 11 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { gt->steering_table[L3BANK] = icl_l3bank_steering_table; @@ -135,6 +207,19 @@ void intel_gt_mcr_init(struct intel_gt *gt) } /* + * Although the rest of the driver should use MCR-specific functions to + * read/write MCR registers, we still use the regular intel_uncore_* functions + * internally to implement those, so we need a way for the functions in this + * file to "cast" an i915_mcr_reg_t into an i915_reg_t. + */ +static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr) +{ + i915_reg_t r = { .reg = mcr.reg }; + + return r; +} + +/* * rw_with_mcr_steering_fw - Access a register with specific MCR steering * @uncore: pointer to struct intel_uncore * @reg: register being accessed @@ -148,14 +233,26 @@ void intel_gt_mcr_init(struct intel_gt *gt) * Caller needs to make sure the relevant forcewake wells are up. */ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, u8 rw_flag, + i915_mcr_reg_t reg, u8 rw_flag, int group, int instance, u32 value) { u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; lockdep_assert_held(&uncore->lock); - if (GRAPHICS_VER(uncore->i915) >= 11) { + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) { + /* + * Always leave the hardware in multicast mode when doing reads + * (see comment about Wa_22013088509 below) and only change it + * to unicast mode when doing writes of a specific instance. + * + * No need to save old steering reg value. + */ + intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, + REG_FIELD_PREP(MTL_MCR_GROUPID, group) | + REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) | + (rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0)); + } else if (GRAPHICS_VER(uncore->i915) >= 11) { mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance); @@ -173,39 +270,53 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore, */ if (rw_flag == FW_REG_WRITE) mcr_mask |= GEN11_MCR_MULTICAST; + + mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + old_mcr = mcr; + + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); } else { mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance); - } - old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + old_mcr = mcr; - mcr &= ~mcr_mask; - mcr |= mcr_ss; - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + } if (rw_flag == FW_REG_READ) - val = intel_uncore_read_fw(uncore, reg); + val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg)); else - intel_uncore_write_fw(uncore, reg, value); - - mcr &= ~mcr_mask; - mcr |= old_mcr & mcr_mask; + intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value); - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + /* + * For pre-MTL platforms, we need to restore the old value of the + * steering control register to ensure that implicit steering continues + * to behave as expected. For MTL and beyond, we need only reinstate + * the 'multicast' bit (and only if we did a write that cleared it). + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70) && rw_flag == FW_REG_WRITE) + intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + else if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 70)) + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, old_mcr); return val; } static u32 rw_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, u8 rw_flag, + i915_mcr_reg_t reg, u8 rw_flag, int group, int instance, u32 value) { enum forcewake_domains fw_domains; u32 val; - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, + fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg), rw_flag); fw_domains |= intel_uncore_forcewake_for_reg(uncore, GEN8_MCR_SELECTOR, @@ -233,7 +344,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore, * group/instance. */ u32 intel_gt_mcr_read(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, int group, int instance) { return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0); @@ -250,7 +361,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt, * Write an MCR register in unicast mode after steering toward a specific * group/instance. */ -void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value, +void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value, int group, int instance) { rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value); @@ -265,9 +376,16 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value, * Write an MCR register in multicast mode to update all instances. */ void intel_gt_mcr_multicast_write(struct intel_gt *gt, - i915_reg_t reg, u32 value) + i915_mcr_reg_t reg, u32 value) { - intel_uncore_write(gt->uncore, reg, value); + /* + * Ensure we have multicast behavior, just in case some non-i915 agent + * left the hardware in unicast mode. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + + intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value); } /** @@ -281,9 +399,44 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt, * domains; use intel_gt_mcr_multicast_write() in cases where forcewake should * be obtained automatically. */ -void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value) +void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value) +{ + /* + * Ensure we have multicast behavior, just in case some non-i915 agent + * left the hardware in unicast mode. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST); + + intel_uncore_write_fw(gt->uncore, mcr_reg_cast(reg), value); +} + +/** + * intel_gt_mcr_multicast_rmw - Performs a multicast RMW operations + * @gt: GT structure + * @reg: the MCR register to read and write + * @clear: bits to clear during RMW + * @set: bits to set during RMW + * + * Performs a read-modify-write on an MCR register in a multicast manner. + * This operation only makes sense on MCR registers where all instances are + * expected to have the same value. The read will target any non-terminated + * instance and the write will be applied to all instances. + * + * This function assumes the caller is already holding any necessary forcewake + * domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should + * be obtained automatically. + * + * Returns the old (unmodified) value read. + */ +u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg, + u32 clear, u32 set) { - intel_uncore_write_fw(gt->uncore, reg, value); + u32 val = intel_gt_mcr_read_any(gt, reg); + + intel_gt_mcr_multicast_write(gt, reg, (val & ~clear) | set); + + return val; } /* @@ -301,7 +454,7 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 va * for @type steering too. */ static bool reg_needs_read_steering(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, enum intel_steering_type type) { const u32 offset = i915_mmio_reg_offset(reg); @@ -332,6 +485,8 @@ static void get_nonterminated_steering(struct intel_gt *gt, enum intel_steering_type type, u8 *group, u8 *instance) { + u32 dss; + switch (type) { case L3BANK: *group = 0; /* unused */ @@ -351,6 +506,15 @@ static void get_nonterminated_steering(struct intel_gt *gt, *group = __ffs(gt->info.mslice_mask) << 1; *instance = 0; /* unused */ break; + case GAM: + *group = IS_DG2(gt->i915) ? 1 : 0; + *instance = 0; + break; + case DSS: + dss = intel_sseu_find_first_xehp_dss(>->info.sseu, 0, 0); + *group = dss / GEN_DSS_PER_GSLICE; + *instance = dss % GEN_DSS_PER_GSLICE; + break; case INSTANCE0: /* * There are a lot of MCR types for which instance (0, 0) @@ -359,6 +523,13 @@ static void get_nonterminated_steering(struct intel_gt *gt, *group = 0; *instance = 0; break; + case OADDRM: + if ((VDBOX_MASK(gt) | VEBOX_MASK(gt) | gt->info.sfc_mask) & BIT(0)) + *group = 0; + else + *group = 1; + *instance = 0; + break; default: MISSING_CASE(type); *group = 0; @@ -380,7 +551,7 @@ static void get_nonterminated_steering(struct intel_gt *gt, * steering. */ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, u8 *group, u8 *instance) { int type; @@ -409,7 +580,7 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, * * Returns the value from a non-terminated instance of @reg. */ -u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg) +u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg) { int type; u8 group, instance; @@ -423,7 +594,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg) } } - return intel_uncore_read_fw(gt->uncore, reg); + return intel_uncore_read_fw(gt->uncore, mcr_reg_cast(reg)); } /** @@ -436,7 +607,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg) * * Returns the value from a non-terminated instance of @reg. */ -u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg) +u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg) { int type; u8 group, instance; @@ -450,7 +621,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg) } } - return intel_uncore_read(gt->uncore, reg); + return intel_uncore_read(gt->uncore, mcr_reg_cast(reg)); } static void report_steering_type(struct drm_printer *p, @@ -483,11 +654,20 @@ static void report_steering_type(struct drm_printer *p, void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, bool dump_table) { - drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n", - gt->default_steering.groupid, - gt->default_steering.instanceid); - - if (IS_PONTEVECCHIO(gt->i915)) { + /* + * Starting with MTL we no longer have default steering; + * all ranges are explicitly steered. + */ + if (GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)) + drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n", + gt->default_steering.groupid, + gt->default_steering.instanceid); + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) { + for (int i = 0; i < NUM_STEERING_TYPES; i++) + if (gt->steering_table[i]) + report_steering_type(p, gt, i, dump_table); + } else if (IS_PONTEVECCHIO(gt->i915)) { report_steering_type(p, gt, INSTANCE0, dump_table); } else if (HAS_MSLICE_STEERING(gt->i915)) { report_steering_type(p, gt, MSLICE, dump_table); @@ -520,3 +700,58 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, return; } } + +/** + * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state + * @gt: GT structure + * @reg: the register to read + * @mask: mask to apply to register value + * @value: value to wait for + * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait + * @slow_timeout_ms: slow timeout in millisecond + * + * This routine waits until the target register @reg contains the expected + * @value after applying the @mask, i.e. it waits until :: + * + * (intel_gt_mcr_read_any_fw(gt, reg) & mask) == value + * + * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds. + * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us + * must be not larger than 20,0000 microseconds. + * + * This function is basically an MCR-friendly version of + * __intel_wait_for_register_fw(). Generally this function will only be used + * on GAM registers which are a bit special --- although they're MCR registers, + * reads (e.g., waiting for status updates) are always directed to the primary + * instance. + * + * Note that this routine assumes the caller holds forcewake asserted, it is + * not suitable for very long waits. + * + * Return: 0 if the register matches the desired condition, or -ETIMEDOUT. + */ +int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms) +{ + u32 reg_value = 0; +#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value) + int ret; + + /* Catch any overuse of this function */ + might_sleep_if(slow_timeout_ms); + GEM_BUG_ON(fast_timeout_us > 20000); + GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms); + + ret = -ETIMEDOUT; + if (fast_timeout_us && fast_timeout_us <= 20000) + ret = _wait_for_atomic(done, fast_timeout_us, 0); + if (ret && slow_timeout_ms) + ret = wait_for(done, slow_timeout_ms); + + return ret; +#undef done +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 77a8b11c287d..3fb0502bff22 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -11,21 +11,24 @@ void intel_gt_mcr_init(struct intel_gt *gt); u32 intel_gt_mcr_read(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, int group, int instance); -u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg); -u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg); +u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg); +u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg); void intel_gt_mcr_unicast_write(struct intel_gt *gt, - i915_reg_t reg, u32 value, + i915_mcr_reg_t reg, u32 value, int group, int instance); void intel_gt_mcr_multicast_write(struct intel_gt *gt, - i915_reg_t reg, u32 value); + i915_mcr_reg_t reg, u32 value); void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, - i915_reg_t reg, u32 value); + i915_mcr_reg_t reg, u32 value); + +u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg, + u32 clear, u32 set); void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, - i915_reg_t reg, + i915_mcr_reg_t reg, u8 *group, u8 *instance); void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, @@ -34,6 +37,13 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, unsigned int *group, unsigned int *instance); +int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt, + i915_mcr_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms); + /* * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice * presence is determined by using the group/instance as direct lookups in the diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 108b9e76c32e..40d0a3be42ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -344,162 +344,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_printf(p, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(rps, rps->efficient_freq)); } else if (GRAPHICS_VER(i915) >= 6) { - u32 rp_state_limits; - u32 gt_perf_status; - struct intel_rps_freq_caps caps; - u32 rpmodectl, rpinclimit, rpdeclimit; - u32 rpstat, cagf, reqf; - u32 rpcurupei, rpcurup, rpprevup; - u32 rpcurdownei, rpcurdown, rpprevdown; - u32 rpupei, rpupt, rpdownei, rpdownt; - u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; - - rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - gen6_rps_get_freq_caps(rps, &caps); - if (IS_GEN9_LP(i915)) - gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - else - gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - - /* RPSTAT1 is in the GT power well */ - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); - if (GRAPHICS_VER(i915) >= 9) { - reqf >>= 23; - } else { - reqf &= ~GEN6_TURBO_DISABLE; - if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - reqf >>= 24; - else - reqf >>= 25; - } - reqf = intel_gpu_freq(rps, reqf); - - rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); - rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); - rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); - rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; - rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; - rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; - rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; - rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; - rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - - rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); - rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); - - rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); - rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - - cagf = intel_rps_read_actual_frequency(rps); - - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - if (GRAPHICS_VER(i915) >= 11) { - pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); - pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); - /* - * The equivalent to the PM ISR & IIR cannot be read - * without affecting the current state of the system - */ - pm_isr = 0; - pm_iir = 0; - } else if (GRAPHICS_VER(i915) >= 8) { - pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); - pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); - pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); - pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); - } else { - pm_ier = intel_uncore_read(uncore, GEN6_PMIER); - pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); - pm_isr = intel_uncore_read(uncore, GEN6_PMISR); - pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); - } - pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); - - drm_printf(p, "Video Turbo Mode: %s\n", - str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO)); - drm_printf(p, "HW control enabled: %s\n", - str_yes_no(rpmodectl & GEN6_RP_ENABLE)); - drm_printf(p, "SW control enabled: %s\n", - str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); - - drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", - pm_ier, pm_imr, pm_mask); - if (GRAPHICS_VER(i915) <= 10) - drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", - pm_isr, pm_iir); - drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", - rps->pm_intrmsk_mbz); - drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); - drm_printf(p, "Render p-state ratio: %d\n", - (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); - drm_printf(p, "Render p-state VID: %d\n", - gt_perf_status & 0xff); - drm_printf(p, "Render p-state limit: %d\n", - rp_state_limits & 0xff); - drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); - drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); - drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); - drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); - drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); - drm_printf(p, "CAGF: %dMHz\n", cagf); - drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", - rpcurupei, - intel_gt_pm_interval_to_ns(gt, rpcurupei)); - drm_printf(p, "RP CUR UP: %d (%lldns)\n", - rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); - drm_printf(p, "RP PREV UP: %d (%lldns)\n", - rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); - drm_printf(p, "Up threshold: %d%%\n", - rps->power.up_threshold); - drm_printf(p, "RP UP EI: %d (%lldns)\n", - rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); - drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", - rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); - - drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", - rpcurdownei, - intel_gt_pm_interval_to_ns(gt, rpcurdownei)); - drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", - rpcurdown, - intel_gt_pm_interval_to_ns(gt, rpcurdown)); - drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", - rpprevdown, - intel_gt_pm_interval_to_ns(gt, rpprevdown)); - drm_printf(p, "Down threshold: %d%%\n", - rps->power.down_threshold); - drm_printf(p, "RP DOWN EI: %d (%lldns)\n", - rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); - drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", - rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); - - drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(rps, caps.min_freq)); - drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(rps, caps.rp1_freq)); - drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(rps, caps.rp0_freq)); - drm_printf(p, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(rps, rps->max_freq)); - - drm_printf(p, "Current freq: %d MHz\n", - intel_gpu_freq(rps, rps->cur_freq)); - drm_printf(p, "Actual freq: %d MHz\n", cagf); - drm_printf(p, "Idle freq: %d MHz\n", - intel_gpu_freq(rps, rps->idle_freq)); - drm_printf(p, "Min freq: %d MHz\n", - intel_gpu_freq(rps, rps->min_freq)); - drm_printf(p, "Boost freq: %d MHz\n", - intel_gpu_freq(rps, rps->boost_freq)); - drm_printf(p, "Max freq: %d MHz\n", - intel_gpu_freq(rps, rps->max_freq)); - drm_printf(p, - "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(rps, rps->efficient_freq)); + gen6_rps_frequency_dump(rps, p); } else { drm_puts(p, "no P-state info available\n"); } @@ -655,6 +500,44 @@ static bool rps_eval(void *data) DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost); +static int perf_limit_reasons_get(void *data, u64 *val) +{ + struct intel_gt *gt = data; + intel_wakeref_t wakeref; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + *val = intel_uncore_read(gt->uncore, intel_gt_perf_limit_reasons_reg(gt)); + + return 0; +} + +static int perf_limit_reasons_clear(void *data, u64 val) +{ + struct intel_gt *gt = data; + intel_wakeref_t wakeref; + + /* + * Clear the upper 16 "log" bits, the lower 16 "status" bits are + * read-only. The upper 16 "log" bits are identical to the lower 16 + * "status" bits except that the "log" bits remain set until cleared. + */ + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + intel_uncore_rmw(gt->uncore, intel_gt_perf_limit_reasons_reg(gt), + GT0_PERF_LIMIT_REASONS_LOG_MASK, 0); + + return 0; +} + +static bool perf_limit_reasons_eval(void *data) +{ + struct intel_gt *gt = data; + + return i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt)); +} + +DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get, + perf_limit_reasons_clear, "%llu\n"); + void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { @@ -664,6 +547,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { "forcewake_user", &forcewake_user_fops, NULL}, { "llc", &llc_fops, llc_eval }, { "rps_boost", &rps_boost_fops, rps_eval }, + { "perf_limit_reasons", &perf_limit_reasons_fops, perf_limit_reasons_eval }, }; intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 5a3a25838fff..70177d3f2e94 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -8,6 +8,19 @@ #include "i915_reg_defs.h" +#define MCR_REG(offset) ((const i915_mcr_reg_t){ .reg = (offset) }) + +/* + * The perf control registers are technically multicast registers, but the + * driver never needs to read/write them directly; we only use them to build + * lists of registers (where they're mixed in with other non-MCR registers) + * and then operate on the offset directly. For now we'll just define them + * as non-multicast so we can place them on the same list, but we may want + * to try to come up with a better way to handle heterogeneous lists of + * registers in the future. + */ +#define PERF_REG(offset) _MMIO(offset) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 @@ -39,12 +52,17 @@ #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0xd84) #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0xd88) +#define FORCEWAKE_ACK_GSC _MMIO(0xdf8) +#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc) + #define GMD_ID_GRAPHICS _MMIO(0xd8c) #define GMD_ID_MEDIA _MMIO(MTL_MEDIA_GSI_BASE + 0xd8c) #define MCFG_MCR_SELECTOR _MMIO(0xfd0) +#define MTL_MCR_SELECTOR _MMIO(0xfd4) #define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) +#define GAM_MCR_SELECTOR _MMIO(0xfe0) #define GEN8_MCR_SLICE(slice) (((slice) & 3) << 26) #define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3) #define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24) @@ -54,6 +72,8 @@ #define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) #define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) #define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) +#define MTL_MCR_GROUPID REG_GENMASK(11, 8) +#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) #define IPEIR_I965 _MMIO(0x2064) #define IPEHR_I965 _MMIO(0x2068) @@ -329,11 +349,12 @@ #define GEN7_TLB_RD_ADDR _MMIO(0x4700) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) +#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4) -#define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900) #define XEHP_TILE_LMEM_RANGE_SHIFT 8 -#define XEHP_FLAT_CCS_BASE_ADDR _MMIO(0x4910) +#define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910) #define XEHP_CCS_BASE_SHIFT 8 #define GAMTARBMODE _MMIO(0x4a08) @@ -383,17 +404,18 @@ #define CHICKEN_RASTER_2 _MMIO(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) -#define VFLSKPD _MMIO(0x62a8) +#define VFLSKPD MCR_REG(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) -#define FF_MODE2 _MMIO(0x6604) +#define GEN12_FF_MODE2 _MMIO(0x6604) +#define XEHP_FF_MODE2 MCR_REG(0x6604) #define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) #define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) -#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c) +#define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c) #define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */ #define RC_OP_FLUSH_ENABLE (1 << 0) @@ -421,6 +443,7 @@ #define HIZ_CHICKEN _MMIO(0x7018) #define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) +#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) #define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE REG_BIT(3) #define GEN8_L3CNTLREG _MMIO(0x7034) @@ -442,23 +465,16 @@ #define GEN8_HDC_CHICKEN1 _MMIO(0x7304) #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) +#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304) #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) #define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) -/* GEN9 chicken */ -#define SLICE_ECO_CHICKEN0 _MMIO(0x7308) -#define PIXEL_MASK_CAMMING_DISABLE (1 << 14) - -#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308) -#define DISABLE_PIXEL_MASK_CAMMING (1 << 14) - #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) -#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) - -#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) +#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11) #define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4) #define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \ @@ -485,9 +501,12 @@ #define VF_PREEMPTION _MMIO(0x83a4) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) +#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4) +#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) + #define GEN8_RC6_CTX_INFO _MMIO(0x8504) -#define GEN12_SQCM _MMIO(0x8724) +#define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESS REG_BIT(30) #define HSW_IDICR _MMIO(0x9008) @@ -519,6 +538,8 @@ #define GEN6_MBCTL_BOOT_FETCH_MECH (1 << 0) /* Fuse readout registers for GT */ +#define XEHP_FUSE4 _MMIO(0x9114) +#define GT_L3_EXC_MASK REG_GENMASK(6, 4) #define GEN10_MIRROR_FUSE3 _MMIO(0x9118) #define GEN10_L3BANK_PAIR_COUNT 4 #define GEN10_L3BANK_MASK 0x0F @@ -647,6 +668,9 @@ #define GEN7_MISCCPCTL _MMIO(0x9424) #define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0) + +#define GEN8_MISCCPCTL MCR_REG(0x9424) +#define GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0) #define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) #define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2) #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4) @@ -700,7 +724,8 @@ #define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) #define LTCDD_CLKGATE_DIS REG_BIT(10) -#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) #define MSCUNIT_CLKGATE_DIS (1 << 10) @@ -708,27 +733,27 @@ #define L3_CLKGATE_DIS REG_BIT(16) #define L3_CR2X_CLKGATE_DIS REG_BIT(17) -#define SCCGCTL94DC _MMIO(0x94dc) +#define SCCGCTL94DC MCR_REG(0x94dc) #define CG3DDISURB REG_BIT(14) #define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4) #define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19) #define PSDUNIT_CLKGATE_DIS REG_BIT(5) -#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) +#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524) #define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) #define GWUNIT_CLKGATE_DIS REG_BIT(16) -#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528) +#define SUBSLICE_UNIT_LEVEL_CLKGATE2 MCR_REG(0x9528) #define CPSSUNIT_CLKGATE_DIS REG_BIT(9) -#define SSMCGCTL9530 _MMIO(0x9530) +#define SSMCGCTL9530 MCR_REG(0x9530) #define RTFUNIT_CLKGATE_DIS REG_BIT(18) -#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550) +#define GEN10_DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550) #define DFR_DISABLE (1 << 9) -#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) +#define INF_UNIT_LEVEL_CLKGATE MCR_REG(0x9560) #define CGPSF_CLKGATE_DIS (1 << 3) #define MICRO_BP0_0 _MMIO(0x9800) @@ -901,6 +926,8 @@ #define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4) #define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4) +#define FORCEWAKE_REQ_GSC _MMIO(0xa618) + #define CHV_POWER_SS0_SIG1 _MMIO(0xa720) #define CHV_POWER_SS0_SIG2 _MMIO(0xa724) #define CHV_POWER_SS1_SIG1 _MMIO(0xa728) @@ -938,7 +965,8 @@ /* MOCS (Memory Object Control State) registers */ #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ -#define GEN9_LNCFCMOCS_REG_COUNT 32 +#define XEHP_LNCFCMOCS(i) MCR_REG(0xb020 + (i) * 4) +#define LNCFCMOCS_REG_COUNT 32 #define GEN7_L3CNTLREG3 _MMIO(0xb024) @@ -954,15 +982,10 @@ #define GEN7_L3LOG(slice, i) _MMIO(0xb070 + (slice) * 0x200 + (i) * 4) #define GEN7_L3LOG_SIZE 0x80 -#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) -#define PMFLUSHDONE_LNICRSDROP (1 << 20) -#define PMFLUSH_GAPL3UNBLOCK (1 << 21) -#define PMFLUSHDONE_LNEBLK (1 << 22) - -#define XEHP_L3NODEARBCFG _MMIO(0xb0b4) +#define XEHP_L3NODEARBCFG MCR_REG(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) -#define GEN8_L3SQCREG1 _MMIO(0xb100) +#define GEN8_L3SQCREG1 MCR_REG(0xb100) /* * Note that on CHV the following has an off-by-one error wrt. to BSpec. * Using the formula in BSpec leads to a hang, while the formula here works @@ -973,31 +996,28 @@ #define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14) #define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14)) -#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xb114) -#define GEN11_I2M_WRITE_DISABLE (1 << 28) - -#define GEN8_L3SQCREG4 _MMIO(0xb118) +#define GEN8_L3SQCREG4 MCR_REG(0xb118) #define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) #define GEN8_LQSC_RO_PERF_DIS (1 << 27) #define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) #define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22) -#define GEN9_SCRATCH1 _MMIO(0xb11c) +#define GEN9_SCRATCH1 MCR_REG(0xb11c) #define EVICTION_PERF_FIX_ENABLE REG_BIT(8) -#define BDW_SCRATCH1 _MMIO(0xb11c) +#define BDW_SCRATCH1 MCR_REG(0xb11c) #define GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE (1 << 2) -#define GEN11_SCRATCH2 _MMIO(0xb140) +#define GEN11_SCRATCH2 MCR_REG(0xb140) #define GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE (1 << 19) -#define GEN11_L3SQCREG5 _MMIO(0xb158) +#define XEHP_L3SQCREG5 MCR_REG(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) -#define MLTICTXCTL _MMIO(0xb170) +#define MLTICTXCTL MCR_REG(0xb170) #define TDONRENDER REG_BIT(2) -#define XEHP_L3SCQREG7 _MMIO(0xb188) +#define XEHP_L3SCQREG7 MCR_REG(0xb188) #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) #define XEHPC_L3SCRUB _MMIO(0xb18c) @@ -1005,7 +1025,7 @@ #define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0) #define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6) -#define L3SQCREG1_CCS0 _MMIO(0xb200) +#define L3SQCREG1_CCS0 MCR_REG(0xb200) #define FLUSHALLNONCOH REG_BIT(5) #define GEN11_GLBLINVL _MMIO(0xb404) @@ -1030,11 +1050,14 @@ #define GEN9_BLT_MOCS(i) _MMIO(__GEN9_BCS0_MOCS0 + (i) * 4) #define GEN12_FAULT_TLB_DATA0 _MMIO(0xceb8) +#define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8) #define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc) +#define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) #define GEN12_RING_FAULT_REG _MMIO(0xcec4) +#define XEHP_RING_FAULT_REG MCR_REG(0xcec4) #define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) #define RING_FAULT_GTTSEL_MASK (1 << 11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) @@ -1042,16 +1065,21 @@ #define RING_FAULT_VALID (1 << 0) #define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8) #define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define XEHP_VD_TLB_INV_CR MCR_REG(0xcedc) #define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define XEHP_VE_TLB_INV_CR MCR_REG(0xcee0) #define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) +#define XEHP_BLT_TLB_INV_CR MCR_REG(0xcee4) #define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04) +#define XEHP_COMPCTX_TLB_INV_CR MCR_REG(0xcf04) -#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28) -#define RENDER_MOD_CTRL _MMIO(0xcf2c) -#define COMP_MOD_CTRL _MMIO(0xcf30) -#define VDBX_MOD_CTRL _MMIO(0xcf34) -#define VEBX_MOD_CTRL _MMIO(0xcf38) +#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28) +#define RENDER_MOD_CTRL MCR_REG(0xcf2c) +#define COMP_MOD_CTRL MCR_REG(0xcf30) +#define VDBX_MOD_CTRL MCR_REG(0xcf34) +#define VEBX_MOD_CTRL MCR_REG(0xcf38) #define FORCE_MISS_FTLB REG_BIT(3) #define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c) @@ -1066,48 +1094,52 @@ #define GEN12_GAM_DONE _MMIO(0xcf68) #define GEN7_HALF_SLICE_CHICKEN1 _MMIO(0xe100) /* IVB GT1 + VLV */ +#define GEN8_HALF_SLICE_CHICKEN1 MCR_REG(0xe100) #define GEN7_MAX_PS_THREAD_DEP (8 << 12) #define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1 << 10) #define GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE (1 << 4) #define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) +#define GEN8_SAMPLER_INSTDONE MCR_REG(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define GEN8_ROW_INSTDONE MCR_REG(0xe164) -#define HALF_SLICE_CHICKEN2 _MMIO(0xe180) +#define HALF_SLICE_CHICKEN2 MCR_REG(0xe180) #define GEN8_ST_PO_DISABLE (1 << 13) -#define HALF_SLICE_CHICKEN3 _MMIO(0xe184) +#define HSW_HALF_SLICE_CHICKEN3 _MMIO(0xe184) +#define GEN8_HALF_SLICE_CHICKEN3 MCR_REG(0xe184) #define HSW_SAMPLE_C_PERFORMANCE (1 << 9) #define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8) #define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5) #define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1) -#define GEN9_HALF_SLICE_CHICKEN5 _MMIO(0xe188) +#define GEN9_HALF_SLICE_CHICKEN5 MCR_REG(0xe188) #define GEN9_DG_MIRROR_FIX_ENABLE (1 << 5) #define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3) -#define GEN10_SAMPLER_MODE _MMIO(0xe18c) +#define GEN10_SAMPLER_MODE MCR_REG(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) -#define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) +#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) #define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8) #define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4) #define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2) -#define GEN10_CACHE_MODE_SS _MMIO(0xe420) +#define GEN10_CACHE_MODE_SS MCR_REG(0xe420) #define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10) #define DISABLE_ECC REG_BIT(5) #define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4) #define ENABLE_PREFETCH_INTO_IC REG_BIT(3) -#define EU_PERF_CNTL0 _MMIO(0xe458) -#define EU_PERF_CNTL4 _MMIO(0xe45c) +#define EU_PERF_CNTL0 PERF_REG(0xe458) +#define EU_PERF_CNTL4 PERF_REG(0xe45c) -#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c) +#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c) #define GEN12_DISABLE_GRF_CLEAR REG_BIT(13) #define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) @@ -1119,7 +1151,7 @@ #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) -#define GEN8_ROW_CHICKEN _MMIO(0xe4f0) +#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0) #define FLOW_CONTROL_ENABLE REG_BIT(15) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) @@ -1130,42 +1162,43 @@ #define DISABLE_EARLY_EOT REG_BIT(1) #define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) + +#define GEN8_ROW_CHICKEN2 MCR_REG(0xe4f4) #define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) #define GEN12_DISABLE_EARLY_READ REG_BIT(14) #define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) #define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define GEN12_DISABLE_DOP_GATING REG_BIT(0) -#define RT_CTRL _MMIO(0xe530) +#define RT_CTRL MCR_REG(0xe530) #define DIS_NULL_QUERY REG_BIT(10) #define STACKID_CTRL REG_GENMASK(6, 5) #define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2) -#define EU_PERF_CNTL1 _MMIO(0xe558) -#define EU_PERF_CNTL5 _MMIO(0xe55c) +#define EU_PERF_CNTL1 PERF_REG(0xe558) +#define EU_PERF_CNTL5 PERF_REG(0xe55c) -#define GEN12_HDC_CHICKEN0 _MMIO(0xe5f0) +#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) -#define ICL_HDC_MODE _MMIO(0xe5f4) +#define ICL_HDC_MODE MCR_REG(0xe5f4) -#define EU_PERF_CNTL2 _MMIO(0xe658) -#define EU_PERF_CNTL6 _MMIO(0xe65c) -#define EU_PERF_CNTL3 _MMIO(0xe758) +#define EU_PERF_CNTL2 PERF_REG(0xe658) +#define EU_PERF_CNTL6 PERF_REG(0xe65c) +#define EU_PERF_CNTL3 PERF_REG(0xe758) -#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8) +#define LSC_CHICKEN_BIT_0 MCR_REG(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) -#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4) +#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) -#define SARB_CHICKEN1 _MMIO(0xe90c) +#define SARB_CHICKEN1 MCR_REG(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) -#define GEN7_HALF_SLICE_CHICKEN1_GT2 _MMIO(0xf100) - #define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4) #define DOP_CLOCK_GATING_DISABLE (1 << 0) #define PUSH_CONSTANT_DEREF_DISABLE (1 << 8) @@ -1513,6 +1546,9 @@ #define VLV_RENDER_C0_COUNT _MMIO(0x138118) #define VLV_MEDIA_C0_COUNT _MMIO(0x13811c) +#define GEN12_RPSTAT1 _MMIO(0x1381b4) +#define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) + #define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) #define GEN11_GUNIT (28) @@ -1583,6 +1619,11 @@ #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) +#define GT0_PACKAGE_ENERGY_STATUS _MMIO(0x250004) +#define GT0_PACKAGE_RAPL_LIMIT _MMIO(0x250008) +#define GT0_PACKAGE_POWER_SKU_UNIT _MMIO(0x250068) +#define GT0_PLATFORM_ENERGY_STATUS _MMIO(0x25006c) + /* * Standalone Media's non-engine GT registers are located at their regular GT * offsets plus 0x380000. This extra offset is stored inside the intel_uncore diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index d651ccd0ab20..9486dd3bed99 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -22,11 +22,9 @@ bool is_object_gt(struct kobject *kobj) return !strncmp(kobj->name, "gt", 2); } -struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, +struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj, const char *name) { - struct kobject *kobj = &dev->kobj; - /* * We are interested at knowing from where the interface * has been called, whether it's called from gt/ or from @@ -38,6 +36,7 @@ struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, * "struct drm_i915_private *" type. */ if (!is_object_gt(kobj)) { + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); return to_gt(i915); @@ -51,18 +50,18 @@ static struct kobject *gt_get_parent_obj(struct intel_gt *gt) return >->i915->drm.primary->kdev->kobj; } -static ssize_t id_show(struct device *dev, - struct device_attribute *attr, +static ssize_t id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); return sysfs_emit(buf, "%u\n", gt->info.id); } -static DEVICE_ATTR_RO(id); +static struct kobj_attribute attr_id = __ATTR_RO(id); static struct attribute *id_attrs[] = { - &dev_attr_id.attr, + &attr_id.attr, NULL, }; ATTRIBUTE_GROUPS(id); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index 6232923a420d..18bab835be02 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -18,11 +18,6 @@ bool is_object_gt(struct kobject *kobj); struct drm_i915_private *kobj_to_i915(struct kobject *kobj); -struct kobject * -intel_gt_create_kobj(struct intel_gt *gt, - struct kobject *dir, - const char *name); - static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) { return container_of(kobj, struct intel_gt, sysfs_gt); @@ -30,7 +25,7 @@ static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) void intel_gt_sysfs_register(struct intel_gt *gt); void intel_gt_sysfs_unregister(struct intel_gt *gt); -struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, +struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj, const char *name); #endif /* SYSFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 180dd6f3ef57..2b5f05b31187 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -24,14 +24,15 @@ enum intel_gt_sysfs_op { }; static int -sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, +sysfs_gt_attribute_w_func(struct kobject *kobj, struct attribute *attr, int (func)(struct intel_gt *gt, u32 val), u32 val) { struct intel_gt *gt; int ret; - if (!is_object_gt(&dev->kobj)) { + if (!is_object_gt(kobj)) { int i; + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); for_each_gt(gt, i915, i) { @@ -40,7 +41,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, break; } } else { - gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + gt = intel_gt_sysfs_get_drvdata(kobj, attr->name); ret = func(gt, val); } @@ -48,7 +49,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, } static u32 -sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, +sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr, u32 (func)(struct intel_gt *gt), enum intel_gt_sysfs_op op) { @@ -57,8 +58,9 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1; - if (!is_object_gt(&dev->kobj)) { + if (!is_object_gt(kobj)) { int i; + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); for_each_gt(gt, i915, i) { @@ -77,7 +79,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, } } } else { - gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + gt = intel_gt_sysfs_get_drvdata(kobj, attr->name); ret = func(gt); } @@ -92,6 +94,76 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, #define sysfs_gt_attribute_r_max_func(d, a, f) \ sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) +#define INTEL_GT_SYSFS_SHOW(_name, _attr_type) \ + static ssize_t _name##_show_common(struct kobject *kobj, \ + struct attribute *attr, char *buff) \ + { \ + u32 val = sysfs_gt_attribute_r_##_attr_type##_func(kobj, attr, \ + __##_name##_show); \ + \ + return sysfs_emit(buff, "%u\n", val); \ + } \ + static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buff) \ + { \ + return _name ##_show_common(kobj, &attr->attr, buff); \ + } \ + static ssize_t _name##_dev_show(struct device *dev, \ + struct device_attribute *attr, char *buff) \ + { \ + return _name##_show_common(&dev->kobj, &attr->attr, buff); \ + } + +#define INTEL_GT_SYSFS_STORE(_name, _func) \ + static ssize_t _name##_store_common(struct kobject *kobj, \ + struct attribute *attr, \ + const char *buff, size_t count) \ + { \ + int ret; \ + u32 val; \ + \ + ret = kstrtou32(buff, 0, &val); \ + if (ret) \ + return ret; \ + \ + ret = sysfs_gt_attribute_w_func(kobj, attr, _func, val); \ + \ + return ret ?: count; \ + } \ + static ssize_t _name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, const char *buff, \ + size_t count) \ + { \ + return _name##_store_common(kobj, &attr->attr, buff, count); \ + } \ + static ssize_t _name##_dev_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buff, size_t count) \ + { \ + return _name##_store_common(&dev->kobj, &attr->attr, buff, count); \ + } + +#define INTEL_GT_SYSFS_SHOW_MAX(_name) INTEL_GT_SYSFS_SHOW(_name, max) +#define INTEL_GT_SYSFS_SHOW_MIN(_name) INTEL_GT_SYSFS_SHOW(_name, min) + +#define INTEL_GT_ATTR_RW(_name) \ + static struct kobj_attribute attr_##_name = __ATTR_RW(_name) + +#define INTEL_GT_ATTR_RO(_name) \ + static struct kobj_attribute attr_##_name = __ATTR_RO(_name) + +#define INTEL_GT_DUAL_ATTR_RW(_name) \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, 0644, \ + _name##_dev_show, \ + _name##_dev_store); \ + INTEL_GT_ATTR_RW(_name) + +#define INTEL_GT_DUAL_ATTR_RO(_name) \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, 0444, \ + _name##_dev_show, \ + NULL); \ + INTEL_GT_ATTR_RO(_name) + #ifdef CONFIG_PM static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) { @@ -104,11 +176,8 @@ static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) return DIV_ROUND_CLOSEST_ULL(res, 1000); } -static ssize_t rc6_enable_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static u8 get_rc6_mask(struct intel_gt *gt) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); u8 mask = 0; if (HAS_RC6(gt->i915)) @@ -118,37 +187,35 @@ static ssize_t rc6_enable_show(struct device *dev, if (HAS_RC6pp(gt->i915)) mask |= BIT(2); - return sysfs_emit(buff, "%x\n", mask); + return mask; } -static u32 __rc6_residency_ms_show(struct intel_gt *gt) +static ssize_t rc6_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) { - return get_residency(gt, GEN6_GT_GFX_RC6); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + + return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); } -static ssize_t rc6_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static ssize_t rc6_enable_dev_show(struct device *dev, + struct device_attribute *attr, + char *buff) { - u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6_residency_ms_show); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name); - return sysfs_emit(buff, "%u\n", rc6_residency); + return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); } -static u32 __rc6p_residency_ms_show(struct intel_gt *gt) +static u32 __rc6_residency_ms_show(struct intel_gt *gt) { - return get_residency(gt, GEN6_GT_GFX_RC6p); + return get_residency(gt, GEN6_GT_GFX_RC6); } -static ssize_t rc6p_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static u32 __rc6p_residency_ms_show(struct intel_gt *gt) { - u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6p_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6p_residency); + return get_residency(gt, GEN6_GT_GFX_RC6p); } static u32 __rc6pp_residency_ms_show(struct intel_gt *gt) @@ -156,67 +223,69 @@ static u32 __rc6pp_residency_ms_show(struct intel_gt *gt) return get_residency(gt, GEN6_GT_GFX_RC6pp); } -static ssize_t rc6pp_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6pp_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6pp_residency); -} - static u32 __media_rc6_residency_ms_show(struct intel_gt *gt) { return get_residency(gt, VLV_GT_MEDIA_RC6); } -static ssize_t media_rc6_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __media_rc6_residency_ms_show); +INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(rc6p_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(rc6pp_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(media_rc6_residency_ms); - return sysfs_emit(buff, "%u\n", rc6_residency); -} - -static DEVICE_ATTR_RO(rc6_enable); -static DEVICE_ATTR_RO(rc6_residency_ms); -static DEVICE_ATTR_RO(rc6p_residency_ms); -static DEVICE_ATTR_RO(rc6pp_residency_ms); -static DEVICE_ATTR_RO(media_rc6_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6_enable); +INTEL_GT_DUAL_ATTR_RO(rc6_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6p_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6pp_residency_ms); +INTEL_GT_DUAL_ATTR_RO(media_rc6_residency_ms); static struct attribute *rc6_attrs[] = { + &attr_rc6_enable.attr, + &attr_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6p_attrs[] = { + &attr_rc6p_residency_ms.attr, + &attr_rc6pp_residency_ms.attr, + NULL +}; + +static struct attribute *media_rc6_attrs[] = { + &attr_media_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6_dev_attrs[] = { &dev_attr_rc6_enable.attr, &dev_attr_rc6_residency_ms.attr, NULL }; -static struct attribute *rc6p_attrs[] = { +static struct attribute *rc6p_dev_attrs[] = { &dev_attr_rc6p_residency_ms.attr, &dev_attr_rc6pp_residency_ms.attr, NULL }; -static struct attribute *media_rc6_attrs[] = { +static struct attribute *media_rc6_dev_attrs[] = { &dev_attr_media_rc6_residency_ms.attr, NULL }; static const struct attribute_group rc6_attr_group[] = { { .attrs = rc6_attrs, }, - { .name = power_group_name, .attrs = rc6_attrs, }, + { .name = power_group_name, .attrs = rc6_dev_attrs, }, }; static const struct attribute_group rc6p_attr_group[] = { { .attrs = rc6p_attrs, }, - { .name = power_group_name, .attrs = rc6p_attrs, }, + { .name = power_group_name, .attrs = rc6p_dev_attrs, }, }; static const struct attribute_group media_rc6_attr_group[] = { { .attrs = media_rc6_attrs, }, - { .name = power_group_name, .attrs = media_rc6_attrs, }, + { .name = power_group_name, .attrs = media_rc6_dev_attrs, }, }; static int __intel_gt_sysfs_create_group(struct kobject *kobj, @@ -271,104 +340,34 @@ static u32 __act_freq_mhz_show(struct intel_gt *gt) return intel_rps_read_actual_frequency(>->rps); } -static ssize_t act_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __act_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", actual_freq); -} - static u32 __cur_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_requested_frequency(>->rps); } -static ssize_t cur_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __cur_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", cur_freq); -} - static u32 __boost_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_boost_frequency(>->rps); } -static ssize_t boost_freq_mhz_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __boost_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", boost_freq); -} - static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val) { return intel_rps_set_boost_frequency(>->rps, val); } -static ssize_t boost_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - ssize_t ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - return sysfs_gt_attribute_w_func(dev, attr, - __boost_freq_mhz_store, val) ?: count; -} - -static u32 __rp0_freq_mhz_show(struct intel_gt *gt) +static u32 __RP0_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_rp0_frequency(>->rps); } -static ssize_t RP0_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rp0_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rp0_freq); -} - -static u32 __rp1_freq_mhz_show(struct intel_gt *gt) -{ - return intel_rps_get_rp1_frequency(>->rps); -} - -static ssize_t RP1_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rp1_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rp1_freq); -} - -static u32 __rpn_freq_mhz_show(struct intel_gt *gt) +static u32 __RPn_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_rpn_frequency(>->rps); } -static ssize_t RPn_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) +static u32 __RP1_freq_mhz_show(struct intel_gt *gt) { - u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rpn_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rpn_freq); + return intel_rps_get_rp1_frequency(>->rps); } static u32 __max_freq_mhz_show(struct intel_gt *gt) @@ -376,71 +375,21 @@ static u32 __max_freq_mhz_show(struct intel_gt *gt) return intel_rps_get_max_frequency(>->rps); } -static ssize_t max_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __max_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", max_freq); -} - static int __set_max_freq(struct intel_gt *gt, u32 val) { return intel_rps_set_max_frequency(>->rps, val); } -static ssize_t max_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - int ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val); - - return ret ?: count; -} - static u32 __min_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_min_frequency(>->rps); } -static ssize_t min_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr, - __min_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", min_freq); -} - static int __set_min_freq(struct intel_gt *gt, u32 val) { return intel_rps_set_min_frequency(>->rps, val); } -static ssize_t min_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - int ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val); - - return ret ?: count; -} - static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) { struct intel_rps *rps = >->rps; @@ -448,23 +397,31 @@ static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) return intel_gpu_freq(rps, rps->efficient_freq); } -static ssize_t vlv_rpe_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __vlv_rpe_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rpe_freq); -} - -#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \ - static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store) - -#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ - INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL) -#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ - INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store) +INTEL_GT_SYSFS_SHOW_MAX(act_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(boost_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(cur_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RP0_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RP1_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RPn_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(max_freq_mhz); +INTEL_GT_SYSFS_SHOW_MIN(min_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(vlv_rpe_freq_mhz); +INTEL_GT_SYSFS_STORE(boost_freq_mhz, __boost_freq_mhz_store); +INTEL_GT_SYSFS_STORE(max_freq_mhz, __set_max_freq); +INTEL_GT_SYSFS_STORE(min_freq_mhz, __set_min_freq); + +#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store, _show_dev, _store_dev) \ + static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, \ + _show_dev, _store_dev); \ + static struct kobj_attribute attr_rps_##_name = __ATTR(rps_##_name, _mode, \ + _show, _store) + +#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL, \ + _name##_dev_show, NULL) +#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store, \ + _name##_dev_show, _name##_dev_store) /* The below macros generate static structures */ INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz); @@ -475,32 +432,31 @@ INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz); - -static DEVICE_ATTR_RO(vlv_rpe_freq_mhz); - -#define GEN6_ATTR(s) { \ - &dev_attr_##s##_act_freq_mhz.attr, \ - &dev_attr_##s##_cur_freq_mhz.attr, \ - &dev_attr_##s##_boost_freq_mhz.attr, \ - &dev_attr_##s##_max_freq_mhz.attr, \ - &dev_attr_##s##_min_freq_mhz.attr, \ - &dev_attr_##s##_RP0_freq_mhz.attr, \ - &dev_attr_##s##_RP1_freq_mhz.attr, \ - &dev_attr_##s##_RPn_freq_mhz.attr, \ +INTEL_GT_RPS_SYSFS_ATTR_RO(vlv_rpe_freq_mhz); + +#define GEN6_ATTR(p, s) { \ + &p##attr_##s##_act_freq_mhz.attr, \ + &p##attr_##s##_cur_freq_mhz.attr, \ + &p##attr_##s##_boost_freq_mhz.attr, \ + &p##attr_##s##_max_freq_mhz.attr, \ + &p##attr_##s##_min_freq_mhz.attr, \ + &p##attr_##s##_RP0_freq_mhz.attr, \ + &p##attr_##s##_RP1_freq_mhz.attr, \ + &p##attr_##s##_RPn_freq_mhz.attr, \ NULL, \ } -#define GEN6_RPS_ATTR GEN6_ATTR(rps) -#define GEN6_GT_ATTR GEN6_ATTR(gt) +#define GEN6_RPS_ATTR GEN6_ATTR(, rps) +#define GEN6_GT_ATTR GEN6_ATTR(dev_, gt) static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR; static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR; -static ssize_t punit_req_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t punit_req_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 preq = intel_rps_read_punit_req_frequency(>->rps); return sysfs_emit(buff, "%u\n", preq); @@ -508,20 +464,20 @@ static ssize_t punit_req_freq_mhz_show(struct device *dev, struct intel_gt_bool_throttle_attr { struct attribute attr; - ssize_t (*show)(struct device *dev, struct device_attribute *attr, + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, char *buf); - i915_reg_t reg32; + i915_reg_t (*reg32)(struct intel_gt *gt); u32 mask; }; -static ssize_t throttle_reason_bool_show(struct device *dev, - struct device_attribute *attr, +static ssize_t throttle_reason_bool_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_gt_bool_throttle_attr *t_attr = (struct intel_gt_bool_throttle_attr *) attr; - bool val = rps_read_mask_mmio(>->rps, t_attr->reg32, t_attr->mask); + bool val = rps_read_mask_mmio(>->rps, t_attr->reg32(gt), t_attr->mask); return sysfs_emit(buff, "%u\n", val); } @@ -530,11 +486,11 @@ static ssize_t throttle_reason_bool_show(struct device *dev, struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \ .attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \ .show = throttle_reason_bool_show, \ - .reg32 = GT0_PERF_LIMIT_REASONS, \ + .reg32 = intel_gt_perf_limit_reasons_reg, \ .mask = mask__, \ } -static DEVICE_ATTR_RO(punit_req_freq_mhz); +INTEL_GT_ATTR_RO(punit_req_freq_mhz); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK); @@ -597,8 +553,8 @@ static const struct attribute *throttle_reason_attrs[] = { #define U8_8_VAL_MASK 0xffff #define U8_8_SCALE_TO_VALUE "0.00390625" -static ssize_t freq_factor_scale_show(struct device *dev, - struct device_attribute *attr, +static ssize_t freq_factor_scale_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE); @@ -610,11 +566,11 @@ static u32 media_ratio_mode_to_factor(u32 mode) return !mode ? mode : 256 / mode; } -static ssize_t media_freq_factor_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_freq_factor_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_guc_slpc *slpc = >->uc.guc.slpc; intel_wakeref_t wakeref; u32 mode; @@ -641,11 +597,11 @@ static ssize_t media_freq_factor_show(struct device *dev, return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode)); } -static ssize_t media_freq_factor_store(struct device *dev, - struct device_attribute *attr, +static ssize_t media_freq_factor_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_guc_slpc *slpc = >->uc.guc.slpc; u32 factor, mode; int err; @@ -670,11 +626,11 @@ static ssize_t media_freq_factor_store(struct device *dev, return err ?: count; } -static ssize_t media_RP0_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_RP0_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 val; int err; @@ -691,11 +647,11 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev, return sysfs_emit(buff, "%u\n", val); } -static ssize_t media_RPn_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_RPn_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 val; int err; @@ -712,17 +668,17 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev, return sysfs_emit(buff, "%u\n", val); } -static DEVICE_ATTR_RW(media_freq_factor); -static struct device_attribute dev_attr_media_freq_factor_scale = +INTEL_GT_ATTR_RW(media_freq_factor); +static struct kobj_attribute attr_media_freq_factor_scale = __ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL); -static DEVICE_ATTR_RO(media_RP0_freq_mhz); -static DEVICE_ATTR_RO(media_RPn_freq_mhz); +INTEL_GT_ATTR_RO(media_RP0_freq_mhz); +INTEL_GT_ATTR_RO(media_RPn_freq_mhz); static const struct attribute *media_perf_power_attrs[] = { - &dev_attr_media_freq_factor.attr, - &dev_attr_media_freq_factor_scale.attr, - &dev_attr_media_RP0_freq_mhz.attr, - &dev_attr_media_RPn_freq_mhz.attr, + &attr_media_freq_factor.attr, + &attr_media_freq_factor_scale.attr, + &attr_media_RP0_freq_mhz.attr, + &attr_media_RPn_freq_mhz.attr, NULL }; @@ -754,20 +710,29 @@ static const struct attribute * const rps_defaults_attrs[] = { NULL }; -static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, - const struct attribute * const *attrs) +static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj) { + const struct attribute * const *attrs; + struct attribute *vlv_attr; int ret; if (GRAPHICS_VER(gt->i915) < 6) return 0; + if (is_object_gt(kobj)) { + attrs = gen6_rps_attrs; + vlv_attr = &attr_rps_vlv_rpe_freq_mhz.attr; + } else { + attrs = gen6_gt_attrs; + vlv_attr = &dev_attr_gt_vlv_rpe_freq_mhz.attr; + } + ret = sysfs_create_files(kobj, attrs); if (ret) return ret; if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) - ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr); + ret = sysfs_create_file(kobj, vlv_attr); return ret; } @@ -778,9 +743,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) intel_sysfs_rc6_init(gt, kobj); - ret = is_object_gt(kobj) ? - intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) : - intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs); + ret = intel_sysfs_rps_init(gt, kobj); if (ret) drm_warn(>->i915->drm, "failed to create gt%u RPS sysfs files (%pe)", @@ -790,13 +753,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) if (!is_object_gt(kobj)) return; - ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr); + ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr); if (ret) drm_warn(>->i915->drm, "failed to create gt%u punit_req_freq_mhz sysfs (%pe)", gt->info.id, ERR_PTR(ret)); - if (GRAPHICS_VER(gt->i915) >= 11) { + if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) { ret = sysfs_create_files(kobj, throttle_reason_attrs); if (ret) drm_warn(>->i915->drm, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index f19c2de77ff6..a0cc73b401ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -20,6 +20,7 @@ #include "intel_gsc.h" #include "i915_vma.h" +#include "i915_perf_types.h" #include "intel_engine_types.h" #include "intel_gt_buffer_pool_types.h" #include "intel_hwconfig.h" @@ -59,6 +60,9 @@ enum intel_steering_type { L3BANK, MSLICE, LNCF, + GAM, + DSS, + OADDRM, /* * On some platforms there are multiple types of MCR registers that @@ -141,20 +145,6 @@ struct intel_gt { struct intel_wakeref wakeref; atomic_t user_wakeref; - /** - * Protects access to lmem usefault list. - * It is required, if we are outside of the runtime suspend path, - * access to @lmem_userfault_list requires always first grabbing the - * runtime pm, to ensure we can't race against runtime suspend. - * Once we have that we also need to grab @lmem_userfault_lock, - * at which point we have exclusive access. - * The runtime suspend path is special since it doesn't really hold any locks, - * but instead has exclusive access by virtue of all other accesses requiring - * holding the runtime pm wakeref. - */ - struct mutex lmem_userfault_lock; - struct list_head lmem_userfault_list; - struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ @@ -170,9 +160,6 @@ struct intel_gt { */ intel_wakeref_t awake; - /* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */ - struct intel_wakeref_auto userfault_wakeref; - u32 clock_frequency; u32 clock_period_ns; @@ -286,6 +273,8 @@ struct intel_gt { /* sysfs defaults per gt */ struct gt_defaults defaults; struct kobject *sysfs_defaults; + + struct i915_perf_gt perf; }; struct intel_gt_definition { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 2eaeba14319e..e82a9d763e57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -15,6 +15,7 @@ #include "i915_trace.h" #include "i915_utils.h" #include "intel_gt.h" +#include "intel_gt_mcr.h" #include "intel_gt_regs.h" #include "intel_gtt.h" @@ -269,11 +270,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, ARRAY_SIZE(vm->min_alignment)); - if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) && - subclass == VM_CLASS_PPGTT) { - vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M; - vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M; - } else if (HAS_64K_PAGES(vm->i915)) { + if (HAS_64K_PAGES(vm->i915)) { vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; } @@ -343,7 +340,8 @@ int setup_scratch_page(struct i915_address_space *vm) */ size = I915_GTT_PAGE_SIZE_4K; if (i915_vm_is_4lvl(vm) && - HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) && + !HAS_64K_PAGES(vm->i915)) size = I915_GTT_PAGE_SIZE_64K; do { @@ -385,18 +383,6 @@ skip: if (size == I915_GTT_PAGE_SIZE_4K) return -ENOMEM; - /* - * If we need 64K minimum GTT pages for device local-memory, - * like on XEHPSDV, then we need to fail the allocation here, - * otherwise we can't safely support the insertion of - * local-memory pages for this vm, since the HW expects the - * correct physical alignment and size when the page-table is - * operating in 64K GTT mode, which includes any scratch PTEs, - * since userspace can still touch them. - */ - if (HAS_64K_PAGES(vm->i915)) - return -ENOMEM; - size = I915_GTT_PAGE_SIZE_4K; } while (1); } @@ -493,6 +479,18 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } +static void xehp_setup_private_ppat(struct intel_gt *gt) +{ + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); + intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); +} + static void icl_setup_private_ppat(struct intel_uncore *uncore) { intel_uncore_write(uncore, @@ -585,13 +583,16 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); } -void setup_private_pat(struct intel_uncore *uncore) +void setup_private_pat(struct intel_gt *gt) { - struct drm_i915_private *i915 = uncore->i915; + struct intel_uncore *uncore = gt->uncore; + struct drm_i915_private *i915 = gt->i915; GEM_BUG_ON(GRAPHICS_VER(i915) < 8); - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + xehp_setup_private_ppat(gt); + else if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); else if (GRAPHICS_VER(i915) >= 11) icl_setup_private_ppat(uncore); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index c0ca53cba9f0..4d75ba4bb41d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -93,6 +93,7 @@ typedef u64 gen8_pte_t; #define GEN12_GGTT_PTE_LM BIT_ULL(1) #define GEN12_PDE_64K BIT(6) +#define GEN12_PTE_PS64 BIT(8) /* * Cacheability Control is a 4-bit value. The low three bits are stored in bits @@ -667,7 +668,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, void gtt_write_workarounds(struct intel_gt *gt); -void setup_private_pat(struct intel_uncore *uncore); +void setup_private_pat(struct intel_gt *gt); int i915_vm_alloc_pt_stash(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3955292483a6..7771a19008c6 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -20,6 +20,30 @@ #include "intel_ring.h" #include "shmem_utils.h" +/* + * The per-platform tables are u8-encoded in @data. Decode @data and set the + * addresses' offset and commands in @regs. The following encoding is used + * for each byte. There are 2 steps: decoding commands and decoding addresses. + * + * Commands: + * [7]: create NOPs - number of NOPs are set in lower bits + * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set + * MI_LRI_FORCE_POSTED + * [5:0]: Number of NOPs or registers to set values to in case of + * MI_LOAD_REGISTER_IMM + * + * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" + * number of registers. They are set by using the REG/REG16 macros: the former + * is used for offsets smaller than 0x200 while the latter is for values bigger + * than that. Those macros already set all the bits documented below correctly: + * + * [7]: When a register offset needs more than 6 bits, use additional bytes, to + * follow, for the lower bits + * [6:0]: Register offset, without considering the engine base. + * + * This function only tweaks the commands and register offsets. Values are not + * filled out. + */ static void set_offsets(u32 *regs, const u8 *data, const struct intel_engine_cs *engine, @@ -264,6 +288,39 @@ static const u8 dg2_xcs_offsets[] = { END }; +static const u8 mtl_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + NOP(4), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + static const u8 gen8_rcs_offsets[] = { NOP(1), LRI(14, POSTED), @@ -606,6 +663,49 @@ static const u8 dg2_rcs_offsets[] = { END }; +static const u8 mtl_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(2), + LRI(2, POSTED), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -624,7 +724,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) + return mtl_rcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_rcs_offsets; else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) return xehp_rcs_offsets; @@ -637,7 +739,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) else return gen8_rcs_offsets; } else { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) + return mtl_xcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) return dg2_xcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_xcs_offsets; @@ -745,19 +849,18 @@ static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) static u32 lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) { - switch (GRAPHICS_VER(engine->i915)) { - default: - MISSING_CASE(GRAPHICS_VER(engine->i915)); - fallthrough; - case 12: + if (GRAPHICS_VER(engine->i915) >= 12) return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 11: + else if (GRAPHICS_VER(engine->i915) >= 11) return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 9: + else if (GRAPHICS_VER(engine->i915) >= 9) return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 8: + else if (GRAPHICS_VER(engine->i915) >= 8) return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - } + + GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8); + + return 0; } static void @@ -1012,7 +1115,7 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) context_size += I915_GTT_PAGE_SIZE; /* for redzone */ - if (GRAPHICS_VER(engine->i915) == 12) { + if (GRAPHICS_VER(engine->i915) >= 12) { ce->wa_bb_page = context_size / PAGE_SIZE; context_size += PAGE_SIZE; } @@ -1718,24 +1821,16 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) unsigned int i; int err; - if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) + if (GRAPHICS_VER(engine->i915) >= 11 || + !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) return; - switch (GRAPHICS_VER(engine->i915)) { - case 12: - case 11: - return; - case 9: + if (GRAPHICS_VER(engine->i915) == 9) { wa_bb_fn[0] = gen9_init_indirectctx_bb; wa_bb_fn[1] = NULL; - break; - case 8: + } else if (GRAPHICS_VER(engine->i915) == 8) { wa_bb_fn[0] = gen8_init_indirectctx_bb; wa_bb_fn[1] = NULL; - break; - default: - MISSING_CASE(GRAPHICS_VER(engine->i915)); - return; } err = lrc_create_wa_ctx(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index a390f0813c8b..7111bae759f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -110,6 +110,8 @@ enum { #define XEHP_SW_CTX_ID_WIDTH 16 #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6 +#define GEN12_GUC_SW_CTX_ID_SHIFT 39 +#define GEN12_GUC_SW_CTX_ID_WIDTH 16 static inline void lrc_runtime_start(struct intel_context *ce) { diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index aaaf1906026c..b405a04135ca 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -10,6 +10,7 @@ #include "intel_gtt.h" #include "intel_migrate.h" #include "intel_ring.h" +#include "gem/i915_gem_lmem.h" struct insert_pte_data { u64 offset; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 152244d7f62a..49fdd509527a 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -7,6 +7,7 @@ #include "intel_engine.h" #include "intel_gt.h" +#include "intel_gt_mcr.h" #include "intel_gt_regs.h" #include "intel_mocs.h" #include "intel_ring.h" @@ -609,14 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_uncore *uncore, +static void init_l3cc_table(struct intel_gt *gt, const struct drm_i915_mocs_table *table) { unsigned int i; u32 l3cc; for_each_l3cc(l3cc, table, i) - intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc); + else + intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc); } void intel_mocs_init_engine(struct intel_engine_cs *engine) @@ -636,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, &table); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine->uncore, &table); + init_l3cc_table(engine->gt, &table); } static u32 global_mocs_offset(void) @@ -672,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt) * memory transactions including guc transactions */ if (flags & HAS_RENDER_L3CC) - init_l3cc_table(gt->uncore, &table); + init_l3cc_table(gt, &table); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b36674356986..3159df6cdd49 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1278,7 +1278,7 @@ static void intel_gt_reset_global(struct intel_gt *gt, kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); /* Use a watchdog to ensure that our reset completes */ - intel_wedge_on_timeout(&w, gt, 5 * HZ) { + intel_wedge_on_timeout(&w, gt, 60 * HZ) { intel_display_prepare_reset(gt->i915); intel_gt_reset(gt, engine_mask, reason); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 6b86250c31ab..6c34a83c24b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -625,9 +625,7 @@ static void gen5_rps_disable(struct intel_rps *rps) rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); /* Ack interrupts, disable EFC interrupt */ - intel_uncore_write(uncore, MEMINTREN, - intel_uncore_read(uncore, MEMINTREN) & - ~MEMINT_EVAL_CHG_EN); + intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0); intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); /* Go back to the starting frequency */ @@ -1016,9 +1014,15 @@ void intel_rps_boost(struct i915_request *rq) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + /* Return if old value is non zero */ - if (!atomic_fetch_inc(&slpc->num_waiters)) + if (!atomic_fetch_inc(&slpc->num_waiters)) { + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", + rq->fence.context, rq->fence.seqno); schedule_work(&slpc->boost_work); + } return; } @@ -1085,15 +1089,25 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps) return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); } -/** - * gen6_rps_get_freq_caps - Get freq caps exposed by HW - * @rps: the intel_rps structure - * @caps: returned freq caps - * - * Returned "caps" frequencies should be converted to MHz using - * intel_gpu_freq() - */ -void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +static void +mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ? + intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) : + intel_uncore_read(uncore, MTL_RP_STATE_CAP); + u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ? + intel_uncore_read(uncore, MTL_MPE_FREQUENCY) : + intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY); + + /* MTL values are in units of 16.67 MHz */ + caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap); + caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap); + caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe); +} + +static void +__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 rp_state_cap; @@ -1128,6 +1142,24 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c } } +/** + * gen6_rps_get_freq_caps - Get freq caps exposed by HW + * @rps: the intel_rps structure + * @caps: returned freq caps + * + * Returned "caps" frequencies should be converted to MHz using + * intel_gpu_freq() + */ +void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (IS_METEORLAKE(i915)) + return mtl_get_freq_caps(rps, caps); + else + return __gen6_rps_get_freq_caps(rps, caps); +} + static void gen6_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -2191,6 +2223,213 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) return intel_gpu_freq(rps, rps->min_freq); } +static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + struct intel_gt *gt = rps_to_gt(rps); + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_rps_freq_caps caps; + u32 rp_state_limits; + u32 gt_perf_status; + u32 rpmodectl, rpinclimit, rpdeclimit; + u32 rpstat, cagf, reqf; + u32 rpcurupei, rpcurup, rpprevup; + u32 rpcurdownei, rpcurdown, rpprevdown; + u32 rpupei, rpupt, rpdownei, rpdownt; + u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; + + rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); + gen6_rps_get_freq_caps(rps, &caps); + if (IS_GEN9_LP(i915)) + gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); + else + gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); + + /* RPSTAT1 is in the GT power well */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ); + if (GRAPHICS_VER(i915) >= 9) { + reqf >>= 23; + } else { + reqf &= ~GEN6_TURBO_DISABLE; + if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + reqf >>= 24; + else + reqf >>= 25; + } + reqf = intel_gpu_freq(rps, reqf); + + rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL); + rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; + rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; + rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; + rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; + rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; + rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; + + rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI); + rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); + + rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); + rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); + + cagf = intel_rps_read_actual_frequency(rps); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + if (GRAPHICS_VER(i915) >= 11) { + pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE); + pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK); + /* + * The equivalent to the PM ISR & IIR cannot be read + * without affecting the current state of the system + */ + pm_isr = 0; + pm_iir = 0; + } else if (GRAPHICS_VER(i915) >= 8) { + pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2)); + pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2)); + pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2)); + pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2)); + } else { + pm_ier = intel_uncore_read(uncore, GEN6_PMIER); + pm_imr = intel_uncore_read(uncore, GEN6_PMIMR); + pm_isr = intel_uncore_read(uncore, GEN6_PMISR); + pm_iir = intel_uncore_read(uncore, GEN6_PMIIR); + } + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + drm_printf(p, "Video Turbo Mode: %s\n", + str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO)); + drm_printf(p, "HW control enabled: %s\n", + str_yes_no(rpmodectl & GEN6_RP_ENABLE)); + drm_printf(p, "SW control enabled: %s\n", + str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); + + drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n", + pm_ier, pm_imr, pm_mask); + if (GRAPHICS_VER(i915) <= 10) + drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n", + pm_isr, pm_iir); + drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); + drm_printf(p, "Render p-state ratio: %d\n", + (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8); + drm_printf(p, "Render p-state VID: %d\n", + gt_perf_status & 0xff); + drm_printf(p, "Render p-state limit: %d\n", + rp_state_limits & 0xff); + drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat); + drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl); + drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit); + drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit); + drm_printf(p, "RPNSWREQ: %dMHz\n", reqf); + drm_printf(p, "CAGF: %dMHz\n", cagf); + drm_printf(p, "RP CUR UP EI: %d (%lldns)\n", + rpcurupei, + intel_gt_pm_interval_to_ns(gt, rpcurupei)); + drm_printf(p, "RP CUR UP: %d (%lldns)\n", + rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); + drm_printf(p, "RP PREV UP: %d (%lldns)\n", + rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); + drm_printf(p, "Up threshold: %d%%\n", + rps->power.up_threshold); + drm_printf(p, "RP UP EI: %d (%lldns)\n", + rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); + drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n", + rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); + + drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n", + rpcurdownei, + intel_gt_pm_interval_to_ns(gt, rpcurdownei)); + drm_printf(p, "RP CUR DOWN: %d (%lldns)\n", + rpcurdown, + intel_gt_pm_interval_to_ns(gt, rpcurdown)); + drm_printf(p, "RP PREV DOWN: %d (%lldns)\n", + rpprevdown, + intel_gt_pm_interval_to_ns(gt, rpprevdown)); + drm_printf(p, "Down threshold: %d%%\n", + rps->power.down_threshold); + drm_printf(p, "RP DOWN EI: %d (%lldns)\n", + rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); + drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", + rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); + + drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.min_freq)); + drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); + drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp0_freq)); + drm_printf(p, "Max overclocked frequency: %dMHz\n", + intel_gpu_freq(rps, rps->max_freq)); + + drm_printf(p, "Current freq: %d MHz\n", + intel_gpu_freq(rps, rps->cur_freq)); + drm_printf(p, "Actual freq: %d MHz\n", cagf); + drm_printf(p, "Idle freq: %d MHz\n", + intel_gpu_freq(rps, rps->idle_freq)); + drm_printf(p, "Min freq: %d MHz\n", + intel_gpu_freq(rps, rps->min_freq)); + drm_printf(p, "Boost freq: %d MHz\n", + intel_gpu_freq(rps, rps->boost_freq)); + drm_printf(p, "Max freq: %d MHz\n", + intel_gpu_freq(rps, rps->max_freq)); + drm_printf(p, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, rps->efficient_freq)); +} + +static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; + struct intel_rps_freq_caps caps; + u32 pm_mask; + + gen6_rps_get_freq_caps(rps, &caps); + pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK); + + drm_printf(p, "PM MASK=0x%08x\n", pm_mask); + drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", + rps->pm_intrmsk_mbz); + drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1)); + drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); + drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.min_freq)); + drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); + drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", + intel_gpu_freq(rps, caps.rp0_freq)); + drm_printf(p, "Current freq: %d MHz\n", + intel_rps_get_requested_frequency(rps)); + drm_printf(p, "Actual freq: %d MHz\n", + intel_rps_read_actual_frequency(rps)); + drm_printf(p, "Min freq: %d MHz\n", + intel_rps_get_min_frequency(rps)); + drm_printf(p, "Boost freq: %d MHz\n", + intel_rps_get_boost_frequency(rps)); + drm_printf(p, "Max freq: %d MHz\n", + intel_rps_get_max_frequency(rps)); + drm_printf(p, + "efficient (RPe) frequency: %d MHz\n", + intel_gpu_freq(rps, caps.rp1_freq)); +} + +void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) +{ + if (rps_uses_slpc(rps)) + return slpc_frequency_dump(rps, p); + else + return rps_frequency_dump(rps, p); +} + static int set_max_freq(struct intel_rps *rps, u32 val) { struct drm_i915_private *i915 = rps_to_i915(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 4509dfdc52e0..110300dfd438 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -10,6 +10,7 @@ #include "i915_reg_defs.h" struct i915_request; +struct drm_printer; void intel_rps_init_early(struct intel_rps *rps); void intel_rps_init(struct intel_rps *rps); @@ -54,6 +55,8 @@ void intel_rps_lower_unslice(struct intel_rps *rps); u32 intel_rps_read_throttle_reason(struct intel_rps *rps); bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask); +void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p); + void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 66f21c735d54..6c6198a257ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -677,8 +677,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, * If i915/perf is active, we want a stable powergating configuration * on the system. Use the configuration pinned by i915/perf. */ - if (i915->perf.exclusive_stream) - req_sseu = &i915->perf.sseu; + if (gt->perf.exclusive_stream) + req_sseu = >->perf.sseu; slices = hweight8(req_sseu->slice_mask); subslices = hweight8(req_sseu->subslice_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index a821e3d405db..3cdf5c24dbc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -166,6 +166,21 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, _wa_add(wal, &wa); } +static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg, + u32 clear, u32 set, u32 read_mask, bool masked_reg) +{ + struct i915_wa wa = { + .mcr_reg = reg, + .clr = clear, + .set = set, + .read = read_mask, + .masked_reg = masked_reg, + .is_mcr = 1, + }; + + _wa_add(wal, &wa); +} + static void wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { @@ -173,6 +188,12 @@ wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) } static void +wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set) +{ + wa_mcr_add(wal, reg, clear, set, clear, false); +} + +static void wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { wa_write_clr_set(wal, reg, ~0, set); @@ -185,11 +206,23 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) } static void +wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set) +{ + wa_mcr_write_clr_set(wal, reg, set, set); +} + +static void wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) { wa_write_clr_set(wal, reg, clr, 0); } +static void +wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr) +{ + wa_mcr_write_clr_set(wal, reg, clr, 0); +} + /* * WA operations on "masked register". A masked register has the upper 16 bits * documented as "masked" in b-spec. Its purpose is to allow writing to just a @@ -208,18 +241,37 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) } static void +wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); +} + +static void wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); } static void +wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); +} + +static void wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); } +static void +wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, + u32 mask, u32 val) +{ + wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); +} + static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE); /* WaDisablePartialInstShootdown:bdw,chv */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for a possible hang in the unlikely event a TLB @@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:bdw * * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); - wa_masked_en(wal, HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); wa_masked_en(wal, HDC_CHICKEN0, /* WaForceContextSaveRestoreNonCoherent:bdw */ @@ -314,7 +366,7 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:chv */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* Improve HiZ throughput on CHV. */ wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); @@ -333,21 +385,21 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, */ wa_masked_en(wal, COMMON_SLICE_CHICKEN2, GEN9_PBE_COMPRESSED_HASH_SELECTION); - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); } /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_YV12_BUGFIX | - GEN9_ENABLE_GPGPU_PREEMPTION); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | + GEN9_ENABLE_GPGPU_PREEMPTION); /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ @@ -356,8 +408,8 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ - wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); + wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ wa_masked_en(wal, HDC_CHICKEN0, @@ -386,11 +438,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, IS_KABYLAKE(i915) || IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) - wa_masked_en(wal, HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ - wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); /* * Supporting preemption with fine-granularity requires changes in the @@ -469,8 +521,8 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bxt */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); /* WaToEnableHwFixForPushConstHWBug:bxt */ wa_masked_en(wal, COMMON_SLICE_CHICKEN2, @@ -490,8 +542,8 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:kbl */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -514,8 +566,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:cfl */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -534,13 +586,13 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, * (the register is whitelisted in hardware now, so UMDs can opt in * for coherency if they have a good reason). */ - wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); /* WaEnableFloatBlendOptimization:icl */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), - 0 /* write-only, so skip validation */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), + 0 /* write-only, so skip validation */, + true); /* WaDisableGPGPUMidThreadPreemption:icl */ wa_masked_field_set(wal, GEN8_CS_CHICKEN1, @@ -548,8 +600,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); /* allow headerless messages for preemptible GPGPU context */ - wa_masked_en(wal, GEN10_SAMPLER_MODE, - GEN11_SAMPLER_ENABLE_HEADLESS_MSG); + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + GEN11_SAMPLER_ENABLE_HEADLESS_MSG); /* Wa_1604278689:icl,ehl */ wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); @@ -558,7 +610,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, 0xFFFFFFFF); /* Wa_1406306137:icl,ehl */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); } /* @@ -569,13 +621,13 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); - wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, - REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); - wa_add(wal, - FF_MODE2, - FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128, - 0, false); + wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); + wa_mcr_add(wal, + XEHP_FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128, + 0, false); } /* @@ -599,7 +651,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, * verification is ignored. */ wa_add(wal, - FF_MODE2, + GEN12_FF_MODE2, FF_MODE2_TDS_TIMER_MASK, FF_MODE2_TDS_TIMER_128, 0, false); @@ -608,6 +660,8 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = engine->i915; + gen12_ctx_gt_tuning_init(engine, wal); /* @@ -637,10 +691,14 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, * to Wa_1608008084. */ wa_add(wal, - FF_MODE2, + GEN12_FF_MODE2, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0, false); + + if (!IS_DG1(i915)) + /* Wa_1806527549 */ + wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -664,27 +722,27 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_16011186671:dg2_g11 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { - wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); - wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); + wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); + wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); } if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { /* Wa_14010469329:dg2_g10 */ - wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); + wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, + XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); /* * Wa_22010465075:dg2_g10 * Wa_22010613112:dg2_g10 * Wa_14010698770:dg2_g10 */ - wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); } /* Wa_16013271637:dg2 */ - wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1, - MSC_MSAA_REODER_BUF_BYPASS_DISABLE); + wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, + MSC_MSAA_REODER_BUF_BYPASS_DISABLE); /* Wa_14014947963:dg2 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || @@ -1076,18 +1134,23 @@ static void __set_mcr_steering(struct i915_wa_list *wal, wa_write_clr_set(wal, steering_reg, mcr_mask, mcr); } -static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, - unsigned int slice, unsigned int subslice) +static void debug_dump_steering(struct intel_gt *gt) { struct drm_printer p = drm_debug_printer("MCR Steering:"); + if (drm_debug_enabled(DRM_UT_DRIVER)) + intel_gt_mcr_report_steering(&p, gt, false); +} + +static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, + unsigned int slice, unsigned int subslice) +{ __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); gt->default_steering.groupid = slice; gt->default_steering.instanceid = subslice; - if (drm_debug_enabled(DRM_UT_DRIVER)) - intel_gt_mcr_report_steering(&p, gt, false); + debug_dump_steering(gt); } static void @@ -1181,6 +1244,9 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) gt->steering_table[MSLICE] = NULL; } + if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0)) + gt->steering_table[GAM] = NULL; + slice = __ffs(slice_mask); subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) % GEN_DSS_PER_GSLICE; @@ -1198,6 +1264,13 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) */ __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2); __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2); + + /* + * On DG2, GAM registers have a dedicated steering control register + * and must always be programmed to a hardcoded groupid of "1." + */ + if (IS_DG2(gt->i915)) + __set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0); } static void @@ -1254,22 +1327,22 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) PSDUNIT_CLKGATE_DIS); /* Wa_1406680159:icl,ehl */ - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, + GEN11_SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* * This is not a documented workaround, but rather an optimization * to reduce sampler power. */ - wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); + wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } /* @@ -1303,7 +1376,7 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_14011060649(gt, wal); /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ - wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); + wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } static void @@ -1315,14 +1388,14 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1409420604:tgl */ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE2, - CPSSUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, + GEN11_SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1408615072:tgl[a0] */ @@ -1341,14 +1414,14 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_1607087056:dg1 */ if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, + GEN11_SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1409420604:dg1 */ if (IS_DG1(i915)) - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE2, - CPSSUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); /* Wa_1408615072:dg1 */ /* Empirical testing shows this register is unaffected by engine reset. */ @@ -1365,7 +1438,7 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) xehp_init_mcr(gt, wal); /* Wa_1409757795:xehpsdv */ - wa_write_or(wal, SCCGCTL94DC, CG3DDISURB); + wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB); /* Wa_16011155590:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) @@ -1445,8 +1518,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) CG3DDISCFEG_CLKGATE_DIS); /* Wa_14011006942:dg2 */ - wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE, - DSS_ROUTER_CLKGATE_DIS); + wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE, + DSS_ROUTER_CLKGATE_DIS); } if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { @@ -1457,7 +1530,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); /* Wa_14011371254:dg2_g10 */ - wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); + wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); /* Wa_14011431319:dg2_g10 */ wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | @@ -1493,21 +1566,21 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMEDIA_CLKGATE_DIS); /* Wa_14011028019:dg2_g10 */ - wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); + wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); } /* Wa_14014830051:dg2 */ - wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); + wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); /* * The following are not actually "workarounds" but rather * recommended tuning settings documented in the bspec's * performance guide section. */ - wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); + wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } static void @@ -1516,7 +1589,27 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) pvc_init_mcr(gt, wal); /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); +} + +static void +xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + /* FIXME: Actual workarounds will be added in future patch(es) */ + + /* + * Unlike older platforms, we no longer setup implicit steering here; + * all MCR accesses are explicitly steered. + */ + debug_dump_steering(gt); +} + +static void +xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) +{ + /* FIXME: Actual workarounds will be added in future patch(es) */ + + debug_dump_steering(gt); } static void @@ -1524,7 +1617,18 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) { struct drm_i915_private *i915 = gt->i915; - if (IS_PONTEVECCHIO(i915)) + if (gt->type == GT_MEDIA) { + if (MEDIA_VER(i915) >= 13) + xelpmp_gt_workarounds_init(gt, wal); + else + MISSING_CASE(MEDIA_VER(i915)); + + return; + } + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + xelpg_gt_workarounds_init(gt, wal); + else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); else if (IS_DG2(i915)) dg2_gt_workarounds_init(gt, wal); @@ -1628,14 +1732,25 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) u32 val, old = 0; /* open-coded rmw due to steering */ - old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0; + if (wa->clr) + old = wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) : + intel_uncore_read_fw(uncore, wa->reg); val = (old & ~wa->clr) | wa->set; - if (val != old || !wa->clr) - intel_uncore_write_fw(uncore, wa->reg, val); + if (val != old || !wa->clr) { + if (wa->is_mcr) + intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val); + else + intel_uncore_write_fw(uncore, wa->reg, val); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + u32 val = wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) : + intel_uncore_read_fw(uncore, wa->reg); - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg), - wal->name, "application"); + wa_verify(wa, val, wal->name, "application"); + } } intel_uncore_forcewake_put__locked(uncore, fw); @@ -1664,8 +1779,9 @@ static bool wa_list_verify(struct intel_gt *gt, intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - ok &= wa_verify(wa, - intel_gt_mcr_read_any_fw(gt, wa->reg), + ok &= wa_verify(wa, wa->is_mcr ? + intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) : + intel_uncore_read_fw(uncore, wa->reg), wal->name, from); intel_uncore_forcewake_put__locked(uncore, fw); @@ -1712,11 +1828,35 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) } static void +whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags) +{ + struct i915_wa wa = { + .mcr_reg = reg, + .is_mcr = 1, + }; + + if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) + return; + + if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags))) + return; + + wa.mcr_reg.reg |= flags; + _wa_add(wal, &wa); +} + +static void whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) { whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); } +static void +whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg) +{ + whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW); +} + static void gen9_whitelist_build(struct i915_wa_list *w) { /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ @@ -1742,7 +1882,7 @@ static void skl_whitelist_build(struct intel_engine_cs *engine) gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:skl */ - whitelist_reg(w, GEN8_L3SQCREG4); + whitelist_mcr_reg(w, GEN8_L3SQCREG4); } static void bxt_whitelist_build(struct intel_engine_cs *engine) @@ -1763,7 +1903,7 @@ static void kbl_whitelist_build(struct intel_engine_cs *engine) gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:kbl */ - whitelist_reg(w, GEN8_L3SQCREG4); + whitelist_mcr_reg(w, GEN8_L3SQCREG4); } static void glk_whitelist_build(struct intel_engine_cs *engine) @@ -1828,10 +1968,10 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) switch (engine->class) { case RENDER_CLASS: /* WaAllowUMDToModifyHalfSliceChicken7:icl */ - whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); + whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7); /* WaAllowUMDToModifySamplerMode:icl */ - whitelist_reg(w, GEN10_SAMPLER_MODE); + whitelist_mcr_reg(w, GEN10_SAMPLER_MODE); /* WaEnableStateCacheRedirectToCS:icl */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); @@ -2107,24 +2247,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14013392000:dg2_g11 */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); - - /* Wa_16011620976:dg2_g11 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(i915) || IS_DG2_G12(i915)) { /* Wa_1509727124:dg2 */ - wa_masked_en(wal, GEN10_SAMPLER_MODE, - SC_DISABLE_POWER_OPTIMIZATION_EBB); + wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, + SC_DISABLE_POWER_OPTIMIZATION_EBB); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012419201:dg2 */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, + GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || @@ -2133,13 +2270,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_22012826095:dg2 * Wa_22013059131:dg2 */ - wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, - MAXREQS_PER_BANK, - REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); + wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW, + MAXREQS_PER_BANK, + REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); /* Wa_22013059131:dg2 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0, - FORCE_1_SUB_MESSAGE_PER_FRAGMENT); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, + FORCE_1_SUB_MESSAGE_PER_FRAGMENT); } /* Wa_1308578152:dg2_g10 when first gslice is fused off */ @@ -2152,19 +2289,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(i915) || IS_DG2_G12(i915)) { /* Wa_22013037850:dg2 */ - wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, - DISABLE_128B_EVICTION_COMMAND_UDW); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, + DISABLE_128B_EVICTION_COMMAND_UDW); /* Wa_22012856258:dg2 */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - GEN12_DISABLE_READ_SUPPRESSION); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + GEN12_DISABLE_READ_SUPPRESSION); /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 */ - wa_masked_dis(wal, GEN12_HDC_CHICKEN0, - LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); + wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0, + LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { @@ -2172,8 +2309,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1608949956:dg2_g10 * Wa_14010198302:dg2_g10 */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, + MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); /* * Wa_14010918519:dg2_g10 @@ -2181,31 +2318,31 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, * so ignoring verification. */ - wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, - 0, false); + wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, + FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, + 0, false); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { /* Wa_22010430635:dg2 */ - wa_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); + wa_mcr_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); /* Wa_14010648519:dg2 */ - wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } /* Wa_14013202645:dg2 */ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) - wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY); + wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); /* Wa_22012532006:dg2 */ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); + wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { /* Wa_14010680813:dg2_g10 */ @@ -2216,17 +2353,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012362059:dg2 */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), - 0 /* Wa_14012342262 :write-only reg, so skip - verification */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), + 0 /* Wa_14012342262 write-only reg, so skip verification */, + true); } if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || @@ -2253,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); /* * Wa_1407928979:tgl A* @@ -2282,14 +2418,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - GEN12_PUSH_CONST_DEREF_HOLD_DIS); + wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, + GEN12_PUSH_CONST_DEREF_HOLD_DIS); /* * Wa_1409085225:tgl * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || @@ -2313,9 +2449,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ - wa_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (GRAPHICS_VER(i915) == 11) { @@ -2349,9 +2485,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + wa_mcr_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* Wa_1606682166:icl */ wa_write_or(wal, @@ -2359,10 +2495,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_DISABLE_SAMPLER_PREFETCH); /* Wa_1409178092:icl */ - wa_write_clr_set(wal, - GEN11_SCRATCH2, - GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, - 0); + wa_mcr_write_clr_set(wal, + GEN11_SCRATCH2, + GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, + 0); /* WaEnable32PlaneMode:icl */ wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, @@ -2389,12 +2525,64 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_GRAPHICS_VER(i915, 9, 12)) { - /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ + /* + * Intel platforms that support fine-grained preemption (i.e., gen9 and + * beyond) allow the kernel-mode driver to choose between two different + * options for controlling preemption granularity and behavior. + * + * Option 1 (hardware default): + * Preemption settings are controlled in a global manner via + * kernel-only register CS_DEBUG_MODE1 (0x20EC). Any granularity + * and settings chosen by the kernel-mode driver will apply to all + * userspace clients. + * + * Option 2: + * Preemption settings are controlled on a per-context basis via + * register CS_CHICKEN1 (0x2580). CS_CHICKEN1 is saved/restored on + * context switch and is writable by userspace (e.g., via + * MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer) + * which allows different userspace drivers/clients to select + * different settings, or to change those settings on the fly in + * response to runtime needs. This option was known by name + * "FtrPerCtxtPreemptionGranularityControl" at one time, although + * that name is somewhat misleading as other non-granularity + * preemption settings are also impacted by this decision. + * + * On Linux, our policy has always been to let userspace drivers + * control preemption granularity/settings (Option 2). This was + * originally mandatory on gen9 to prevent ABI breakage (old gen9 + * userspace developed before object-level preemption was enabled would + * not behave well if i915 were to go with Option 1 and enable that + * preemption in a global manner). On gen9 each context would have + * object-level preemption disabled by default (see + * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but + * userspace drivers could opt-in to object-level preemption as they + * saw fit. For post-gen9 platforms, we continue to utilize Option 2; + * even though it is no longer necessary for ABI compatibility when + * enabling a new platform, it does ensure that userspace will be able + * to implement any workarounds that show up requiring temporary + * adjustments to preemption behavior at runtime. + * + * Notes/Workarounds: + * - Wa_14015141709: On DG2 and early steppings of MTL, + * CS_CHICKEN1[0] does not disable object-level preemption as + * it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been + * using Option 1). Effectively this means userspace is unable + * to disable object-level preemption on these platforms/steppings + * despite the setting here. + * + * - Wa_16013994831: May require that userspace program + * CS_CHICKEN1[10] when certain runtime conditions are true. + * Userspace requires Option 2 to be in effect for their update of + * CS_CHICKEN1[10] to be effective. + * + * Other workarounds may appear in the future that will also require + * Option 2 behavior to allow proper userspace implementation. + */ + if (GRAPHICS_VER(i915) >= 9) wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL); - } if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || @@ -2420,36 +2608,36 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ - wa_write_or(wal, - BDW_SCRATCH1, - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + wa_mcr_write_or(wal, + BDW_SCRATCH1, + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ if (IS_GEN9_LP(i915)) - wa_write_clr_set(wal, - GEN8_L3SQCREG1, - L3_PRIO_CREDITS_MASK, - L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); + wa_mcr_write_clr_set(wal, + GEN8_L3SQCREG1, + L3_PRIO_CREDITS_MASK, + L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); + wa_mcr_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* Disable atomics in L3 to prevent unrecoverable hangs */ wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1, GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0); - wa_write_clr_set(wal, GEN8_L3SQCREG4, - GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); - wa_write_clr_set(wal, GEN9_SCRATCH1, - EVICTION_PERF_FIX_ENABLE, 0); + wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4, + GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); + wa_mcr_write_clr_set(wal, GEN9_SCRATCH1, + EVICTION_PERF_FIX_ENABLE, 0); } if (IS_HASWELL(i915)) { /* WaSampleCChickenBitEnable:hsw */ wa_masked_en(wal, - HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); + HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); wa_masked_dis(wal, CACHE_MODE_0_GEN7, @@ -2657,7 +2845,7 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) { /* Wa_14014999345:pvc */ - wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); + wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC); } } @@ -2683,8 +2871,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, } if (IS_DG2(i915)) { - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* * This is also listed as Wa_22012654132 for certain DG2 @@ -2695,10 +2883,10 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, * back for verification on DG2 (due to Wa_14012342262), so * we need to explicitly skip the readback. */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } /* @@ -2707,8 +2895,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, * platforms. */ if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) - wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, - THREAD_EX_ARB_MODE_RR_AFTER_DEP); + wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, + THREAD_EX_ARB_MODE_RR_AFTER_DEP); } /* @@ -2734,30 +2922,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ - wa_masked_en(wal, - GEN8_ROW_CHICKEN, - SYSTOLIC_DOP_CLOCK_GATING_DIS); + wa_mcr_masked_en(wal, + GEN8_ROW_CHICKEN, + SYSTOLIC_DOP_CLOCK_GATING_DIS); /* Wa_1607196519 */ - wa_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); + wa_mcr_masked_en(wal, + GEN9_ROW_CHICKEN4, + GEN12_DISABLE_GRF_CLEAR); /* Wa_14010670810:xehpsdv */ - wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); + wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); /* Wa_14010449647:xehpsdv */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); /* Wa_18011725039:xehpsdv */ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) { - wa_masked_dis(wal, MLTICTXCTL, TDONRENDER); - wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); + wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER); + wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH); } /* Wa_14012362059:xehpsdv */ - wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_14014368820:xehpsdv */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | @@ -2766,19 +2954,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { /* Wa_14015227452:dg2,pvc */ - wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); /* Wa_22014226127:dg2,pvc */ - wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); /* Wa_16015675438:dg2,pvc */ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); /* Wa_18018781329:dg2,pvc */ - wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); - wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB); + wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB); + } + + if (IS_DG2(i915)) { + /* + * Wa_16011620976:dg2_g11 + * Wa_22015475538:dg2 + */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + + /* Wa_18017747507:dg2 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index 8a4b6de4e754..7c8b01d00043 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -11,11 +11,16 @@ #include "i915_reg_defs.h" struct i915_wa { - i915_reg_t reg; + union { + i915_reg_t reg; + i915_mcr_reg_t mcr_reg; + }; u32 clr; u32 set; u32 read; - bool masked_reg; + + u32 masked_reg:1; + u32 is_mcr:1; }; struct i915_wa_list { diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 1b75f478d1b8..881b64f3e7b9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -39,6 +39,16 @@ static int perf_end(struct intel_gt *gt) return igt_flush_test(gt->i915); } +static i915_reg_t timestamp_reg(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915)) + return RING_TIMESTAMP_UDW(engine->mmio_base); + else + return RING_TIMESTAMP(engine->mmio_base); +} + static int write_timestamp(struct i915_request *rq, int slot) { struct intel_timeline *tl = @@ -55,7 +65,7 @@ static int write_timestamp(struct i915_request *rq, int slot) if (GRAPHICS_VER(rq->engine->i915) >= 8) cmd++; *cs++ = cmd; - *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine)); *cs++ = tl->hwsp_offset + slot * sizeof(u32); *cs++ = 0; @@ -125,7 +135,7 @@ static int perf_mi_bb_start(void *arg) enum intel_engine_id id; int err = 0; - if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; perf_begin(gt); @@ -135,6 +145,9 @@ static int perf_mi_bb_start(void *arg) u32 cycles[COUNT]; int i; + if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) + continue; + intel_engine_pm_get(engine); batch = create_empty_batch(ce); @@ -249,7 +262,7 @@ static int perf_mi_noop(void *arg) enum intel_engine_id id; int err = 0; - if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */ + if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; perf_begin(gt); @@ -259,6 +272,9 @@ static int perf_mi_noop(void *arg) u32 cycles[COUNT]; int i; + if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) + continue; + intel_engine_pm_get(engine); base = create_empty_batch(ce); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 1e08b2473b99..2c7c053a8808 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -85,8 +85,6 @@ static int wait_for_reset(struct intel_engine_cs *engine, break; } while (time_before(jiffies, timeout)); - flush_scheduled_work(); - if (rq->fence.error != -EIO) { pr_err("%s: hanging request %llx:%lld not reset\n", engine->name, @@ -3475,12 +3473,14 @@ static int random_priority(struct rnd_state *rnd) struct preempt_smoke { struct intel_gt *gt; + struct kthread_work work; struct i915_gem_context **contexts; struct intel_engine_cs *engine; struct drm_i915_gem_object *batch; unsigned int ncontext; struct rnd_state prng; unsigned long count; + int result; }; static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) @@ -3540,34 +3540,31 @@ unpin: return err; } -static int smoke_crescendo_thread(void *arg) +static void smoke_crescendo_work(struct kthread_work *work) { - struct preempt_smoke *smoke = arg; + struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work); IGT_TIMEOUT(end_time); unsigned long count; count = 0; do { struct i915_gem_context *ctx = smoke_context(smoke); - int err; - err = smoke_submit(smoke, - ctx, count % I915_PRIORITY_MAX, - smoke->batch); - if (err) - return err; + smoke->result = smoke_submit(smoke, ctx, + count % I915_PRIORITY_MAX, + smoke->batch); count++; - } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); + } while (!smoke->result && count < smoke->ncontext && + !__igt_timeout(end_time, NULL)); smoke->count = count; - return 0; } static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) #define BATCH BIT(0) { - struct task_struct *tsk[I915_NUM_ENGINES] = {}; + struct kthread_worker *worker[I915_NUM_ENGINES] = {}; struct preempt_smoke *arg; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -3578,6 +3575,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) if (!arg) return -ENOMEM; + memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg)); + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; @@ -3585,31 +3584,28 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - tsk[id] = kthread_run(smoke_crescendo_thread, arg, - "igt/smoke:%d", id); - if (IS_ERR(tsk[id])) { - err = PTR_ERR(tsk[id]); + worker[id] = kthread_create_worker(0, "igt/smoke:%d", id); + if (IS_ERR(worker[id])) { + err = PTR_ERR(worker[id]); break; } - get_task_struct(tsk[id]); - } - yield(); /* start all threads before we kthread_stop() */ + kthread_init_work(&arg[id].work, smoke_crescendo_work); + kthread_queue_work(worker[id], &arg[id].work); + } count = 0; for_each_engine(engine, smoke->gt, id) { - int status; - - if (IS_ERR_OR_NULL(tsk[id])) + if (IS_ERR_OR_NULL(worker[id])) continue; - status = kthread_stop(tsk[id]); - if (status && !err) - err = status; + kthread_flush_work(&arg[id].work); + if (arg[id].result && !err) + err = arg[id].result; count += arg[id].count; - put_task_struct(tsk[id]); + kthread_destroy_worker(worker[id]); } pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index be94f863bdef..b46425aeb2f0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -36,6 +36,19 @@ static int cmp_u32(const void *A, const void *B) return 0; } +static u32 read_timestamp(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + /* On i965 the first read tends to give a stale value */ + ENGINE_READ_FW(engine, RING_TIMESTAMP); + + if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915)) + return ENGINE_READ_FW(engine, RING_TIMESTAMP_UDW); + else + return ENGINE_READ_FW(engine, RING_TIMESTAMP); +} + static void measure_clocks(struct intel_engine_cs *engine, u32 *out_cycles, ktime_t *out_dt) { @@ -45,13 +58,13 @@ static void measure_clocks(struct intel_engine_cs *engine, for (i = 0; i < 5; i++) { local_irq_disable(); - cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP); + cycles[i] = -read_timestamp(engine); dt[i] = ktime_get(); udelay(1000); dt[i] = ktime_sub(ktime_get(), dt[i]); - cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP); + cycles[i] += read_timestamp(engine); local_irq_enable(); } @@ -78,25 +91,6 @@ static int live_gt_clocks(void *arg) if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - if (GRAPHICS_VER(gt->i915) == 5) - /* - * XXX CS_TIMESTAMP low dword is dysfunctional? - * - * Ville's experiments indicate the high dword still works, - * but at a correspondingly reduced frequency. - */ - return 0; - - if (GRAPHICS_VER(gt->i915) == 4) - /* - * XXX CS_TIMESTAMP appears gibberish - * - * Ville's experiments indicate that it mostly appears 'stuck' - * in that we see the register report the same cycle count - * for a couple of reads. - */ - return 0; - intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 7f3bb1d34dfb..71263058a7b0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -866,10 +866,13 @@ static int igt_reset_active_engine(void *arg) } struct active_engine { - struct task_struct *task; + struct kthread_worker *worker; + struct kthread_work work; struct intel_engine_cs *engine; unsigned long resets; unsigned int flags; + bool stop; + int result; }; #define TEST_ACTIVE BIT(0) @@ -900,10 +903,10 @@ static int active_request_put(struct i915_request *rq) return err; } -static int active_engine(void *data) +static void active_engine(struct kthread_work *work) { I915_RND_STATE(prng); - struct active_engine *arg = data; + struct active_engine *arg = container_of(work, typeof(*arg), work); struct intel_engine_cs *engine = arg->engine; struct i915_request *rq[8] = {}; struct intel_context *ce[ARRAY_SIZE(rq)]; @@ -913,16 +916,17 @@ static int active_engine(void *data) for (count = 0; count < ARRAY_SIZE(ce); count++) { ce[count] = intel_context_create(engine); if (IS_ERR(ce[count])) { - err = PTR_ERR(ce[count]); - pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err); + arg->result = PTR_ERR(ce[count]); + pr_err("[%s] Create context #%ld failed: %d!\n", + engine->name, count, arg->result); while (--count) intel_context_put(ce[count]); - return err; + return; } } count = 0; - while (!kthread_should_stop()) { + while (!READ_ONCE(arg->stop)) { unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; @@ -967,7 +971,7 @@ static int active_engine(void *data) intel_context_put(ce[count]); } - return err; + arg->result = err; } static int __igt_reset_engines(struct intel_gt *gt, @@ -1022,7 +1026,7 @@ static int __igt_reset_engines(struct intel_gt *gt, memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES); for_each_engine(other, gt, tmp) { - struct task_struct *tsk; + struct kthread_worker *worker; threads[tmp].resets = i915_reset_engine_count(global, other); @@ -1036,19 +1040,21 @@ static int __igt_reset_engines(struct intel_gt *gt, threads[tmp].engine = other; threads[tmp].flags = flags; - tsk = kthread_run(active_engine, &threads[tmp], - "igt/%s", other->name); - if (IS_ERR(tsk)) { - err = PTR_ERR(tsk); - pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); + worker = kthread_create_worker(0, "igt/%s", + other->name); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); + pr_err("[%s] Worker create failed: %d!\n", + engine->name, err); goto unwind; } - threads[tmp].task = tsk; - get_task_struct(tsk); - } + threads[tmp].worker = worker; - yield(); /* start all threads before we begin */ + kthread_init_work(&threads[tmp].work, active_engine); + kthread_queue_work(threads[tmp].worker, + &threads[tmp].work); + } st_engine_heartbeat_disable_no_pm(engine); GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, @@ -1197,17 +1203,20 @@ unwind: for_each_engine(other, gt, tmp) { int ret; - if (!threads[tmp].task) + if (!threads[tmp].worker) continue; - ret = kthread_stop(threads[tmp].task); + WRITE_ONCE(threads[tmp].stop, true); + kthread_flush_work(&threads[tmp].work); + ret = READ_ONCE(threads[tmp].result); if (ret) { pr_err("kthread for other engine %s failed, err=%d\n", other->name, ret); if (!err) err = ret; } - put_task_struct(threads[tmp].task); + + kthread_destroy_worker(threads[tmp].worker); /* GuC based resets are not logged per engine */ if (!using_guc) { diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 2b0c87999949..0dc5309c90a4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -6,6 +6,7 @@ #include <linux/sort.h> #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" #include "selftests/i915_random.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index cfb4708dd62e..99a372486fb7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -1107,21 +1107,27 @@ static u64 __measure_power(int duration_ms) return div64_u64(1000 * 1000 * dE, dt); } -static u64 measure_power_at(struct intel_rps *rps, int *freq) +static u64 measure_power(struct intel_rps *rps, int *freq) { u64 x[5]; int i; - *freq = rps_set_check(rps, *freq); for (i = 0; i < 5; i++) x[i] = __measure_power(5); - *freq = (*freq + read_cagf(rps)) / 2; + + *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; /* A simple triangle filter for better result stability */ sort(x, 5, sizeof(*x), cmp_u64, NULL); return div_u64(x[1] + 2 * x[2] + x[3], 4); } +static u64 measure_power_at(struct intel_rps *rps, int *freq) +{ + *freq = rps_set_check(rps, *freq); + return measure_power(rps, freq); +} + int live_rps_power(void *arg) { struct intel_gt *gt = arg; diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index f8a1d27df272..82ec95a299f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -11,7 +11,8 @@ enum test_type { VARY_MIN, VARY_MAX, - MAX_GRANTED + MAX_GRANTED, + SLPC_POWER, }; static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) @@ -41,6 +42,39 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) return ret; } +static int slpc_set_freq(struct intel_gt *gt, u32 freq) +{ + int err; + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + + err = slpc_set_max_freq(slpc, freq); + if (err) { + pr_err("Unable to update max freq"); + return err; + } + + err = slpc_set_min_freq(slpc, freq); + if (err) { + pr_err("Unable to update min freq"); + return err; + } + + return err; +} + +static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power) +{ + int err = 0; + + err = slpc_set_freq(gt, *freq); + if (err) + return err; + *freq = intel_rps_read_actual_frequency(>->rps); + *power = measure_power(>->rps, freq); + + return err; +} + static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) { @@ -113,6 +147,58 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, return err; } +static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine) +{ + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct { + u64 power; + int freq; + } min, max; + int err = 0; + + /* + * Our fundamental assumption is that running at lower frequency + * actually saves power. Let's see if our RAPL measurement supports + * that theory. + */ + if (!librapl_supported(gt->i915)) + return 0; + + min.freq = slpc->min_freq; + err = measure_power_at_freq(gt, &min.freq, &min.power); + + if (err) + return err; + + max.freq = slpc->rp0_freq; + err = measure_power_at_freq(gt, &max.freq, &max.power); + + if (err) + return err; + + pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", + engine->name, + min.power, min.freq, + max.power, max.freq); + + if (10 * min.freq >= 9 * max.freq) { + pr_notice("Could not control frequency, ran at [%uMHz, %uMhz]\n", + min.freq, max.freq); + } + + if (11 * min.power > 10 * max.power) { + pr_err("%s: did not conserve power when setting lower frequency!\n", + engine->name); + err = -EINVAL; + } + + /* Restore min/max frequencies */ + slpc_set_max_freq(slpc, slpc->rp0_freq); + slpc_set_min_freq(slpc, slpc->min_freq); + + return err; +} + static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) { struct intel_gt *gt = rps_to_gt(rps); @@ -153,6 +239,11 @@ static int run_test(struct intel_gt *gt, int test_type) if (!intel_uc_uses_guc_slpc(>->uc)) return 0; + if (slpc->min_freq == slpc->rp0_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + if (igt_spinner_init(&spin, gt)) return -ENOMEM; @@ -167,17 +258,14 @@ static int run_test(struct intel_gt *gt, int test_type) } /* - * FIXME: With efficient frequency enabled, GuC can request - * frequencies higher than the SLPC max. While this is fixed - * in GuC, we level set these tests with RPn as min. + * Set min frequency to RPn so that we can test the whole + * range of RPn-RP0. This also turns off efficient freq + * usage and makes results more predictable. */ err = slpc_set_min_freq(slpc, slpc->min_freq); - if (err) + if (err) { + pr_err("Unable to update min freq!"); return err; - - if (slpc->min_freq == slpc->rp0_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; } intel_gt_pm_wait_for_idle(gt); @@ -233,17 +321,23 @@ static int run_test(struct intel_gt *gt, int test_type) err = max_granted_freq(slpc, rps, &max_act_freq); break; + + case SLPC_POWER: + err = slpc_power(gt, engine); + break; } - pr_info("Max actual frequency for %s was %d\n", - engine->name, max_act_freq); + if (test_type != SLPC_POWER) { + pr_info("Max actual frequency for %s was %d\n", + engine->name, max_act_freq); - /* Actual frequency should rise above min */ - if (max_act_freq <= slpc_min_freq) { - pr_err("Actual freq did not rise above min\n"); - pr_err("Perf Limit Reasons: 0x%x\n", - intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); - err = -EINVAL; + /* Actual frequency should rise above min */ + if (max_act_freq <= slpc->min_freq) { + pr_err("Actual freq did not rise above min\n"); + pr_err("Perf Limit Reasons: 0x%x\n", + intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); + err = -EINVAL; + } } igt_spinner_end(&spin); @@ -270,26 +364,66 @@ static int run_test(struct intel_gt *gt, int test_type) static int live_slpc_vary_min(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; + unsigned int i; + int ret; + + for_each_gt(gt, i915, i) { + ret = run_test(gt, VARY_MIN); + if (ret) + return ret; + } - return run_test(gt, VARY_MIN); + return ret; } static int live_slpc_vary_max(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; + unsigned int i; + int ret; + + for_each_gt(gt, i915, i) { + ret = run_test(gt, VARY_MAX); + if (ret) + return ret; + } - return run_test(gt, VARY_MAX); + return ret; } /* check if pcode can grant RP0 */ static int live_slpc_max_granted(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; + unsigned int i; + int ret; - return run_test(gt, MAX_GRANTED); + for_each_gt(gt, i915, i) { + ret = run_test(gt, MAX_GRANTED); + if (ret) + return ret; + } + + return ret; +} + +static int live_slpc_power(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt; + unsigned int i; + int ret; + + for_each_gt(gt, i915, i) { + ret = run_test(gt, SLPC_POWER); + if (ret) + return ret; + } + + return ret; } int intel_slpc_live_selftests(struct drm_i915_private *i915) @@ -298,10 +432,16 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_slpc_vary_max), SUBTEST(live_slpc_vary_min), SUBTEST(live_slpc_max_granted), + SUBTEST(live_slpc_power), }; - if (intel_gt_is_wedged(to_gt(i915))) - return 0; + struct intel_gt *gt; + unsigned int i; + + for_each_gt(gt, i915, i) { + if (intel_gt_is_wedged(gt)) + return 0; + } return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 67a9aab801dd..21b1edc052f8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -991,7 +991,7 @@ static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg) /* Alas, we must pardon some whitelists. Mistakes already made */ static const struct regmask pardon[] = { { GEN9_CTX_PREEMPT_REG, 9 }, - { GEN8_L3SQCREG4, 9 }, + { _MMIO(0xb118), 9 }, /* GEN8_L3SQCREG4 */ }; return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon)); diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 967031056202..f2d9858d827c 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -144,7 +144,7 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -168,7 +168,8 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_nsecs(2)) + clamped = intel_clamp_max_busywait_duration_ns(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.max_busywait_duration_ns, duration); @@ -203,7 +204,7 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -218,7 +219,8 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_timeslice_duration_ms(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.timeslice_duration_ms, duration); @@ -256,7 +258,7 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -272,7 +274,8 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_stop_timeout_ms(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.stop_timeout_ms, duration); @@ -306,7 +309,7 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long timeout; + unsigned long long timeout, clamped; int err; /* @@ -322,7 +325,8 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_preempt_timeout_ms(engine, timeout); + if (timeout != clamped) return -EINVAL; WRITE_ONCE(engine->props.preempt_timeout_ms, timeout); @@ -362,7 +366,7 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long delay; + unsigned long long delay, clamped; int err; /* @@ -379,7 +383,8 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_heartbeat_interval_ms(engine, delay); + if (delay != clamped) return -EINVAL; err = intel_engine_set_heartbeat(engine, delay); diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 29ef8afc8c2e..f359bef046e0 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -117,6 +117,7 @@ enum intel_guc_action { INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000, INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h index 4c840a2639dc..811add10c30d 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -128,6 +128,15 @@ enum slpc_media_ratio_mode { SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2, }; +enum slpc_gucrc_mode { + SLPC_GUCRC_MODE_HW = 0, + SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1, + SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2, + SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3, + + SLPC_GUCRC_MODE_MAX, +}; + enum slpc_event_id { SLPC_EVENT_RESET = 0, SLPC_EVENT_SHUTDOWN = 1, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 4a59478c3b5c..58012edd4eb0 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -82,9 +82,16 @@ #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u /* + * Global scheduling policy update keys. + */ +enum { + GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD = 0x1001, +}; + +/* * Per context scheduling policy update keys. */ -enum { +enum { GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index bac06e3d6f2c..27b09ba1d295 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -441,6 +441,7 @@ err_log: err_fw: intel_uc_fw_fini(&guc->fw); out: + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_INIT_FAIL); i915_probe_error(gt->i915, "failed with %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 804133df1ac9..357873ef692b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -113,6 +113,10 @@ struct intel_guc { */ struct list_head guc_id_list; /** + * @guc_ids_in_use: Number single-lrc guc_ids in use + */ + unsigned int guc_ids_in_use; + /** * @destroyed_contexts: list of contexts waiting to be destroyed * (deregistered with the GuC) */ @@ -132,6 +136,16 @@ struct intel_guc { * @reset_fail_mask: mask of engines that failed to reset */ intel_engine_mask_t reset_fail_mask; + /** + * @sched_disable_delay_ms: schedule disable delay, in ms, for + * contexts + */ + unsigned int sched_disable_delay_ms; + /** + * @sched_disable_gucid_threshold: threshold of min remaining available + * guc_ids before we start bypassing the schedule disable delay + */ + unsigned int sched_disable_gucid_threshold; } submission_state; /** @@ -466,4 +480,6 @@ void intel_guc_write_barrier(struct intel_guc *guc); void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); +int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 74cbe8eaf531..a419d60166c8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -5,6 +5,7 @@ #include <linux/bsearch.h> +#include "gem/i915_gem_lmem.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gt.h" #include "gt/intel_gt_mcr.h" @@ -277,24 +278,16 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg) return slot; } -#define GUC_REGSET_STEERING(group, instance) ( \ - FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \ - FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \ - GUC_REGSET_NEEDS_STEERING \ -) - static long __must_check guc_mmio_reg_add(struct intel_gt *gt, struct temp_regset *regset, - i915_reg_t reg, u32 flags) + u32 offset, u32 flags) { u32 count = regset->storage_used - (regset->registers - regset->storage); - u32 offset = i915_mmio_reg_offset(reg); struct guc_mmio_reg entry = { .offset = offset, .flags = flags, }; struct guc_mmio_reg *slot; - u8 group, inst; /* * The mmio list is built using separate lists within the driver. @@ -306,17 +299,6 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt, sizeof(entry), guc_mmio_reg_cmp)) return 0; - /* - * The GuC doesn't have a default steering, so we need to explicitly - * steer all registers that need steering. However, we do not keep track - * of all the steering ranges, only of those that have a chance of using - * a non-default steering from the i915 pov. Instead of adding such - * tracking, it is easier to just program the default steering for all - * regs that don't need a non-default one. - */ - intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst); - entry.flags |= GUC_REGSET_STEERING(group, inst); - slot = __mmio_reg_add(regset, &entry); if (IS_ERR(slot)) return PTR_ERR(slot); @@ -335,6 +317,38 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt, #define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \ guc_mmio_reg_add(gt, \ regset, \ + i915_mmio_reg_offset(reg), \ + (masked) ? GUC_REGSET_MASKED : 0) + +#define GUC_REGSET_STEERING(group, instance) ( \ + FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \ + FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \ + GUC_REGSET_NEEDS_STEERING \ +) + +static long __must_check guc_mcr_reg_add(struct intel_gt *gt, + struct temp_regset *regset, + i915_mcr_reg_t reg, u32 flags) +{ + u8 group, inst; + + /* + * The GuC doesn't have a default steering, so we need to explicitly + * steer all registers that need steering. However, we do not keep track + * of all the steering ranges, only of those that have a chance of using + * a non-default steering from the i915 pov. Instead of adding such + * tracking, it is easier to just program the default steering for all + * regs that don't need a non-default one. + */ + intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst); + flags |= GUC_REGSET_STEERING(group, inst); + + return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags); +} + +#define GUC_MCR_REG_ADD(gt, regset, reg, masked) \ + guc_mcr_reg_add(gt, \ + regset, \ (reg), \ (masked) ? GUC_REGSET_MASKED : 0) @@ -372,8 +386,21 @@ static int guc_mmio_regset_init(struct temp_regset *regset, false); /* add in local MOCS registers */ - for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) - ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false); + for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false); + else + ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false); + + if (GRAPHICS_VER(engine->i915) >= 12) { + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false); + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false); + } return ret ? -1 : 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 8f1165146013..4e6dca707d94 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -169,6 +169,8 @@ static struct __guc_mmio_reg_descr_group default_lists[] = { MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0), MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), + MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), @@ -182,6 +184,8 @@ static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = { MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0), MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS), + MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS), MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), @@ -240,19 +244,19 @@ static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglist struct __ext_steer_reg { const char *name; - i915_reg_t reg; + i915_mcr_reg_t reg; }; static const struct __ext_steer_reg xe_extregs[] = { - {"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE}, - {"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE} + {"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE}, + {"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE} }; static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, const struct __ext_steer_reg *extlist, int slice_id, int subslice_id) { - ext->reg = extlist->reg; + ext->reg = _MMIO(i915_mmio_reg_offset(extlist->reg)); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; @@ -419,6 +423,44 @@ guc_capture_get_device_reglist(struct intel_guc *guc) return default_lists; } +static const char * +__stringify_type(u32 type) +{ + switch (type) { + case GUC_CAPTURE_LIST_TYPE_GLOBAL: + return "Global"; + case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS: + return "Class"; + case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE: + return "Instance"; + default: + break; + } + + return "unknown"; +} + +static const char * +__stringify_engclass(u32 class) +{ + switch (class) { + case GUC_RENDER_CLASS: + return "Render"; + case GUC_VIDEO_CLASS: + return "Video"; + case GUC_VIDEOENHANCE_CLASS: + return "VideoEnhance"; + case GUC_BLITTER_CLASS: + return "Blitter"; + case GUC_COMPUTE_CLASS: + return "Compute"; + default: + break; + } + + return "unknown"; +} + static int guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid, struct guc_mmio_reg *ptr, u16 num_entries) @@ -482,32 +524,55 @@ guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u return num_regs; } -int -intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, - size_t *size) +static int +guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + size_t *size, bool is_purpose_est) { struct intel_guc_state_capture *gc = guc->capture; + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid]; int num_regs; - if (!gc->reglists) + if (!gc->reglists) { + drm_warn(&i915->drm, "GuC-capture: No reglist on this device\n"); return -ENODEV; + } if (cache->is_valid) { *size = cache->size; return cache->status; } + if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF && + !guc_capture_get_one_list(gc->reglists, owner, type, classid)) { + if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL) + drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist Global!\n"); + else + drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist %s(%u):%s(%u)!\n", + __stringify_type(type), type, + __stringify_engclass(classid), classid); + return -ENODATA; + } + num_regs = guc_cap_list_num_regs(gc, owner, type, classid); + /* intentional empty lists can exist depending on hw config */ if (!num_regs) return -ENODATA; - *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + - (num_regs * sizeof(struct guc_mmio_reg))); + if (size) + *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + + (num_regs * sizeof(struct guc_mmio_reg))); return 0; } +int +intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + size_t *size) +{ + return guc_capture_getlistsize(guc, owner, type, classid, size, false); +} + static void guc_capture_create_prealloc_nodes(struct intel_guc *guc); int @@ -606,7 +671,7 @@ guc_capture_output_min_size_est(struct intel_guc *guc) struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; enum intel_engine_id id; - int worst_min_size = 0, num_regs = 0; + int worst_min_size = 0; size_t tmp = 0; if (!guc->capture) @@ -627,21 +692,19 @@ guc_capture_output_min_size_est(struct intel_guc *guc) worst_min_size += sizeof(struct guc_state_capture_group_header_t) + (3 * sizeof(struct guc_state_capture_header_t)); - if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp)) - num_regs += tmp; + if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp, true)) + worst_min_size += tmp; - if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, - engine->class, &tmp)) { - num_regs += tmp; + if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + engine->class, &tmp, true)) { + worst_min_size += tmp; } - if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, - engine->class, &tmp)) { - num_regs += tmp; + if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + engine->class, &tmp, true)) { + worst_min_size += tmp; } } - worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); - return worst_min_size; } @@ -658,15 +721,23 @@ static void check_guc_capture_size(struct intel_guc *guc) int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER; u32 buffer_size = intel_guc_log_section_size_capture(&guc->log); + /* + * NOTE: min_size is much smaller than the capture region allocation (DG2: <80K vs 1MB) + * Additionally, its based on space needed to fit all engines getting reset at once + * within the same G2H handler task slot. This is very unlikely. However, if GuC really + * does run out of space for whatever reason, we will see an separate warning message + * when processing the G2H event capture-notification, search for: + * INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE. + */ if (min_size < 0) drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n", min_size); else if (min_size > buffer_size) - drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n", + drm_warn(&i915->drm, "GuC error state capture buffer maybe small: %d < %d\n", buffer_size, min_size); else if (spare_size > buffer_size) - drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n", - buffer_size, spare_size, min_size); + drm_dbg(&i915->drm, "GuC error state capture buffer lacks spare size: %d < %d (min = %d)\n", + buffer_size, spare_size, min_size); } /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index 25f09a420561..7269eb0bbedf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -71,12 +71,73 @@ static bool intel_eval_slpc_support(void *data) return intel_guc_slpc_is_used(guc); } +static int guc_sched_disable_delay_ms_get(void *data, u64 *val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + *val = (u64)guc->submission_state.sched_disable_delay_ms; + + return 0; +} + +static int guc_sched_disable_delay_ms_set(void *data, u64 val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + /* clamp to a practical limit, 1 minute is reasonable for a longest delay */ + guc->submission_state.sched_disable_delay_ms = min_t(u64, val, 60000); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops, + guc_sched_disable_delay_ms_get, + guc_sched_disable_delay_ms_set, "%lld\n"); + +static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + *val = guc->submission_state.sched_disable_gucid_threshold; + return 0; +} + +static int guc_sched_disable_gucid_threshold_set(void *data, u64 val) +{ + struct intel_guc *guc = data; + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + if (val > intel_guc_sched_disable_gucid_threshold_max(guc)) + guc->submission_state.sched_disable_gucid_threshold = + intel_guc_sched_disable_gucid_threshold_max(guc); + else + guc->submission_state.sched_disable_gucid_threshold = val; + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops, + guc_sched_disable_gucid_threshold_get, + guc_sched_disable_gucid_threshold_set, "%lld\n"); + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { { "guc_info", &guc_info_fops, NULL }, { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, + { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL }, + { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops, + NULL }, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index a0372735cddb..5b86b2e286e0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -10,12 +10,15 @@ */ #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "intel_guc_fw.h" #include "i915_drv.h" -static void guc_prepare_xfer(struct intel_uncore *uncore) +static void guc_prepare_xfer(struct intel_gt *gt) { + struct intel_uncore *uncore = gt->uncore; + u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC | GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | @@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) if (GRAPHICS_VER(uncore->i915) == 9) { /* DOP Clock Gating Enable for GuC clocks */ - intel_uncore_rmw(uncore, GEN7_MISCCPCTL, - 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE); + intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL, + GEN8_DOP_CLOCK_GATE_GUC_ENABLE | + intel_gt_mcr_read_any(gt, GEN8_MISCCPCTL)); /* allows for 5us (in 10ns units) before GT can go to RC6 */ intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF); @@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc) struct intel_uncore *uncore = gt->uncore; int ret; - guc_prepare_xfer(uncore); + guc_prepare_xfer(gt); /* * Note that GuC needs the CSS header plus uKernel code to be copied diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 323b055e5db9..968ebd79dce7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -290,6 +290,25 @@ struct guc_update_context_policy { struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; } __packed; +/* Format of the UPDATE_SCHEDULING_POLICIES H2G data packet */ +struct guc_update_scheduling_policy_header { + u32 action; +} __packed; + +/* + * Can't dynmically allocate memory for the scheduling policy KLV because + * it will be sent from within the reset path. Need a fixed size lump on + * the stack instead :(. + * + * Currently, there is only one KLV defined, which has 1 word of KL + 2 words of V. + */ +#define MAX_SCHEDULING_POLICY_SIZE 3 + +struct guc_update_scheduling_policy { + struct guc_update_scheduling_policy_header header; + u32 data[MAX_SCHEDULING_POLICY_SIZE]; +} __packed; + #define GUC_POWER_UNSPECIFIED 0 #define GUC_POWER_D0 1 #define GUC_POWER_D1 2 @@ -298,6 +317,9 @@ struct guc_update_context_policy { /* Scheduling policy settings */ +#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION 100 /* in ms */ +#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO 50 /* in percent */ + #define GLOBAL_POLICY_MAX_NUM_WI 15 /* Don't reset an engine upon preemption failure */ @@ -305,6 +327,27 @@ struct guc_update_context_policy { #define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 +/* + * GuC converts the timeout to clock ticks internally. Different platforms have + * different GuC clocks. Thus, the maximum value before overflow is platform + * dependent. Current worst case scenario is about 110s. So, the spec says to + * limit to 100s to be safe. + */ +#define GUC_POLICY_MAX_EXEC_QUANTUM_US (100 * 1000 * 1000UL) +#define GUC_POLICY_MAX_PREEMPT_TIMEOUT_US (100 * 1000 * 1000UL) + +static inline u32 guc_policy_max_exec_quantum_ms(void) +{ + BUILD_BUG_ON(GUC_POLICY_MAX_EXEC_QUANTUM_US >= UINT_MAX); + return GUC_POLICY_MAX_EXEC_QUANTUM_US / 1000; +} + +static inline u32 guc_policy_max_preempt_timeout_ms(void) +{ + BUILD_BUG_ON(GUC_POLICY_MAX_PREEMPT_TIMEOUT_US >= UINT_MAX); + return GUC_POLICY_MAX_PREEMPT_TIMEOUT_US / 1000; +} + struct guc_policies { u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; /* In micro seconds. How much time to allow before DPC processing is diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 55d3ef93e86f..68331c538b0a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -16,15 +16,15 @@ #if defined(CONFIG_DRM_I915_DEBUG_GUC) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M -#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M #elif defined(CONFIG_DRM_I915_DEBUG_GEM) #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M -#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M #else #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_8K #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_64K -#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_2M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M #endif static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index fdd895f73f9f..63464933cbce 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -137,6 +137,17 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } +static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), + id, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -190,6 +201,15 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } +static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_unset_param(guc, id); +} + static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -263,6 +283,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) slpc->max_freq_softlimit = 0; slpc->min_freq_softlimit = 0; + slpc->min_is_rpmax = false; slpc->boost_freq = 0; atomic_set(&slpc->num_waiters, 0); @@ -588,6 +609,39 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) return 0; } +static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int slpc_min_freq; + int ret; + + ret = intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq); + if (ret) { + drm_err(&i915->drm, + "Failed to get min freq: (%d)\n", + ret); + return false; + } + + if (slpc_min_freq == SLPC_MAX_FREQ_MHZ) + return true; + else + return false; +} + +static void update_server_min_softlimit(struct intel_guc_slpc *slpc) +{ + /* For server parts, SLPC min will be at RPMax. + * Use min softlimit to clamp it to RP0 instead. + */ + if (!slpc->min_freq_softlimit && + is_slpc_min_freq_rpmax(slpc)) { + slpc->min_is_rpmax = true; + slpc->min_freq_softlimit = slpc->rp0_freq; + (slpc_to_gt(slpc))->defaults.min_freq = slpc->min_freq_softlimit; + } +} + static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) { /* Force SLPC to used platform rp0 */ @@ -610,6 +664,52 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc) slpc->boost_freq = slpc->rp0_freq; } +/** + * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode + * @slpc: pointer to intel_guc_slpc. + * @mode: new value of the mode. + * + * This function will override the GUCRC mode. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode) +{ + int ret; + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + + if (mode >= SLPC_GUCRC_MODE_MAX) + return -EINVAL; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode); + if (ret) + drm_err(&i915->drm, + "Override gucrc mode %d failed %d\n", + mode, ret); + } + + return ret; +} + +int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE); + if (ret) + drm_err(&i915->drm, + "Unsetting gucrc mode failed %d\n", + ret); + } + + return ret; +} + /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. @@ -647,6 +747,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); + /* Handle the case where min=max=RPmax */ + update_server_min_softlimit(slpc); + /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 82a98f78f96c..17ed515f6a85 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -9,6 +9,8 @@ #include "intel_guc_submission.h" #include "intel_guc_slpc_types.h" +#define SLPC_MAX_FREQ_MHZ 4250 + struct intel_gt; struct drm_printer; @@ -42,5 +44,7 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); +int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc); +int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h index 73d208123528..a6ef53b04e04 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -19,6 +19,9 @@ struct intel_guc_slpc { bool supported; bool selected; + /* Indicates this is a server part */ + bool min_is_rpmax; + /* platform frequency limits */ u32 min_freq; u32 rp0_freq; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1db59eeb34db..4ccb29f9ac55 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -6,6 +6,7 @@ #include <linux/circ_buf.h> #include "gem/i915_gem_context.h" +#include "gem/i915_gem_lmem.h" #include "gt/gen8_engine_cs.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" @@ -65,7 +66,13 @@ * corresponding G2H returns indicating the scheduling disable operation has * completed it is safe to unpin the context. While a disable is in flight it * isn't safe to resubmit the context so a fence is used to stall all future - * requests of that context until the G2H is returned. + * requests of that context until the G2H is returned. Because this interaction + * with the GuC takes a non-zero amount of time we delay the disabling of + * scheduling after the pin count goes to zero by a configurable period of time + * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of + * time to resubmit something on the context before doing this costly operation. + * This delay is only done if the context isn't closed and the guc_id usage is + * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). * * Context deregistration: * Before a context can be destroyed or if we steal its guc_id we must @@ -163,7 +170,8 @@ guc_create_parallel(struct intel_engine_cs **engines, #define SCHED_STATE_PENDING_ENABLE BIT(5) #define SCHED_STATE_REGISTERED BIT(6) #define SCHED_STATE_POLICY_REQUIRED BIT(7) -#define SCHED_STATE_BLOCKED_SHIFT 8 +#define SCHED_STATE_CLOSED BIT(8) +#define SCHED_STATE_BLOCKED_SHIFT 9 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) @@ -173,12 +181,20 @@ static inline void init_sched_state(struct intel_context *ce) ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; } +/* + * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. + * A context close can race with the submission path, so SCHED_STATE_CLOSED + * can be set immediately before we try to register. + */ +#define SCHED_STATE_VALID_INIT \ + (SCHED_STATE_BLOCKED_MASK | \ + SCHED_STATE_CLOSED | \ + SCHED_STATE_REGISTERED) + __maybe_unused static bool sched_state_is_init(struct intel_context *ce) { - /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */ - return !(ce->guc_state.sched_state & - ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); + return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); } static inline bool @@ -319,6 +335,17 @@ static inline void clr_context_policy_required(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; } +static inline bool context_close_done(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_CLOSED; +} + +static inline void set_context_close_done(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_CLOSED; +} + static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> @@ -343,25 +370,6 @@ static inline void decr_context_blocked(struct intel_context *ce) ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; } -static inline bool context_has_committed_requests(struct intel_context *ce) -{ - return !!ce->guc_state.number_committed_requests; -} - -static inline void incr_context_committed_requests(struct intel_context *ce) -{ - lockdep_assert_held(&ce->guc_state.lock); - ++ce->guc_state.number_committed_requests; - GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); -} - -static inline void decr_context_committed_requests(struct intel_context *ce) -{ - lockdep_assert_held(&ce->guc_state.lock); - --ce->guc_state.number_committed_requests; - GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); -} - static struct intel_context * request_to_scheduling_context(struct i915_request *rq) { @@ -1067,6 +1075,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) xa_unlock(&guc->context_lookup); + if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && + (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { + /* successful cancel so jump straight to close it */ + intel_context_sched_disable_unpin(ce); + } + spin_lock(&ce->guc_state.lock); /* @@ -1994,6 +2008,9 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) if (unlikely(ret < 0)) return ret; + if (!intel_context_is_parent(ce)) + ++guc->submission_state.guc_ids_in_use; + ce->guc_id.id = ret; return 0; } @@ -2003,14 +2020,16 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce)); if (!context_guc_id_invalid(ce)) { - if (intel_context_is_parent(ce)) + if (intel_context_is_parent(ce)) { bitmap_release_region(guc->submission_state.guc_ids_bitmap, ce->guc_id.id, order_base_2(ce->parallel.number_children + 1)); - else + } else { + --guc->submission_state.guc_ids_in_use; ida_simple_remove(&guc->submission_state.guc_ids, ce->guc_id.id); + } clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } @@ -2429,6 +2448,10 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) int ret; /* NB: For both of these, zero means disabled. */ + GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, + execution_quantum)); + GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, + preemption_timeout)); execution_quantum = engine->props.timeslice_duration_ms * 1000; preemption_timeout = engine->props.preempt_timeout_ms * 1000; @@ -2462,6 +2485,10 @@ static void guc_context_policy_init_v69(struct intel_engine_cs *engine, desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; /* NB: For both of these, zero means disabled. */ + GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, + desc->execution_quantum)); + GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, + desc->preemption_timeout)); desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } @@ -2998,41 +3025,104 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq, } } -static void guc_context_sched_disable(struct intel_context *ce) +static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, + unsigned long flags) + __releases(ce->guc_state.lock) { - struct intel_guc *guc = ce_to_guc(ce); - unsigned long flags; struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; u16 guc_id; + lockdep_assert_held(&ce->guc_state.lock); + guc_id = prep_context_pending_disable(ce); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); +} + +static bool bypass_sched_disable(struct intel_guc *guc, + struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(intel_context_is_child(ce)); + if (submission_disabled(guc) || context_guc_id_invalid(ce) || + !ctx_id_mapped(guc, ce->guc_id.id)) { + clr_context_enabled(ce); + return true; + } + + return !context_enabled(ce); +} + +static void __delay_sched_disable(struct work_struct *wrk) +{ + struct intel_context *ce = + container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (bypass_sched_disable(guc, ce)) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + intel_context_sched_disable_unpin(ce); + } else { + do_sched_disable(guc, ce, flags); + } +} + +static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) +{ /* - * We have to check if the context has been disabled by another thread, - * check if submssion has been disabled to seal a race with reset and - * finally check if any more requests have been committed to the - * context ensursing that a request doesn't slip through the - * 'context_pending_disable' fence. + * parent contexts are perma-pinned, if we are unpinning do schedule + * disable immediately. */ - if (unlikely(!context_enabled(ce) || submission_disabled(guc) || - context_has_committed_requests(ce))) { - clr_context_enabled(ce); + if (intel_context_is_parent(ce)) + return true; + + /* + * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. + */ + return guc->submission_state.guc_ids_in_use > + guc->submission_state.sched_disable_gucid_threshold; +} + +static void guc_context_sched_disable(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + u64 delay = guc->submission_state.sched_disable_delay_ms; + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + if (bypass_sched_disable(guc, ce)) { spin_unlock_irqrestore(&ce->guc_state.lock, flags); - goto unpin; + intel_context_sched_disable_unpin(ce); + } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && + delay) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + mod_delayed_work(system_unbound_wq, + &ce->guc_state.sched_disable_delay_work, + msecs_to_jiffies(delay)); + } else { + do_sched_disable(guc, ce, flags); } - guc_id = prep_context_pending_disable(ce); +} - spin_unlock_irqrestore(&ce->guc_state.lock, flags); +static void guc_context_close(struct intel_context *ce) +{ + unsigned long flags; - with_intel_runtime_pm(runtime_pm, wakeref) - __guc_context_sched_disable(guc, ce, guc_id); + if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && + cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) + __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); - return; -unpin: - intel_context_sched_disable_unpin(ce); + spin_lock_irqsave(&ce->guc_state.lock, flags); + set_context_close_done(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); } static inline void guc_lrc_desc_unpin(struct intel_context *ce) @@ -3071,7 +3161,6 @@ static void __guc_context_destroy(struct intel_context *ce) ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); - GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); intel_context_fini(ce); @@ -3340,8 +3429,6 @@ static void remove_from_context(struct i915_request *rq) guc_prio_fini(rq, ce); - decr_context_committed_requests(ce); - spin_unlock_irq(&ce->guc_state.lock); atomic_dec(&ce->guc_id.ref); @@ -3351,6 +3438,8 @@ static void remove_from_context(struct i915_request *rq) static const struct intel_context_ops guc_context_ops = { .alloc = guc_context_alloc, + .close = guc_context_close, + .pre_pin = guc_context_pre_pin, .pin = guc_context_pin, .unpin = guc_context_unpin, @@ -3433,6 +3522,10 @@ static void guc_context_init(struct intel_context *ce) rcu_read_unlock(); ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); + + INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, + __delay_sched_disable); + set_bit(CONTEXT_GUC_INIT, &ce->flags); } @@ -3471,6 +3564,26 @@ static int guc_request_alloc(struct i915_request *rq) guc_context_init(ce); /* + * If the context gets closed while the execbuf is ongoing, the context + * close code will race with the below code to cancel the delayed work. + * If the context close wins the race and cancels the work, it will + * immediately call the sched disable (see guc_context_close), so there + * is a chance we can get past this check while the sched_disable code + * is being executed. To make sure that code completes before we check + * the status further down, we wait for the close process to complete. + * Else, this code path could send a request down thinking that the + * context is still in a schedule-enable mode while the GuC ends up + * dropping the request completely because the disable did go from the + * context_close path right to GuC just prior. In the event the CT is + * full, we could potentially need to wait up to 1.5 seconds. + */ + if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) + intel_context_sched_disable_unpin(ce); + else if (intel_context_is_closed(ce)) + if (wait_for(context_close_done(ce), 1500)) + drm_warn(&guc_to_gt(guc)->i915->drm, + "timed out waiting on context sched close before realloc\n"); + /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the * guc_id and creating horrible race conditions. This is especially bad @@ -3524,7 +3637,6 @@ out: list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); } - incr_context_committed_requests(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); return 0; @@ -3600,6 +3712,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce) static const struct intel_context_ops virtual_guc_context_ops = { .alloc = guc_virtual_context_alloc, + .close = guc_context_close, + .pre_pin = guc_virtual_context_pre_pin, .pin = guc_virtual_context_pin, .unpin = guc_virtual_context_unpin, @@ -3689,6 +3803,8 @@ static void guc_child_context_destroy(struct kref *kref) static const struct intel_context_ops virtual_parent_context_ops = { .alloc = guc_virtual_context_alloc, + .close = guc_context_close, + .pre_pin = guc_context_pre_pin, .pin = guc_parent_context_pin, .unpin = guc_parent_context_unpin, @@ -4093,7 +4209,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->emit_bb_start = gen8_emit_bb_start; if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) - engine->emit_bb_start = gen125_emit_bb_start; + engine->emit_bb_start = xehp_emit_bb_start; } static void rcs_submission_override(struct intel_engine_cs *engine) @@ -4177,6 +4293,98 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) return 0; } +struct scheduling_policy { + /* internal data */ + u32 max_words, num_words; + u32 count; + /* API data */ + struct guc_update_scheduling_policy h2g; +}; + +static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) +{ + u32 *start = (void *)&policy->h2g; + u32 *end = policy->h2g.data + policy->num_words; + size_t delta = end - start; + + return delta; +} + +static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) +{ + policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; + policy->max_words = ARRAY_SIZE(policy->h2g.data); + policy->num_words = 0; + policy->count = 0; + + return policy; +} + +static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, + u32 action, u32 *data, u32 len) +{ + u32 *klv_ptr = policy->h2g.data + policy->num_words; + + GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); + *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | + FIELD_PREP(GUC_KLV_0_LEN, len); + memcpy(klv_ptr, data, sizeof(u32) * len); + policy->num_words += 1 + len; + policy->count++; +} + +static int __guc_action_set_scheduling_policies(struct intel_guc *guc, + struct scheduling_policy *policy) +{ + int ret; + + ret = intel_guc_send(guc, (u32 *)&policy->h2g, + __guc_scheduling_policy_action_size(policy)); + if (ret < 0) + return ret; + + if (ret != policy->count) { + drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!", + ret, policy->count); + if (ret > policy->count) + return -EPROTO; + } + + return 0; +} + +static int guc_init_global_schedule_policy(struct intel_guc *guc) +{ + struct scheduling_policy policy; + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + int ret = 0; + + if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0)) + return 0; + + __guc_scheduling_policy_start_klv(&policy); + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { + u32 yield[] = { + GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, + GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, + }; + + __guc_scheduling_policy_add_klv(&policy, + GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, + yield, ARRAY_SIZE(yield)); + + ret = __guc_action_set_scheduling_policies(guc, &policy); + if (ret) + i915_probe_error(gt->i915, + "Failed to configure global scheduling policies: %pe!\n", + ERR_PTR(ret)); + } + + return ret; +} + void intel_guc_submission_enable(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -4189,6 +4397,7 @@ void intel_guc_submission_enable(struct intel_guc *guc) guc_init_lrc_mapping(guc); guc_init_engine_stats(guc); + guc_init_global_schedule_policy(guc); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -4219,6 +4428,26 @@ static bool __guc_submission_selected(struct intel_guc *guc) return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; } +int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) +{ + return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); +} + +/* + * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher + * workloads are able to enjoy the latency reduction when delaying the schedule-disable + * operation. This matches the 30fps game-render + encode (real world) workload this + * knob was tested against. + */ +#define SCHED_DISABLE_DELAY_MS 34 + +/* + * A threshold of 75% is a reasonable starting point considering that real world apps + * generally don't get anywhere near this. + */ +#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ + (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) + void intel_guc_submission_init_early(struct intel_guc *guc) { xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); @@ -4235,7 +4464,10 @@ void intel_guc_submission_init_early(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); + guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; + guc->submission_state.sched_disable_gucid_threshold = + NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 3bb8838e325a..fbc8bae14f76 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -10,6 +10,9 @@ #include "intel_huc.h" #include "i915_drv.h" +#include <linux/device/bus.h> +#include <linux/mei_aux.h> + /** * DOC: HuC * @@ -42,6 +45,172 @@ * HuC-specific commands. */ +/* + * MEI-GSC load is an async process. The probing of the exposed aux device + * (see intel_gsc.c) usually happens a few seconds after i915 probe, depending + * on when the kernel schedules it. Unless something goes terribly wrong, we're + * guaranteed for this to happen during boot, so the big timeout is a safety net + * that we never expect to need. + * MEI-PXP + HuC load usually takes ~300ms, but if the GSC needs to be resumed + * and/or reset, this can take longer. Note that the kernel might schedule + * other work between the i915 init/resume and the MEI one, which can add to + * the delay. + */ +#define GSC_INIT_TIMEOUT_MS 10000 +#define PXP_INIT_TIMEOUT_MS 5000 + +static int sw_fence_dummy_notify(struct i915_sw_fence *sf, + enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + +static void __delayed_huc_load_complete(struct intel_huc *huc) +{ + if (!i915_sw_fence_done(&huc->delayed_load.fence)) + i915_sw_fence_complete(&huc->delayed_load.fence); +} + +static void delayed_huc_load_complete(struct intel_huc *huc) +{ + hrtimer_cancel(&huc->delayed_load.timer); + __delayed_huc_load_complete(huc); +} + +static void __gsc_init_error(struct intel_huc *huc) +{ + huc->delayed_load.status = INTEL_HUC_DELAYED_LOAD_ERROR; + __delayed_huc_load_complete(huc); +} + +static void gsc_init_error(struct intel_huc *huc) +{ + hrtimer_cancel(&huc->delayed_load.timer); + __gsc_init_error(huc); +} + +static void gsc_init_done(struct intel_huc *huc) +{ + hrtimer_cancel(&huc->delayed_load.timer); + + /* MEI-GSC init is done, now we wait for MEI-PXP to bind */ + huc->delayed_load.status = INTEL_HUC_WAITING_ON_PXP; + if (!i915_sw_fence_done(&huc->delayed_load.fence)) + hrtimer_start(&huc->delayed_load.timer, + ms_to_ktime(PXP_INIT_TIMEOUT_MS), + HRTIMER_MODE_REL); +} + +static enum hrtimer_restart huc_delayed_load_timer_callback(struct hrtimer *hrtimer) +{ + struct intel_huc *huc = container_of(hrtimer, struct intel_huc, delayed_load.timer); + + if (!intel_huc_is_authenticated(huc)) { + if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_GSC) + drm_notice(&huc_to_gt(huc)->i915->drm, + "timed out waiting for MEI GSC init to load HuC\n"); + else if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_PXP) + drm_notice(&huc_to_gt(huc)->i915->drm, + "timed out waiting for MEI PXP init to load HuC\n"); + else + MISSING_CASE(huc->delayed_load.status); + + __gsc_init_error(huc); + } + + return HRTIMER_NORESTART; +} + +static void huc_delayed_load_start(struct intel_huc *huc) +{ + ktime_t delay; + + GEM_BUG_ON(intel_huc_is_authenticated(huc)); + + /* + * On resume we don't have to wait for MEI-GSC to be re-probed, but we + * do need to wait for MEI-PXP to reset & re-bind + */ + switch (huc->delayed_load.status) { + case INTEL_HUC_WAITING_ON_GSC: + delay = ms_to_ktime(GSC_INIT_TIMEOUT_MS); + break; + case INTEL_HUC_WAITING_ON_PXP: + delay = ms_to_ktime(PXP_INIT_TIMEOUT_MS); + break; + default: + gsc_init_error(huc); + return; + } + + /* + * This fence is always complete unless we're waiting for the + * GSC device to come up to load the HuC. We arm the fence here + * and complete it when we confirm that the HuC is loaded from + * the PXP bind callback. + */ + GEM_BUG_ON(!i915_sw_fence_done(&huc->delayed_load.fence)); + i915_sw_fence_fini(&huc->delayed_load.fence); + i915_sw_fence_reinit(&huc->delayed_load.fence); + i915_sw_fence_await(&huc->delayed_load.fence); + i915_sw_fence_commit(&huc->delayed_load.fence); + + hrtimer_start(&huc->delayed_load.timer, delay, HRTIMER_MODE_REL); +} + +static int gsc_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct device *dev = data; + struct intel_huc *huc = container_of(nb, struct intel_huc, delayed_load.nb); + struct intel_gsc_intf *intf = &huc_to_gt(huc)->gsc.intf[0]; + + if (!intf->adev || &intf->adev->aux_dev.dev != dev) + return 0; + + switch (action) { + case BUS_NOTIFY_BOUND_DRIVER: /* mei driver bound to aux device */ + gsc_init_done(huc); + break; + + case BUS_NOTIFY_DRIVER_NOT_BOUND: /* mei driver fails to be bound */ + case BUS_NOTIFY_UNBIND_DRIVER: /* mei driver about to be unbound */ + drm_info(&huc_to_gt(huc)->i915->drm, + "mei driver not bound, disabling HuC load\n"); + gsc_init_error(huc); + break; + } + + return 0; +} + +void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus) +{ + int ret; + + if (!intel_huc_is_loaded_by_gsc(huc)) + return; + + huc->delayed_load.nb.notifier_call = gsc_notifier; + ret = bus_register_notifier(bus, &huc->delayed_load.nb); + if (ret) { + drm_err(&huc_to_gt(huc)->i915->drm, + "failed to register GSC notifier\n"); + huc->delayed_load.nb.notifier_call = NULL; + gsc_init_error(huc); + } +} + +void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus) +{ + if (!huc->delayed_load.nb.notifier_call) + return; + + delayed_huc_load_complete(huc); + + bus_unregister_notifier(bus, &huc->delayed_load.nb); + huc->delayed_load.nb.notifier_call = NULL; +} + void intel_huc_init_early(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; @@ -57,6 +226,17 @@ void intel_huc_init_early(struct intel_huc *huc) huc->status.mask = HUC_FW_VERIFIED; huc->status.value = HUC_FW_VERIFIED; } + + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a delayed HuC reload in progress. + */ + i915_sw_fence_init(&huc->delayed_load.fence, + sw_fence_dummy_notify); + i915_sw_fence_commit(&huc->delayed_load.fence); + + hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + huc->delayed_load.timer.function = huc_delayed_load_timer_callback; } #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") @@ -113,6 +293,7 @@ int intel_huc_init(struct intel_huc *huc) return 0; out: + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_INIT_FAIL); drm_info(&i915->drm, "HuC init failed with %d\n", err); return err; } @@ -122,9 +303,50 @@ void intel_huc_fini(struct intel_huc *huc) if (!intel_uc_fw_is_loadable(&huc->fw)) return; + delayed_huc_load_complete(huc); + + i915_sw_fence_fini(&huc->delayed_load.fence); intel_uc_fw_fini(&huc->fw); } +void intel_huc_suspend(struct intel_huc *huc) +{ + if (!intel_uc_fw_is_loadable(&huc->fw)) + return; + + /* + * in the unlikely case that we're suspending before the GSC has + * completed its loading sequence, just stop waiting. We'll restart + * on resume. + */ + delayed_huc_load_complete(huc); +} + +int intel_huc_wait_for_auth_complete(struct intel_huc *huc) +{ + struct intel_gt *gt = huc_to_gt(huc); + int ret; + + ret = __intel_wait_for_register(gt->uncore, + huc->status.reg, + huc->status.mask, + huc->status.value, + 2, 50, NULL); + + /* mark the load process as complete even if the wait failed */ + delayed_huc_load_complete(huc); + + if (ret) { + drm_err(>->i915->drm, "HuC: Firmware not verified %d\n", ret); + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); + return ret; + } + + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + drm_info(>->i915->drm, "HuC authenticated\n"); + return 0; +} + /** * intel_huc_auth() - Authenticate HuC uCode * @huc: intel_huc structure @@ -161,27 +383,18 @@ int intel_huc_auth(struct intel_huc *huc) } /* Check authentication status, it should be done by now */ - ret = __intel_wait_for_register(gt->uncore, - huc->status.reg, - huc->status.mask, - huc->status.value, - 2, 50, NULL); - if (ret) { - DRM_ERROR("HuC: Firmware not verified %d\n", ret); + ret = intel_huc_wait_for_auth_complete(huc); + if (ret) goto fail; - } - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); - drm_info(>->i915->drm, "HuC authenticated\n"); return 0; fail: i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret); - intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return ret; } -static bool huc_is_authenticated(struct intel_huc *huc) +bool intel_huc_is_authenticated(struct intel_huc *huc) { struct intel_gt *gt = huc_to_gt(huc); intel_wakeref_t wakeref; @@ -200,13 +413,8 @@ static bool huc_is_authenticated(struct intel_huc *huc) * This function reads status register to verify if HuC * firmware was successfully loaded. * - * Returns: - * * -ENODEV if HuC is not present on this platform, - * * -EOPNOTSUPP if HuC firmware is disabled, - * * -ENOPKG if HuC firmware was not installed, - * * -ENOEXEC if HuC firmware is invalid or mismatched, - * * 0 if HuC firmware is not running, - * * 1 if HuC firmware is authenticated and running. + * The return values match what is expected for the I915_PARAM_HUC_STATUS + * getparam. */ int intel_huc_check_status(struct intel_huc *huc) { @@ -219,11 +427,21 @@ int intel_huc_check_status(struct intel_huc *huc) return -ENOPKG; case INTEL_UC_FIRMWARE_ERROR: return -ENOEXEC; + case INTEL_UC_FIRMWARE_INIT_FAIL: + return -ENOMEM; + case INTEL_UC_FIRMWARE_LOAD_FAIL: + return -EIO; default: break; } - return huc_is_authenticated(huc); + return intel_huc_is_authenticated(huc); +} + +static bool huc_has_delayed_load(struct intel_huc *huc) +{ + return intel_huc_is_loaded_by_gsc(huc) && + (huc->delayed_load.status != INTEL_HUC_DELAYED_LOAD_ERROR); } void intel_huc_update_auth_status(struct intel_huc *huc) @@ -231,9 +449,11 @@ void intel_huc_update_auth_status(struct intel_huc *huc) if (!intel_uc_fw_is_loadable(&huc->fw)) return; - if (huc_is_authenticated(huc)) + if (intel_huc_is_authenticated(huc)) intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + else if (huc_has_delayed_load(huc)) + huc_delayed_load_start(huc); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index d7e25b6e879e..52db03620c60 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -7,9 +7,21 @@ #define _INTEL_HUC_H_ #include "i915_reg_defs.h" +#include "i915_sw_fence.h" #include "intel_uc_fw.h" #include "intel_huc_fw.h" +#include <linux/notifier.h> +#include <linux/hrtimer.h> + +struct bus_type; + +enum intel_huc_delayed_load_status { + INTEL_HUC_WAITING_ON_GSC = 0, + INTEL_HUC_WAITING_ON_PXP, + INTEL_HUC_DELAYED_LOAD_ERROR, +}; + struct intel_huc { /* Generic uC firmware management */ struct intel_uc_fw fw; @@ -20,14 +32,27 @@ struct intel_huc { u32 mask; u32 value; } status; + + struct { + struct i915_sw_fence fence; + struct hrtimer timer; + struct notifier_block nb; + enum intel_huc_delayed_load_status status; + } delayed_load; }; void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); +void intel_huc_suspend(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); +int intel_huc_wait_for_auth_complete(struct intel_huc *huc); int intel_huc_check_status(struct intel_huc *huc); void intel_huc_update_auth_status(struct intel_huc *huc); +bool intel_huc_is_authenticated(struct intel_huc *huc); + +void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); +void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); static inline int intel_huc_sanitize(struct intel_huc *huc) { @@ -56,6 +81,12 @@ static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc) return huc->fw.loaded_via_gsc; } +static inline bool intel_huc_wait_required(struct intel_huc *huc) +{ + return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) && + !intel_huc_is_authenticated(huc); +} + void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index 9d6ab1e01639..4f246416db17 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -3,9 +3,43 @@ * Copyright © 2014-2019 Intel Corporation */ +#include "gt/intel_gsc.h" #include "gt/intel_gt.h" +#include "intel_huc.h" #include "intel_huc_fw.h" #include "i915_drv.h" +#include "pxp/intel_pxp_huc.h" + +int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc) +{ + int ret; + + if (!intel_huc_is_loaded_by_gsc(huc)) + return -ENODEV; + + if (!intel_uc_fw_is_loadable(&huc->fw)) + return -ENOEXEC; + + /* + * If we abort a suspend, HuC might still be loaded when the mei + * component gets re-bound and this function called again. If so, just + * mark the HuC as loaded. + */ + if (intel_huc_is_authenticated(huc)) { + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING); + return 0; + } + + GEM_WARN_ON(intel_uc_fw_is_loaded(&huc->fw)); + + ret = intel_pxp_huc_load_and_auth(&huc_to_gt(huc)->pxp); + if (ret) + return ret; + + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED); + + return intel_huc_wait_for_auth_complete(huc); +} /** * intel_huc_fw_upload() - load HuC uCode to device via DMA transfer diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h index 12f264ee3e0b..db42e238b45f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h @@ -8,6 +8,7 @@ struct intel_huc; +int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc); int intel_huc_fw_upload(struct intel_huc *huc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index b91ad4aede1f..de2843dc1307 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -93,7 +93,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(BROXTON, 0, guc_mmp(bxt, 70, 1, 1)) \ fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1)) -#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \ +#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \ + fw_def(DG2, 0, huc_gsc(dg2)) \ fw_def(ALDERLAKE_P, 0, huc_raw(tgl)) \ fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, huc_raw(tgl)) \ @@ -141,6 +142,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define MAKE_HUC_FW_PATH_BLANK(prefix_) \ __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc") +#define MAKE_HUC_FW_PATH_GSC(prefix_) \ + __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc_gsc") + #define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ __MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_) @@ -153,7 +157,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, MODULE_FIRMWARE(uc_); INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP) -INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP) +INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC) /* * The next expansion of the table macros (in __uc_fw_auto_select below) provides @@ -168,6 +172,7 @@ struct __packed uc_fw_blob { u8 major; u8 minor; u8 patch; + bool loaded_via_gsc; }; #define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ @@ -176,16 +181,16 @@ struct __packed uc_fw_blob { .patch = patch_, \ .path = path_, -#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \ +#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \ { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ - .legacy = false } + .legacy = false, .loaded_via_gsc = gsc_ } #define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \ { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ .legacy = true } #define GUC_FW_BLOB(prefix_, major_, minor_) \ - UC_FW_BLOB_NEW(major_, minor_, 0, \ + UC_FW_BLOB_NEW(major_, minor_, 0, false, \ MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_)) #define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ @@ -193,12 +198,15 @@ struct __packed uc_fw_blob { MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_)) #define HUC_FW_BLOB(prefix_) \ - UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_)) + UC_FW_BLOB_NEW(0, 0, 0, false, MAKE_HUC_FW_PATH_BLANK(prefix_)) #define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ UC_FW_BLOB_OLD(major_, minor_, patch_, \ MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_)) +#define HUC_FW_BLOB_GSC(prefix_) \ + UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_)) + struct __packed uc_fw_platform_requirement { enum intel_platform p; u8 rev; /* first platform rev using this FW */ @@ -224,7 +232,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP) }; static const struct uc_fw_platform_requirement blobs_huc[] = { - INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP) + INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC) }; static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, @@ -272,6 +280,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) uc_fw->file_wanted.path = blob->path; uc_fw->file_wanted.major_ver = blob->major; uc_fw->file_wanted.minor_ver = blob->minor; + uc_fw->loaded_via_gsc = blob->loaded_via_gsc; found = true; break; } @@ -904,7 +913,6 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) out_unpin: i915_gem_object_unpin_pages(uc_fw->obj); out: - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL); return err; } |