From 78a033433a5ae4fee85511ee075bc9a48312c79e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Sep 2022 16:26:51 -0700 Subject: drm/i915/gt: Cleanup partial engine discovery failures If we abort driver initialisation in the middle of gt/engine discovery, some engines will be fully setup and some not. Those incompletely setup engines only have 'engine->release == NULL' and so will leak any of the common objects allocated. v2: - Drop the destroy_pinned_context() helper for now. It's not really worth it with just a single callsite at the moment. (Janusz) Signed-off-by: Chris Wilson Cc: Janusz Krzysztofik Signed-off-by: Matt Roper Reviewed-by: Janusz Krzysztofik Link: https://patchwork.freedesktop.org/patch/msgid/20220915232654.3283095-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/gt/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1f7188129cd1..2ddcad497fa3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1274,8 +1274,13 @@ int intel_engines_init(struct intel_gt *gt) return err; err = setup(engine); - if (err) + if (err) { + intel_engine_cleanup_common(engine); return err; + } + + /* The backend should now be responsible for cleanup */ + GEM_BUG_ON(engine->release == NULL); err = engine_init_common(engine); if (err) -- cgit From dfa13f1bfc8648041da6f39ca95364f1030af3b9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 14 Oct 2022 16:02:26 -0700 Subject: drm/i915/gen8: Create separate reg definitions for new MCR registers Gen8 was the first time our hardware had multicast registers (or at least the first time the multicast nature was exposed and MMIO accesses could be steered). There are some registers that transitioned from singleton behavior to multicast during the gen7 -> gen8 transition; let's duplicate the register definitions for those registers in preparation for upcoming patches that will handle MCR registers in a special manner. The registers adjusted are: * MISCCPCTL * SAMPLER_INSTDONE * ROW_INSTDONE * ROW_CHICKEN2 * HALF_SLICE_CHICKEN1 * HALF_SLICE_CHICKEN3 v2: - Use the gen8 version of HALF_SLICE_CHICKEN3 in GVT's gen9 engine MMIO list. (Bala) - Update to the gen8 version of MISCCPCTL in a couple new workarounds that were recently added for DG2/PVC. (Bala) Signed-off-by: Matt Roper Reviewed-by: Balasubramani Vivekanandan Reviewed-by: Balasubramani Vivekanandan Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 11 ++++++++++- drivers/gpu/drm/i915/gt/intel_workarounds.c | 26 +++++++++++++------------- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 2 +- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 2 +- drivers/gpu/drm/i915/intel_gvt_mmio_table.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 11 ++++++----- 9 files changed, 37 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/i915/gt/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2ddcad497fa3..c408bac3c533 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1559,11 +1559,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, for_each_ss_steering(iter, engine->gt, slice, subslice) { instdone->sampler[slice][subslice] = intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, + GEN8_SAMPLER_INSTDONE, slice, subslice); instdone->row[slice][subslice] = intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, + GEN8_ROW_INSTDONE, slice, subslice); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 2d06610cbec9..d62be9b28863 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -648,6 +648,9 @@ #define GEN7_MISCCPCTL _MMIO(0x9424) #define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0) + +#define GEN8_MISCCPCTL _MMIO(0x9424) +#define GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0) #define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) #define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2) #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4) @@ -1069,18 +1072,22 @@ #define GEN12_GAM_DONE _MMIO(0xcf68) #define GEN7_HALF_SLICE_CHICKEN1 _MMIO(0xe100) /* IVB GT1 + VLV */ +#define GEN8_HALF_SLICE_CHICKEN1 _MMIO(0xe100) #define GEN7_MAX_PS_THREAD_DEP (8 << 12) #define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1 << 10) #define GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE (1 << 4) #define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) +#define GEN8_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define GEN8_ROW_INSTDONE _MMIO(0xe164) #define HALF_SLICE_CHICKEN2 _MMIO(0xe180) #define GEN8_ST_PO_DISABLE (1 << 13) -#define HALF_SLICE_CHICKEN3 _MMIO(0xe184) +#define HSW_HALF_SLICE_CHICKEN3 _MMIO(0xe184) +#define GEN8_HALF_SLICE_CHICKEN3 _MMIO(0xe184) #define HSW_SAMPLE_C_PERFORMANCE (1 << 9) #define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8) #define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5) @@ -1133,6 +1140,8 @@ #define DISABLE_EARLY_EOT REG_BIT(1) #define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) + +#define GEN8_ROW_CHICKEN2 _MMIO(0xe4f4) #define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15) #define GEN12_DISABLE_EARLY_READ REG_BIT(14) #define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b8eb20a155f0..47a683dcc8a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -295,10 +295,10 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, + wa_masked_en(wal, GEN8_ROW_CHICKEN2, DOP_CLOCK_GATING_DISABLE); - wa_masked_en(wal, HALF_SLICE_CHICKEN3, + wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); wa_masked_en(wal, HDC_CHICKEN0, @@ -386,7 +386,7 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, IS_KABYLAKE(i915) || IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) - wa_masked_en(wal, HALF_SLICE_CHICKEN3, + wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ @@ -490,7 +490,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:kbl */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, + wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } @@ -514,7 +514,7 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:cfl */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, + wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } @@ -1517,7 +1517,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS); /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } static void @@ -1526,7 +1526,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) pvc_init_mcr(gt, wal); /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + wa_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); } static void @@ -2117,7 +2117,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14013392000:dg2_g11 */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); + wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); } if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || @@ -2163,7 +2163,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) DISABLE_128B_EVICTION_COMMAND_UDW); /* Wa_22012856258:dg2 */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, + wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION); /* @@ -2260,7 +2260,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); + wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); /* * Wa_1407928979:tgl A* @@ -2289,7 +2289,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, + wa_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS); /* @@ -2508,7 +2508,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) if (IS_HASWELL(i915)) { /* WaSampleCChickenBitEnable:hsw */ wa_masked_en(wal, - HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); + HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); wa_masked_dis(wal, CACHE_MODE_0_GEN7, @@ -2806,7 +2806,7 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); /* Wa_14010449647:xehpsdv */ - wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, + wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); /* Wa_18011725039:xehpsdv */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 8f1165146013..9495a7928bc8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -244,8 +244,8 @@ struct __ext_steer_reg { }; static const struct __ext_steer_reg xe_extregs[] = { - {"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE}, - {"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE} + {"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE}, + {"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE} }; static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index a0372735cddb..9229243992c2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -35,7 +35,7 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) if (GRAPHICS_VER(uncore->i915) == 9) { /* DOP Clock Gating Enable for GuC clocks */ - intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + intel_uncore_rmw(uncore, GEN8_MISCCPCTL, 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE); /* allows for 5us (in 10ns units) before GT can go to RC6 */ diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index e7e33f95cc51..683f88f5d669 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2257,7 +2257,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(HSW_HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* display */ diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index c85bafe7539e..485ff6c8e558 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -111,7 +111,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */ {RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ - {RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ + {RCS0, GEN8_HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ {RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ {RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ {RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */ diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c index e015bc91a26f..51d5c3c804d5 100644 --- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c +++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c @@ -102,7 +102,7 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(_MMIO(0x2438)); MMIO_D(_MMIO(0x243c)); MMIO_D(_MMIO(0x7018)); - MMIO_D(HALF_SLICE_CHICKEN3); + MMIO_D(HSW_HALF_SLICE_CHICKEN3); MMIO_D(GEN7_HALF_SLICE_CHICKEN1); /* display */ MMIO_F(_MMIO(0x60220), 0x20); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8f86f56e7ca4..21bc2f356451 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4325,8 +4325,8 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, u32 val; /* WaTempDisableDOPClkGating:bdw */ - misccpctl = intel_uncore_read(&dev_priv->uncore, GEN7_MISCCPCTL); - intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); + misccpctl = intel_uncore_read(&dev_priv->uncore, GEN8_MISCCPCTL); + intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL, misccpctl & ~GEN8_DOP_CLOCK_GATE_ENABLE); val = intel_uncore_read(&dev_priv->uncore, GEN8_L3SQCREG1); val &= ~L3_PRIO_CREDITS_MASK; @@ -4340,7 +4340,7 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, */ intel_uncore_posting_read(&dev_priv->uncore, GEN8_L3SQCREG1); udelay(1); - intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, misccpctl); + intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL, misccpctl); } static void icl_init_clock_gating(struct drm_i915_private *dev_priv) @@ -4500,8 +4500,9 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) gen9_init_clock_gating(dev_priv); /* WaDisableDopClockGating:skl */ - intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, intel_uncore_read(&dev_priv->uncore, GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE); + intel_uncore_write(&dev_priv->uncore, GEN8_MISCCPCTL, + intel_uncore_read(&dev_priv->uncore, GEN8_MISCCPCTL) & + ~GEN8_DOP_CLOCK_GATE_ENABLE); /* WAC6entrylatency:skl */ intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) | -- cgit From 568944af44e7538ed5d1389dabf56e938afdaf4f Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 6 Oct 2022 14:38:10 -0700 Subject: drm/i915/guc: Limit scheduling properties to avoid overflow GuC converts the pre-emption timeout and timeslice quantum values into clock ticks internally. That significantly reduces the point of 32bit overflow. On current platforms, worst case scenario is approximately 110 seconds. Rather than allowing the user to set higher values and then get confused by early timeouts, add limits when setting these values. v2: Add helper functions for clamping (review feedback from Tvrtko). v3: Add a bunch of BUG_ON range checks in addition to the checks already in the clamping functions (Tvrtko) Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Acked-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221006213813.1563435-2-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 6 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 69 +++++++++++++++++++++++ drivers/gpu/drm/i915/gt/sysfs_engines.c | 25 ++++---- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 21 +++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 +++ 5 files changed, 119 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/gt/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 04e435bce79b..cbc8b857d5f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs *engine) return engine->hung_ce; } +u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value); +u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value); + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c408bac3c533..b1c16aac2a9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -512,6 +512,26 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; } + /* Cap properties according to any system limits */ +#define CLAMP_PROP(field) \ + do { \ + u64 clamp = intel_clamp_##field(engine, engine->props.field); \ + if (clamp != engine->props.field) { \ + drm_notice(&engine->i915->drm, \ + "Warning, clamping %s to %lld to prevent overflow\n", \ + #field, clamp); \ + engine->props.field = clamp; \ + } \ + } while (0) + + CLAMP_PROP(heartbeat_interval_ms); + CLAMP_PROP(max_busywait_duration_ns); + CLAMP_PROP(preempt_timeout_ms); + CLAMP_PROP(stop_timeout_ms); + CLAMP_PROP(timeslice_duration_ms); + +#undef CLAMP_PROP + engine->defaults = engine->props; /* never to change again */ engine->context_size = intel_engine_context_size(gt, engine->class); @@ -534,6 +554,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, return 0; } +u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value) +{ + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value) +{ + value = min(value, jiffies_to_nsecs(2)); + + return value; +} + +u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value) +{ + /* + * NB: The GuC API only supports 32bit values. However, the limit is further + * reduced due to internal calculations which would otherwise overflow. + */ + if (intel_guc_submission_is_wanted(&engine->gt->uc.guc)) + value = min_t(u64, value, guc_policy_max_preempt_timeout_ms()); + + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value) +{ + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + +u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value) +{ + /* + * NB: The GuC API only supports 32bit values. However, the limit is further + * reduced due to internal calculations which would otherwise overflow. + */ + if (intel_guc_submission_is_wanted(&engine->gt->uc.guc)) + value = min_t(u64, value, guc_policy_max_exec_quantum_ms()); + + value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)); + + return value; +} + static void __setup_engine_capabilities(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 967031056202..f2d9858d827c 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -144,7 +144,7 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -168,7 +168,8 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_nsecs(2)) + clamped = intel_clamp_max_busywait_duration_ns(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.max_busywait_duration_ns, duration); @@ -203,7 +204,7 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -218,7 +219,8 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_timeslice_duration_ms(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.timeslice_duration_ms, duration); @@ -256,7 +258,7 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long duration; + unsigned long long duration, clamped; int err; /* @@ -272,7 +274,8 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_stop_timeout_ms(engine, duration); + if (duration != clamped) return -EINVAL; WRITE_ONCE(engine->props.stop_timeout_ms, duration); @@ -306,7 +309,7 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long timeout; + unsigned long long timeout, clamped; int err; /* @@ -322,7 +325,8 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_preempt_timeout_ms(engine, timeout); + if (timeout != clamped) return -EINVAL; WRITE_ONCE(engine->props.preempt_timeout_ms, timeout); @@ -362,7 +366,7 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct intel_engine_cs *engine = kobj_to_engine(kobj); - unsigned long long delay; + unsigned long long delay, clamped; int err; /* @@ -379,7 +383,8 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, if (err) return err; - if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + clamped = intel_clamp_heartbeat_interval_ms(engine, delay); + if (delay != clamped) return -EINVAL; err = intel_engine_set_heartbeat(engine, delay); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index e7a7fb450f44..968ebd79dce7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -327,6 +327,27 @@ struct guc_update_scheduling_policy { #define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 +/* + * GuC converts the timeout to clock ticks internally. Different platforms have + * different GuC clocks. Thus, the maximum value before overflow is platform + * dependent. Current worst case scenario is about 110s. So, the spec says to + * limit to 100s to be safe. + */ +#define GUC_POLICY_MAX_EXEC_QUANTUM_US (100 * 1000 * 1000UL) +#define GUC_POLICY_MAX_PREEMPT_TIMEOUT_US (100 * 1000 * 1000UL) + +static inline u32 guc_policy_max_exec_quantum_ms(void) +{ + BUILD_BUG_ON(GUC_POLICY_MAX_EXEC_QUANTUM_US >= UINT_MAX); + return GUC_POLICY_MAX_EXEC_QUANTUM_US / 1000; +} + +static inline u32 guc_policy_max_preempt_timeout_ms(void) +{ + BUILD_BUG_ON(GUC_POLICY_MAX_PREEMPT_TIMEOUT_US >= UINT_MAX); + return GUC_POLICY_MAX_PREEMPT_TIMEOUT_US / 1000; +} + struct guc_policies { u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; /* In micro seconds. How much time to allow before DPC processing is diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 693b07a97789..c433d35ae41a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2430,6 +2430,10 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) int ret; /* NB: For both of these, zero means disabled. */ + GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, + execution_quantum)); + GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, + preemption_timeout)); execution_quantum = engine->props.timeslice_duration_ms * 1000; preemption_timeout = engine->props.preempt_timeout_ms * 1000; @@ -2463,6 +2467,10 @@ static void guc_context_policy_init_v69(struct intel_engine_cs *engine, desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; /* NB: For both of these, zero means disabled. */ + GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, + desc->execution_quantum)); + GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, + desc->preemption_timeout)); desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } -- cgit From c3bd49cd9a1043b963331e7fd874b380bed3f2bd Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 6 Oct 2022 14:38:11 -0700 Subject: drm/i915: Fix compute pre-emption w/a to apply to compute engines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An earlier patch added support for compute engines. However, it missed enabling the anti-pre-emption w/a for the new engine class. So move the 'compute capable' flag earlier and use it for the pre-emption w/a test. Fixes: c674c5b9342e ("drm/i915/xehp: CCS should use RCS setup functions") Cc: Tvrtko Ursulin Cc: Daniele Ceraolo Spurio Cc: Aravind Iddamsetty Cc: Matt Roper Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Lucas De Marchi Cc: John Harrison Cc: Jason Ekstrand Cc: "Michał Winiarski" Cc: Matthew Brost Cc: Chris Wilson Cc: Tejas Upadhyay Cc: Umesh Nerlige Ramappa Cc: "Thomas Hellström" Cc: Stuart Summers Cc: Matthew Auld Cc: Jani Nikula Cc: Ramalingam C Cc: Akeem G Abodunrin Signed-off-by: John Harrison Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221006213813.1563435-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915/gt/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index b1c16aac2a9b..7c5b33ccedd0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -486,6 +486,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->logical_mask = BIT(logical_instance); __sprint_engine_name(engine); + if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && + __ffs(CCS_MASK(engine->gt)) == engine->instance) || + engine->class == RENDER_CLASS) + engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; + + /* features common between engines sharing EUs */ + if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { + engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; + engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; + } + engine->props.heartbeat_interval_ms = CONFIG_DRM_I915_HEARTBEAT_INTERVAL; engine->props.max_busywait_duration_ns = @@ -498,20 +509,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, CONFIG_DRM_I915_TIMESLICE_DURATION; /* Override to uninterruptible for OpenCL workloads. */ - if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS) + if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) engine->props.preempt_timeout_ms = 0; - if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && - __ffs(CCS_MASK(engine->gt)) == engine->instance) || - engine->class == RENDER_CLASS) - engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; - - /* features common between engines sharing EUs */ - if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { - engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; - engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; - } - /* Cap properties according to any system limits */ #define CLAMP_PROP(field) \ do { \ -- cgit From d7a8680ec9fb217987a9569aba1abeed886805f0 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 6 Oct 2022 14:38:13 -0700 Subject: drm/i915: Improve long running compute w/a for GuC submission A workaround was added to the driver to allow compute workloads to run 'forever' by disabling pre-emption on the RCS engine for Gen12. It is not totally unbound as the heartbeat will kick in eventually and cause a reset of the hung engine. However, this does not work well in GuC submission mode. In GuC mode, the pre-emption timeout is how GuC detects hung contexts and triggers a per engine reset. Thus, disabling the timeout means also losing all per engine reset ability. A full GT reset will still occur when the heartbeat finally expires, but that is a much more destructive and undesirable mechanism. The purpose of the workaround is actually to give compute tasks longer to reach a pre-emption point after a pre-emption request has been issued. This is necessary because Gen12 does not support mid-thread pre-emption and compute tasks can have long running threads. So, rather than disabling the timeout completely, just set it to a 'long' value. v2: Review feedback from Tvrtko - must hard code the 'long' value instead of determining it algorithmically. So make it an extra CONFIG definition. Also, remove the execlist centric comment from the existing pre-emption timeout CONFIG option given that it applies to more than just execlists. Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Acked-by: Michal Mrozek Acked-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221006213813.1563435-5-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/Kconfig.profile | 26 ++++++++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 9 +++++++-- 2 files changed, 29 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/gt/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 39328567c200..7cc38d25ee5c 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -57,10 +57,28 @@ config DRM_I915_PREEMPT_TIMEOUT default 640 # milliseconds help How long to wait (in milliseconds) for a preemption event to occur - when submitting a new context via execlists. If the current context - does not hit an arbitration point and yield to HW before the timer - expires, the HW will be reset to allow the more important context - to execute. + when submitting a new context. If the current context does not hit + an arbitration point and yield to HW before the timer expires, the + HW will be reset to allow the more important context to execute. + + This is adjustable via + /sys/class/drm/card?/engine/*/preempt_timeout_ms + + May be 0 to disable the timeout. + + The compiled in default may get overridden at driver probe time on + certain platforms and certain engines which will be reflected in the + sysfs control. + +config DRM_I915_PREEMPT_TIMEOUT_COMPUTE + int "Preempt timeout for compute engines (ms, jiffy granularity)" + default 7500 # milliseconds + help + How long to wait (in milliseconds) for a preemption event to occur + when submitting a new context to a compute capable engine. If the + current context does not hit an arbitration point and yield to HW + before the timer expires, the HW will be reset to allow the more + important context to execute. This is adjustable via /sys/class/drm/card?/engine/*/preempt_timeout_ms diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7c5b33ccedd0..3b7d750ad054 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -508,9 +508,14 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, engine->props.timeslice_duration_ms = CONFIG_DRM_I915_TIMESLICE_DURATION; - /* Override to uninterruptible for OpenCL workloads. */ + /* + * Mid-thread pre-emption is not available in Gen12. Unfortunately, + * some compute workloads run quite long threads. That means they get + * reset due to not pre-empting in a timely manner. So, bump the + * pre-emption timeout value to be much higher for compute engines. + */ if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE)) - engine->props.preempt_timeout_ms = 0; + engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE; /* Cap properties according to any system limits */ #define CLAMP_PROP(field) \ -- cgit