diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 360 |
1 files changed, 206 insertions, 154 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 245386e20c52..68c5af079ef8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -53,12 +53,6 @@ void intel_ring_update_space(struct intel_ringbuffer *ringbuf) ringbuf->tail, ringbuf->size); } -int intel_ring_space(struct intel_ringbuffer *ringbuf) -{ - intel_ring_update_space(ringbuf); - return ringbuf->space; -} - bool intel_engine_stopped(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->dev->dev_private; @@ -919,24 +913,26 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) { struct drm_device *dev = engine->dev; struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t tmp; int ret; - /* WaEnableLbsSlaRetryTimerDecrement:skl */ + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */ + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - /* WaDisableKillLogic:bxt,skl */ + /* WaDisableKillLogic:bxt,skl,kbl */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | ECOCHK_DIS_TLB); - /* WaClearFlowControlGpgpuContextSave:skl,bxt */ - /* WaDisablePartialInstShootdown:skl,bxt */ + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, FLOW_CONTROL_ENABLE | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - /* Syncing dependencies between camera and graphics:skl,bxt */ + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); @@ -958,18 +954,18 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) */ } - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */ - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt */ + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl */ + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_ENABLE_YV12_BUGFIX | GEN9_ENABLE_GPGPU_PREEMPTION); - /* Wa4x4STCOptimizationDisable:skl,bxt */ - /* WaDisablePartialResolveInVc:skl,bxt */ + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl */ WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - /* WaCcsTlbPrefetchDisable:skl,bxt */ + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */ WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); @@ -979,31 +975,57 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, PIXEL_MASK_CAMMING_DISABLE); - /* WaForceContextSaveRestoreNonCoherent:skl,bxt */ - tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT; - if (IS_SKL_REVID(dev, SKL_REVID_F0, REVID_FOREVER) || - IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER)) - tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE; - WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp); + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableHDCInvalidation:skl,bxt,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */ - if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0)) + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); - /* WaDisableSTUnitPowerOptimization:skl,bxt */ + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - /* WaOCLCoherentLineFlush:skl,bxt */ + /* WaOCLCoherentLineFlush:skl,bxt,kbl */ I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt */ + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */ + ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); + if (ret) + return ret; + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; - /* WaAllowUMDToModifyHDCChicken1:skl,bxt */ + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */ ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); if (ret) return ret; @@ -1098,22 +1120,6 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HIZ_CHICKEN, BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); - /* This is tied to WaForceContextSaveRestoreNonCoherent */ - if (IS_SKL_REVID(dev, 0, REVID_FOREVER)) { - /* - *Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:skl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - } - /* WaBarrierPerformanceFixDisable:skl */ if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0)) WA_SET_BIT_MASKED(HDC_CHICKEN0, @@ -1126,6 +1132,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) GEN7_HALF_SLICE_CHICKEN1, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + /* WaDisableGafsUnitClkGating:skl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + /* WaDisableLSQCROPERFforOCL:skl */ ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); if (ret) @@ -1180,6 +1189,63 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) return ret; } + /* WaInsertDummyPushConstPs:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +static int kbl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->dev->dev_private; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaEnableGapsTsvCreditFix:kbl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + WA_SET_BIT(GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + + /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes + * involving this register should also be added to WA batch as required. + */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0)) + /* WaDisableLSQCROPERFforOCL:kbl */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_RO_PERF_DIS); + + /* WaInsertDummyPushConstPs:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableGafsUnitClkGating:kbl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + /* WaDisableLSQCROPERFforOCL:kbl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + return 0; } @@ -1205,6 +1271,9 @@ int init_workarounds_ring(struct intel_engine_cs *engine) if (IS_BROXTON(dev)) return bxt_init_workarounds(engine); + if (IS_KABYLAKE(dev_priv)) + return kbl_init_workarounds(engine); + return 0; } @@ -1309,7 +1378,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, intel_ring_emit(signaller, seqno); intel_ring_emit(signaller, 0); intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->id)); + MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); } @@ -1349,7 +1418,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, intel_ring_emit(signaller, upper_32_bits(gtt_offset)); intel_ring_emit(signaller, seqno); intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->id)); + MI_SEMAPHORE_TARGET(waiter->hw_id)); intel_ring_emit(signaller, 0); } @@ -1573,6 +1642,8 @@ pc_render_add_request(struct drm_i915_gem_request *req) static void gen6_seqno_barrier(struct intel_engine_cs *engine) { + struct drm_i915_private *dev_priv = engine->dev->dev_private; + /* Workaround to force correct ordering between irq and seqno writes on * ivb (and maybe also on snb) by reading from a CS register (like * ACTHD) before reading the status page. @@ -1584,9 +1655,13 @@ gen6_seqno_barrier(struct intel_engine_cs *engine) * the write time to land, but that would incur a delay after every * batch i.e. much more frequent than a delay when waiting for the * interrupt (with the same net latency). + * + * Also note that to prevent whole machine hangs on gen7, we have to + * take the spinlock to guard against concurrent cacheline access. */ - struct drm_i915_private *dev_priv = engine->dev->dev_private; + spin_lock_irq(&dev_priv->uncore.lock); POSTING_READ_FW(RING_ACTHD(engine->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); } static u32 @@ -2312,51 +2387,6 @@ void intel_cleanup_engine(struct intel_engine_cs *engine) engine->dev = NULL; } -static int ring_wait_for_space(struct intel_engine_cs *engine, int n) -{ - struct intel_ringbuffer *ringbuf = engine->buffer; - struct drm_i915_gem_request *request; - unsigned space; - int ret; - - if (intel_ring_space(ringbuf) >= n) - return 0; - - /* The whole point of reserving space is to not wait! */ - WARN_ON(ringbuf->reserved_in_use); - - list_for_each_entry(request, &engine->request_list, list) { - space = __intel_ring_space(request->postfix, ringbuf->tail, - ringbuf->size); - if (space >= n) - break; - } - - if (WARN_ON(&request->list == &engine->request_list)) - return -ENOSPC; - - ret = i915_wait_request(request); - if (ret) - return ret; - - ringbuf->space = space; - return 0; -} - -static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf) -{ - uint32_t __iomem *virt; - int rem = ringbuf->size - ringbuf->tail; - - virt = ringbuf->virtual_start + ringbuf->tail; - rem /= 4; - while (rem--) - iowrite32(MI_NOOP, virt++); - - ringbuf->tail = 0; - intel_ring_update_space(ringbuf); -} - int intel_engine_idle(struct intel_engine_cs *engine) { struct drm_i915_gem_request *req; @@ -2398,63 +2428,82 @@ int intel_ring_reserve_space(struct drm_i915_gem_request *request) void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size) { - WARN_ON(ringbuf->reserved_size); - WARN_ON(ringbuf->reserved_in_use); - + GEM_BUG_ON(ringbuf->reserved_size); ringbuf->reserved_size = size; } void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf) { - WARN_ON(ringbuf->reserved_in_use); - + GEM_BUG_ON(!ringbuf->reserved_size); ringbuf->reserved_size = 0; - ringbuf->reserved_in_use = false; } void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf) { - WARN_ON(ringbuf->reserved_in_use); - - ringbuf->reserved_in_use = true; - ringbuf->reserved_tail = ringbuf->tail; + GEM_BUG_ON(!ringbuf->reserved_size); + ringbuf->reserved_size = 0; } void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf) { - WARN_ON(!ringbuf->reserved_in_use); - if (ringbuf->tail > ringbuf->reserved_tail) { - WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size, - "request reserved size too small: %d vs %d!\n", - ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size); - } else { + GEM_BUG_ON(ringbuf->reserved_size); +} + +static int wait_for_space(struct drm_i915_gem_request *req, int bytes) +{ + struct intel_ringbuffer *ringbuf = req->ringbuf; + struct intel_engine_cs *engine = req->engine; + struct drm_i915_gem_request *target; + + intel_ring_update_space(ringbuf); + if (ringbuf->space >= bytes) + return 0; + + /* + * Space is reserved in the ringbuffer for finalising the request, + * as that cannot be allowed to fail. During request finalisation, + * reserved_space is set to 0 to stop the overallocation and the + * assumption is that then we never need to wait (which has the + * risk of failing with EINTR). + * + * See also i915_gem_request_alloc() and i915_add_request(). + */ + GEM_BUG_ON(!ringbuf->reserved_size); + + list_for_each_entry(target, &engine->request_list, list) { + unsigned space; + /* - * The ring was wrapped while the reserved space was in use. - * That means that some unknown amount of the ring tail was - * no-op filled and skipped. Thus simply adding the ring size - * to the tail and doing the above space check will not work. - * Rather than attempt to track how much tail was skipped, - * it is much simpler to say that also skipping the sanity - * check every once in a while is not a big issue. + * The request queue is per-engine, so can contain requests + * from multiple ringbuffers. Here, we must ignore any that + * aren't from the ringbuffer we're considering. */ + if (target->ringbuf != ringbuf) + continue; + + /* Would completion of this request free enough space? */ + space = __intel_ring_space(target->postfix, ringbuf->tail, + ringbuf->size); + if (space >= bytes) + break; } - ringbuf->reserved_size = 0; - ringbuf->reserved_in_use = false; + if (WARN_ON(&target->list == &engine->request_list)) + return -ENOSPC; + + return i915_wait_request(target); } -static int __intel_ring_prepare(struct intel_engine_cs *engine, int bytes) +int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { - struct intel_ringbuffer *ringbuf = engine->buffer; - int remain_usable = ringbuf->effective_size - ringbuf->tail; + struct intel_ringbuffer *ringbuf = req->ringbuf; int remain_actual = ringbuf->size - ringbuf->tail; - int ret, total_bytes, wait_bytes = 0; + int remain_usable = ringbuf->effective_size - ringbuf->tail; + int bytes = num_dwords * sizeof(u32); + int total_bytes, wait_bytes; bool need_wrap = false; - if (ringbuf->reserved_in_use) - total_bytes = bytes; - else - total_bytes = bytes + ringbuf->reserved_size; + total_bytes = bytes + ringbuf->reserved_size; if (unlikely(bytes > remain_usable)) { /* @@ -2463,44 +2512,42 @@ static int __intel_ring_prepare(struct intel_engine_cs *engine, int bytes) */ wait_bytes = remain_actual + total_bytes; need_wrap = true; + } else if (unlikely(total_bytes > remain_usable)) { + /* + * The base request will fit but the reserved space + * falls off the end. So we don't need an immediate wrap + * and only need to effectively wait for the reserved + * size space from the start of ringbuffer. + */ + wait_bytes = remain_actual + ringbuf->reserved_size; } else { - if (unlikely(total_bytes > remain_usable)) { - /* - * The base request will fit but the reserved space - * falls off the end. So don't need an immediate wrap - * and only need to effectively wait for the reserved - * size space from the start of ringbuffer. - */ - wait_bytes = remain_actual + ringbuf->reserved_size; - } else if (total_bytes > ringbuf->space) { - /* No wrapping required, just waiting. */ - wait_bytes = total_bytes; - } + /* No wrapping required, just waiting. */ + wait_bytes = total_bytes; } - if (wait_bytes) { - ret = ring_wait_for_space(engine, wait_bytes); + if (wait_bytes > ringbuf->space) { + int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) return ret; - if (need_wrap) - __wrap_ring_buffer(ringbuf); + intel_ring_update_space(ringbuf); + if (unlikely(ringbuf->space < wait_bytes)) + return -EAGAIN; } - return 0; -} + if (unlikely(need_wrap)) { + GEM_BUG_ON(remain_actual > ringbuf->space); + GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size); -int intel_ring_begin(struct drm_i915_gem_request *req, - int num_dwords) -{ - struct intel_engine_cs *engine = req->engine; - int ret; - - ret = __intel_ring_prepare(engine, num_dwords * sizeof(uint32_t)); - if (ret) - return ret; + /* Fill the tail with MI_NOOP */ + memset(ringbuf->virtual_start + ringbuf->tail, + 0, remain_actual); + ringbuf->tail = 0; + ringbuf->space -= remain_actual; + } - engine->buffer->space -= num_dwords * sizeof(uint32_t); + ringbuf->space -= bytes; + GEM_BUG_ON(ringbuf->space < 0); return 0; } @@ -2772,6 +2819,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->name = "render ring"; engine->id = RCS; engine->exec_id = I915_EXEC_RENDER; + engine->hw_id = 0; engine->mmio_base = RENDER_RING_BASE; if (INTEL_INFO(dev)->gen >= 8) { @@ -2923,6 +2971,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) engine->name = "bsd ring"; engine->id = VCS; engine->exec_id = I915_EXEC_BSD; + engine->hw_id = 1; engine->write_tail = ring_write_tail; if (INTEL_INFO(dev)->gen >= 6) { @@ -3001,6 +3050,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) engine->name = "bsd2 ring"; engine->id = VCS2; engine->exec_id = I915_EXEC_BSD; + engine->hw_id = 4; engine->write_tail = ring_write_tail; engine->mmio_base = GEN8_BSD2_RING_BASE; @@ -3033,6 +3083,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) engine->name = "blitter ring"; engine->id = BCS; engine->exec_id = I915_EXEC_BLT; + engine->hw_id = 2; engine->mmio_base = BLT_RING_BASE; engine->write_tail = ring_write_tail; @@ -3092,6 +3143,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) engine->name = "video enhancement ring"; engine->id = VECS; engine->exec_id = I915_EXEC_VEBOX; + engine->hw_id = 3; engine->mmio_base = VEBOX_RING_BASE; engine->write_tail = ring_write_tail; |