diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 304 |
1 files changed, 217 insertions, 87 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e5b3c6dbd467..cd79c3843452 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -502,6 +502,68 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *ring) I915_WRITE(HWS_PGA, addr); } +static void intel_ring_setup_status_page(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + u32 mmio = 0; + + /* The ring status page addresses are no longer next to the rest of + * the ring registers as of gen7. + */ + if (IS_GEN7(dev)) { + switch (ring->id) { + case RCS: + mmio = RENDER_HWS_PGA_GEN7; + break; + case BCS: + mmio = BLT_HWS_PGA_GEN7; + break; + /* + * VCS2 actually doesn't exist on Gen7. Only shut up + * gcc switch check warning + */ + case VCS2: + case VCS: + mmio = BSD_HWS_PGA_GEN7; + break; + case VECS: + mmio = VEBOX_HWS_PGA_GEN7; + break; + } + } else if (IS_GEN6(ring->dev)) { + mmio = RING_HWS_PGA_GEN6(ring->mmio_base); + } else { + /* XXX: gen8 returns to sanity */ + mmio = RING_HWS_PGA(ring->mmio_base); + } + + I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); + POSTING_READ(mmio); + + /* + * Flush the TLB for this page + * + * FIXME: These two bits have disappeared on gen8, so a question + * arises: do we still need this and if so how should we go about + * invalidating the TLB? + */ + if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) { + u32 reg = RING_INSTPM(ring->mmio_base); + + /* ring should be idle before issuing a sync flush*/ + WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); + + I915_WRITE(reg, + _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | + INSTPM_SYNC_FLUSH)); + if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, + 1000)) + DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", + ring->name); + } +} + static bool stop_ring(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = to_i915(ring->dev); @@ -788,12 +850,14 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * workaround for for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ - /* WaForceEnableNonCoherent:bdw */ - /* WaHdcDisableFetchWhenMasked:bdw */ - /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */ WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceEnableNonCoherent:bdw */ HDC_FORCE_NON_COHERENT | + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaHdcDisableFetchWhenMasked:bdw */ HDC_DONOT_FETCH_MEM_WHEN_MASKED | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: @@ -870,9 +934,132 @@ static int chv_init_workarounds(struct intel_engine_cs *ring) GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4); + if (INTEL_REVID(dev) == SKL_REVID_C0 || + INTEL_REVID(dev) == SKL_REVID_D0) + /* WaBarrierPerformanceFixDisable:skl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE | + HDC_BARRIER_PERFORMANCE_DISABLE); + + return 0; +} + +static int gen9_init_workarounds(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + /* WaDisablePartialInstShootdown:skl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + if (INTEL_REVID(dev) == SKL_REVID_A0 || + INTEL_REVID(dev) == SKL_REVID_B0) { + /* WaDisableDgMirrorFixInHalfSliceChicken5:skl */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_DG_MIRROR_FIX_ENABLE); + } + + if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) { + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl */ + WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); + WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0, + DISABLE_PIXEL_MASK_CAMMING); + } + + if (INTEL_REVID(dev) >= SKL_REVID_C0) { + /* WaEnableYV12BugFixInHalfSliceChicken7:skl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX); + } + + if (INTEL_REVID(dev) <= SKL_REVID_D0) { + /* + *Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:skl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + } + + /* Wa4x4STCOptimizationDisable:skl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* WaDisablePartialResolveInVc:skl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); + + /* WaCcsTlbPrefetchDisable:skl */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + return 0; +} + +static int skl_tune_iz_hashing(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (hweight8(dev_priv->info.subslice_7eu[i]) != 1) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(dev_priv->info.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + return 0; } + +static int skl_init_workarounds(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + gen9_init_workarounds(ring); + + /* WaDisablePowerCompilerClockGating:skl */ + if (INTEL_REVID(dev) == SKL_REVID_B0) + WA_SET_BIT_MASKED(HIZ_CHICKEN, + BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); + + return skl_tune_iz_hashing(ring); +} + int init_workarounds_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -888,6 +1075,11 @@ int init_workarounds_ring(struct intel_engine_cs *ring) if (IS_CHERRYVIEW(dev)) return chv_init_workarounds(ring); + if (IS_SKYLAKE(dev)) + return skl_init_workarounds(ring); + else if (IS_GEN9(dev)) + return gen9_init_workarounds(ring); + return 0; } @@ -1386,68 +1578,6 @@ i8xx_ring_put_irq(struct intel_engine_cs *ring) spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } -void intel_ring_setup_status_page(struct intel_engine_cs *ring) -{ - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = ring->dev->dev_private; - u32 mmio = 0; - - /* The ring status page addresses are no longer next to the rest of - * the ring registers as of gen7. - */ - if (IS_GEN7(dev)) { - switch (ring->id) { - case RCS: - mmio = RENDER_HWS_PGA_GEN7; - break; - case BCS: - mmio = BLT_HWS_PGA_GEN7; - break; - /* - * VCS2 actually doesn't exist on Gen7. Only shut up - * gcc switch check warning - */ - case VCS2: - case VCS: - mmio = BSD_HWS_PGA_GEN7; - break; - case VECS: - mmio = VEBOX_HWS_PGA_GEN7; - break; - } - } else if (IS_GEN6(ring->dev)) { - mmio = RING_HWS_PGA_GEN6(ring->mmio_base); - } else { - /* XXX: gen8 returns to sanity */ - mmio = RING_HWS_PGA(ring->mmio_base); - } - - I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); - POSTING_READ(mmio); - - /* - * Flush the TLB for this page - * - * FIXME: These two bits have disappeared on gen8, so a question - * arises: do we still need this and if so how should we go about - * invalidating the TLB? - */ - if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) { - u32 reg = RING_INSTPM(ring->mmio_base); - - /* ring should be idle before issuing a sync flush*/ - WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); - - I915_WRITE(reg, - _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | - INSTPM_SYNC_FLUSH)); - if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, - 1000)) - DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", - ring->name); - } -} - static int bsd_ring_flush(struct intel_engine_cs *ring, u32 invalidate_domains, @@ -1611,7 +1741,7 @@ gen8_ring_put_irq(struct intel_engine_cs *ring) static int i965_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 length, - unsigned flags) + unsigned dispatch_flags) { int ret; @@ -1622,7 +1752,8 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); + (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965)); intel_ring_emit(ring, offset); intel_ring_advance(ring); @@ -1635,8 +1766,8 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int i830_dispatch_execbuffer(struct intel_engine_cs *ring, - u64 offset, u32 len, - unsigned flags) + u64 offset, u32 len, + unsigned dispatch_flags) { u32 cs_offset = ring->scratch.gtt_offset; int ret; @@ -1654,7 +1785,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); - if ((flags & I915_DISPATCH_PINNED) == 0) { + if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; @@ -1686,7 +1817,8 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; intel_ring_emit(ring, MI_BATCH_BUFFER); - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); intel_ring_emit(ring, offset + len - 8); intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); @@ -1697,7 +1829,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, static int i915_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, - unsigned flags) + unsigned dispatch_flags) { int ret; @@ -1706,7 +1838,8 @@ i915_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE)); intel_ring_advance(ring); return 0; @@ -2097,6 +2230,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring) kref_init(&request->ref); request->ring = ring; + request->ringbuf = ring->buffer; request->uniq = dev_private->request_uniq++; ret = i915_gem_get_seqno(ring->dev, &request->seqno); @@ -2273,9 +2407,10 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, static int gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, - unsigned flags) + unsigned dispatch_flags) { - bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); + bool ppgtt = USES_PPGTT(ring->dev) && + !(dispatch_flags & I915_DISPATCH_SECURE); int ret; ret = intel_ring_begin(ring, 4); @@ -2294,8 +2429,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, static int hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, - u64 offset, u32 len, - unsigned flags) + u64 offset, u32 len, + unsigned dispatch_flags) { int ret; @@ -2305,7 +2440,7 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | - (flags & I915_DISPATCH_SECURE ? + (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); @@ -2317,7 +2452,7 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, static int gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, - unsigned flags) + unsigned dispatch_flags) { int ret; @@ -2327,7 +2462,8 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); + (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); @@ -2612,19 +2748,13 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) } /** - * Initialize the second BSD ring for Broadwell GT3. - * It is noted that this only exists on Broadwell GT3. + * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) */ int intel_init_bsd2_ring_buffer(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring = &dev_priv->ring[VCS2]; - if ((INTEL_INFO(dev)->gen != 8)) { - DRM_ERROR("No dual-BSD ring on non-BDW machine\n"); - return -EINVAL; - } - ring->name = "bsd2 ring"; ring->id = VCS2; |