diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
60 files changed, 3730 insertions, 1610 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c index 1de5fbaa1cf9..3a21cf63b3f0 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c @@ -9,6 +9,7 @@ #include "debugfs_engines.h" #include "debugfs_gt.h" #include "debugfs_gt_pm.h" +#include "intel_sseu_debugfs.h" #include "uc/intel_uc_debugfs.h" #include "i915_drv.h" @@ -25,6 +26,7 @@ void debugfs_gt_register(struct intel_gt *gt) debugfs_engines_register(gt, root); debugfs_gt_pm_register(gt, root); + intel_sseu_debugfs_register(gt, root); intel_uc_debugfs_register(>->uc, root); } diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c new file mode 100644 index 000000000000..b491a64919c8 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "gen2_engine_cs.h" +#include "i915_drv.h" +#include "intel_engine.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_ring.h" + +int gen2_emit_flush(struct i915_request *rq, u32 mode) +{ + unsigned int num_store_dw = 12; + u32 cmd, *cs; + + cmd = MI_FLUSH; + if (mode & EMIT_INVALIDATE) + cmd |= MI_READ_FLUSH; + + cs = intel_ring_begin(rq, 2 + 4 * num_store_dw); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = cmd; + while (num_store_dw--) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); + *cs++ = 0; + *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; + } + *cs++ = cmd; + + intel_ring_advance(rq, cs); + + return 0; +} + +int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + u32 cmd, *cs; + int i; + + /* + * read/write caches: + * + * I915_GEM_DOMAIN_RENDER is always invalidated, but is + * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is + * also flushed at 2d versus 3d pipeline switches. + * + * read-only caches: + * + * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if + * MI_READ_FLUSH is set, and is always flushed on 965. + * + * I915_GEM_DOMAIN_COMMAND may not exist? + * + * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is + * invalidated when MI_EXE_FLUSH is set. + * + * I915_GEM_DOMAIN_VERTEX, which exists on 965, is + * invalidated with every MI_FLUSH. + * + * TLBs: + * + * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND + * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and + * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER + * are flushed at any MI_FLUSH. + */ + + cmd = MI_FLUSH; + if (mode & EMIT_INVALIDATE) { + cmd |= MI_EXE_FLUSH; + if (IS_G4X(rq->engine->i915) || IS_GEN(rq->engine->i915, 5)) + cmd |= MI_INVALIDATE_ISP; + } + + i = 2; + if (mode & EMIT_INVALIDATE) + i += 20; + + cs = intel_ring_begin(rq, i); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = cmd; + + /* + * A random delay to let the CS invalidate take effect? Without this + * delay, the GPU relocation path fails as the CS does not see + * the updated contents. Just as important, if we apply the flushes + * to the EMIT_FLUSH branch (i.e. immediately after the relocation + * write and before the invalidate on the next batch), the relocations + * still fail. This implies that is a delay following invalidation + * that is required to reset the caches as opposed to a delay to + * ensure the memory is written. + */ + if (mode & EMIT_INVALIDATE) { + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + + for (i = 0; i < 12; i++) + *cs++ = MI_FLUSH; + + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + } + + *cs++ = cmd; + + intel_ring_advance(rq, cs); + + return 0; +} + +int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs, + int flush, int post) +{ + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH; + + while (flush--) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); + *cs++ = rq->fence.seqno; + } + + while (post--) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + } + + *cs++ = MI_USER_INTERRUPT; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return __gen2_emit_breadcrumb(rq, cs, 16, 8); +} + +u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return __gen2_emit_breadcrumb(rq, cs, 8, 8); +} + +/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +#define I830_BATCH_LIMIT SZ_256K +#define I830_TLB_ENTRIES (2) +#define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT) +int i830_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 *cs, cs_offset = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); + + GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Evict the invalid PTE TLBs */ + *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; + *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ + *cs++ = cs_offset; + *cs++ = 0xdeadbeef; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { + if (len > I830_BATCH_LIMIT) + return -ENOSPC; + + cs = intel_ring_begin(rq, 6 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Blit the batch (which has now all relocs applied) to the + * stable batch scratch bo area (so that the CS never + * stumbles over its tlb invalidation bug) ... + */ + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; + *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; + *cs++ = cs_offset; + *cs++ = 4096; + *cs++ = offset; + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* ... and execute it. */ + offset = cs_offset; + } + + if (!(dispatch_flags & I915_DISPATCH_SECURE)) + offset |= MI_BATCH_NON_SECURE; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +int gen3_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 *cs; + + if (!(dispatch_flags & I915_DISPATCH_SECURE)) + offset |= MI_BATCH_NON_SECURE; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +int gen4_emit_bb_start(struct i915_request *rq, + u64 offset, u32 length, + unsigned int dispatch_flags) +{ + u32 security; + u32 *cs; + + security = MI_BATCH_NON_SECURE_I965; + if (dispatch_flags & I915_DISPATCH_SECURE) + security = 0; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security; + *cs++ = offset; + intel_ring_advance(rq, cs); + + return 0; +} + +void gen2_irq_enable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + i915->irq_mask &= ~engine->irq_enable_mask; + intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); + ENGINE_POSTING_READ16(engine, RING_IMR); +} + +void gen2_irq_disable(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + i915->irq_mask |= engine->irq_enable_mask; + intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); +} + +void gen3_irq_enable(struct intel_engine_cs *engine) +{ + engine->i915->irq_mask &= ~engine->irq_enable_mask; + intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); + intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); +} + +void gen3_irq_disable(struct intel_engine_cs *engine) +{ + engine->i915->irq_mask |= engine->irq_enable_mask; + intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); +} + +void gen5_irq_enable(struct intel_engine_cs *engine) +{ + gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); +} + +void gen5_irq_disable(struct intel_engine_cs *engine) +{ + gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); +} diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.h b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h new file mode 100644 index 000000000000..a5cd64a65c9e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN2_ENGINE_CS_H__ +#define __GEN2_ENGINE_CS_H__ + +#include <linux/types.h> + +struct i915_request; +struct intel_engine_cs; + +int gen2_emit_flush(struct i915_request *rq, u32 mode); +int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode); +int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode); + +u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs); +u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs); + +int i830_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags); +int gen3_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags); +int gen4_emit_bb_start(struct i915_request *rq, + u64 offset, u32 length, + unsigned int dispatch_flags); + +void gen2_irq_enable(struct intel_engine_cs *engine); +void gen2_irq_disable(struct intel_engine_cs *engine); +void gen3_irq_enable(struct intel_engine_cs *engine); +void gen3_irq_disable(struct intel_engine_cs *engine); +void gen5_irq_enable(struct intel_engine_cs *engine); +void gen5_irq_disable(struct intel_engine_cs *engine); + +#endif /* __GEN2_ENGINE_CS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c new file mode 100644 index 000000000000..ce38d1bcaba3 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c @@ -0,0 +1,455 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "gen6_engine_cs.h" +#include "intel_engine.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" +#include "intel_ring.h" + +#define HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) + +/* + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * And the workaround for these two requires this workaround first: + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. + */ +static int +gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) +{ + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + u32 *cs; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; /* low dword */ + *cs++ = 0; /* high dword */ + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + u32 *cs, flags = 0; + int ret; + + /* Force SNB workarounds for PIPE_CONTROL flushes */ + ret = gen6_emit_post_sync_nonzero_flush(rq); + if (ret) + return ret; + + /* + * Just flush everything. Experiments have shown that reducing the + * number of bits based on the write domains has little performance + * impact. And when rearranging requests, the order of flushes is + * unknown. + */ + if (mode & EMIT_FLUSH) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* + * Ensure that any following seqno writes only happen + * when the render cache is indeed flushed. + */ + flags |= PIPE_CONTROL_CS_STALL; + } + if (mode & EMIT_INVALIDATE) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + /* + * TLB invalidate requires a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) +{ + /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + + /* Finally we can flush and with it emit the breadcrumb */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); + *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +static int mi_flush_dw(struct i915_request *rq, u32 flags) +{ + u32 cmd, *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_FLUSH_DW; + + /* + * We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + /* + * Bspec vol 1c.3 - blitter engine command streamer: + * "If ENABLED, all TLBs will be invalidated once the flush + * operation is complete. This bit is only valid when the + * Post-Sync Operation field is a value of 1h or 3h." + */ + cmd |= flags; + + *cs++ = cmd; + *cs++ = HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags) +{ + return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0); +} + +int gen6_emit_flush_xcs(struct i915_request *rq, u32 mode) +{ + return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); +} + +int gen6_emit_flush_vcs(struct i915_request *rq, u32 mode) +{ + return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD); +} + +int gen6_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 security; + u32 *cs; + + security = MI_BATCH_NON_SECURE_I965; + if (dispatch_flags & I915_DISPATCH_SECURE) + security = 0; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = __gen6_emit_bb_start(cs, offset, security); + intel_ring_advance(rq, cs); + + return 0; +} + +int +hsw_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags) +{ + u32 security; + u32 *cs; + + security = MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW; + if (dispatch_flags & I915_DISPATCH_SECURE) + security = 0; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = __gen6_emit_bb_start(cs, offset, security); + intel_ring_advance(rq, cs); + + return 0; +} + +static int gen7_stall_cs(struct i915_request *rq) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + u32 scratch_addr = + intel_gt_scratch_offset(rq->engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + u32 *cs, flags = 0; + + /* + * Ensure that any following seqno writes only happen when the render + * cache is indeed flushed. + * + * Workaround: 4th PIPE_CONTROL command (except the ones with only + * read-cache invalidate bits set) must have the CS_STALL bit set. We + * don't try to be clever and just set it unconditionally. + */ + flags |= PIPE_CONTROL_CS_STALL; + + /* + * CS_STALL suggests at least a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + /* + * Just flush everything. Experiments have shown that reducing the + * number of bits based on the write domains has little performance + * impact. + */ + if (mode & EMIT_FLUSH) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + } + if (mode & EMIT_INVALIDATE) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; + + /* + * Workaround: we must issue a pipe_control with CS-stall bit + * set before a pipe_control command that has the state cache + * invalidate bit set. + */ + gen7_stall_cs(rq); + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + intel_ring_advance(rq, cs); + + return 0; +} + +u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) +{ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL); + *cs++ = i915_request_active_timeline(rq)->hwsp_offset; + *cs++ = rq->fence.seqno; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) +{ + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + *cs++ = MI_USER_INTERRUPT; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} + +#define GEN7_XCS_WA 32 +u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) +{ + int i; + + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + + *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | + MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = rq->fence.seqno; + + for (i = 0; i < GEN7_XCS_WA; i++) { + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; + } + + *cs++ = MI_FLUSH_DW; + *cs++ = 0; + *cs++ = 0; + + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return cs; +} +#undef GEN7_XCS_WA + +void gen6_irq_enable(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, + ~(engine->irq_enable_mask | engine->irq_keep_mask)); + + /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ + ENGINE_POSTING_READ(engine, RING_IMR); + + gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); +} + +void gen6_irq_disable(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); + gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); +} + +void hsw_irq_enable_vecs(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask); + + /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ + ENGINE_POSTING_READ(engine, RING_IMR); + + gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask); +} + +void hsw_irq_disable_vecs(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~0); + gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask); +} diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.h b/drivers/gpu/drm/i915/gt/gen6_engine_cs.h new file mode 100644 index 000000000000..76c6bc9f3bde --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __GEN6_ENGINE_CS_H__ +#define __GEN6_ENGINE_CS_H__ + +#include <linux/types.h> + +#include "intel_gpu_commands.h" + +struct i915_request; +struct intel_engine_cs; + +int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode); +int gen6_emit_flush_vcs(struct i915_request *rq, u32 mode); +int gen6_emit_flush_xcs(struct i915_request *rq, u32 mode); +u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs); +u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs); + +int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode); +u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs); +u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs); + +int gen6_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags); +int hsw_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + unsigned int dispatch_flags); + +void gen6_irq_enable(struct intel_engine_cs *engine); +void gen6_irq_disable(struct intel_engine_cs *engine); + +void hsw_irq_enable_vecs(struct intel_engine_cs *engine); +void hsw_irq_disable_vecs(struct intel_engine_cs *engine); + +#endif /* __GEN6_ENGINE_CS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index f4fec7eb4064..cdc0b9c54305 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -183,13 +183,11 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); struct i915_page_directory * const pd = ppgtt->base.pd; struct i915_page_table *pt, *alloc = NULL; - intel_wakeref_t wakeref; + bool flush = false; u64 from = start; unsigned int pde; int ret = 0; - wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); - spin_lock(&pd->lock); gen6_for_each_pde(pt, pd, start, length, pde) { const unsigned int count = gen6_pte_count(start, length); @@ -214,14 +212,20 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, alloc = pt; pt = pd->entry[pde]; } + + flush = true; } atomic_add(count, &pt->used); } spin_unlock(&pd->lock); - if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) - gen6_flush_pd(ppgtt, from, start); + if (flush && i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) { + intel_wakeref_t wakeref; + + with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref) + gen6_flush_pd(ppgtt, from, start); + } goto out; @@ -230,7 +234,6 @@ unwind_out: out: if (alloc) free_px(vm, alloc); - intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); return ret; } @@ -299,11 +302,12 @@ static void pd_vma_clear_pages(struct i915_vma *vma) vma->pages = NULL; } -static int pd_vma_bind(struct i915_vma *vma, +static int pd_vma_bind(struct i915_address_space *vm, + struct i915_vma *vma, enum i915_cache_level cache_level, u32 unused) { - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct gen6_ppgtt *ppgtt = vma->private; u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; @@ -314,7 +318,7 @@ static int pd_vma_bind(struct i915_vma *vma, return 0; } -static void pd_vma_unbind(struct i915_vma *vma) +static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) { struct gen6_ppgtt *ppgtt = vma->private; struct i915_page_directory * const pd = ppgtt->base.pd; diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index de595b66a746..d93d85cd3027 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -396,7 +396,7 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, emit_batch(vma, memset(batch, 0, bv.max_size), &bv); i915_gem_object_flush_map(vma->obj); - i915_gem_object_unpin_map(vma->obj); + __i915_gem_object_release_map(vma->obj); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index d907d538176e..91786310c114 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -314,13 +314,18 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) { lockdep_assert_held(&rq->lock); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + return true; + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; struct intel_context *ce = rq->context; struct list_head *pos; spin_lock(&b->irq_lock); - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + + if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + goto unlock; if (!__intel_breadcrumbs_arm_irq(b)) goto unlock; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index e4aece20bc80..52db2bde44a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -204,25 +204,25 @@ static int __ring_active(struct intel_ring *ring) { int err; - err = i915_active_acquire(&ring->vma->active); + err = intel_ring_pin(ring); if (err) return err; - err = intel_ring_pin(ring); + err = i915_active_acquire(&ring->vma->active); if (err) - goto err_active; + goto err_pin; return 0; -err_active: - i915_active_release(&ring->vma->active); +err_pin: + intel_ring_unpin(ring); return err; } static void __ring_retire(struct intel_ring *ring) { - intel_ring_unpin(ring); i915_active_release(&ring->vma->active); + intel_ring_unpin(ring); } __i915_active_call diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c index 487299cb91f2..b9c8163978a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -30,7 +30,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); + *cs++ = intel_sseu_make_rpcs(rq->engine->gt, &sseu); intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 9bf6d4989968..a9249a23903a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -187,7 +187,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 -#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f @@ -335,7 +334,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); -ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, + ktime_t *now); struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 8691eb61e185..dd1a42c4d344 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -370,7 +370,7 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) * instances. */ if ((INTEL_GEN(i915) >= 11 && - RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) || + engine->gt->info.vdbox_sfc_access & engine->mask) || (INTEL_GEN(i915) >= 9 && engine->instance == 0)) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; @@ -414,12 +414,12 @@ void intel_engines_release(struct intel_gt *gt) /* Decouple the backend; but keep the layout for late GPU resets */ for_each_engine(engine, gt, id) { - intel_wakeref_wait_for_idle(&engine->wakeref); - GEM_BUG_ON(intel_engine_pm_is_awake(engine)); - if (!engine->release) continue; + intel_wakeref_wait_for_idle(&engine->wakeref); + GEM_BUG_ON(intel_engine_pm_is_awake(engine)); + engine->release(engine); engine->release = NULL; @@ -450,6 +450,80 @@ void intel_engines_free(struct intel_gt *gt) } } +/* + * Determine which engines are fused off in our particular hardware. + * Note that we have a catch-22 situation where we need to be able to access + * the blitter forcewake domain to read the engine fuses, but at the same time + * we need to know which engines are available on the system to know which + * forcewake domains are present. We solve this by intializing the forcewake + * domains based on the full engine mask in the platform capabilities before + * calling this function and pruning the domains for fused-off engines + * afterwards. + */ +static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_gt_info *info = >->info; + struct intel_uncore *uncore = gt->uncore; + unsigned int logical_vdbox = 0; + unsigned int i; + u32 media_fuse; + u16 vdbox_mask; + u16 vebox_mask; + + info->engine_mask = INTEL_INFO(i915)->platform_engine_mask; + + if (INTEL_GEN(i915) < 11) + return info->engine_mask; + + media_fuse = ~intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); + + vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; + vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> + GEN11_GT_VEBOX_DISABLE_SHIFT; + + for (i = 0; i < I915_MAX_VCS; i++) { + if (!HAS_ENGINE(gt, _VCS(i))) { + vdbox_mask &= ~BIT(i); + continue; + } + + if (!(BIT(i) & vdbox_mask)) { + info->engine_mask &= ~BIT(_VCS(i)); + drm_dbg(&i915->drm, "vcs%u fused off\n", i); + continue; + } + + /* + * In Gen11, only even numbered logical VDBOXes are + * hooked up to an SFC (Scaler & Format Converter) unit. + * In TGL each VDBOX has access to an SFC. + */ + if (INTEL_GEN(i915) >= 12 || logical_vdbox++ % 2 == 0) + gt->info.vdbox_sfc_access |= BIT(i); + } + drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", + vdbox_mask, VDBOX_MASK(gt)); + GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt)); + + for (i = 0; i < I915_MAX_VECS; i++) { + if (!HAS_ENGINE(gt, _VECS(i))) { + vebox_mask &= ~BIT(i); + continue; + } + + if (!(BIT(i) & vebox_mask)) { + info->engine_mask &= ~BIT(_VECS(i)); + drm_dbg(&i915->drm, "vecs%u fused off\n", i); + } + } + drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n", + vebox_mask, VEBOX_MASK(gt)); + GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); + + return info->engine_mask; +} + /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers * @gt: pointer to struct intel_gt @@ -459,8 +533,7 @@ void intel_engines_free(struct intel_gt *gt) int intel_engines_init_mmio(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - struct intel_device_info *device_info = mkwrite_device_info(i915); - const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask; + const unsigned int engine_mask = init_engine_mask(gt); unsigned int mask = 0; unsigned int i; int err; @@ -473,7 +546,7 @@ int intel_engines_init_mmio(struct intel_gt *gt) return -ENODEV; for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { - if (!HAS_ENGINE(i915, i)) + if (!HAS_ENGINE(gt, i)) continue; err = intel_engine_setup(gt, i); @@ -489,14 +562,16 @@ int intel_engines_init_mmio(struct intel_gt *gt) * engines. */ if (drm_WARN_ON(&i915->drm, mask != engine_mask)) - device_info->engine_mask = mask; + gt->info.engine_mask = mask; - RUNTIME_INFO(i915)->num_engines = hweight32(mask); + gt->info.num_engines = hweight32(mask); intel_gt_check_and_clear_faults(gt); intel_setup_engine_capabilities(gt); + intel_uncore_prune_engine_fw_domains(gt->uncore, gt); + return 0; cleanup: @@ -634,7 +709,7 @@ static int engine_setup_common(struct intel_engine_cs *engine) /* Use the whole device by default */ engine->sseu = - intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); + intel_sseu_from_device_info(&engine->gt->info.sseu); intel_engine_init_workarounds(engine); intel_engine_init_whitelist(engine); @@ -661,7 +736,6 @@ static int measure_breadcrumb_dw(struct intel_context *ce) if (!frame) return -ENOMEM; - frame->rq.i915 = engine->i915; frame->rq.engine = engine; frame->rq.context = ce; rcu_assign_pointer(frame->rq.timeline, ce->timeline); @@ -1001,7 +1075,7 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + const struct sseu_dev_info *sseu = &engine->gt->info.sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -1095,19 +1169,21 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine) { struct tasklet_struct *t = &engine->execlists.tasklet; - if (__tasklet_is_scheduled(t)) { - local_bh_disable(); - if (tasklet_trylock(t)) { - /* Must wait for any GPU reset in progress. */ - if (__tasklet_is_enabled(t)) - t->func(t->data); - tasklet_unlock(t); - } - local_bh_enable(); - } + if (!t->func) + return; - /* Otherwise flush the tasklet if it was running on another cpu */ - tasklet_unlock_wait(t); + /* Synchronise and wait for the tasklet on another CPU */ + tasklet_kill(t); + + /* Having cancelled the tasklet, ensure that is run */ + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); } /** @@ -1194,8 +1270,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } -static int print_sched_attr(struct drm_i915_private *i915, - const struct i915_sched_attr *attr, +static int print_sched_attr(const struct i915_sched_attr *attr, char *buf, int x, int len) { if (attr->priority == I915_PRIORITY_INVALID) @@ -1215,7 +1290,7 @@ static void print_request(struct drm_printer *m, char buf[80] = ""; int x = 0; - x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); + x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", prefix, @@ -1423,9 +1498,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, int len; len = scnprintf(hdr, sizeof(hdr), - "\t\tActive[%d]: ccid:%08x, ", + "\t\tActive[%d]: ccid:%08x%s%s, ", (int)(port - execlists->active), - rq->context->lrc.ccid); + rq->context->lrc.ccid, + intel_context_is_closed(rq->context) ? "!" : "", + intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); @@ -1435,9 +1512,11 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, int len; len = scnprintf(hdr, sizeof(hdr), - "\t\tPending[%d]: ccid:%08x, ", + "\t\tPending[%d]: ccid:%08x%s%s, ", (int)(port - execlists->pending), - rq->context->lrc.ccid); + rq->context->lrc.ccid, + intel_context_is_closed(rq->context) ? "!" : "", + intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); @@ -1506,6 +1585,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct i915_request *rq; intel_wakeref_t wakeref; unsigned long flags; + ktime_t dummy; if (header) { va_list ap; @@ -1523,6 +1603,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, yesno(!llist_empty(&engine->barrier_tasks))); drm_printf(m, "\tLatency: %luus\n", ewma__engine_latency_read(&engine->latency)); + if (intel_engine_supports_stats(engine)) + drm_printf(m, "\tRuntime: %llums\n", + ktime_to_ms(intel_engine_get_busy_time(engine, + &dummy))); + drm_printf(m, "\tForcewake: %x domains, %d active\n", + engine->fw_domain, atomic_read(&engine->fw_active)); rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); @@ -1589,7 +1675,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, intel_engine_print_breadcrumbs(engine, m); } -static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) +static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, + ktime_t *now) { ktime_t total = engine->stats.total; @@ -1597,9 +1684,9 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) * If the engine is executing something at the moment * add it to the total. */ + *now = ktime_get(); if (atomic_read(&engine->stats.active)) - total = ktime_add(total, - ktime_sub(ktime_get(), engine->stats.start)); + total = ktime_add(total, ktime_sub(*now, engine->stats.start)); return total; } @@ -1607,17 +1694,18 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) /** * intel_engine_get_busy_time() - Return current accumulated engine busyness * @engine: engine to report on + * @now: monotonic timestamp of sampling * * Returns accumulated time @engine was busy since engine stats were enabled. */ -ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) { unsigned int seq; ktime_t total; do { seq = read_seqbegin(&engine->stats.lock); - total = __intel_engine_get_busy_time(engine); + total = __intel_engine_get_busy_time(engine, now); } while (read_seqretry(&engine->stats.lock, seq)); return total; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 5136c8bf112d..8ffdf676c0a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -4,6 +4,7 @@ * Copyright © 2019 Intel Corporation */ +#include "i915_drv.h" #include "i915_request.h" #include "intel_context.h" @@ -31,7 +32,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine) delay = msecs_to_jiffies_timeout(delay); if (delay >= HZ) delay = round_jiffies_up_relative(delay); - mod_delayed_work(system_wq, &engine->heartbeat.work, delay); + mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay); return true; } @@ -48,8 +49,10 @@ static void show_heartbeat(const struct i915_request *rq, struct drm_printer p = drm_debug_printer("heartbeat"); intel_engine_dump(engine, &p, - "%s heartbeat {prio:%d} not ticking\n", + "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n", engine->name, + rq->fence.context, + rq->fence.seqno, rq->sched.attr.priority); } @@ -62,6 +65,10 @@ static void heartbeat(struct work_struct *wrk) container_of(wrk, typeof(*engine), heartbeat.work.work); struct intel_context *ce = engine->kernel_context; struct i915_request *rq; + unsigned long serial; + + /* Just in case everything has gone horribly wrong, give it a kick */ + intel_engine_flush_submission(engine); rq = engine->heartbeat.systole; if (rq && i915_request_completed(rq)) { @@ -76,8 +83,19 @@ static void heartbeat(struct work_struct *wrk) goto out; if (engine->heartbeat.systole) { - if (engine->schedule && - rq->sched.attr.priority < I915_PRIORITY_BARRIER) { + if (!i915_sw_fence_signaled(&rq->submit)) { + /* + * Not yet submitted, system is stalled. + * + * This more often happens for ring submission, + * where all contexts are funnelled into a common + * ringbuffer. If one context is blocked on an + * external fence, not only is it not submitted, + * but all other contexts, including the kernel + * context are stuck waiting for the signal. + */ + } else if (engine->schedule && + rq->sched.attr.priority < I915_PRIORITY_BARRIER) { /* * Gradually raise the priority of the heartbeat to * give high priority work [which presumably desires @@ -105,10 +123,19 @@ static void heartbeat(struct work_struct *wrk) goto out; } - if (engine->wakeref_serial == engine->serial) + serial = READ_ONCE(engine->serial); + if (engine->wakeref_serial == serial) goto out; - mutex_lock(&ce->timeline->mutex); + if (!mutex_trylock(&ce->timeline->mutex)) { + /* Unable to lock the kernel timeline, is the engine stuck? */ + if (xchg(&engine->heartbeat.blocked, serial) == serial) + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "no heartbeat on %s", + engine->name); + goto out; + } intel_context_enter(ce); rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); @@ -117,7 +144,7 @@ static void heartbeat(struct work_struct *wrk) goto unlock; idle_pulse(engine, rq); - if (i915_modparams.enable_hangcheck) + if (engine->i915->params.enable_hangcheck) engine->heartbeat.systole = i915_request_get(rq); __i915_request_commit(rq); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index d0a1078ef632..8ec3eecf3e39 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -142,6 +142,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) return true; GEM_BUG_ON(!intel_context_is_barrier(ce)); + GEM_BUG_ON(ce->timeline->hwsp_ggtt != engine->status_page.vma); /* Already inside the kernel context, safe to power down. */ if (engine->wakeref_serial == engine->serial) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 2b6cdf47d428..8de92fd7d392 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -24,6 +24,7 @@ #include "i915_selftest.h" #include "intel_sseu.h" #include "intel_timeline_types.h" +#include "intel_uncore.h" #include "intel_wakeref.h" #include "intel_workarounds_types.h" @@ -176,8 +177,12 @@ struct intel_engine_execlists { * the first error interrupt, record the EIR and schedule the tasklet. * In the tasklet, we process the pending CS events to ensure we have * the guilty request, and then reset the engine. + * + * Low 16b are used by HW, with the upper 16b used as the enabling mask. + * Reserve the upper 16b for tracking internal errors. */ u32 error_interrupt; +#define ERROR_CSB BIT(31) /** * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset @@ -313,6 +318,16 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; + /* + * Some w/a require forcewake to be held (which prevents RC6) while + * a particular engine is active. If so, we set fw_domain to which + * domains need to be held for the duration of request activity, + * and 0 if none. We try to limit the duration of the hold as much + * as possible. + */ + enum forcewake_domains fw_domain; + atomic_t fw_active; + unsigned long context_tag; struct rb_node uabi_node; @@ -337,6 +352,7 @@ struct intel_engine_cs { struct { struct delayed_work work; struct i915_request *systole; + unsigned long blocked; } heartbeat; unsigned long serial; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 848decee9066..34e6096f196e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -201,7 +201,7 @@ void intel_engines_driver_register(struct drm_i915_private *i915) uabi_node); char old[sizeof(engine->name)]; - if (intel_gt_has_init_error(engine->gt)) + if (intel_gt_has_unrecoverable_error(engine->gt)) continue; /* ignore incomplete engines */ GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 66165b10256e..62979ea591f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -108,13 +108,32 @@ static bool needs_idle_maps(struct drm_i915_private *i915) void i915_ggtt_suspend(struct i915_ggtt *ggtt) { - struct i915_vma *vma; + struct i915_vma *vma, *vn; + int open; + + mutex_lock(&ggtt->vm.mutex); - list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) + /* Skip rewriting PTE on VMA unbind. */ + open = atomic_xchg(&ggtt->vm.open, 0); + + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); i915_vma_wait_for_bind(vma); + if (i915_vma_is_pinned(vma)) + continue; + + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { + __i915_vma_evict(vma); + drm_mm_remove_node(&vma->node); + } + } + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); ggtt->invalidate(ggtt); + atomic_set(&ggtt->vm.open, open); + + mutex_unlock(&ggtt->vm.mutex); intel_gt_check_and_clear_faults(ggtt->vm.gt); } @@ -417,35 +436,31 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm, intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } -static int ggtt_bind_vma(struct i915_vma *vma, +static int ggtt_bind_vma(struct i915_address_space *vm, + struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; + if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK)) + return 0; + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; if (i915_gem_object_is_readonly(obj)) pte_flags |= PTE_READ_ONLY; - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); - + vm->insert_entries(vm, vma, cache_level, pte_flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; - /* - * Without aliasing PPGTT there's no difference between - * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally - * upgrade to both bound if we bind either to avoid double-binding. - */ - atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); - return 0; } -static void ggtt_unbind_vma(struct i915_vma *vma) +static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + vm->clear_range(vm, vma->node.start, vma->size); } static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) @@ -553,7 +568,8 @@ err: return ret; } -static int aliasing_gtt_bind_vma(struct i915_vma *vma, +static int aliasing_gtt_bind_vma(struct i915_address_space *vm, + struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { @@ -566,44 +582,27 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, pte_flags |= PTE_READ_ONLY; if (flags & I915_VMA_LOCAL_BIND) { - struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; + struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias; - if (flags & I915_VMA_ALLOC) { - ret = alias->vm.allocate_va_range(&alias->vm, - vma->node.start, - vma->size); - if (ret) - return ret; - - set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); - } - - GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, - __i915_vma_flags(vma))); - alias->vm.insert_entries(&alias->vm, vma, - cache_level, pte_flags); + ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags); + if (ret) + return ret; } if (flags & I915_VMA_GLOBAL_BIND) - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + vm->insert_entries(vm, vma, cache_level, pte_flags); return 0; } -static void aliasing_gtt_unbind_vma(struct i915_vma *vma) +static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, + struct i915_vma *vma) { - if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { - struct i915_address_space *vm = vma->vm; - + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) vm->clear_range(vm, vma->node.start, vma->size); - } - - if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { - struct i915_address_space *vm = - &i915_vm_to_ggtt(vma->vm)->alias->vm; - vm->clear_range(vm, vma->node.start, vma->size); - } + if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) + ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma); } static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) @@ -1166,6 +1165,11 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) ggtt->invalidate(ggtt); } +static unsigned int clear_bind(struct i915_vma *vma) +{ + return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags); +} + void i915_ggtt_resume(struct i915_ggtt *ggtt) { struct i915_vma *vma; @@ -1183,14 +1187,11 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; + unsigned int was_bound = clear_bind(vma); - if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) - continue; - - clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, - PIN_GLOBAL, NULL)); + was_bound, NULL)); if (obj) { /* only used during resume => exclusive access */ flush |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f069551e412f..e0755f1a904b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -44,6 +44,14 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) gt->ggtt = ggtt; } +int intel_gt_init_mmio(struct intel_gt *gt) +{ + intel_uc_init_mmio(>->uc); + intel_sseu_info_init(gt); + + return intel_engines_init_mmio(gt); +} + static void init_unused_ring(struct intel_gt *gt, u32 base) { struct intel_uncore *uncore = gt->uncore; @@ -510,7 +518,7 @@ static int __engines_verify_workarounds(struct intel_gt *gt) static void __intel_gt_disable(struct intel_gt *gt) { - intel_gt_set_wedged_on_init(gt); + intel_gt_set_wedged_on_fini(gt); intel_gt_suspend_prepare(gt); intel_gt_suspend_late(gt); @@ -616,6 +624,11 @@ void intel_gt_driver_unregister(struct intel_gt *gt) void intel_gt_driver_release(struct intel_gt *gt) { struct i915_address_space *vm; + intel_wakeref_t wakeref; + + /* Scrub all HW state upon release */ + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + __intel_gt_reset(gt, ALL_ENGINES); vm = fetch_and_zero(>->vm); if (vm) /* FIXME being called twice on error paths :( */ @@ -637,3 +650,11 @@ void intel_gt_driver_late_release(struct intel_gt *gt) intel_gt_fini_timelines(gt); intel_engines_free(gt); } + +void intel_gt_info_print(const struct intel_gt_info *info, + struct drm_printer *p) +{ + drm_printf(p, "available engines: %x\n", info->engine_mask); + + intel_sseu_dump(&info->sseu, p); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 4fac043750aa..9157c7411f60 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -11,6 +11,7 @@ #include "intel_reset.h" struct drm_i915_private; +struct drm_printer; #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ @@ -35,6 +36,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); +int intel_gt_init_mmio(struct intel_gt *gt); int __must_check intel_gt_init_hw(struct intel_gt *gt); int intel_gt_init(struct intel_gt *gt); void intel_gt_driver_register(struct intel_gt *gt); @@ -58,14 +60,21 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, return i915_ggtt_offset(gt->scratch) + field; } -static inline bool intel_gt_is_wedged(const struct intel_gt *gt) +static inline bool intel_gt_has_unrecoverable_error(const struct intel_gt *gt) { - return __intel_reset_failed(>->reset); + return test_bit(I915_WEDGED_ON_INIT, >->reset.flags) || + test_bit(I915_WEDGED_ON_FINI, >->reset.flags); } -static inline bool intel_gt_has_init_error(const struct intel_gt *gt) +static inline bool intel_gt_is_wedged(const struct intel_gt *gt) { - return test_bit(I915_WEDGED_ON_INIT, >->reset.flags); + GEM_BUG_ON(intel_gt_has_unrecoverable_error(gt) && + !test_bit(I915_WEDGED, >->reset.flags)); + + return unlikely(test_bit(I915_WEDGED, >->reset.flags)); } +void intel_gt_info_print(const struct intel_gt_info *info, + struct drm_printer *p); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 1495054a4305..418ae184cecf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -212,8 +212,9 @@ void intel_gt_flush_buffer_pool(struct intel_gt *gt) { struct intel_gt_buffer_pool *pool = >->buffer_pool; - if (cancel_delayed_work_sync(&pool->work)) + do { pool_free_imm(pool); + } while (cancel_delayed_work_sync(&pool->work)); } void intel_gt_fini_buffer_pool(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 0cc7dd54f4f9..b05da68e52f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -27,7 +27,8 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) { u32 eir; - eir = ENGINE_READ(engine, RING_EIR); + /* Upper 16b are the enabling mask, rsvd for internal errors */ + eir = ENGINE_READ(engine, RING_EIR) & GENMASK(15, 0); ENGINE_TRACE(engine, "CS error: %x\n", eir); /* Disable the error interrupt until after the reset */ @@ -457,7 +458,7 @@ void gen5_gt_irq_postinstall(struct intel_gt *gt) * RPS interrupts will get enabled/disabled on demand when RPS * itself is enabled/disabled. */ - if (HAS_ENGINE(gt->i915, VECS0)) { + if (HAS_ENGINE(gt, VECS0)) { pm_irqs |= PM_VEBOX_USER_INTERRUPT; gt->pm_ier |= PM_VEBOX_USER_INTERRUPT; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 6bdb434a442d..274aa0dd7050 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -188,7 +188,7 @@ int intel_gt_resume(struct intel_gt *gt) enum intel_engine_id id; int err; - err = intel_gt_has_init_error(gt); + err = intel_gt_has_unrecoverable_error(gt); if (err) return err; @@ -214,8 +214,8 @@ int intel_gt_resume(struct intel_gt *gt) /* Only when the HW is re-initialised, can we replay the requests */ err = intel_gt_init_hw(gt); if (err) { - drm_err(>->i915->drm, - "Failed to initialize GPU, declaring it wedged!\n"); + i915_probe_error(gt->i915, + "Failed to initialize GPU, declaring it wedged!\n"); goto err_wedged; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 16ff47c83bd5..66fcbf9d0fdd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -31,12 +31,15 @@ static bool engine_active(const struct intel_engine_cs *engine) return !list_empty(&engine->kernel_context->timeline->requests); } -static bool flush_submission(struct intel_gt *gt) +static bool flush_submission(struct intel_gt *gt, long timeout) { struct intel_engine_cs *engine; enum intel_engine_id id; bool active = false; + if (!timeout) + return false; + if (!intel_gt_pm_is_awake(gt)) return false; @@ -139,7 +142,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) if (unlikely(timeout < 0)) timeout = -timeout, interruptible = false; - flush_submission(gt); /* kick the ksoftirqd tasklets */ + flush_submission(gt, timeout); /* kick the ksoftirqd tasklets */ spin_lock(&timelines->lock); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) { @@ -194,7 +197,7 @@ out_active: spin_lock(&timelines->lock); list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); - if (flush_submission(gt)) /* Wait, there's more! */ + if (flush_submission(gt, timeout)) /* Wait, there's more! */ active_count++; return active_count ? timeout : 0; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 0cc1d6b185dc..6d39a4a11bf3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -109,6 +109,17 @@ struct intel_gt { struct intel_gt_buffer_pool buffer_pool; struct i915_vma *scratch; + + struct intel_gt_info { + intel_engine_mask_t engine_mask; + u8 num_engines; + + /* Media engine access to SFC per instance */ + u8 vdbox_sfc_access; + + /* Slice/subslice/EU info */ + struct sseu_dev_info sseu; + } info; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index d93ebdf3fa0e..f2b75078e05f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -198,14 +198,16 @@ struct intel_gt; struct i915_vma_ops { /* Map an object into an address space with the given cache flags. */ - int (*bind_vma)(struct i915_vma *vma, + int (*bind_vma)(struct i915_address_space *vm, + struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); /* * Unmap an object from an address space. This usually consists of * setting the valid PTE entries to a reserved scratch page. */ - void (*unbind_vma)(struct i915_vma *vma); + void (*unbind_vma)(struct i915_address_space *vm, + struct i915_vma *vma); int (*set_pages)(struct i915_vma *vma); void (*clear_pages)(struct i915_vma *vma); @@ -566,6 +568,13 @@ int ggtt_set_pages(struct i915_vma *vma); int ppgtt_set_pages(struct i915_vma *vma); void clear_pages(struct i915_vma *vma); +int ppgtt_bind_vma(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); +void ppgtt_unbind_vma(struct i915_address_space *vm, + struct i915_vma *vma); + void gtt_write_workarounds(struct intel_gt *gt); void setup_private_pat(struct intel_uncore *uncore); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7c3d8ef4a47c..e0280a672f1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -446,6 +446,9 @@ static int queue_prio(const struct intel_engine_execlists *execlists) * we have to flip the index value to become priority. */ p = to_priolist(rb); + if (!I915_USER_PRIORITY_SHIFT) + return p->priority; + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); } @@ -1377,6 +1380,8 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid |= engine->execlists.ccid; __intel_gt_pm_get(engine->gt); + if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) + intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1409,8 +1414,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) struct virtual_engine *ve = container_of(ce, typeof(*ve), context); struct i915_request *next = READ_ONCE(ve->request); - if (next && next->execution_mask & ~rq->execution_mask) - tasklet_schedule(&ve->base.execlists.tasklet); + if (next == rq || (next && next->execution_mask & ~rq->execution_mask)) + tasklet_hi_schedule(&ve->base.execlists.tasklet); } static inline void @@ -1445,6 +1450,8 @@ __execlists_schedule_out(struct i915_request *rq, intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) + intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); intel_gt_pm_put_async(engine->gt); /* @@ -1636,9 +1643,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, ccid = ce->lrc.ccid; /* - * Sentinels are supposed to be lonely so they flush the - * current exection off the HW. Check that they are the - * only request in the pending submission. + * Sentinels are supposed to be the last request so they flush + * the current execution off the HW. Check that they are the only + * request in the pending submission. */ if (sentinel) { GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", @@ -1647,15 +1654,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, port - execlists->pending); return false; } - sentinel = i915_request_has_sentinel(rq); - if (sentinel && port != execlists->pending) { - GEM_TRACE_ERR("%s: sentinel context:%llx not in prime position[%zd]\n", - engine->name, - ce->timeline->fence_context, - port - execlists->pending); - return false; - } /* Hold tightly onto the lock to prevent concurrent retires! */ if (!spin_trylock_irqsave(&rq->lock, flags)) @@ -1967,7 +1966,7 @@ static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { if (list_is_last(&rq->sched.link, &engine->active.requests)) - return INT_MIN; + return engine->execlists.queue_priority_hint; return rq_prio(list_next_entry(rq, sched.link)); } @@ -2445,6 +2444,7 @@ done: set_preempt_timeout(engine, *active); execlists_submit_ports(engine); } else { + start_timeslice(engine, execlists->queue_priority_hint); skip_submit: ring_set_paused(engine, 0); } @@ -2569,6 +2569,25 @@ static void process_csb(struct intel_engine_cs *engine) return; /* + * We will consume all events from HW, or at least pretend to. + * + * The sequence of events from the HW is deterministic, and derived + * from our writes to the ELSP, with a smidgen of variability for + * the arrival of the asynchronous requests wrt to the inflight + * execution. If the HW sends an event that does not correspond with + * the one we are expecting, we have to abandon all hope as we lose + * all tracking of what the engine is actually executing. We will + * only detect we are out of sequence with the HW when we get an + * 'impossible' event because we have already drained our own + * preemption/promotion queue. If this occurs, we know that we likely + * lost track of execution earlier and must unwind and restart, the + * simplest way is by stop processing the event queue and force the + * engine to reset. + */ + execlists->csb_head = tail; + ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); + + /* * Hopefully paired with a wmb() in HW! * * We must complete the read of the write pointer before any reads @@ -2577,8 +2596,6 @@ static void process_csb(struct intel_engine_cs *engine) * we perform the READ_ONCE(*csb_write). */ rmb(); - - ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); do { bool promote; @@ -2613,6 +2630,11 @@ static void process_csb(struct intel_engine_cs *engine) if (promote) { struct i915_request * const *old = execlists->active; + if (GEM_WARN_ON(!*execlists->pending)) { + execlists->error_interrupt |= ERROR_CSB; + break; + } + ring_set_paused(engine, 0); /* Point active to the new ELSP; prevent overwriting */ @@ -2635,7 +2657,10 @@ static void process_csb(struct intel_engine_cs *engine) WRITE_ONCE(execlists->pending[0], NULL); } else { - GEM_BUG_ON(!*execlists->active); + if (GEM_WARN_ON(!*execlists->active)) { + execlists->error_interrupt |= ERROR_CSB; + break; + } /* port0 completed, advanced to port1 */ trace_ports(execlists, "completed", execlists->active); @@ -2686,7 +2711,6 @@ static void process_csb(struct intel_engine_cs *engine) } } while (head != tail); - execlists->csb_head = head; set_timeslice(engine); /* @@ -3005,12 +3029,12 @@ static u32 active_ccid(struct intel_engine_cs *engine) return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); } -static bool execlists_capture(struct intel_engine_cs *engine) +static void execlists_capture(struct intel_engine_cs *engine) { struct execlists_capture *cap; if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) - return true; + return; /* * We need to _quickly_ capture the engine state before we reset. @@ -3019,7 +3043,7 @@ static bool execlists_capture(struct intel_engine_cs *engine) */ cap = capture_regs(engine); if (!cap) - return true; + return; spin_lock_irq(&engine->active.lock); cap->rq = active_context(engine, active_ccid(engine)); @@ -3056,14 +3080,13 @@ static bool execlists_capture(struct intel_engine_cs *engine) INIT_WORK(&cap->work, execlists_capture_work); schedule_work(&cap->work); - return true; + return; err_rq: i915_request_put(cap->rq); err_free: i915_gpu_coredump_put(cap->error); kfree(cap); - return false; } static void execlists_reset(struct intel_engine_cs *engine, const char *msg) @@ -3083,10 +3106,8 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg) tasklet_disable_nosync(&engine->execlists.tasklet); ring_set_paused(engine, 1); /* Freeze the current request in place */ - if (execlists_capture(engine)) - intel_engine_reset(engine, msg); - else - ring_set_paused(engine, 0); + execlists_capture(engine); + intel_engine_reset(engine, msg); tasklet_enable(&engine->execlists.tasklet); clear_and_wake_up_bit(bit, lock); @@ -3117,9 +3138,18 @@ static void execlists_submission_tasklet(unsigned long data) process_csb(engine); if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { + const char *msg; + + /* Generate the error message in priority wrt to the user! */ + if (engine->execlists.error_interrupt & GENMASK(15, 0)) + msg = "CS error"; /* thrown by a user payload */ + else if (engine->execlists.error_interrupt & ERROR_CSB) + msg = "invalid CSB event"; + else + msg = "internal error"; + engine->execlists.error_interrupt = 0; - if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */ - execlists_reset(engine, "CS error"); + execlists_reset(engine, msg); } if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { @@ -3170,13 +3200,6 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) if (reset_in_progress(execlists)) return; /* defer until we restart the engine following reset */ - /* Hopefully we clear execlists->pending[] to let us through */ - if (READ_ONCE(execlists->pending[0]) && - tasklet_trylock(&execlists->tasklet)) { - process_csb(engine); - tasklet_unlock(&execlists->tasklet); - } - __execlists_submission_tasklet(engine); } @@ -3199,11 +3222,25 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, return !list_empty(&engine->active.hold) && hold_request(rq); } +static void flush_csb(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *el = &engine->execlists; + + if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) { + if (!reset_in_progress(el)) + process_csb(engine); + tasklet_unlock(&el->tasklet); + } +} + static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; + /* Hopefully we clear execlists->pending[] to let us through */ + flush_csb(engine); + /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); @@ -3415,7 +3452,7 @@ __execlists_update_reg_state(const struct intel_context *ce, /* RPCS */ if (engine->class == RENDER_CLASS) { regs[CTX_R_PWR_CLK_STATE] = - intel_sseu_make_rpcs(engine->i915, &ce->sseu); + intel_sseu_make_rpcs(engine->gt, &ce->sseu); i915_oa_init_reg_state(ce, engine); } @@ -3536,7 +3573,7 @@ static int emit_pdps(struct i915_request *rq) int err, i; u32 *cs; - GEM_BUG_ON(intel_vgpu_active(rq->i915)); + GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); /* * Beware ye of the dragons, this sequence is magic! @@ -3873,7 +3910,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, &wa_ctx->per_ctx }; wa_bb_func_t wa_bb_fn[2]; - struct page *page; void *batch, *batch_ptr; unsigned int i; int ret; @@ -3909,14 +3945,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } - page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); - batch = batch_ptr = kmap_atomic(page); + batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); /* * Emit the two workaround batch buffers, recording the offset from the * start of the workaround batch buffer object for each and their * respective sizes. */ + batch_ptr = batch; for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { wa_bb[i]->offset = batch_ptr - batch; if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, @@ -3928,10 +3964,10 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) batch_ptr = wa_bb_fn[i](engine, batch_ptr); wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } + GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); - BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); - - kunmap_atomic(batch); + __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); + __i915_gem_object_release_map(wa_ctx->vma->obj); if (ret) lrc_destroy_wa_ctx(engine); @@ -4515,11 +4551,11 @@ static int gen8_emit_flush_render(struct i915_request *request, * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL * pipe control. */ - if (IS_GEN(request->i915, 9)) + if (IS_GEN(request->engine->i915, 9)) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_REVID(request->i915, 0, KBL_REVID_B0)) + if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0)) dc_flush_wa = true; } @@ -5396,13 +5432,8 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) * typically be the first we inspect for submission. */ swp = prandom_u32_max(ve->num_siblings); - if (!swp) - return; - - swap(ve->siblings[swp], ve->siblings[0]); - if (!intel_engine_has_relative_mmio(ve->siblings[0])) - virtual_update_register_offsets(ve->context.lrc_reg_state, - ve->siblings[0]); + if (swp) + swap(ve->siblings[swp], ve->siblings[0]); } static int virtual_context_alloc(struct intel_context *ce) @@ -5415,15 +5446,9 @@ static int virtual_context_alloc(struct intel_context *ce) static int virtual_context_pin(struct intel_context *ce) { struct virtual_engine *ve = container_of(ce, typeof(*ve), context); - int err; /* Note: we must use a real engine class for setting up reg state */ - err = __execlists_context_pin(ce, ve->siblings[0]); - if (err) - return err; - - virtual_engine_initial_hint(ve); - return 0; + return __execlists_context_pin(ce, ve->siblings[0]); } static void virtual_context_enter(struct intel_context *ce) @@ -5598,7 +5623,7 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(!list_empty(virtual_queue(ve))); list_move_tail(&rq->sched.link, virtual_queue(ve)); - tasklet_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.execlists.tasklet); } spin_unlock_irqrestore(&ve->base.active.lock, flags); @@ -5688,6 +5713,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); + ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */ ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; @@ -5769,6 +5795,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.flags |= I915_ENGINE_IS_VIRTUAL; + virtual_engine_initial_hint(ve); return &ve->context; err_put: diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index f86f7e68ce5e..f0862e924d11 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -155,16 +155,16 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt) return ppgtt; } -static int ppgtt_bind_vma(struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +int ppgtt_bind_vma(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { u32 pte_flags; int err; - if (flags & I915_VMA_ALLOC) { - err = vma->vm->allocate_va_range(vma->vm, - vma->node.start, vma->size); + if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { + err = vm->allocate_va_range(vm, vma->node.start, vma->size); if (err) return err; @@ -176,17 +176,16 @@ static int ppgtt_bind_vma(struct i915_vma *vma, if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; - GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))); - vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + vm->insert_entries(vm, vma, cache_level, pte_flags); wmb(); return 0; } -static void ppgtt_unbind_vma(struct i915_vma *vma) +void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) { if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + vm->clear_range(vm, vma->node.start, vma->size); } int ppgtt_set_pages(struct i915_vma *vma) diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index f59e7875cc5e..1bfad589c63b 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -61,7 +61,7 @@ render_state_get_rodata(const struct intel_engine_cs *engine) #define OUT_BATCH(batch, i, val) \ do { \ if ((i) >= PAGE_SIZE / sizeof(u32)) \ - goto err; \ + goto out; \ (batch)[(i)++] = (val); \ } while(0) @@ -70,15 +70,12 @@ static int render_state_setup(struct intel_renderstate *so, { const struct intel_renderstate_rodata *rodata = so->rodata; unsigned int i = 0, reloc_index = 0; - unsigned int needs_clflush; + int ret = -EINVAL; u32 *d; - int ret; - ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush); - if (ret) - return ret; - - d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0)); + d = i915_gem_object_pin_map(so->vma->obj, I915_MAP_WB); + if (IS_ERR(d)) + return PTR_ERR(d); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -89,7 +86,7 @@ static int render_state_setup(struct intel_renderstate *so, if (HAS_64BIT_RELOC(i915)) { if (i + 1 >= rodata->batch_items || rodata->batch[i + 1] != 0) - goto err; + goto out; d[i++] = s; s = upper_32_bits(r); @@ -103,7 +100,7 @@ static int render_state_setup(struct intel_renderstate *so, if (rodata->reloc[reloc_index] != -1) { drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index); - goto err; + goto out; } so->batch_offset = i915_ggtt_offset(so->vma); @@ -150,19 +147,11 @@ static int render_state_setup(struct intel_renderstate *so, */ so->aux_size = ALIGN(so->aux_size, 8); - if (needs_clflush) - drm_clflush_virt_range(d, i * sizeof(u32)); - kunmap_atomic(d); - ret = 0; out: - i915_gem_object_finish_access(so->vma->obj); + __i915_gem_object_flush_map(so->vma->obj, 0, i * sizeof(u32)); + __i915_gem_object_release_map(so->vma->obj); return ret; - -err: - kunmap_atomic(d); - ret = -EINVAL; - goto out; } #undef OUT_BATCH diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 39070b514e65..46a5ceffc22f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -342,7 +342,7 @@ static int gen6_reset_engines(struct intel_gt *gt, static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) { struct intel_uncore *uncore = engine->uncore; - u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; + u8 vdbox_sfc_access = engine->gt->info.vdbox_sfc_access; i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; i915_reg_t sfc_usage; @@ -417,7 +417,7 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) static void gen11_unlock_sfc(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; - u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; + u8 vdbox_sfc_access = engine->gt->info.vdbox_sfc_access; i915_reg_t sfc_forced_lock; u32 sfc_forced_lock_bit; @@ -638,7 +638,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) bool intel_has_gpu_reset(const struct intel_gt *gt) { - if (!i915_modparams.reset) + if (!gt->i915->params.reset) return NULL; return intel_get_gpu_reset(gt); @@ -646,7 +646,7 @@ bool intel_has_gpu_reset(const struct intel_gt *gt) bool intel_has_reset_engine(const struct intel_gt *gt) { - if (i915_modparams.reset < 2) + if (gt->i915->params.reset < 2) return false; return INTEL_INFO(gt->i915)->has_reset_engine; @@ -880,7 +880,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) return true; /* Never fully initialised, recovery impossible */ - if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) + if (intel_gt_has_unrecoverable_error(gt)) return false; GT_TRACE(gt, "start\n"); @@ -930,7 +930,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * Warn CI about the unrecoverable wedged condition. * Time for a reboot. */ - add_taint_for_CI(TAINT_WARN); + add_taint_for_CI(gt->i915, TAINT_WARN); return false; } @@ -1038,7 +1038,7 @@ void intel_gt_reset(struct intel_gt *gt, awake = reset_prepare(gt); if (!intel_has_gpu_reset(gt)) { - if (i915_modparams.reset) + if (gt->i915->params.reset) drm_err(>->i915->drm, "GPU reset not supported\n"); else drm_dbg(>->i915->drm, "GPU reset disabled\n"); @@ -1097,7 +1097,7 @@ taint: * rather than continue on into oblivion. For everyone else, * the system should still plod along, but they have been warned! */ - add_taint_for_CI(TAINT_WARN); + add_taint_for_CI(gt->i915, TAINT_WARN); error: __intel_gt_set_wedged(gt); goto finish; @@ -1246,7 +1246,7 @@ void intel_gt_handle_error(struct intel_gt *gt, */ wakeref = intel_runtime_pm_get(gt->uncore->rpm); - engine_mask &= INTEL_INFO(gt->i915)->engine_mask; + engine_mask &= gt->info.engine_mask; if (flags & I915_ERROR_CAPTURE) { i915_capture_error_state(gt->i915); @@ -1342,7 +1342,7 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) if (!intel_gt_is_wedged(gt)) return 0; - if (intel_gt_has_init_error(gt)) + if (intel_gt_has_unrecoverable_error(gt)) return -EIO; /* Reset still in progress? Maybe we will recover? */ @@ -1360,6 +1360,15 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt) I915_WEDGED_ON_INIT); intel_gt_set_wedged(gt); set_bit(I915_WEDGED_ON_INIT, >->reset.flags); + + /* Wedged on init is non-recoverable */ + add_taint_for_CI(gt->i915, TAINT_WARN); +} + +void intel_gt_set_wedged_on_fini(struct intel_gt *gt) +{ + intel_gt_set_wedged(gt); + set_bit(I915_WEDGED_ON_FINI, >->reset.flags); } void intel_gt_init_reset(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 8e8d5f761166..a0eec7c11c0c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -47,8 +47,10 @@ int intel_gt_terminally_wedged(struct intel_gt *gt); /* * There's no unset_wedged_on_init paired with this one. * Once we're wedged on init, there's no going back. + * Same thing for unset_wedged_on_fini. */ void intel_gt_set_wedged_on_init(struct intel_gt *gt); +void intel_gt_set_wedged_on_fini(struct intel_gt *gt); int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask); @@ -71,14 +73,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w); (W)->gt; \ __intel_fini_wedge((W))) -static inline bool __intel_reset_failed(const struct intel_reset *reset) -{ - GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ? - !test_bit(I915_WEDGED, &reset->flags) : false); - - return unlikely(test_bit(I915_WEDGED, &reset->flags)); -} - bool intel_has_gpu_reset(const struct intel_gt *gt); bool intel_has_reset_engine(const struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index f43bc3a0fe4f..add6b86d9d03 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -34,12 +34,17 @@ struct intel_reset { * longer use the GPU - similar to #I915_WEDGED bit. The difference in * in the way we're handling "forced" unwedged (e.g. through debugfs), * which is not allowed in case we failed to initialize. + * + * #I915_WEDGED_ON_FINI - Similar to #I915_WEDGED_ON_INIT, except we + * use it to mark that the GPU is no longer available (and prevent + * users from using it). */ unsigned long flags; #define I915_RESET_BACKOFF 0 #define I915_RESET_MODESET 1 #define I915_RESET_ENGINE 2 -#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2) +#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 3) +#define I915_WEDGED_ON_FINI (BITS_PER_LONG - 2) #define I915_WEDGED (BITS_PER_LONG - 1) struct mutex mutex; /* serialises wedging/unwedging */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index ca7286e58409..94915f668715 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -27,21 +27,15 @@ * */ -#include <linux/log2.h> - -#include "gem/i915_gem_context.h" - +#include "gen2_engine_cs.h" +#include "gen6_engine_cs.h" #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" -#include "i915_trace.h" #include "intel_context.h" #include "intel_gt.h" -#include "intel_gt_irq.h" -#include "intel_gt_pm_irq.h" #include "intel_reset.h" #include "intel_ring.h" -#include "intel_workarounds.h" #include "shmem_utils.h" /* Rough estimate of the typical request size, performing a flush, @@ -49,436 +43,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -static int -gen2_render_ring_flush(struct i915_request *rq, u32 mode) -{ - unsigned int num_store_dw; - u32 cmd, *cs; - - cmd = MI_FLUSH; - num_store_dw = 0; - if (mode & EMIT_INVALIDATE) - cmd |= MI_READ_FLUSH; - if (mode & EMIT_FLUSH) - num_store_dw = 4; - - cs = intel_ring_begin(rq, 2 + 3 * num_store_dw); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = cmd; - while (num_store_dw--) { - *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT); - *cs++ = 0; - } - *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; - - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen4_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 cmd, *cs; - int i; - - /* - * read/write caches: - * - * I915_GEM_DOMAIN_RENDER is always invalidated, but is - * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is - * also flushed at 2d versus 3d pipeline switches. - * - * read-only caches: - * - * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if - * MI_READ_FLUSH is set, and is always flushed on 965. - * - * I915_GEM_DOMAIN_COMMAND may not exist? - * - * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is - * invalidated when MI_EXE_FLUSH is set. - * - * I915_GEM_DOMAIN_VERTEX, which exists on 965, is - * invalidated with every MI_FLUSH. - * - * TLBs: - * - * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND - * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and - * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER - * are flushed at any MI_FLUSH. - */ - - cmd = MI_FLUSH; - if (mode & EMIT_INVALIDATE) { - cmd |= MI_EXE_FLUSH; - if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5)) - cmd |= MI_INVALIDATE_ISP; - } - - i = 2; - if (mode & EMIT_INVALIDATE) - i += 20; - - cs = intel_ring_begin(rq, i); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = cmd; - - /* - * A random delay to let the CS invalidate take effect? Without this - * delay, the GPU relocation path fails as the CS does not see - * the updated contents. Just as important, if we apply the flushes - * to the EMIT_FLUSH branch (i.e. immediately after the relocation - * write and before the invalidate on the next batch), the relocations - * still fail. This implies that is a delay following invalidation - * that is required to reset the caches as opposed to a delay to - * ensure the memory is written. - */ - if (mode & EMIT_INVALIDATE) { - *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT) | - PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - - for (i = 0; i < 12; i++) - *cs++ = MI_FLUSH; - - *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT) | - PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - } - - *cs++ = cmd; - - intel_ring_advance(rq, cs); - - return 0; -} - -/* - * Emits a PIPE_CONTROL with a non-zero post-sync operation, for - * implementing two workarounds on gen6. From section 1.4.7.1 - * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: - * - * [DevSNB-C+{W/A}] Before any depth stall flush (including those - * produced by non-pipelined state commands), software needs to first - * send a PIPE_CONTROL with no bits set except Post-Sync Operation != - * 0. - * - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable - * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. - * - * And the workaround for these two requires this workaround first: - * - * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent - * BEFORE the pipe-control with a post-sync op and no write-cache - * flushes. - * - * And this last workaround is tricky because of the requirements on - * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM - * volume 2 part 1: - * - * "1 of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - Notify Enable ([8] of DW1)" - * - * The cache flushes require the workaround flush that triggered this - * one, so we can't use it. Depth stall would trigger the same. - * Post-sync nonzero is what triggered this second workaround, so we - * can't use that one either. Notify enable is IRQs, which aren't - * really our business. That leaves only stall at scoreboard. - */ -static int -gen6_emit_post_sync_nonzero_flush(struct i915_request *rq) -{ - u32 scratch_addr = - intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - u32 *cs; - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; /* low dword */ - *cs++ = 0; /* high dword */ - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - *cs++ = 0; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen6_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 scratch_addr = - intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - u32 *cs, flags = 0; - int ret; - - /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = gen6_emit_post_sync_nonzero_flush(rq); - if (ret) - return ret; - - /* Just flush everything. Experiments have shown that reducing the - * number of bits based on the write domains has little performance - * impact. - */ - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* - * Ensure that any following seqno writes only happen - * when the render cache is indeed flushed. - */ - flags |= PIPE_CONTROL_CS_STALL; - } - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - /* - * TLB invalidate requires a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; - } - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = flags; - *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = 0; - *cs++ = 0; - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT) | - PIPE_CONTROL_GLOBAL_GTT; - *cs++ = 0; - - /* Finally we can flush and with it emit the breadcrumb */ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->hwsp_offset | - PIPE_CONTROL_GLOBAL_GTT; - *cs++ = rq->fence.seqno; - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -static int -gen7_render_ring_cs_stall_wa(struct i915_request *rq) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; - *cs++ = 0; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen7_render_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 scratch_addr = - intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - u32 *cs, flags = 0; - - /* - * Ensure that any following seqno writes only happen when the render - * cache is indeed flushed. - * - * Workaround: 4th PIPE_CONTROL command (except the ones with only - * read-cache invalidate bits set) must have the CS_STALL bit set. We - * don't try to be clever and just set it unconditionally. - */ - flags |= PIPE_CONTROL_CS_STALL; - - /* - * CS_STALL suggests at least a post-sync write. - */ - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - /* Just flush everything. Experiments have shown that reducing the - * number of bits based on the write domains has little performance - * impact. - */ - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - } - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; - - /* Workaround: we must issue a pipe_control with CS-stall bit - * set before a pipe_control command that has the state cache - * invalidate bit set. */ - gen7_render_ring_cs_stall_wa(rq); - } - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = flags; - *cs++ = scratch_addr; - *cs++ = 0; - intel_ring_advance(rq, cs); - - return 0; -} - -static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->hwsp_offset; - *cs++ = rq->fence.seqno; - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->fence.seqno; - - *cs++ = MI_USER_INTERRUPT; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -#define GEN7_XCS_WA 32 -static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - int i; - - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | - MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->fence.seqno; - - for (i = 0; i < GEN7_XCS_WA; i++) { - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - } - - *cs++ = MI_FLUSH_DW; - *cs++ = 0; - *cs++ = 0; - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} -#undef GEN7_XCS_WA - static void set_hwstam(struct intel_engine_cs *engine, u32 mask) { /* @@ -865,32 +429,6 @@ static void reset_finish(struct intel_engine_cs *engine) { } -static int rcs_resume(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - struct intel_uncore *uncore = engine->uncore; - - /* - * Disable CONSTANT_BUFFER before it is loaded from the context - * image. For as it is loaded, it is executed and the stored - * address may no longer be valid, leading to a GPU hang. - * - * This imposes the requirement that userspace reload their - * CONSTANT_BUFFER on every batch, fortunately a requirement - * they are already accustomed to from before contexts were - * enabled. - */ - if (IS_GEN(i915, 4)) - intel_uncore_write(uncore, ECOSKPD, - _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE)); - - if (IS_GEN_RANGE(i915, 6, 7)) - intel_uncore_write(uncore, INSTPM, - _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - - return xcs_resume(engine); -} - static void reset_cancel(struct intel_engine_cs *engine) { struct i915_request *request; @@ -918,255 +456,6 @@ static void i9xx_submit_request(struct i915_request *request) intel_ring_set_tail(request->ring, request->tail)); } -static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH; - - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} - -#define GEN5_WA_STORES 8 /* must be at least 1! */ -static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - int i; - - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); - - *cs++ = MI_FLUSH; - - BUILD_BUG_ON(GEN5_WA_STORES < 1); - for (i = 0; i < GEN5_WA_STORES; i++) { - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - } - - *cs++ = MI_USER_INTERRUPT; - - rq->tail = intel_ring_offset(rq, cs); - assert_ring_tail_valid(rq->ring, rq->tail); - - return cs; -} -#undef GEN5_WA_STORES - -static void -gen5_irq_enable(struct intel_engine_cs *engine) -{ - gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); -} - -static void -gen5_irq_disable(struct intel_engine_cs *engine) -{ - gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); -} - -static void -i9xx_irq_enable(struct intel_engine_cs *engine) -{ - engine->i915->irq_mask &= ~engine->irq_enable_mask; - intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); - intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); -} - -static void -i9xx_irq_disable(struct intel_engine_cs *engine) -{ - engine->i915->irq_mask |= engine->irq_enable_mask; - intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); -} - -static void -i8xx_irq_enable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - - i915->irq_mask &= ~engine->irq_enable_mask; - intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); - ENGINE_POSTING_READ16(engine, RING_IMR); -} - -static void -i8xx_irq_disable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - - i915->irq_mask |= engine->irq_enable_mask; - intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); -} - -static int -bsd_ring_flush(struct i915_request *rq, u32 mode) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_FLUSH; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - return 0; -} - -static void -gen6_irq_enable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); - - /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ - ENGINE_POSTING_READ(engine, RING_IMR); - - gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); -} - -static void -gen6_irq_disable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); -} - -static void -hsw_vebox_irq_enable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask); - - /* Flush/delay to ensure the RING_IMR is active before the GT IMR */ - ENGINE_POSTING_READ(engine, RING_IMR); - - gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask); -} - -static void -hsw_vebox_irq_disable(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~0); - gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask); -} - -static int -i965_emit_bb_start(struct i915_request *rq, - u64 offset, u32 length, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & - I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ -#define I830_BATCH_LIMIT SZ_256K -#define I830_TLB_ENTRIES (2) -#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) -static int -i830_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs, cs_offset = - intel_gt_scratch_offset(rq->engine->gt, - INTEL_GT_SCRATCH_FIELD_DEFAULT); - - GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Evict the invalid PTE TLBs */ - *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; - *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ - *cs++ = cs_offset; - *cs++ = 0xdeadbeef; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { - if (len > I830_BATCH_LIMIT) - return -ENOSPC; - - cs = intel_ring_begin(rq, 6 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Blit the batch (which has now all relocs applied) to the - * stable batch scratch bo area (so that the CS never - * stumbles over its tlb invalidation bug) ... - */ - *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; - *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; - *cs++ = cs_offset; - *cs++ = 4096; - *cs++ = offset; - - *cs++ = MI_FLUSH; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - /* ... and execute it. */ - offset = cs_offset; - } - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : - MI_BATCH_NON_SECURE); - intel_ring_advance(rq, cs); - - return 0; -} - -static int -i915_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : - MI_BATCH_NON_SECURE); - intel_ring_advance(rq, cs); - - return 0; -} - static void __ring_context_fini(struct intel_context *ce) { i915_vma_put(ce->state); @@ -1254,7 +543,7 @@ alloc_context_vma(struct intel_engine_cs *engine) vaddr, engine->context_size); i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); + __i915_gem_object_release_map(obj); } vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); @@ -1356,11 +645,11 @@ static inline int mi_set_context(struct i915_request *rq, struct intel_context *ce, u32 flags) { - struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; + struct drm_i915_private *i915 = engine->i915; enum intel_engine_id id; const int num_engines = - IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; + IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0; bool force_restore = false; int len; u32 *cs; @@ -1471,7 +760,7 @@ static inline int mi_set_context(struct i915_request *rq, static int remap_l3_slice(struct i915_request *rq, int slice) { - u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; + u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice]; int i; if (!remap_info) @@ -1582,7 +871,7 @@ static int switch_context(struct i915_request *rq) void **residuals = NULL; int ret; - GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); + GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); if (engine->wa_ctx.vma && ce != engine->kernel_context) { if (engine->wa_ctx.vma->private != ce) { @@ -1704,99 +993,6 @@ static void gen6_bsd_submit_request(struct i915_request *request) intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); } -static int mi_flush_dw(struct i915_request *rq, u32 flags) -{ - u32 cmd, *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cmd = MI_FLUSH_DW; - - /* - * We always require a command barrier so that subsequent - * commands, such as breadcrumb interrupts, are strictly ordered - * wrt the contents of the write cache being flushed to memory - * (and thus being coherent from the CPU). - */ - cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - - /* - * Bspec vol 1c.3 - blitter engine command streamer: - * "If ENABLED, all TLBs will be invalidated once the flush - * operation is complete. This bit is only valid when the - * Post-Sync Operation field is a value of 1h or 3h." - */ - cmd |= flags; - - *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags) -{ - return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0); -} - -static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) -{ - return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD); -} - -static int -hsw_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW); - /* bit0-7 is the length on GEN6+ */ - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen6_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965); - /* bit0-7 is the length on GEN6+ */ - *cs++ = offset; - intel_ring_advance(rq, cs); - - return 0; -} - -/* Blitter support (SandyBridge+) */ - -static int gen6_ring_flush(struct i915_request *rq, u32 mode) -{ - return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); -} - static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; @@ -1843,11 +1039,11 @@ static void setup_irq(struct intel_engine_cs *engine) engine->irq_enable = gen5_irq_enable; engine->irq_disable = gen5_irq_disable; } else if (INTEL_GEN(i915) >= 3) { - engine->irq_enable = i9xx_irq_enable; - engine->irq_disable = i9xx_irq_disable; + engine->irq_enable = gen3_irq_enable; + engine->irq_disable = gen3_irq_disable; } else { - engine->irq_enable = i8xx_irq_enable; - engine->irq_disable = i8xx_irq_disable; + engine->irq_enable = gen2_irq_enable; + engine->irq_disable = gen2_irq_disable; } } @@ -1874,7 +1070,7 @@ static void setup_common(struct intel_engine_cs *engine) * equivalent to our next initial bread so we can elide * engine->emit_init_breadcrumb(). */ - engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen3_emit_breadcrumb; if (IS_GEN(i915, 5)) engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; @@ -1883,11 +1079,11 @@ static void setup_common(struct intel_engine_cs *engine) if (INTEL_GEN(i915) >= 6) engine->emit_bb_start = gen6_emit_bb_start; else if (INTEL_GEN(i915) >= 4) - engine->emit_bb_start = i965_emit_bb_start; + engine->emit_bb_start = gen4_emit_bb_start; else if (IS_I830(i915) || IS_I845G(i915)) engine->emit_bb_start = i830_emit_bb_start; else - engine->emit_bb_start = i915_emit_bb_start; + engine->emit_bb_start = gen3_emit_bb_start; } static void setup_rcs(struct intel_engine_cs *engine) @@ -1900,25 +1096,23 @@ static void setup_rcs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; if (INTEL_GEN(i915) >= 7) { - engine->emit_flush = gen7_render_ring_flush; - engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; + engine->emit_flush = gen7_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs; } else if (IS_GEN(i915, 6)) { - engine->emit_flush = gen6_render_ring_flush; - engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; + engine->emit_flush = gen6_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs; } else if (IS_GEN(i915, 5)) { - engine->emit_flush = gen4_render_ring_flush; + engine->emit_flush = gen4_emit_flush_rcs; } else { if (INTEL_GEN(i915) < 4) - engine->emit_flush = gen2_render_ring_flush; + engine->emit_flush = gen2_emit_flush; else - engine->emit_flush = gen4_render_ring_flush; + engine->emit_flush = gen4_emit_flush_rcs; engine->irq_enable_mask = I915_USER_INTERRUPT; } if (IS_HASWELL(i915)) engine->emit_bb_start = hsw_emit_bb_start; - - engine->resume = rcs_resume; } static void setup_vcs(struct intel_engine_cs *engine) @@ -1929,15 +1123,15 @@ static void setup_vcs(struct intel_engine_cs *engine) /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN(i915, 6)) engine->set_default_submission = gen6_bsd_set_default_submission; - engine->emit_flush = gen6_bsd_ring_flush; + engine->emit_flush = gen6_emit_flush_vcs; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; if (IS_GEN(i915, 6)) - engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; else - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; } else { - engine->emit_flush = bsd_ring_flush; + engine->emit_flush = gen4_emit_flush_vcs; if (IS_GEN(i915, 5)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; else @@ -1949,13 +1143,13 @@ static void setup_bcs(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - engine->emit_flush = gen6_ring_flush; + engine->emit_flush = gen6_emit_flush_xcs; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; if (IS_GEN(i915, 6)) - engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; else - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; } static void setup_vecs(struct intel_engine_cs *engine) @@ -1964,12 +1158,12 @@ static void setup_vecs(struct intel_engine_cs *engine) GEM_BUG_ON(INTEL_GEN(i915) < 7); - engine->emit_flush = gen6_ring_flush; + engine->emit_flush = gen6_emit_flush_xcs; engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; - engine->irq_enable = hsw_vebox_irq_enable; - engine->irq_disable = hsw_vebox_irq_disable; + engine->irq_enable = hsw_irq_enable_vecs; + engine->irq_disable = hsw_irq_disable_vecs; - engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; } static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 2f59fc6df3c2..97ba14ad52e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -51,15 +51,16 @@ static void rps_timer(struct timer_list *t) { struct intel_rps *rps = from_timer(rps, t, timer); struct intel_engine_cs *engine; + ktime_t dt, last, timestamp; enum intel_engine_id id; s64 max_busy[3] = {}; - ktime_t dt, last; + timestamp = 0; for_each_engine(engine, rps_to_gt(rps), id) { s64 busy; int i; - dt = intel_engine_get_busy_time(engine); + dt = intel_engine_get_busy_time(engine, ×tamp); last = engine->stats.rps; engine->stats.rps = dt; @@ -69,16 +70,14 @@ static void rps_timer(struct timer_list *t) swap(busy, max_busy[i]); } } - - dt = ktime_get(); last = rps->pm_timestamp; - rps->pm_timestamp = dt; + rps->pm_timestamp = timestamp; if (intel_rps_is_active(rps)) { s64 busy; int i; - dt = ktime_sub(dt, last); + dt = ktime_sub(timestamp, last); /* * Our goal is to evaluate each engine independently, so we run @@ -1063,11 +1062,12 @@ static bool gen6_rps_enable(struct intel_rps *rps) static int chv_rps_max_freq(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_gt *gt = rps_to_gt(rps); u32 val; val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); - switch (RUNTIME_INFO(i915)->sseu.eu_total) { + switch (gt->info.sseu.eu_total) { case 8: /* (2 * 4) config */ val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index d173271c7397..f1c039e1b5ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -60,10 +60,552 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) return hweight32(intel_sseu_get_subslices(sseu, slice)); } -u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, +static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, + int subslice) +{ + int slice_stride = sseu->max_subslices * sseu->eu_stride; + + return slice * slice_stride + subslice * sseu->eu_stride; +} + +static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, + int subslice) +{ + int i, offset = sseu_eu_idx(sseu, slice, subslice); + u16 eu_mask = 0; + + for (i = 0; i < sseu->eu_stride; i++) + eu_mask |= + ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); + + return eu_mask; +} + +static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, + u16 eu_mask) +{ + int i, offset = sseu_eu_idx(sseu, slice, subslice); + + for (i = 0; i < sseu->eu_stride; i++) + sseu->eu_mask[offset + i] = + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; +} + +static u16 compute_eu_total(const struct sseu_dev_info *sseu) +{ + u16 i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) + total += hweight8(sseu->eu_mask[i]); + + return total; +} + +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, + u8 s_en, u32 ss_en, u16 eu_en) +{ + int s, ss; + + /* ss_en represents entire subslice mask across all slices */ + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > + sizeof(ss_en) * BITS_PER_BYTE); + + for (s = 0; s < sseu->max_slices; s++) { + if ((s_en & BIT(s)) == 0) + continue; + + sseu->slice_mask |= BIT(s); + + intel_sseu_set_subslices(sseu, s, ss_en); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, s, ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); +} + +static void gen12_sseu_info_init(struct intel_gt *gt) +{ + struct sseu_dev_info *sseu = >->info.sseu; + struct intel_uncore *uncore = gt->uncore; + u32 dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; + u8 s_en; + int eu; + + /* + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. + * Instead of splitting these, provide userspace with an array + * of DSS to more closely represent the hardware resource. + */ + intel_sseu_set_info(sseu, 1, 6, 16); + + s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & + GEN11_GT_S_ENA_MASK; + + dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); + + /* one bit per pair of EUs */ + eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & + GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + + /* TGL only supports slice-level power gating */ + sseu->has_slice_pg = 1; +} + +static void gen11_sseu_info_init(struct intel_gt *gt) +{ + struct sseu_dev_info *sseu = >->info.sseu; + struct intel_uncore *uncore = gt->uncore; + u32 ss_en; + u8 eu_en; + u8 s_en; + + if (IS_ELKHARTLAKE(gt->i915)) + intel_sseu_set_info(sseu, 1, 4, 8); + else + intel_sseu_set_info(sseu, 1, 8, 8); + + s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & + GEN11_GT_S_ENA_MASK; + ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); + + eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & + GEN11_EU_DIS_MASK); + + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); + + /* ICL has no power gating restrictions. */ + sseu->has_slice_pg = 1; + sseu->has_subslice_pg = 1; + sseu->has_eu_pg = 1; +} + +static void gen10_sseu_info_init(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + struct sseu_dev_info *sseu = >->info.sseu; + const u32 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); + const int eu_mask = 0xff; + u32 subslice_mask, eu_en; + int s, ss; + + intel_sseu_set_info(sseu, 6, 4, 8); + + sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> + GEN10_F2_S_ENA_SHIFT; + + /* Slice0 */ + eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE0); + for (ss = 0; ss < sseu->max_subslices; ss++) + sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); + /* Slice1 */ + sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); + eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE1); + sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); + /* Slice2 */ + sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); + /* Slice3 */ + sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); + eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE2); + sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); + /* Slice4 */ + sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); + /* Slice5 */ + sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); + eu_en = ~intel_uncore_read(uncore, GEN10_EU_DISABLE3); + sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); + + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + + for (s = 0; s < sseu->max_slices; s++) { + u32 subslice_mask_with_eus = subslice_mask; + + for (ss = 0; ss < sseu->max_subslices; ss++) { + if (sseu_get_eus(sseu, s, ss) == 0) + subslice_mask_with_eus &= ~BIT(ss); + } + + /* + * Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + intel_sseu_set_subslices(sseu, s, s == 0 ? + subslice_mask_with_eus : + subslice_mask_with_eus & 0x3); + } + + sseu->eu_total = compute_eu_total(sseu); + + /* + * CNL is expected to always have a uniform distribution + * of EU across subslices with the exception that any one + * EU in any one subslice may be fused off for die + * recovery. + */ + sseu->eu_per_subslice = + intel_sseu_subslice_total(sseu) ? + DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : + 0; + + /* No restrictions on Power Gating */ + sseu->has_slice_pg = 1; + sseu->has_subslice_pg = 1; + sseu->has_eu_pg = 1; +} + +static void cherryview_sseu_info_init(struct intel_gt *gt) +{ + struct sseu_dev_info *sseu = >->info.sseu; + u32 fuse; + u8 subslice_mask = 0; + + fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); + + sseu->slice_mask = BIT(0); + intel_sseu_set_info(sseu, 1, 2, 8); + + if (!(fuse & CHV_FGT_DISABLE_SS0)) { + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> + CHV_FGT_EU_DIS_SS0_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> + CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); + + subslice_mask |= BIT(0); + sseu_set_eus(sseu, 0, 0, ~disabled_mask); + } + + if (!(fuse & CHV_FGT_DISABLE_SS1)) { + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> + CHV_FGT_EU_DIS_SS1_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> + CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); + + subslice_mask |= BIT(1); + sseu_set_eus(sseu, 0, 1, ~disabled_mask); + } + + intel_sseu_set_subslices(sseu, 0, subslice_mask); + + sseu->eu_total = compute_eu_total(sseu); + + /* + * CHV expected to always have a uniform distribution of EU + * across subslices. + */ + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? + sseu->eu_total / + intel_sseu_subslice_total(sseu) : + 0; + /* + * CHV supports subslice power gating on devices with more than + * one subslice, and supports EU power gating on devices with + * more than one EU pair per subslice. + */ + sseu->has_slice_pg = 0; + sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; + sseu->has_eu_pg = (sseu->eu_per_subslice > 2); +} + +static void gen9_sseu_info_init(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_device_info *info = mkwrite_device_info(i915); + struct sseu_dev_info *sseu = >->info.sseu; + struct intel_uncore *uncore = gt->uncore; + u32 fuse2, eu_disable, subslice_mask; + const u8 eu_mask = 0xff; + int s, ss; + + fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); + sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + + /* BXT has a single slice and at most 3 subslices. */ + intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, + IS_GEN9_LP(i915) ? 3 : 4, 8); + + /* + * The subslice disable field is global, i.e. it applies + * to each of the enabled slices. + */ + subslice_mask = (1 << sseu->max_subslices) - 1; + subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> + GEN9_F2_SS_DIS_SHIFT); + + /* + * Iterate through enabled slices and subslices to + * count the total enabled EU. + */ + for (s = 0; s < sseu->max_slices; s++) { + if (!(sseu->slice_mask & BIT(s))) + /* skip disabled slice */ + continue; + + intel_sseu_set_subslices(sseu, s, subslice_mask); + + eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); + for (ss = 0; ss < sseu->max_subslices; ss++) { + int eu_per_ss; + u8 eu_disabled_mask; + + if (!intel_sseu_has_subslice(sseu, s, ss)) + /* skip disabled subslice */ + continue; + + eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + eu_per_ss = sseu->max_eus_per_subslice - + hweight8(eu_disabled_mask); + + /* + * Record which subslice(s) has(have) 7 EUs. we + * can tune the hash used to spread work among + * subslices if they are unbalanced. + */ + if (eu_per_ss == 7) + sseu->subslice_7eu[s] |= BIT(ss); + } + } + + sseu->eu_total = compute_eu_total(sseu); + + /* + * SKL is expected to always have a uniform distribution + * of EU across subslices with the exception that any one + * EU in any one subslice may be fused off for die + * recovery. BXT is expected to be perfectly uniform in EU + * distribution. + */ + sseu->eu_per_subslice = + intel_sseu_subslice_total(sseu) ? + DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : + 0; + + /* + * SKL+ supports slice power gating on devices with more than + * one slice, and supports EU power gating on devices with + * more than one EU pair per subslice. BXT+ supports subslice + * power gating on devices with more than one subslice, and + * supports EU power gating on devices with more than one EU + * pair per subslice. + */ + sseu->has_slice_pg = + !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; + sseu->has_subslice_pg = + IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; + sseu->has_eu_pg = sseu->eu_per_subslice > 2; + + if (IS_GEN9_LP(i915)) { +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) + info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; + + sseu->min_eu_in_pool = 0; + if (info->has_pooled_eu) { + if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) + sseu->min_eu_in_pool = 3; + else if (IS_SS_DISABLED(1)) + sseu->min_eu_in_pool = 6; + else + sseu->min_eu_in_pool = 9; + } +#undef IS_SS_DISABLED + } +} + +static void bdw_sseu_info_init(struct intel_gt *gt) +{ + struct sseu_dev_info *sseu = >->info.sseu; + struct intel_uncore *uncore = gt->uncore; + int s, ss; + u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ + u32 eu_disable0, eu_disable1, eu_disable2; + + fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); + sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + intel_sseu_set_info(sseu, 3, 3, 8); + + /* + * The subslice disable field is global, i.e. it applies + * to each of the enabled slices. + */ + subslice_mask = GENMASK(sseu->max_subslices - 1, 0); + subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> + GEN8_F2_SS_DIS_SHIFT); + eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); + eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); + eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); + eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; + eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | + ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << + (32 - GEN8_EU_DIS0_S1_SHIFT)); + eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | + ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << + (32 - GEN8_EU_DIS1_S2_SHIFT)); + + /* + * Iterate through enabled slices and subslices to + * count the total enabled EU. + */ + for (s = 0; s < sseu->max_slices; s++) { + if (!(sseu->slice_mask & BIT(s))) + /* skip disabled slice */ + continue; + + intel_sseu_set_subslices(sseu, s, subslice_mask); + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u8 eu_disabled_mask; + u32 n_disabled; + + if (!intel_sseu_has_subslice(sseu, s, ss)) + /* skip disabled subslice */ + continue; + + eu_disabled_mask = + eu_disable[s] >> (ss * sseu->max_eus_per_subslice); + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + n_disabled = hweight8(eu_disabled_mask); + + /* + * Record which subslices have 7 EUs. + */ + if (sseu->max_eus_per_subslice - n_disabled == 7) + sseu->subslice_7eu[s] |= 1 << ss; + } + } + + sseu->eu_total = compute_eu_total(sseu); + + /* + * BDW is expected to always have a uniform distribution of EU across + * subslices with the exception that any one EU in any one subslice may + * be fused off for die recovery. + */ + sseu->eu_per_subslice = + intel_sseu_subslice_total(sseu) ? + DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : + 0; + + /* + * BDW supports slice power gating on devices with more than + * one slice. + */ + sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; + sseu->has_subslice_pg = 0; + sseu->has_eu_pg = 0; +} + +static void hsw_sseu_info_init(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct sseu_dev_info *sseu = >->info.sseu; + u32 fuse1; + u8 subslice_mask = 0; + int s, ss; + + /* + * There isn't a register to tell us how many slices/subslices. We + * work off the PCI-ids here. + */ + switch (INTEL_INFO(i915)->gt) { + default: + MISSING_CASE(INTEL_INFO(i915)->gt); + fallthrough; + case 1: + sseu->slice_mask = BIT(0); + subslice_mask = BIT(0); + break; + case 2: + sseu->slice_mask = BIT(0); + subslice_mask = BIT(0) | BIT(1); + break; + case 3: + sseu->slice_mask = BIT(0) | BIT(1); + subslice_mask = BIT(0) | BIT(1); + break; + } + + fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); + switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { + default: + MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >> + HSW_F1_EU_DIS_SHIFT); + fallthrough; + case HSW_F1_EU_DIS_10EUS: + sseu->eu_per_subslice = 10; + break; + case HSW_F1_EU_DIS_8EUS: + sseu->eu_per_subslice = 8; + break; + case HSW_F1_EU_DIS_6EUS: + sseu->eu_per_subslice = 6; + break; + } + + intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), + hweight8(subslice_mask), + sseu->eu_per_subslice); + + for (s = 0; s < sseu->max_slices; s++) { + intel_sseu_set_subslices(sseu, s, subslice_mask); + + for (ss = 0; ss < sseu->max_subslices; ss++) { + sseu_set_eus(sseu, s, ss, + (1UL << sseu->eu_per_subslice) - 1); + } + } + + sseu->eu_total = compute_eu_total(sseu); + + /* No powergating for you. */ + sseu->has_slice_pg = 0; + sseu->has_subslice_pg = 0; + sseu->has_eu_pg = 0; +} + +void intel_sseu_info_init(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + if (IS_HASWELL(i915)) + hsw_sseu_info_init(gt); + else if (IS_CHERRYVIEW(i915)) + cherryview_sseu_info_init(gt); + else if (IS_BROADWELL(i915)) + bdw_sseu_info_init(gt); + else if (IS_GEN(i915, 9)) + gen9_sseu_info_init(gt); + else if (IS_GEN(i915, 10)) + gen10_sseu_info_init(gt); + else if (IS_GEN(i915, 11)) + gen11_sseu_info_init(gt); + else if (INTEL_GEN(i915) >= 12) + gen12_sseu_info_init(gt); +} + +u32 intel_sseu_make_rpcs(struct intel_gt *gt, const struct intel_sseu *req_sseu) { - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + struct drm_i915_private *i915 = gt->i915; + const struct sseu_dev_info *sseu = >->info.sseu; bool subslice_pg = sseu->has_subslice_pg; u8 slices, subslices; u32 rpcs = 0; @@ -173,3 +715,48 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, return rpcs; } + +void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) +{ + int s; + + drm_printf(p, "slice total: %u, mask=%04x\n", + hweight8(sseu->slice_mask), sseu->slice_mask); + drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); + for (s = 0; s < sseu->max_slices; s++) { + drm_printf(p, "slice%d: %u subslices, mask=%08x\n", + s, intel_sseu_subslices_per_slice(sseu, s), + intel_sseu_get_subslices(sseu, s)); + } + drm_printf(p, "EU total: %u\n", sseu->eu_total); + drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); + drm_printf(p, "has slice power gating: %s\n", + yesno(sseu->has_slice_pg)); + drm_printf(p, "has subslice power gating: %s\n", + yesno(sseu->has_subslice_pg)); + drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); +} + +void intel_sseu_print_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) +{ + int s, ss; + + if (sseu->max_slices == 0) { + drm_printf(p, "Unavailable\n"); + return; + } + + for (s = 0; s < sseu->max_slices; s++) { + drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", + s, intel_sseu_subslices_per_slice(sseu, s), + intel_sseu_get_subslices(sseu, s)); + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u16 enabled_eus = sseu_get_eus(sseu, s, ss); + + drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", + ss, hweight16(enabled_eus), enabled_eus); + } + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index d1d225204f09..23ba6c2ebe70 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -13,6 +13,8 @@ #include "i915_gem.h" struct drm_i915_private; +struct intel_gt; +struct drm_printer; #define GEN_MAX_SLICES (6) /* CNL upper bound */ #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ @@ -94,7 +96,13 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u32 ss_mask); -u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, +void intel_sseu_info_init(struct intel_gt *gt); + +u32 intel_sseu_make_rpcs(struct intel_gt *gt, const struct intel_sseu *req_sseu); +void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p); +void intel_sseu_print_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p); + #endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c new file mode 100644 index 000000000000..51780282d872 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2020 Intel Corporation + */ + +#include "debugfs_gt.h" +#include "intel_sseu_debugfs.h" +#include "i915_drv.h" + +static void sseu_copy_subslices(const struct sseu_dev_info *sseu, + int slice, u8 *to_mask) +{ + int offset = slice * sseu->ss_stride; + + memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride); +} + +static void cherryview_sseu_device_status(struct intel_gt *gt, + struct sseu_dev_info *sseu) +{ +#define SS_MAX 2 + struct intel_uncore *uncore = gt->uncore; + const int ss_max = SS_MAX; + u32 sig1[SS_MAX], sig2[SS_MAX]; + int ss; + + sig1[0] = intel_uncore_read(uncore, CHV_POWER_SS0_SIG1); + sig1[1] = intel_uncore_read(uncore, CHV_POWER_SS1_SIG1); + sig2[0] = intel_uncore_read(uncore, CHV_POWER_SS0_SIG2); + sig2[1] = intel_uncore_read(uncore, CHV_POWER_SS1_SIG2); + + for (ss = 0; ss < ss_max; ss++) { + unsigned int eu_cnt; + + if (sig1[ss] & CHV_SS_PG_ENABLE) + /* skip disabled subslice */ + continue; + + sseu->slice_mask = BIT(0); + sseu->subslice_mask[0] |= BIT(ss); + eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + + ((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2); + sseu->eu_total += eu_cnt; + sseu->eu_per_subslice = max_t(unsigned int, + sseu->eu_per_subslice, eu_cnt); + } +#undef SS_MAX +} + +static void gen10_sseu_device_status(struct intel_gt *gt, + struct sseu_dev_info *sseu) +{ +#define SS_MAX 6 + struct intel_uncore *uncore = gt->uncore; + const struct intel_gt_info *info = >->info; + u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; + int s, ss; + + for (s = 0; s < info->sseu.max_slices; s++) { + /* + * FIXME: Valid SS Mask respects the spec and read + * only valid bits for those registers, excluding reserved + * although this seems wrong because it would leave many + * subslices without ACK. + */ + s_reg[s] = intel_uncore_read(uncore, GEN10_SLICE_PGCTL_ACK(s)) & + GEN10_PGCTL_VALID_SS_MASK(s); + eu_reg[2 * s] = intel_uncore_read(uncore, + GEN10_SS01_EU_PGCTL_ACK(s)); + eu_reg[2 * s + 1] = intel_uncore_read(uncore, + GEN10_SS23_EU_PGCTL_ACK(s)); + } + + eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK | + GEN9_PGCTL_SSA_EU19_ACK | + GEN9_PGCTL_SSA_EU210_ACK | + GEN9_PGCTL_SSA_EU311_ACK; + eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK | + GEN9_PGCTL_SSB_EU19_ACK | + GEN9_PGCTL_SSB_EU210_ACK | + GEN9_PGCTL_SSB_EU311_ACK; + + for (s = 0; s < info->sseu.max_slices; s++) { + if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) + /* skip disabled slice */ + continue; + + sseu->slice_mask |= BIT(s); + sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask); + + for (ss = 0; ss < info->sseu.max_subslices; ss++) { + unsigned int eu_cnt; + + if (info->sseu.has_subslice_pg && + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + /* skip disabled subslice */ + continue; + + eu_cnt = 2 * hweight32(eu_reg[2 * s + ss / 2] & + eu_mask[ss % 2]); + sseu->eu_total += eu_cnt; + sseu->eu_per_subslice = max_t(unsigned int, + sseu->eu_per_subslice, + eu_cnt); + } + } +#undef SS_MAX +} + +static void gen9_sseu_device_status(struct intel_gt *gt, + struct sseu_dev_info *sseu) +{ +#define SS_MAX 3 + struct intel_uncore *uncore = gt->uncore; + const struct intel_gt_info *info = >->info; + u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; + int s, ss; + + for (s = 0; s < info->sseu.max_slices; s++) { + s_reg[s] = intel_uncore_read(uncore, GEN9_SLICE_PGCTL_ACK(s)); + eu_reg[2 * s] = + intel_uncore_read(uncore, GEN9_SS01_EU_PGCTL_ACK(s)); + eu_reg[2 * s + 1] = + intel_uncore_read(uncore, GEN9_SS23_EU_PGCTL_ACK(s)); + } + + eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK | + GEN9_PGCTL_SSA_EU19_ACK | + GEN9_PGCTL_SSA_EU210_ACK | + GEN9_PGCTL_SSA_EU311_ACK; + eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK | + GEN9_PGCTL_SSB_EU19_ACK | + GEN9_PGCTL_SSB_EU210_ACK | + GEN9_PGCTL_SSB_EU311_ACK; + + for (s = 0; s < info->sseu.max_slices; s++) { + if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) + /* skip disabled slice */ + continue; + + sseu->slice_mask |= BIT(s); + + if (IS_GEN9_BC(gt->i915)) + sseu_copy_subslices(&info->sseu, s, + sseu->subslice_mask); + + for (ss = 0; ss < info->sseu.max_subslices; ss++) { + unsigned int eu_cnt; + u8 ss_idx = s * info->sseu.ss_stride + + ss / BITS_PER_BYTE; + + if (IS_GEN9_LP(gt->i915)) { + if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + /* skip disabled subslice */ + continue; + + sseu->subslice_mask[ss_idx] |= + BIT(ss % BITS_PER_BYTE); + } + + eu_cnt = eu_reg[2 * s + ss / 2] & eu_mask[ss % 2]; + eu_cnt = 2 * hweight32(eu_cnt); + + sseu->eu_total += eu_cnt; + sseu->eu_per_subslice = max_t(unsigned int, + sseu->eu_per_subslice, + eu_cnt); + } + } +#undef SS_MAX +} + +static void bdw_sseu_device_status(struct intel_gt *gt, + struct sseu_dev_info *sseu) +{ + const struct intel_gt_info *info = >->info; + u32 slice_info = intel_uncore_read(gt->uncore, GEN8_GT_SLICE_INFO); + int s; + + sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; + + if (sseu->slice_mask) { + sseu->eu_per_subslice = info->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) + sseu_copy_subslices(&info->sseu, s, + sseu->subslice_mask); + sseu->eu_total = sseu->eu_per_subslice * + intel_sseu_subslice_total(sseu); + + /* subtract fused off EU(s) from enabled slice(s) */ + for (s = 0; s < fls(sseu->slice_mask); s++) { + u8 subslice_7eu = info->sseu.subslice_7eu[s]; + + sseu->eu_total -= hweight8(subslice_7eu); + } + } +} + +static void i915_print_sseu_info(struct seq_file *m, + bool is_available_info, + bool has_pooled_eu, + const struct sseu_dev_info *sseu) +{ + const char *type = is_available_info ? "Available" : "Enabled"; + int s; + + seq_printf(m, " %s Slice Mask: %04x\n", type, + sseu->slice_mask); + seq_printf(m, " %s Slice Total: %u\n", type, + hweight8(sseu->slice_mask)); + seq_printf(m, " %s Subslice Total: %u\n", type, + intel_sseu_subslice_total(sseu)); + for (s = 0; s < fls(sseu->slice_mask); s++) { + seq_printf(m, " %s Slice%i subslices: %u\n", type, + s, intel_sseu_subslices_per_slice(sseu, s)); + } + seq_printf(m, " %s EU Total: %u\n", type, + sseu->eu_total); + seq_printf(m, " %s EU Per Subslice: %u\n", type, + sseu->eu_per_subslice); + + if (!is_available_info) + return; + + seq_printf(m, " Has Pooled EU: %s\n", yesno(has_pooled_eu)); + if (has_pooled_eu) + seq_printf(m, " Min EU in pool: %u\n", sseu->min_eu_in_pool); + + seq_printf(m, " Has Slice Power Gating: %s\n", + yesno(sseu->has_slice_pg)); + seq_printf(m, " Has Subslice Power Gating: %s\n", + yesno(sseu->has_subslice_pg)); + seq_printf(m, " Has EU Power Gating: %s\n", + yesno(sseu->has_eu_pg)); +} + +/* + * this is called from top-level debugfs as well, so we can't get the gt from + * the seq_file. + */ +int intel_sseu_status(struct seq_file *m, struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + const struct intel_gt_info *info = >->info; + struct sseu_dev_info sseu; + intel_wakeref_t wakeref; + + if (INTEL_GEN(i915) < 8) + return -ENODEV; + + seq_puts(m, "SSEU Device Info\n"); + i915_print_sseu_info(m, true, HAS_POOLED_EU(i915), &info->sseu); + + seq_puts(m, "SSEU Device Status\n"); + memset(&sseu, 0, sizeof(sseu)); + intel_sseu_set_info(&sseu, info->sseu.max_slices, + info->sseu.max_subslices, + info->sseu.max_eus_per_subslice); + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + if (IS_CHERRYVIEW(i915)) + cherryview_sseu_device_status(gt, &sseu); + else if (IS_BROADWELL(i915)) + bdw_sseu_device_status(gt, &sseu); + else if (IS_GEN(i915, 9)) + gen9_sseu_device_status(gt, &sseu); + else if (INTEL_GEN(i915) >= 10) + gen10_sseu_device_status(gt, &sseu); + } + + i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), &sseu); + + return 0; +} + +static int sseu_status_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + + return intel_sseu_status(m, gt); +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(sseu_status); + +static int rcs_topology_show(struct seq_file *m, void *unused) +{ + struct intel_gt *gt = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + intel_sseu_print_topology(>->info.sseu, &p); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(rcs_topology); + +void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root) +{ + static const struct debugfs_gt_file files[] = { + { "sseu_status", &sseu_status_fops, NULL }, + { "rcs_topology", &rcs_topology_fops, NULL }, + }; + + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.h b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.h new file mode 100644 index 000000000000..73f001589e90 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ + +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef INTEL_SSEU_DEBUGFS_H +#define INTEL_SSEU_DEBUGFS_H + +struct intel_gt; +struct dentry; +struct seq_file; + +int intel_sseu_status(struct seq_file *m, struct intel_gt *gt); +void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root); + +#endif /* INTEL_SSEU_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 4546284fede1..46d20f5f3ddc 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -73,6 +73,8 @@ hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) return vma; } + GT_TRACE(timeline->gt, "new HWSP allocated\n"); + vma->private = hwsp; hwsp->gt = timeline->gt; hwsp->vma = vma; @@ -327,6 +329,8 @@ int intel_timeline_pin(struct intel_timeline *tl) tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + offset_in_page(tl->hwsp_offset); + GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", + tl->fence_context, tl->hwsp_offset); cacheline_acquire(tl->hwsp_cacheline); if (atomic_fetch_inc(&tl->pin_count)) { @@ -434,6 +438,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, int err; might_lock(&tl->gt->ggtt->vm.mutex); + GT_TRACE(tl->gt, "timeline:%llx wrapped\n", tl->fence_context); /* * If there is an outstanding GPU reference to this cacheline, @@ -497,6 +502,8 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); tl->hwsp_offset += i915_ggtt_offset(vma); + GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", + tl->fence_context, tl->hwsp_offset); cacheline_acquire(cl); tl->hwsp_cacheline = cl; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 85d2bef51524..5726cd0a37e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -205,6 +205,18 @@ wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) #define WA_SET_FIELD_MASKED(addr, mask, value) \ wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value))) +static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); +} + +static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); +} + static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -355,7 +367,10 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, HDC_FORCE_NON_COHERENT); /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ - if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) + if (IS_SKYLAKE(i915) || + IS_KABYLAKE(i915) || + IS_COFFEELAKE(i915) || + IS_COMETLAKE(i915)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); @@ -389,7 +404,7 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, static void skl_tune_iz_hashing(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; u8 vals[3] = { 0, 0, 0 }; unsigned int i; @@ -400,7 +415,7 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine, * Only consider slices where one, and only one, subslice has 7 * EUs */ - if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) + if (!is_power_of_2(gt->info.sseu.subslice_7eu[i])) continue; /* @@ -409,7 +424,7 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine, * * -> 0 <= ss <= 3; */ - ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; + ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1; vals[i] = 3 - ss; } @@ -600,11 +615,14 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, * Wa_1604555607:gen12 and Wa_1608008084:gen12 * FF_MODE2 register will return the wrong value when read. The default * value for this register is zero for all fields and there are no bit - * masks. So instead of doing a RMW we should just write the TDS timer - * value for Wa_1604555607. + * masks. So instead of doing a RMW we should just write the GS Timer + * and TDS timer values for Wa_1604555607 and Wa_16011163337. */ - wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, - FF_MODE2_TDS_TIMER_128, 0); + wa_add(wal, + FF_MODE2, + FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128, + 0); /* WaDisableGPGPUMidThreadPreemption:tgl */ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, @@ -630,7 +648,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, icl_ctx_workarounds_init(engine, wal); else if (IS_CANNONLAKE(i915)) cnl_ctx_workarounds_init(engine, wal); - else if (IS_COFFEELAKE(i915)) + else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_ctx_workarounds_init(engine, wal); else if (IS_GEMINILAKE(i915)) glk_ctx_workarounds_init(engine, wal); @@ -644,6 +662,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, chv_ctx_workarounds_init(engine, wal); else if (IS_BROADWELL(i915)) bdw_ctx_workarounds_init(engine, wal); + else if (IS_GEN(i915, 7)) + gen7_ctx_workarounds_init(engine, wal); + else if (IS_GEN(i915, 6)) + gen6_ctx_workarounds_init(engine, wal); else if (INTEL_GEN(i915) < 8) return; else @@ -917,7 +939,7 @@ static void gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { /* WaDisableKillLogic:bxt,skl,kbl */ - if (!IS_COFFEELAKE(i915)) + if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915)) wa_write_or(wal, GAM_ECOCHK, ECOCHK_DIS_TLB); @@ -1014,7 +1036,7 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) static void wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) { - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + const struct sseu_dev_info *sseu = &i915->gt.info.sseu; unsigned int slice, subslice; u32 l3_en, mcr, mcr_mask; @@ -1180,7 +1202,7 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) icl_gt_workarounds_init(i915, wal); else if (IS_CANNONLAKE(i915)) cnl_gt_workarounds_init(i915, wal); - else if (IS_COFFEELAKE(i915)) + else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_gt_workarounds_init(i915, wal); else if (IS_GEMINILAKE(i915)) glk_gt_workarounds_init(i915, wal); @@ -1428,6 +1450,18 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine) RING_FORCE_TO_NONPRIV_RANGE_4); } +static void cml_whitelist_build(struct intel_engine_cs *engine) +{ + struct i915_wa_list *w = &engine->whitelist; + + if (engine->class != RENDER_CLASS) + whitelist_reg_ext(w, + RING_CTX_TIMESTAMP(engine->mmio_base), + RING_FORCE_TO_NONPRIV_ACCESS_RD); + + cfl_whitelist_build(engine); +} + static void cnl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -1478,9 +1512,15 @@ static void icl_whitelist_build(struct intel_engine_cs *engine) /* hucStatus2RegOffset */ whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), RING_FORCE_TO_NONPRIV_ACCESS_RD); + whitelist_reg_ext(w, + RING_CTX_TIMESTAMP(engine->mmio_base), + RING_FORCE_TO_NONPRIV_ACCESS_RD); break; default: + whitelist_reg_ext(w, + RING_CTX_TIMESTAMP(engine->mmio_base), + RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1512,6 +1552,9 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) whitelist_reg(w, HIZ_CHICKEN); break; default: + whitelist_reg_ext(w, + RING_CTX_TIMESTAMP(engine->mmio_base), + RING_FORCE_TO_NONPRIV_ACCESS_RD); break; } } @@ -1529,6 +1572,8 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) icl_whitelist_build(engine); else if (IS_CANNONLAKE(i915)) cnl_whitelist_build(engine); + else if (IS_COMETLAKE(i915)) + cml_whitelist_build(engine); else if (IS_COFFEELAKE(i915)) cfl_whitelist_build(engine); else if (IS_GEMINILAKE(i915)) @@ -1604,11 +1649,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_SARCHKMD, GEN7_DISABLE_SAMPLER_PREFETCH); - /* Wa_1407928979:tgl */ - wa_write_or(wal, - GEN7_FF_THREAD_MODE, - GEN12_FF_TESSELATION_DOP_GATE_DISABLE); - /* Wa_1408615072:tgl */ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL); @@ -1632,6 +1672,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_14010229206:tgl */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); + + /* + * Wa_1407928979:tgl A* + * Wa_18011464164:tgl B0+ + * Wa_22010931296:tgl B0+ + */ + wa_write_or(wal, GEN7_FF_THREAD_MODE, + GEN12_FF_TESSELATION_DOP_GATE_DISABLE); } if (IS_GEN(i915, 11)) { @@ -1725,6 +1773,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + + /* Wa_22010271021:ehl */ + if (IS_ELKHARTLAKE(i915)) + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); } if (IS_GEN_RANGE(i915, 9, 12)) { @@ -1734,7 +1788,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN9_FFSC_PERCTX_PREEMPT_CTRL); } - if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { + if (IS_SKYLAKE(i915) || + IS_KABYLAKE(i915) || + IS_COFFEELAKE(i915) || + IS_COMETLAKE(i915)) { /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ wa_write_or(wal, GEN8_GARBCNTL, @@ -1818,6 +1875,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), /* XXX bit doesn't stick on Broadwater */ IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); + + if (IS_GEN(i915, 4)) + /* + * Disable CONSTANT_BUFFER before it is loaded from the context + * image. For as it is loaded, it is executed and the stored + * address may no longer be valid, leading to a GPU hang. + * + * This imposes the requirement that userspace reload their + * CONSTANT_BUFFER on every batch, fortunately a requirement + * they are already accustomed to from before contexts were + * enabled. + */ + wa_add(wal, ECOSKPD, + 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), + 0 /* XXX bit doesn't stick on Broadwater */); } static void @@ -1932,7 +2004,7 @@ wa_list_srm(struct i915_request *rq, const struct i915_wa_list *wal, struct i915_vma *vma) { - struct drm_i915_private *i915 = rq->i915; + struct drm_i915_private *i915 = rq->engine->i915; unsigned int i, count = 0; const struct i915_wa *wa; u32 srm, *cs; @@ -2021,7 +2093,7 @@ static int engine_wa_list_verify(struct intel_context *ce, err = 0; for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg))) + if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg))) continue; if (!wa_verify(wa, results[i], wal->name, from)) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index f88e445a1cae..729c3c7b11e2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -49,7 +49,7 @@ static int write_timestamp(struct i915_request *rq, int slot) return PTR_ERR(cs); cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; - if (INTEL_GEN(rq->i915) >= 8) + if (INTEL_GEN(rq->engine->i915) >= 8) cmd++; *cs++ = cmd; *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 697114dd1f47..73243ba59c7d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -10,6 +10,7 @@ #include "intel_gt_requests.h" #include "i915_selftest.h" +#include "selftest_engine_heartbeat.h" static int timeline_sync(struct intel_timeline *tl) { @@ -142,24 +143,6 @@ out: return err; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine, - unsigned long *saved) -{ - *saved = engine->props.heartbeat_interval_ms; - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine, - unsigned long saved) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = saved; -} - static int live_idle_flush(void *arg) { struct intel_gt *gt = arg; @@ -170,11 +153,9 @@ static int live_idle_flush(void *arg) /* Check that we can flush the idle barriers */ for_each_engine(engine, gt, id) { - unsigned long heartbeat; - - engine_heartbeat_disable(engine, &heartbeat); + st_engine_heartbeat_disable(engine); err = __live_idle_pulse(engine, intel_engine_flush_barriers); - engine_heartbeat_enable(engine, heartbeat); + st_engine_heartbeat_enable(engine); if (err) break; } @@ -192,11 +173,9 @@ static int live_idle_pulse(void *arg) /* Check that heartbeat pulses flush the idle barriers */ for_each_engine(engine, gt, id) { - unsigned long heartbeat; - - engine_heartbeat_disable(engine, &heartbeat); + st_engine_heartbeat_disable(engine); err = __live_idle_pulse(engine, intel_engine_pulse); - engine_heartbeat_enable(engine, heartbeat); + st_engine_heartbeat_enable(engine); if (err && err != -ENODEV) break; @@ -386,11 +365,27 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - saved_hangcheck = i915_modparams.enable_hangcheck; - i915_modparams.enable_hangcheck = INT_MAX; + saved_hangcheck = i915->params.enable_hangcheck; + i915->params.enable_hangcheck = INT_MAX; err = intel_gt_live_subtests(tests, &i915->gt); - i915_modparams.enable_hangcheck = saved_hangcheck; + i915->params.enable_hangcheck = saved_hangcheck; return err; } + +void st_engine_heartbeat_disable(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +void st_engine_heartbeat_enable(struct intel_engine_cs *engine) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h new file mode 100644 index 000000000000..cd27113d5400 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef SELFTEST_ENGINE_HEARTBEAT_H +#define SELFTEST_ENGINE_HEARTBEAT_H + +struct intel_engine_cs; + +void st_engine_heartbeat_disable(struct intel_engine_cs *engine); +void st_engine_heartbeat_enable(struct intel_engine_cs *engine); + +#endif /* SELFTEST_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index cbf6b0735272..b08fc5390e8a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -6,7 +6,107 @@ #include "i915_selftest.h" #include "selftest_engine.h" +#include "selftest_engine_heartbeat.h" #include "selftests/igt_atomic.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_spinner.h" + +static int live_engine_busy_stats(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * Check that if an engine supports busy-stats, they tell the truth. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + GEM_BUG_ON(intel_gt_pm_is_awake(gt)); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + ktime_t de, dt; + ktime_t t[2]; + + if (!intel_engine_supports_stats(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (intel_gt_pm_wait_for_idle(gt)) { + err = -EBUSY; + break; + } + + st_engine_heartbeat_disable(engine); + + ENGINE_TRACE(engine, "measuring idle time\n"); + preempt_disable(); + de = intel_engine_get_busy_time(engine, &t[0]); + udelay(100); + de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); + preempt_enable(); + dt = ktime_sub(t[1], t[0]); + if (de < 0 || de > 10) { + pr_err("%s: reported %lldns [%d%%] busyness while sleeping [for %lldns]\n", + engine->name, + de, (int)div64_u64(100 * de, dt), dt); + GEM_TRACE_DUMP(); + err = -EINVAL; + goto end; + } + + /* 100% busy */ + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(engine->gt); + err = -ETIME; + goto end; + } + + ENGINE_TRACE(engine, "measuring busy time\n"); + preempt_disable(); + de = intel_engine_get_busy_time(engine, &t[0]); + udelay(100); + de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de); + preempt_enable(); + dt = ktime_sub(t[1], t[0]); + if (100 * de < 95 * dt || 95 * de > 100 * dt) { + pr_err("%s: reported %lldns [%d%%] busyness while spinning [for %lldns]\n", + engine->name, + de, (int)div64_u64(100 * de, dt), dt); + GEM_TRACE_DUMP(); + err = -EINVAL; + goto end; + } + +end: + st_engine_heartbeat_enable(engine); + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + return err; +} static int live_engine_pm(void *arg) { @@ -77,6 +177,7 @@ static int live_engine_pm(void *arg) int live_engine_pm_selftests(struct intel_gt *gt) { static const struct i915_subtest tests[] = { + SUBTEST(live_engine_busy_stats), SUBTEST(live_engine_pm), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 242181a5214c..6180a47c1b51 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -5,10 +5,141 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/sort.h> + +#include "intel_gt_clock_utils.h" + #include "selftest_llc.h" #include "selftest_rc6.h" #include "selftest_rps.h" +static int cmp_u64(const void *A, const void *B) +{ + const u64 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + +static void measure_clocks(struct intel_engine_cs *engine, + u32 *out_cycles, ktime_t *out_dt) +{ + ktime_t dt[5]; + u32 cycles[5]; + int i; + + for (i = 0; i < 5; i++) { + preempt_disable(); + cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP); + dt[i] = ktime_get(); + + udelay(1000); + + dt[i] = ktime_sub(ktime_get(), dt[i]); + cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP); + preempt_enable(); + } + + /* Use the median of both cycle/dt; close enough */ + sort(cycles, 5, sizeof(*cycles), cmp_u32, NULL); + *out_cycles = (cycles[1] + 2 * cycles[2] + cycles[3]) / 4; + + sort(dt, 5, sizeof(*dt), cmp_u64, NULL); + *out_dt = div_u64(dt[1] + 2 * dt[2] + dt[3], 4); +} + +static int live_gt_clocks(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!RUNTIME_INFO(gt->i915)->cs_timestamp_frequency_hz) { /* unknown */ + pr_info("CS_TIMESTAMP frequency unknown\n"); + return 0; + } + + if (INTEL_GEN(gt->i915) < 4) /* Any CS_TIMESTAMP? */ + return 0; + + if (IS_GEN(gt->i915, 5)) + /* + * XXX CS_TIMESTAMP low dword is dysfunctional? + * + * Ville's experiments indicate the high dword still works, + * but at a correspondingly reduced frequency. + */ + return 0; + + if (IS_GEN(gt->i915, 4)) + /* + * XXX CS_TIMESTAMP appears gibberish + * + * Ville's experiments indicate that it mostly appears 'stuck' + * in that we see the register report the same cycle count + * for a couple of reads. + */ + return 0; + + intel_gt_pm_get(gt); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + u32 cycles; + u32 expected; + u64 time; + u64 dt; + + if (INTEL_GEN(engine->i915) < 7 && engine->id != RCS0) + continue; + + measure_clocks(engine, &cycles, &dt); + + time = i915_cs_timestamp_ticks_to_ns(engine->i915, cycles); + expected = i915_cs_timestamp_ns_to_ticks(engine->i915, dt); + + pr_info("%s: TIMESTAMP %d cycles [%lldns] in %lldns [%d cycles], using CS clock frequency of %uKHz\n", + engine->name, cycles, time, dt, expected, + RUNTIME_INFO(engine->i915)->cs_timestamp_frequency_hz / 1000); + + if (9 * time < 8 * dt || 8 * time > 9 * dt) { + pr_err("%s: CS ticks did not match walltime!\n", + engine->name); + err = -EINVAL; + break; + } + + if (9 * expected < 8 * cycles || 8 * expected > 9 * cycles) { + pr_err("%s: walltime did not match CS ticks!\n", + engine->name); + err = -EINVAL; + break; + } + } + + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + intel_gt_pm_put(gt); + + return err; +} + static int live_gt_resume(void *arg) { struct intel_gt *gt = arg; @@ -52,6 +183,7 @@ static int live_gt_resume(void *arg) int intel_gt_pm_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(live_gt_clocks), SUBTEST(live_rc6_manual), SUBTEST(live_rps_clock_interval), SUBTEST(live_rps_control), diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 4aa4cc917d8b..fb5ebf930ab2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -29,6 +29,7 @@ #include "intel_gt.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "selftest_engine_heartbeat.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -203,12 +204,12 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; memset(batch, 0, 1024); batch += 1024 / sizeof(*batch); - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); @@ -217,12 +218,12 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; memset(batch, 0, 1024); batch += 1024 / sizeof(*batch); - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); } else if (INTEL_GEN(gt->i915) >= 4) { @@ -230,24 +231,24 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; memset(batch, 0, 1024); batch += 1024 / sizeof(*batch); - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 2 << 6; *batch++ = lower_32_bits(vma->node.start); } else { *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; memset(batch, 0, 1024); batch += 1024 / sizeof(*batch); - *batch++ = MI_ARB_CHECK; + *batch++ = MI_NOOP; *batch++ = MI_BATCH_BUFFER_START | 2 << 6; *batch++ = lower_32_bits(vma->node.start); } @@ -310,22 +311,6 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) 1000)); } -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static int igt_hang_sanitycheck(void *arg) { struct intel_gt *gt = arg; @@ -482,7 +467,7 @@ static int igt_reset_nop_engine(void *arg) reset_engine_count = i915_reset_engine_count(global, engine); count = 0; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -499,6 +484,20 @@ static int igt_reset_nop_engine(void *arg) rq = intel_context_create_request(ce); if (IS_ERR(rq)) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + intel_engine_dump(engine, &p, + "%s(%s): failed to submit request\n", + __func__, + engine->name); + + GEM_TRACE("%s(%s): failed to submit request\n", + __func__, + engine->name); + GEM_TRACE_DUMP(); + + intel_gt_set_wedged(gt); + err = PTR_ERR(rq); break; } @@ -526,7 +525,7 @@ static int igt_reset_nop_engine(void *arg) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -576,7 +575,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { @@ -628,7 +627,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; @@ -805,10 +804,10 @@ static int __igt_reset_engines(struct intel_gt *gt, threads[tmp].resets = i915_reset_engine_count(global, other); - if (!(flags & TEST_OTHERS)) + if (other == engine && !(flags & TEST_SELF)) continue; - if (other == engine && !(flags & TEST_SELF)) + if (other != engine && !(flags & TEST_OTHERS)) continue; threads[tmp].engine = other; @@ -827,7 +826,7 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; @@ -866,13 +865,29 @@ static int __igt_reset_engines(struct intel_gt *gt, count++; if (rq) { + if (rq->fence.error != -EIO) { + pr_err("i915_reset_engine(%s:%s):" + " failed to reset request %llx:%lld\n", + engine->name, test_name, + rq->fence.context, + rq->fence.seqno); + i915_request_put(rq); + + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + break; + } + if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("i915_reset_engine(%s:%s):" - " failed to complete request after reset\n", - engine->name, test_name); + " failed to complete request %llx:%lld after reset\n", + engine->name, test_name, + rq->fence.context, + rq->fence.seqno); intel_engine_dump(engine, &p, "%s\n", engine->name); i915_request_put(rq); @@ -901,7 +916,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); @@ -983,7 +998,7 @@ static int igt_reset_engines(void *arg) }, { "self-priority", - TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, + TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, }, { } }; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 924bc01ef526..3fc5de961280 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -9,6 +9,7 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_reset.h" +#include "gt/selftest_engine_heartbeat.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -51,22 +52,6 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) return vma; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static bool is_active(struct i915_request *rq) { if (i915_request_is_active(rq)) @@ -75,7 +60,7 @@ static bool is_active(struct i915_request *rq) if (i915_request_on_hold(rq)) return true; - if (i915_request_started(rq)) + if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) return true; return false; @@ -234,7 +219,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) err = -EIO; break; } - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); for (n = 0; n < ARRAY_SIZE(ce); n++) { struct intel_context *tmp; @@ -332,7 +317,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) i915_request_put(rq[0]); err_ce: - tasklet_kill(&engine->execlists.tasklet); /* flush submission */ + intel_engine_flush_submission(engine); igt_spinner_end(&spin); for (n = 0; n < ARRAY_SIZE(ce); n++) { if (IS_ERR_OR_NULL(ce[n])) @@ -342,7 +327,7 @@ err_ce: intel_context_put(ce[n]); } - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_live_test_end(&t)) err = -EIO; if (err) @@ -363,6 +348,156 @@ static int live_unlite_preempt(void *arg) return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); } +static int live_unlite_ring(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct igt_spinner spin; + enum intel_engine_id id; + int err = 0; + + /* + * Setup a preemption event that will cause almost the entire ring + * to be unwound, potentially fooling our intel_ring_direction() + * into emitting a forward lite-restore instead of the rollback. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce[2] = {}; + struct i915_request *rq; + struct igt_live_test t; + int n; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + st_engine_heartbeat_disable(engine); + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + memset32(tmp->ring->vaddr, + 0xdeadbeef, /* trigger a hang if executed */ + tmp->ring->vma->size / sizeof(u32)); + + ce[n] = tmp; + } + + /* Create max prio spinner, followed by N low prio nops */ + rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + i915_request_get(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + i915_request_put(rq); + err = -ETIME; + goto err_ce; + } + + /* Fill the ring, until we will cause a wrap */ + n = 0; + while (intel_ring_direction(ce[0]->ring, + rq->wa_tail, + ce[0]->ring->tail) <= 0) { + struct i915_request *tmp; + + tmp = intel_context_create_request(ce[0]); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + i915_request_put(rq); + goto err_ce; + } + + i915_request_add(tmp); + intel_engine_flush_submission(engine); + n++; + } + intel_engine_flush_submission(engine); + pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", + engine->name, n, + ce[0]->ring->size, + ce[0]->ring->tail, + ce[0]->ring->emit, + rq->tail); + GEM_BUG_ON(intel_ring_direction(ce[0]->ring, + rq->tail, + ce[0]->ring->tail) <= 0); + i915_request_put(rq); + + /* Create a second ring to preempt the first ring after rq[0] */ + rq = intel_context_create_request(ce[1]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + err = wait_for_submit(engine, rq, HZ / 2); + i915_request_put(rq); + if (err) { + pr_err("%s: preemption request was not submitted\n", + engine->name); + err = -ETIME; + } + + pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", + engine->name, + ce[0]->ring->tail, ce[0]->ring->emit, + ce[1]->ring->tail, ce[1]->ring->emit); + +err_ce: + intel_engine_flush_submission(engine); + igt_spinner_end(&spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + st_engine_heartbeat_enable(engine); + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + static int live_pin_rewind(void *arg) { struct intel_gt *gt = arg; @@ -471,7 +606,7 @@ static int live_hold_reset(void *arg) break; } - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -531,7 +666,7 @@ static int live_hold_reset(void *arg) i915_request_put(rq); out: - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_context_put(ce); if (err) break; @@ -578,7 +713,7 @@ static int live_error_interrupt(void *arg) const struct error_phase *p; int err = 0; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); for (p = phases; p->error[0] != GOOD; p++) { struct i915_request *client[ARRAY_SIZE(phases->error)]; @@ -677,7 +812,7 @@ out: } } - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) { intel_gt_set_wedged(gt); return err; @@ -828,7 +963,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, goto out; if (i915_request_wait(head, 0, - 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { + 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", count, n); GEM_TRACE_DUMP(); @@ -845,10 +980,11 @@ static int live_timeslice_preempt(void *arg) { struct intel_gt *gt = arg; struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct i915_vma *vma; void *vaddr; int err = 0; - int count; /* * If a request takes too long, we would like to give other users @@ -885,26 +1021,21 @@ static int live_timeslice_preempt(void *arg) if (err) goto err_pin; - for_each_prime_number_from(count, 1, 16) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, gt, id) { - if (!intel_engine_has_preemption(engine)) - continue; + for_each_engine(engine, gt, id) { + if (!intel_engine_has_preemption(engine)) + continue; - memset(vaddr, 0, PAGE_SIZE); + memset(vaddr, 0, PAGE_SIZE); - engine_heartbeat_disable(engine); - err = slice_semaphore_queue(engine, vma, count); - engine_heartbeat_enable(engine); - if (err) - goto err_pin; + st_engine_heartbeat_disable(engine); + err = slice_semaphore_queue(engine, vma, 5); + st_engine_heartbeat_enable(engine); + if (err) + goto err_pin; - if (igt_flush_test(gt->i915)) { - err = -EIO; - goto err_pin; - } + if (igt_flush_test(gt->i915)) { + err = -EIO; + goto err_pin; } } @@ -1020,7 +1151,7 @@ static int live_timeslice_rewind(void *arg) * Expect execution/evaluation order XZY */ - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); timeslice = xchg(&engine->props.timeslice_duration_ms, 1); slot = memset32(engine->status_page.addr + 1000, 0, 4); @@ -1031,18 +1162,18 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[0] = create_rewinder(ce, NULL, slot, X); - if (IS_ERR(rq[0])) { + rq[A1] = create_rewinder(ce, NULL, slot, X); + if (IS_ERR(rq[A1])) { intel_context_put(ce); goto err; } - rq[1] = create_rewinder(ce, NULL, slot, Y); + rq[A2] = create_rewinder(ce, NULL, slot, Y); intel_context_put(ce); - if (IS_ERR(rq[1])) + if (IS_ERR(rq[A2])) goto err; - err = wait_for_submit(engine, rq[1], HZ / 2); + err = wait_for_submit(engine, rq[A2], HZ / 2); if (err) { pr_err("%s: failed to submit first context\n", engine->name); @@ -1055,12 +1186,12 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[2] = create_rewinder(ce, rq[0], slot, Z); + rq[B1] = create_rewinder(ce, rq[A1], slot, Z); intel_context_put(ce); if (IS_ERR(rq[2])) goto err; - err = wait_for_submit(engine, rq[2], HZ / 2); + err = wait_for_submit(engine, rq[B1], HZ / 2); if (err) { pr_err("%s: failed to submit second context\n", engine->name); @@ -1068,6 +1199,7 @@ static int live_timeslice_rewind(void *arg) } /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ + ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ /* Wait for the timeslice to kick in */ del_timer(&engine->execlists.timer); @@ -1114,7 +1246,7 @@ err: wmb(); engine->props.timeslice_duration_ms = timeslice; - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); for (i = 0; i < 3; i++) i915_request_put(rq[i]); if (igt_flush_test(gt->i915)) @@ -1140,9 +1272,17 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine) return rq; } -static long timeslice_threshold(const struct intel_engine_cs *engine) +static long slice_timeout(struct intel_engine_cs *engine) { - return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; + long timeout; + + /* Enough time for a timeslice to kick in, and kick out */ + timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); + + /* Enough time for the nop request to complete */ + timeout += HZ / 5; + + return timeout + 1; } static int live_timeslice_queue(void *arg) @@ -1198,7 +1338,7 @@ static int live_timeslice_queue(void *arg) if (!intel_engine_has_preemption(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); memset(vaddr, 0, PAGE_SIZE); /* ELSP[0]: semaphore wait */ @@ -1243,24 +1383,8 @@ static int live_timeslice_queue(void *arg) intel_engine_flush_submission(engine); } while (READ_ONCE(engine->execlists.pending[0])); - if (!READ_ONCE(engine->execlists.timer.expires) && - execlists_active(&engine->execlists) == rq && - !i915_request_completed(rq)) { - struct drm_printer p = - drm_info_printer(gt->i915->drm.dev); - - GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - GEM_TRACE_DUMP(); - - memset(vaddr, 0xff, PAGE_SIZE); - err = -EINVAL; - } - /* Timeslice every jiffy, so within 2 we should signal */ - if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { + if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1275,7 +1399,7 @@ static int live_timeslice_queue(void *arg) err_rq: i915_request_put(rq); err_heartbeat: - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; } @@ -1321,7 +1445,7 @@ static int live_timeslice_nopreempt(void *arg) break; } - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); timeslice = xchg(&engine->props.timeslice_duration_ms, 1); /* Create an unpreemptible spinner */ @@ -1349,7 +1473,7 @@ static int live_timeslice_nopreempt(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { - err = PTR_ERR(rq); + err = PTR_ERR(ce); goto out_spin; } @@ -1379,7 +1503,7 @@ static int live_timeslice_nopreempt(void *arg) * allow the maximum priority barrier through. Wait long * enough to see if it is timesliced in by mistake. */ - if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) { + if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", engine->name); err = -EINVAL; @@ -1390,7 +1514,7 @@ out_spin: igt_spinner_end(&spin); out_heartbeat: xchg(&engine->props.timeslice_duration_ms, timeslice); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; @@ -2294,7 +2418,7 @@ static int live_suppress_self_preempt(void *arg) if (igt_flush_test(gt->i915)) goto err_wedged; - intel_engine_pm_get(engine); + st_engine_heartbeat_disable(engine); engine->execlists.preempt_hang.count = 0; rq_a = spinner_create_request(&a.spin, @@ -2302,14 +2426,14 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_a)) { err = PTR_ERR(rq_a); - intel_engine_pm_put(engine); + st_engine_heartbeat_enable(engine); goto err_client_b; } i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { pr_err("First client failed to start\n"); - intel_engine_pm_put(engine); + st_engine_heartbeat_enable(engine); goto err_wedged; } @@ -2321,7 +2445,7 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_b)) { err = PTR_ERR(rq_b); - intel_engine_pm_put(engine); + st_engine_heartbeat_enable(engine); goto err_client_b; } i915_request_add(rq_b); @@ -2332,7 +2456,7 @@ static int live_suppress_self_preempt(void *arg) if (!igt_wait_for_spinner(&b.spin, rq_b)) { pr_err("Second client failed to start\n"); - intel_engine_pm_put(engine); + st_engine_heartbeat_enable(engine); goto err_wedged; } @@ -2346,12 +2470,12 @@ static int live_suppress_self_preempt(void *arg) engine->name, engine->execlists.preempt_hang.count, depth); - intel_engine_pm_put(engine); + st_engine_heartbeat_enable(engine); err = -EINVAL; goto err_client_b; } - intel_engine_pm_put(engine); + st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -2371,183 +2495,6 @@ err_wedged: goto err_client_b; } -static int __i915_sw_fence_call -dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - return NOTIFY_DONE; -} - -static struct i915_request *dummy_request(struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - rq = kzalloc(sizeof(*rq), GFP_KERNEL); - if (!rq) - return NULL; - - rq->engine = engine; - - spin_lock_init(&rq->lock); - INIT_LIST_HEAD(&rq->fence.cb_list); - rq->fence.lock = &rq->lock; - rq->fence.ops = &i915_fence_ops; - - i915_sched_node_init(&rq->sched); - - /* mark this request as permanently incomplete */ - rq->fence.seqno = 1; - BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ - rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; - GEM_BUG_ON(i915_request_completed(rq)); - - i915_sw_fence_init(&rq->submit, dummy_notify); - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - - spin_lock_init(&rq->lock); - rq->fence.lock = &rq->lock; - INIT_LIST_HEAD(&rq->fence.cb_list); - - return rq; -} - -static void dummy_request_free(struct i915_request *dummy) -{ - /* We have to fake the CS interrupt to kick the next request */ - i915_sw_fence_commit(&dummy->submit); - - i915_request_mark_complete(dummy); - dma_fence_signal(&dummy->fence); - - i915_sched_node_fini(&dummy->sched); - i915_sw_fence_fini(&dummy->submit); - - dma_fence_free(&dummy->fence); -} - -static int live_suppress_wait_preempt(void *arg) -{ - struct intel_gt *gt = arg; - struct preempt_client client[4]; - struct i915_request *rq[ARRAY_SIZE(client)] = {}; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = -ENOMEM; - int i; - - /* - * Waiters are given a little priority nudge, but not enough - * to actually cause any preemption. Double check that we do - * not needlessly generate preempt-to-idle cycles. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ - return -ENOMEM; - if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ - goto err_client_0; - if (preempt_client_init(gt, &client[2])) /* head of queue */ - goto err_client_1; - if (preempt_client_init(gt, &client[3])) /* bystander */ - goto err_client_2; - - for_each_engine(engine, gt, id) { - int depth; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (!engine->emit_init_breadcrumb) - continue; - - for (depth = 0; depth < ARRAY_SIZE(client); depth++) { - struct i915_request *dummy; - - engine->execlists.preempt_hang.count = 0; - - dummy = dummy_request(engine); - if (!dummy) - goto err_client_3; - - for (i = 0; i < ARRAY_SIZE(client); i++) { - struct i915_request *this; - - this = spinner_create_request(&client[i].spin, - client[i].ctx, engine, - MI_NOOP); - if (IS_ERR(this)) { - err = PTR_ERR(this); - goto err_wedged; - } - - /* Disable NEWCLIENT promotion */ - __i915_active_fence_set(&i915_request_timeline(this)->last_request, - &dummy->fence); - - rq[i] = i915_request_get(this); - i915_request_add(this); - } - - dummy_request_free(dummy); - - GEM_BUG_ON(i915_request_completed(rq[0])); - if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { - pr_err("%s: First client failed to start\n", - engine->name); - goto err_wedged; - } - GEM_BUG_ON(!i915_request_started(rq[0])); - - if (i915_request_wait(rq[depth], - I915_WAIT_PRIORITY, - 1) != -ETIME) { - pr_err("%s: Waiter depth:%d completed!\n", - engine->name, depth); - goto err_wedged; - } - - for (i = 0; i < ARRAY_SIZE(client); i++) { - igt_spinner_end(&client[i].spin); - i915_request_put(rq[i]); - rq[i] = NULL; - } - - if (igt_flush_test(gt->i915)) - goto err_wedged; - - if (engine->execlists.preempt_hang.count) { - pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", - engine->name, - engine->execlists.preempt_hang.count, - depth); - err = -EINVAL; - goto err_client_3; - } - } - } - - err = 0; -err_client_3: - preempt_client_fini(&client[3]); -err_client_2: - preempt_client_fini(&client[2]); -err_client_1: - preempt_client_fini(&client[1]); -err_client_0: - preempt_client_fini(&client[0]); - return err; - -err_wedged: - for (i = 0; i < ARRAY_SIZE(client); i++) { - igt_spinner_end(&client[i].spin); - i915_request_put(rq[i]); - } - intel_gt_set_wedged(gt); - err = -EIO; - goto err_client_3; -} - static int live_chain_preempt(void *arg) { struct intel_gt *gt = arg; @@ -2796,6 +2743,168 @@ err_ce: return err; } +static int __live_preempt_ring(struct intel_engine_cs *engine, + struct igt_spinner *spin, + int queue_sz, int ring_sz) +{ + struct intel_context *ce[2] = {}; + struct i915_request *rq; + struct igt_live_test t; + int err = 0; + int n; + + if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) + return -EIO; + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + tmp->ring = __intel_context_ring_size(ring_sz); + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + memset32(tmp->ring->vaddr, + 0xdeadbeef, /* trigger a hang if executed */ + tmp->ring->vma->size / sizeof(u32)); + + ce[n] = tmp; + } + + rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + i915_request_get(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + + if (!igt_wait_for_spinner(spin, rq)) { + intel_gt_set_wedged(engine->gt); + i915_request_put(rq); + err = -ETIME; + goto err_ce; + } + + /* Fill the ring, until we will cause a wrap */ + n = 0; + while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { + struct i915_request *tmp; + + tmp = intel_context_create_request(ce[0]); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + i915_request_put(rq); + goto err_ce; + } + + i915_request_add(tmp); + intel_engine_flush_submission(engine); + n++; + } + intel_engine_flush_submission(engine); + pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", + engine->name, queue_sz, n, + ce[0]->ring->size, + ce[0]->ring->tail, + ce[0]->ring->emit, + rq->tail); + i915_request_put(rq); + + /* Create a second request to preempt the first ring */ + rq = intel_context_create_request(ce[1]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + err = wait_for_submit(engine, rq, HZ / 2); + i915_request_put(rq); + if (err) { + pr_err("%s: preemption request was not submited\n", + engine->name); + err = -ETIME; + } + + pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", + engine->name, + ce[0]->ring->tail, ce[0]->ring->emit, + ce[1]->ring->tail, ce[1]->ring->emit); + +err_ce: + intel_engine_flush_submission(engine); + igt_spinner_end(spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int live_preempt_ring(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct igt_spinner spin; + enum intel_engine_id id; + int err = 0; + + /* + * Check that we rollback large chunks of a ring in order to do a + * preemption event. Similar to live_unlite_ring, but looking at + * ring size rather than the impact of intel_ring_direction(). + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + int n; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + st_engine_heartbeat_disable(engine); + + for (n = 0; n <= 3; n++) { + err = __live_preempt_ring(engine, &spin, + n * SZ_4K / 4, SZ_4K); + if (err) + break; + } + + st_engine_heartbeat_enable(engine); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + static int live_preempt_gang(void *arg) { struct intel_gt *gt = arg; @@ -2840,16 +2949,8 @@ static int live_preempt_gang(void *arg) /* Submit each spinner at increasing priority */ engine->schedule(rq, &attr); - - if (prio <= I915_PRIORITY_MAX) - continue; - - if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) - break; - - if (__igt_timeout(end_time, NULL)) - break; - } while (1); + } while (prio <= I915_PRIORITY_MAX && + !__igt_timeout(end_time, NULL)); pr_debug("%s: Preempt chain of %d requests\n", engine->name, prio); @@ -3417,7 +3518,7 @@ static int smoke_crescendo_thread(void *arg) return err; count++; - } while (!__igt_timeout(end_time, NULL)); + } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); smoke->count = count; return 0; @@ -3468,8 +3569,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) } pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", - count, flags, - RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); + count, flags, smoke->gt->info.num_engines, smoke->ncontext); return 0; } @@ -3493,11 +3593,10 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) count++; } - } while (!__igt_timeout(end_time, NULL)); + } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", - count, flags, - RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); + count, flags, smoke->gt->info.num_engines, smoke->ncontext); return 0; } @@ -3506,7 +3605,7 @@ static int live_preempt_smoke(void *arg) struct preempt_smoke smoke = { .gt = arg, .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), - .ncontext = 1024, + .ncontext = 256, }; const unsigned int phase[] = { 0, BATCH }; struct igt_live_test t; @@ -3706,13 +3805,43 @@ out: return err; } +static unsigned int +__select_siblings(struct intel_gt *gt, + unsigned int class, + struct intel_engine_cs **siblings, + bool (*filter)(const struct intel_engine_cs *)) +{ + unsigned int n = 0; + unsigned int inst; + + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + if (filter && !filter(gt->engine_class[class][inst])) + continue; + + siblings[n++] = gt->engine_class[class][inst]; + } + + return n; +} + +static unsigned int +select_siblings(struct intel_gt *gt, + unsigned int class, + struct intel_engine_cs **siblings) +{ + return __select_siblings(gt, class, siblings, NULL); +} + static int live_virtual_engine(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_engine_cs *engine; enum intel_engine_id id; - unsigned int class, inst; + unsigned int class; int err; if (intel_uc_uses_guc_submission(>->uc)) @@ -3730,13 +3859,7 @@ static int live_virtual_engine(void *arg) for (class = 0; class <= MAX_ENGINE_CLASS; class++) { int nsibling, n; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - continue; - - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; @@ -3845,7 +3968,7 @@ static int live_virtual_mask(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class, inst; + unsigned int class; int err; if (intel_uc_uses_guc_submission(>->uc)) @@ -3854,17 +3977,178 @@ static int live_virtual_mask(void *arg) for (class = 0; class <= MAX_ENGINE_CLASS; class++) { unsigned int nsibling; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - break; + nsibling = select_siblings(gt, class, siblings); + if (nsibling < 2) + continue; + + err = mask_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + +static int slicein_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + const long timeout = slice_timeout(siblings[0]); + struct intel_context *ce; + struct i915_request *rq; + struct igt_spinner spin; + unsigned int n; + int err = 0; + + /* + * Virtual requests must take part in timeslicing on the target engines. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for (n = 0; n < nsibling; n++) { + ce = intel_context_create(siblings[n]); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_add(rq); + } + + ce = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) { + GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", + __func__, rq->engine->name); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + } + i915_request_put(rq); + +out: + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + igt_spinner_fini(&spin); + return err; +} + +static int sliceout_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + const long timeout = slice_timeout(siblings[0]); + struct intel_context *ce; + struct i915_request *rq; + struct igt_spinner spin; + unsigned int n; + int err = 0; + + /* + * Virtual requests must allow others a fair timeslice. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + /* XXX We do not handle oversubscription and fairness with normal rq */ + for (n = 0; n < nsibling; n++) { + ce = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_add(rq); + } + + for (n = 0; !err && n < nsibling; n++) { + ce = intel_context_create(siblings[n]); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } - siblings[nsibling++] = gt->engine_class[class][inst]; + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) { + GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", + __func__, siblings[n]->name); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; } + i915_request_put(rq); + } + +out: + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + igt_spinner_fini(&spin); + return err; +} + +static int live_virtual_slice(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class; + int err; + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + unsigned int nsibling; + + nsibling = __select_siblings(gt, class, siblings, + intel_engine_has_timeslices); if (nsibling < 2) continue; - err = mask_virtual_engine(gt, siblings, nsibling); + err = slicein_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + + err = sliceout_virtual_engine(gt, siblings, nsibling); if (err) return err; } @@ -3982,7 +4266,7 @@ static int live_virtual_preserved(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class, inst; + unsigned int class; /* * Check that the context image retains non-privileged (user) registers @@ -4000,13 +4284,7 @@ static int live_virtual_preserved(void *arg) for (class = 0; class <= MAX_ENGINE_CLASS; class++) { int nsibling, err; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - continue; - - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; @@ -4217,7 +4495,7 @@ static int live_virtual_bond(void *arg) }; struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class, inst; + unsigned int class; int err; if (intel_uc_uses_guc_submission(>->uc)) @@ -4227,14 +4505,7 @@ static int live_virtual_bond(void *arg) const struct phase *p; int nsibling; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - break; - - GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; @@ -4280,7 +4551,7 @@ static int reset_virtual_engine(struct intel_gt *gt, } for (n = 0; n < nsibling; n++) - engine_heartbeat_disable(siblings[n]); + st_engine_heartbeat_disable(siblings[n]); rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); if (IS_ERR(rq)) { @@ -4351,7 +4622,7 @@ out_rq: i915_request_put(rq); out_heartbeat: for (n = 0; n < nsibling; n++) - engine_heartbeat_enable(siblings[n]); + st_engine_heartbeat_enable(siblings[n]); intel_context_put(ve); out_spin: @@ -4363,7 +4634,7 @@ static int live_virtual_reset(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class, inst; + unsigned int class; /* * Check that we handle a reset event within a virtual engine. @@ -4381,13 +4652,7 @@ static int live_virtual_reset(void *arg) for (class = 0; class <= MAX_ENGINE_CLASS; class++) { int nsibling, err; - nsibling = 0; - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - continue; - - siblings[nsibling++] = gt->engine_class[class][inst]; - } + nsibling = select_siblings(gt, class, siblings); if (nsibling < 2) continue; @@ -4405,6 +4670,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_unlite_switch), SUBTEST(live_unlite_preempt), + SUBTEST(live_unlite_ring), SUBTEST(live_pin_rewind), SUBTEST(live_hold_reset), SUBTEST(live_error_interrupt), @@ -4418,8 +4684,8 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_nopreempt), SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), - SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), + SUBTEST(live_preempt_ring), SUBTEST(live_preempt_gang), SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_user), @@ -4427,6 +4693,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), SUBTEST(live_virtual_preserved), + SUBTEST(live_virtual_slice), SUBTEST(live_virtual_bond), SUBTEST(live_virtual_reset), }; @@ -5030,7 +5297,7 @@ static int live_lrc_gpr(void *arg) return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); err = __live_lrc_gpr(engine, scratch, false); if (err) @@ -5041,7 +5308,7 @@ static int live_lrc_gpr(void *arg) goto err; err: - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -5190,7 +5457,7 @@ static int live_lrc_timestamp(void *arg) for_each_engine(data.engine, gt, id) { int i, err = 0; - engine_heartbeat_disable(data.engine); + st_engine_heartbeat_disable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { struct intel_context *tmp; @@ -5223,7 +5490,7 @@ static int live_lrc_timestamp(void *arg) } err: - engine_heartbeat_enable(data.engine); + st_engine_heartbeat_enable(data.engine); for (i = 0; i < ARRAY_SIZE(data.ce); i++) { if (!data.ce[i]) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 63f87d8608c3..b25eba50c88e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -157,7 +157,7 @@ static int read_mocs_table(struct i915_request *rq, { u32 addr; - if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915)) + if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915)) addr = global_mocs_offset(); else addr = mocs_offset(rq->engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 2dc460624bbc..64ef5ee5decf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -132,7 +132,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce) } cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; - if (INTEL_GEN(rq->i915) >= 8) + if (INTEL_GEN(rq->engine->i915) >= 8) cmd++; *cs++ = cmd; @@ -197,10 +197,10 @@ int live_rc6_ctx_wa(void *arg) int pass; for (pass = 0; pass < 2; pass++) { + struct i915_gpu_error *error = >->i915->gpu_error; struct intel_context *ce; unsigned int resets = - i915_reset_engine_count(>->i915->gpu_error, - engine); + i915_reset_engine_count(error, engine); const u32 *res; /* Use a sacrifical context */ @@ -230,11 +230,10 @@ int live_rc6_ctx_wa(void *arg) engine->name, READ_ONCE(*res)); if (resets != - i915_reset_engine_count(>->i915->gpu_error, - engine)) { + i915_reset_engine_count(error, engine)) { pr_err("%s: GPU reset required\n", engine->name); - add_taint_for_CI(TAINT_WARN); + add_taint_for_CI(gt->i915, TAINT_WARN); err = -EIO; goto out; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 5049c3dd08a6..8624f5d2a1f3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -12,6 +12,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_rc6.h" +#include "selftest_engine_heartbeat.h" #include "selftest_rps.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_spinner.h" @@ -20,22 +21,6 @@ /* Try to isolate the impact of cstates from determing frequency response */ #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static void dummy_rps_work(struct work_struct *wrk) { } @@ -44,9 +29,9 @@ static int cmp_u64(const void *A, const void *B) { const u64 *a = A, *b = B; - if (a < b) + if (*a < *b) return -1; - else if (a > b) + else if (*a > *b) return 1; else return 0; @@ -56,9 +41,9 @@ static int cmp_u32(const void *A, const void *B) { const u32 *a = A, *b = B; - if (a < b) + if (*a < *b) return -1; - else if (a > b) + else if (*a > *b) return 1; else return 0; @@ -249,13 +234,13 @@ int live_rps_clock_interval(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -266,7 +251,7 @@ int live_rps_clock_interval(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -322,7 +307,7 @@ int live_rps_clock_interval(void *arg) intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err == 0) { u64 time = intel_gt_pm_interval_to_ns(gt, cycles); @@ -408,7 +393,7 @@ int live_rps_control(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, @@ -424,7 +409,7 @@ int live_rps_control(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -434,7 +419,7 @@ int live_rps_control(void *arg) pr_err("%s: could not set minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -451,7 +436,7 @@ int live_rps_control(void *arg) pr_err("%s: could not restore minimum frequency [%x], only %x!\n", engine->name, rps->min_freq, read_cagf(rps)); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); show_pstate_limits(rps); err = -EINVAL; break; @@ -466,7 +451,7 @@ int live_rps_control(void *arg) min_dt = ktime_sub(ktime_get(), min_dt); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", engine->name, @@ -637,14 +622,14 @@ int live_rps_frequency_cs(void *arg) int freq; } min, max; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, false, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); break; } @@ -725,7 +710,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -779,14 +764,14 @@ int live_rps_frequency_srm(void *arg) int freq; } min, max; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); vma = create_spin_counter(engine, engine->kernel_context->vm, true, &cancel, &cntr); if (IS_ERR(vma)) { err = PTR_ERR(vma); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); break; } @@ -866,7 +851,7 @@ err_vma: i915_vma_unpin(vma); i915_vma_put(vma); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (igt_flush_test(gt->i915)) err = -EIO; if (err) @@ -1061,11 +1046,11 @@ int live_rps_interrupt(void *arg) intel_gt_pm_wait_for_idle(engine->gt); GEM_BUG_ON(intel_rps_is_active(rps)); - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); err = __rps_up_interrupt(rps, engine, &spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) goto out; @@ -1074,13 +1059,13 @@ int live_rps_interrupt(void *arg) /* Keep the engine awake but idle and check for DOWN */ if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); intel_rc6_disable(>->rc6); err = __rps_down_interrupt(rps, engine); intel_rc6_enable(>->rc6); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) goto out; } @@ -1165,13 +1150,13 @@ int live_rps_power(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); rq = igt_spinner_create_request(&spin, engine->kernel_context, MI_NOOP); if (IS_ERR(rq)) { - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); err = PTR_ERR(rq); break; } @@ -1182,7 +1167,7 @@ int live_rps_power(void *arg) pr_err("%s: RPS spinner did not start\n", engine->name); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); intel_gt_set_wedged(engine->gt); err = -EIO; break; @@ -1195,7 +1180,7 @@ int live_rps_power(void *arg) min.power = measure_power_at(rps, &min.freq); igt_spinner_end(&spin); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", engine->name, @@ -1252,6 +1237,11 @@ int live_rps_dynamic(void *arg) if (igt_spinner_init(&spin, gt)) return -ENOMEM; + if (intel_rps_has_interrupts(rps)) + pr_info("RPS has interrupt support\n"); + if (intel_rps_uses_timer(rps)) + pr_info("RPS has timer support\n"); + for_each_engine(engine, gt, id) { struct i915_request *rq; struct { diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index ef1c35073dc0..fb5b7d3498a6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -12,6 +12,7 @@ #include "intel_gt.h" #include "intel_gt_requests.h" #include "intel_ring.h" +#include "selftest_engine_heartbeat.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" @@ -426,12 +427,12 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) if (IS_ERR(cs)) return PTR_ERR(cs); - if (INTEL_GEN(rq->i915) >= 8) { + if (INTEL_GEN(rq->engine->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = addr; *cs++ = 0; *cs++ = value; - } else if (INTEL_GEN(rq->i915) >= 4) { + } else if (INTEL_GEN(rq->engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = addr; @@ -561,8 +562,9 @@ static int live_hwsp_engine(void *arg) struct intel_timeline *tl = timelines[n]; if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", - n, *tl->hwsp_seqno); + pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n", + n, tl->hwsp_offset, *tl->hwsp_seqno); + GEM_TRACE_DUMP(); err = -EINVAL; } intel_timeline_put(tl); @@ -632,8 +634,9 @@ out: struct intel_timeline *tl = timelines[n]; if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", - n, *tl->hwsp_seqno); + pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n", + n, tl->hwsp_offset, *tl->hwsp_seqno); + GEM_TRACE_DUMP(); err = -EINVAL; } intel_timeline_put(tl); @@ -751,22 +754,6 @@ out_free: return err; } -static void engine_heartbeat_disable(struct intel_engine_cs *engine) -{ - engine->props.heartbeat_interval_ms = 0; - - intel_engine_pm_get(engine); - intel_engine_park_heartbeat(engine); -} - -static void engine_heartbeat_enable(struct intel_engine_cs *engine) -{ - intel_engine_pm_put(engine); - - engine->props.heartbeat_interval_ms = - engine->defaults.heartbeat_interval_ms; -} - static int live_hwsp_rollover_kernel(void *arg) { struct intel_gt *gt = arg; @@ -785,7 +772,7 @@ static int live_hwsp_rollover_kernel(void *arg) struct i915_request *rq[3] = {}; int i; - engine_heartbeat_disable(engine); + st_engine_heartbeat_disable(engine); if (intel_gt_wait_for_idle(gt, HZ / 2)) { err = -EIO; goto out; @@ -836,7 +823,7 @@ static int live_hwsp_rollover_kernel(void *arg) out: for (i = 0; i < ARRAY_SIZE(rq); i++) i915_request_put(rq[i]); - engine_heartbeat_enable(engine); + st_engine_heartbeat_enable(engine); if (err) break; } @@ -980,8 +967,9 @@ static int live_hwsp_recycle(void *arg) } if (*tl->hwsp_seqno != count) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + pr_err("Invalid seqno stored in timeline %lu @ tl->hwsp_offset, found 0x%x\n", count, *tl->hwsp_seqno); + GEM_TRACE_DUMP(); err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 32785463ec9e..febc9e6692ba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -417,6 +417,20 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg) return false; } +static bool timestamp(const struct intel_engine_cs *engine, u32 reg) +{ + reg = (reg - engine->mmio_base) & ~RING_FORCE_TO_NONPRIV_ACCESS_MASK; + switch (reg) { + case 0x358: + case 0x35c: + case 0x3a8: + return true; + + default: + return false; + } +} + static bool ro_register(u32 reg) { if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) == @@ -497,6 +511,9 @@ static int check_dirty_whitelist(struct intel_context *ce) if (wo_register(engine, reg)) continue; + if (timestamp(engine, reg)) + continue; /* timestamps are expected to autoincrement */ + ro_reg = ro_register(reg); /* Clear non priv flags */ diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README new file mode 100644 index 000000000000..e7e96d7073c7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/README @@ -0,0 +1,46 @@ +ASM sources for auto generated shaders +====================================== + +The i915/gt/hsw_clear_kernel.c and i915/gt/ivb_clear_kernel.c files contain +pre-compiled batch chunks that will clear any residual render cache during +context switch. + +They are generated from their respective platform ASM files present on +i915/gt/shaders/clear_kernel directory. + +The generated .c files should never be modified directly. Instead, any modification +needs to be done on the on their respective ASM files and build instructions below +needes to be followed. + +Building +======== + +Environment +----------- + +IGT GPU tool scripts and the Mesa's i965 instruction assembler tool are used +on building. + +Please make sure your Mesa tool is compiled with "-Dtools=intel" and +"-Ddri-drivers=i965", and run this script from IGT source root directory" + +The instructions bellow assume: + * IGT gpu tools source code is located on your home directory (~) as ~/igt + * Mesa source code is located on your home directory (~) as ~/mesa + and built under the ~/mesa/build directory + * Linux kernel source code is under your home directory (~) as ~/linux + +Instructions +------------ + +~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm \ + ~/igt/lib/i915/shaders/clear_kernel/ivb.asm +~ $ cd ~/igt +igt $ ./scripts/generate_clear_kernel.sh -g ivb \ + -m ~/mesa/build/src/intel/tools/i965_asm + +~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm \ + ~/igt/lib/i915/shaders/clear_kernel/hsw.asm +~ $ cd ~/igt +igt $ ./scripts/generate_clear_kernel.sh -g hsw \ + -m ~/mesa/build/src/intel/tools/i965_asm
\ No newline at end of file diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm new file mode 100644 index 000000000000..5fdf384bb621 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +/* + * Kernel for PAVP buffer clear. + * + * 1. Clear all 64 GRF registers assigned to the kernel with designated value; + * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears + * 512 bytes of Render Cache. + */ + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/** + * Curbe Format + * + * DW 1.0 - Block Offset to write Render Cache + * DW 1.1 [15:0] - Clear Word + * DW 1.2 - Delay iterations + * DW 1.3 - Enable Instrumentation (only for debug) + * DW 1.4 - Rsvd (intended for context ID) + * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount + * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) + * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * + * Binding Table + * + * BTI 0: 2D Surface to help clear L3 (Render/Data Cache) + * BTI 1: Wait/Instrumentation Buffer + * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT) + * Expected to be initialized to 0 by driver/another kernel + * Layout: + * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS] + * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N + */ +add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */ +cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N }; +(+f0.0) jmpi(1) 352D { align1 WE_all 1N }; + +/** + * State Register has info on where this thread is running + * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + */ +mov(8) g3<1>UD 0x00000000UD { align1 1Q }; +shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N }; +and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */ +shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N }; +and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */ +mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N }; +add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */ +shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N }; +and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */ +mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N }; +add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */ + +mov(8) g5<1>UD 0x00000000UD { align1 1Q }; +and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N }; +mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N }; + +mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */ +mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */ +and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N }; + +/* Media block read to fetch current value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x02190001 + + render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q }; +add(1) g5<1>D g5<0,1,0>D 1D { align1 1N }; + +/* Media block write for updated value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q }; + +/* Delay thread for specified parameter */ +add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N }; +(+f0.0) jmpi(1) -32D { align1 WE_all 1N }; + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/* Initialize looping parameters */ +mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */ +mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */ + +/* Write 32x16 all "0" block */ +mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q }; +mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q }; +mov(2) g2<1>UD g1<2,2,1>UW { align1 1N }; +mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */ +and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N }; +mov(16) g3<1>UD 0x00000000UD { align1 1H }; +mov(16) g4<1>UD 0x00000000UD { align1 1H }; +mov(16) g5<1>UD 0x00000000UD { align1 1H }; +mov(16) g6<1>UD 0x00000000UD { align1 1H }; +mov(16) g7<1>UD 0x00000000UD { align1 1H }; +mov(16) g8<1>UD 0x00000000UD { align1 1H }; +mov(16) g9<1>UD 0x00000000UD { align1 1H }; +mov(16) g10<1>UD 0x00000000UD { align1 1H }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; +add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; + +/* Now, clear all GRF registers */ +add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N }; +mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H }; +add(1) a0<1>D a0<0,1,0>D 32D { align1 1N }; +(+f0.0) jmpi(1) -64D { align1 WE_all 1N }; + +/* Terminante the thread */ +sendc(8) null<1>UD g127<8,8,1>F 0x82000010 + thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT }; diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm new file mode 100644 index 000000000000..97c7ac9e3854 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +/* + * Kernel for PAVP buffer clear. + * + * 1. Clear all 64 GRF registers assigned to the kernel with designated value; + * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears + * 512 bytes of Render Cache. + */ + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/** + * Curbe Format + * + * DW 1.0 - Block Offset to write Render Cache + * DW 1.1 [15:0] - Clear Word + * DW 1.2 - Delay iterations + * DW 1.3 - Enable Instrumentation (only for debug) + * DW 1.4 - Rsvd (intended for context ID) + * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount + * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count) + * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count) + * + * Binding Table + * + * BTI 0: 2D Surface to help clear L3 (Render/Data Cache) + * BTI 1: Wait/Instrumentation Buffer + * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT) + * Expected to be initialized to 0 by driver/another kernel + * Layout : + * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS] + * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N + */ +add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */ +cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N }; +(+f0.0) jmpi(1) 44D { align1 WE_all 1N }; + +/** + * State Register has info on where this thread is running + * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID + */ +mov(8) g3<1>UD 0x00000000UD { align1 1Q }; +shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N }; +and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */ +shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N }; +and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */ +mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N }; +add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */ +shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N }; +and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */ +mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N }; +add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */ + +mov(8) g5<1>UD 0x00000000UD { align1 1Q }; +and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N }; +mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N }; + +mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */ +mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */ +mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */ +and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N }; + +/* Media block read to fetch current value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x02190001 + render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q }; +add(1) g5<1>D g5<0,1,0>D 1D { align1 1N }; + +/* Media block write for updated value at specified location in instrumentation buffer */ +sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q }; +/* Delay thread for specified parameter */ +add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N }; +(+f0.0) jmpi(1) -4D { align1 WE_all 1N }; + +/* Store designated "clear GRF" value */ +mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N }; + +/* Initialize looping parameters */ +mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */ +mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */ + +/* Write 32x16 all "0" block */ +mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q }; +mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q }; +mov(2) g2<1>UD g1<2,2,1>UW { align1 1N }; +mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */ +and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N }; +mov(16) g3<1>UD 0x00000000UD { align1 1H }; +mov(16) g4<1>UD 0x00000000UD { align1 1H }; +mov(16) g5<1>UD 0x00000000UD { align1 1H }; +mov(16) g6<1>UD 0x00000000UD { align1 1H }; +mov(16) g7<1>UD 0x00000000UD { align1 1H }; +mov(16) g8<1>UD 0x00000000UD { align1 1H }; +mov(16) g9<1>UD 0x00000000UD { align1 1H }; +mov(16) g10<1>UD 0x00000000UD { align1 1H }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; +add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N }; +sendc(8) null<1>UD g2<8,8,1>F 0x120a8000 + render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q }; + +/* Now, clear all GRF registers */ +add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N }; +mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H }; +add(1) a0<1>D a0<0,1,0>D 32D { align1 1N }; +(+f0.0) jmpi(1) -8D { align1 WE_all 1N }; + +/* Terminante the thread */ +sendc(8) null<1>UD g127<8,8,1>F 0x82000010 + thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 101728006ae9..d44061033f23 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -67,7 +67,7 @@ struct __guc_ads_blob { static void __guc_ads_init(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; + struct intel_gt *gt = guc_to_gt(guc); struct __guc_ads_blob *blob = guc->ads_blob; const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; u32 base; @@ -99,13 +99,13 @@ static void __guc_ads_init(struct intel_guc *guc) } /* System info */ - blob->system_info.slice_enabled = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask); + blob->system_info.slice_enabled = hweight8(gt->info.sseu.slice_mask); blob->system_info.rcs_enabled = 1; blob->system_info.bcs_enabled = 1; - blob->system_info.vdbox_enable_mask = VDBOX_MASK(dev_priv); - blob->system_info.vebox_enable_mask = VEBOX_MASK(dev_priv); - blob->system_info.vdbox_sfc_support_mask = RUNTIME_INFO(dev_priv)->vdbox_sfc_access; + blob->system_info.vdbox_enable_mask = VDBOX_MASK(gt); + blob->system_info.vebox_enable_mask = VEBOX_MASK(gt); + blob->system_info.vdbox_sfc_support_mask = gt->info.vdbox_sfc_access; base = intel_guc_ggtt_offset(guc, guc->ads_vma); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index fb10f3597ea5..9bbe8a795cb8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -424,25 +424,28 @@ static void guc_log_capture_logs(struct intel_guc_log *log) static u32 __get_default_log_level(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + /* A negative value means "use platform/config default" */ - if (i915_modparams.guc_log_level < 0) { + if (i915->params.guc_log_level < 0) { return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ? GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_NON_VERBOSE; } - if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) { + if (i915->params.guc_log_level > GUC_LOG_LEVEL_MAX) { DRM_WARN("Incompatible option detected: %s=%d, %s!\n", - "guc_log_level", i915_modparams.guc_log_level, + "guc_log_level", i915->params.guc_log_level, "verbosity too high"); return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ? GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_DISABLED; } - GEM_BUG_ON(i915_modparams.guc_log_level < GUC_LOG_LEVEL_DISABLED); - GEM_BUG_ON(i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX); - return i915_modparams.guc_log_level; + GEM_BUG_ON(i915->params.guc_log_level < GUC_LOG_LEVEL_DISABLED); + GEM_BUG_ON(i915->params.guc_log_level > GUC_LOG_LEVEL_MAX); + return i915->params.guc_log_level; } int intel_guc_log_create(struct intel_guc_log *log) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 94eb63f309ce..fdfeb4b9b0f5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -660,10 +660,12 @@ void intel_guc_submission_disable(struct intel_guc *guc) static bool __guc_submission_selected(struct intel_guc *guc) { + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + if (!intel_guc_submission_is_supported(guc)) return false; - return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; + return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; } void intel_guc_submission_init_early(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index f518fe05c6f9..d6f55f70889d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -47,15 +47,15 @@ static void __confirm_options(struct intel_uc *uc) drm_dbg(&i915->drm, "enable_guc=%d (guc:%s submission:%s huc:%s)\n", - i915_modparams.enable_guc, + i915->params.enable_guc, yesno(intel_uc_wants_guc(uc)), yesno(intel_uc_wants_guc_submission(uc)), yesno(intel_uc_wants_huc(uc))); - if (i915_modparams.enable_guc == -1) + if (i915->params.enable_guc == -1) return; - if (i915_modparams.enable_guc == 0) { + if (i915->params.enable_guc == 0) { GEM_BUG_ON(intel_uc_wants_guc(uc)); GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); GEM_BUG_ON(intel_uc_wants_huc(uc)); @@ -65,25 +65,25 @@ static void __confirm_options(struct intel_uc *uc) if (!intel_uc_supports_guc(uc)) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", - i915_modparams.enable_guc, "GuC is not supported!"); + i915->params.enable_guc, "GuC is not supported!"); - if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC && + if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC && !intel_uc_supports_huc(uc)) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", - i915_modparams.enable_guc, "HuC is not supported!"); + i915->params.enable_guc, "HuC is not supported!"); - if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION && + if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION && !intel_uc_supports_guc_submission(uc)) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", - i915_modparams.enable_guc, "GuC submission is N/A"); + i915->params.enable_guc, "GuC submission is N/A"); - if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION | + if (i915->params.enable_guc & ~(ENABLE_GUC_SUBMISSION | ENABLE_GUC_LOAD_HUC)) drm_info(&i915->drm, "Incompatible option enable_guc=%d - %s\n", - i915_modparams.enable_guc, "undocumented flag"); + i915->params.enable_guc, "undocumented flag"); } void intel_uc_init_early(struct intel_uc *uc) @@ -267,8 +267,17 @@ static void __uc_fetch_firmwares(struct intel_uc *uc) GEM_BUG_ON(!intel_uc_wants_guc(uc)); err = intel_uc_fw_fetch(&uc->guc.fw); - if (err) + if (err) { + /* Make sure we transition out of transient "SELECTED" state */ + if (intel_uc_wants_huc(uc)) { + drm_dbg(&uc_to_gt(uc)->i915->drm, + "Failed to fetch GuC: %d disabling HuC\n", err); + intel_uc_fw_change_status(&uc->huc.fw, + INTEL_UC_FIRMWARE_ERROR); + } + return; + } if (intel_uc_wants_huc(uc)) intel_uc_fw_fetch(&uc->huc.fw); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c index 9d16b784aa0d..089d98662f46 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c @@ -4,14 +4,41 @@ */ #include <linux/debugfs.h> +#include <drm/drm_print.h> +#include "gt/debugfs_gt.h" #include "intel_guc_debugfs.h" #include "intel_huc_debugfs.h" #include "intel_uc.h" #include "intel_uc_debugfs.h" +static int uc_usage_show(struct seq_file *m, void *data) +{ + struct intel_uc *uc = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + drm_printf(&p, "[guc] supported:%s wanted:%s used:%s\n", + yesno(intel_uc_supports_guc(uc)), + yesno(intel_uc_wants_guc(uc)), + yesno(intel_uc_uses_guc(uc))); + drm_printf(&p, "[huc] supported:%s wanted:%s used:%s\n", + yesno(intel_uc_supports_huc(uc)), + yesno(intel_uc_wants_huc(uc)), + yesno(intel_uc_uses_huc(uc))); + drm_printf(&p, "[submission] supported:%s wanted:%s used:%s\n", + yesno(intel_uc_supports_guc_submission(uc)), + yesno(intel_uc_wants_guc_submission(uc)), + yesno(intel_uc_uses_guc_submission(uc))); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(uc_usage); + void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root) { + static const struct debugfs_gt_file files[] = { + { "usage", &uc_usage_fops, NULL }, + }; struct dentry *root; if (!gt_root) @@ -25,6 +52,8 @@ void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root) if (IS_ERR(root)) return; + intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), uc); + intel_guc_debugfs_register(&uc->guc, root); intel_huc_debugfs_register(&uc->huc, root); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index e1caae93996d..59b27aba15c6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -47,12 +47,15 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas * between 33.0 and 35.2 are only related to new additions to support new Gen12 * features. + * + * Note that RKL uses the same firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ - fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \ fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ @@ -112,11 +115,13 @@ struct __packed uc_fw_platform_requirement { }, static void -__uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev) +__uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { static const struct uc_fw_platform_requirement fw_blobs[] = { INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB) }; + enum intel_platform p = INTEL_INFO(i915)->platform; + u8 rev = INTEL_REVID(i915); int i; for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) { @@ -151,35 +156,35 @@ __uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev) } /* We don't want to enable GuC/HuC on pre-Gen11 by default */ - if (i915_modparams.enable_guc == -1 && p < INTEL_ICELAKE) + if (i915->params.enable_guc == -1 && p < INTEL_ICELAKE) uc_fw->path = NULL; } -static const char *__override_guc_firmware_path(void) +static const char *__override_guc_firmware_path(struct drm_i915_private *i915) { - if (i915_modparams.enable_guc & (ENABLE_GUC_SUBMISSION | - ENABLE_GUC_LOAD_HUC)) - return i915_modparams.guc_firmware_path; + if (i915->params.enable_guc & (ENABLE_GUC_SUBMISSION | + ENABLE_GUC_LOAD_HUC)) + return i915->params.guc_firmware_path; return ""; } -static const char *__override_huc_firmware_path(void) +static const char *__override_huc_firmware_path(struct drm_i915_private *i915) { - if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC) - return i915_modparams.huc_firmware_path; + if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC) + return i915->params.huc_firmware_path; return ""; } -static void __uc_fw_user_override(struct intel_uc_fw *uc_fw) +static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { const char *path = NULL; switch (uc_fw->type) { case INTEL_UC_FW_TYPE_GUC: - path = __override_guc_firmware_path(); + path = __override_guc_firmware_path(i915); break; case INTEL_UC_FW_TYPE_HUC: - path = __override_huc_firmware_path(); + path = __override_huc_firmware_path(i915); break; } @@ -213,10 +218,8 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, uc_fw->type = type; if (HAS_GT_UC(i915)) { - __uc_fw_auto_select(uc_fw, - INTEL_INFO(i915)->platform, - INTEL_REVID(i915)); - __uc_fw_user_override(uc_fw); + __uc_fw_auto_select(i915, uc_fw); + __uc_fw_user_override(i915, uc_fw); } intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? |