aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.h14
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.h14
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c7
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h88
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c84
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.c (renamed from drivers/gpu/drm/i915/gt/debugfs_gt.c)18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.h (renamed from drivers/gpu/drm/i915/gt/debugfs_gt.h)14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c (renamed from drivers/gpu/drm/i915/gt/debugfs_engines.c)10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c (renamed from drivers/gpu/drm/i915/gt/debugfs_gt_pm.c)153
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c88
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c176
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c65
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c262
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.h2
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c16
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c10
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h75
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c26
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c36
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c916
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c93
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h9
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c127
66 files changed, 1917 insertions, 834 deletions
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h
deleted file mode 100644
index f69257eaa1cc..000000000000
--- a/drivers/gpu/drm/i915/gt/debugfs_engines.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef DEBUGFS_ENGINES_H
-#define DEBUGFS_ENGINES_H
-
-struct intel_gt;
-struct dentry;
-
-void debugfs_engines_register(struct intel_gt *gt, struct dentry *root);
-
-#endif /* DEBUGFS_ENGINES_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
deleted file mode 100644
index 4cf5f5c9da7d..000000000000
--- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef DEBUGFS_GT_PM_H
-#define DEBUGFS_GT_PM_H
-
-struct intel_gt;
-struct dentry;
-
-void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root);
-
-#endif /* DEBUGFS_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 1aee5e6b1b23..890191f286e3 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -429,7 +429,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
mutex_init(&ppgtt->flush);
mutex_init(&ppgtt->pin_mutex);
- ppgtt_init(&ppgtt->base, gt);
+ ppgtt_init(&ppgtt->base, gt, 0);
ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
ppgtt->base.vm.top = 1;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 6e0e52eeb87a..037a9a6e4889 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -548,6 +548,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
I915_GTT_PAGE_SIZE_2M)))) {
vaddr = px_vaddr(pd);
vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
+ clflush_cache_range(vaddr, PAGE_SIZE);
page_size = I915_GTT_PAGE_SIZE_64K;
/*
@@ -568,6 +569,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
for (i = 1; i < index; i += 16)
memset64(vaddr + i, encode, 15);
+ clflush_cache_range(vaddr, PAGE_SIZE);
}
}
@@ -751,7 +753,8 @@ err_pd:
* space.
*
*/
-struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct i915_ppgtt *ppgtt;
int err;
@@ -760,7 +763,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
if (!ppgtt)
return ERR_PTR(-ENOMEM);
- ppgtt_init(ppgtt, gt);
+ ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index b9028c2ad3c7..f541d19264b4 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -12,7 +12,9 @@ struct i915_address_space;
struct intel_gt;
enum i915_cache_level;
-struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
+
u64 gen8_ggtt_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
u32 flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 745e84c72c90..e9a0cad5c34d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -394,19 +394,18 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
spin_lock_init(&ce->guc_state.lock);
INIT_LIST_HEAD(&ce->guc_state.fences);
+ INIT_LIST_HEAD(&ce->guc_state.requests);
- spin_lock_init(&ce->guc_active.lock);
- INIT_LIST_HEAD(&ce->guc_active.requests);
-
- ce->guc_id = GUC_INVALID_LRC_ID;
- INIT_LIST_HEAD(&ce->guc_id_link);
+ ce->guc_id.id = GUC_INVALID_LRC_ID;
+ INIT_LIST_HEAD(&ce->guc_id.link);
/*
* Initialize fence to be complete as this is expected to be complete
* unless there is a pending schedule disable outstanding.
*/
- i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify);
- i915_sw_fence_commit(&ce->guc_blocked);
+ i915_sw_fence_init(&ce->guc_state.blocked,
+ sw_fence_dummy_notify);
+ i915_sw_fence_commit(&ce->guc_state.blocked);
i915_active_init(&ce->active,
__intel_context_active, __intel_context_retire, 0);
@@ -420,6 +419,7 @@ void intel_context_fini(struct intel_context *ce)
mutex_destroy(&ce->pin_mutex);
i915_active_fini(&ce->active);
+ i915_sw_fence_fini(&ce->guc_state.blocked);
}
void i915_context_module_exit(void)
@@ -520,15 +520,15 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce)
GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
- spin_lock_irqsave(&ce->guc_active.lock, flags);
- list_for_each_entry_reverse(rq, &ce->guc_active.requests,
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ list_for_each_entry_reverse(rq, &ce->guc_state.requests,
sched.link) {
if (i915_request_completed(rq))
break;
active = rq;
}
- spin_unlock_irqrestore(&ce->guc_active.lock, flags);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
return active;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e54351a170e2..12252c411159 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -112,6 +112,7 @@ struct intel_context {
#define CONTEXT_FORCE_SINGLE_SUBMISSION 7
#define CONTEXT_NOPREEMPT 8
#define CONTEXT_LRCA_DIRTY 9
+#define CONTEXT_GUC_INIT 10
struct {
u64 timeout_us;
@@ -152,52 +153,83 @@ struct intel_context {
/** sseu: Control eu/slice partitioning */
struct intel_sseu sseu;
+ /**
+ * pinned_contexts_link: List link for the engine's pinned contexts.
+ * This is only used if this is a perma-pinned kernel context and
+ * the list is assumed to only be manipulated during driver load
+ * or unload time so no mutex protection currently.
+ */
+ struct list_head pinned_contexts_link;
+
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
struct {
- /** lock: protects everything in guc_state */
+ /** @lock: protects everything in guc_state */
spinlock_t lock;
/**
- * sched_state: scheduling state of this context using GuC
+ * @sched_state: scheduling state of this context using GuC
* submission
*/
- u16 sched_state;
+ u32 sched_state;
/*
- * fences: maintains of list of requests that have a submit
- * fence related to GuC submission
+ * @fences: maintains a list of requests that are currently
+ * being fenced until a GuC operation completes
*/
struct list_head fences;
+ /**
+ * @blocked: fence used to signal when the blocking of a
+ * context's submissions is complete.
+ */
+ struct i915_sw_fence blocked;
+ /** @number_committed_requests: number of committed requests */
+ int number_committed_requests;
+ /** @requests: list of active requests on this context */
+ struct list_head requests;
+ /** @prio: the context's current guc priority */
+ u8 prio;
+ /**
+ * @prio_count: a counter of the number requests in flight in
+ * each priority bucket
+ */
+ u32 prio_count[GUC_CLIENT_PRIORITY_NUM];
} guc_state;
struct {
- /** lock: protects everything in guc_active */
- spinlock_t lock;
- /** requests: active requests on this context */
- struct list_head requests;
- } guc_active;
-
- /* GuC scheduling state flags that do not require a lock. */
- atomic_t guc_sched_state_no_lock;
-
- /* GuC LRC descriptor ID */
- u16 guc_id;
-
- /* GuC LRC descriptor reference count */
- atomic_t guc_id_ref;
+ /**
+ * @id: handle which is used to uniquely identify this context
+ * with the GuC, protected by guc->contexts_lock
+ */
+ u16 id;
+ /**
+ * @ref: the number of references to the guc_id, when
+ * transitioning in and out of zero protected by
+ * guc->contexts_lock
+ */
+ atomic_t ref;
+ /**
+ * @link: in guc->guc_id_list when the guc_id has no refs but is
+ * still valid, protected by guc->contexts_lock
+ */
+ struct list_head link;
+ } guc_id;
- /*
- * GuC ID link - in list when unpinned but guc_id still valid in GuC
+#ifdef CONFIG_DRM_I915_SELFTEST
+ /**
+ * @drop_schedule_enable: Force drop of schedule enable G2H for selftest
*/
- struct list_head guc_id_link;
+ bool drop_schedule_enable;
- /* GuC context blocked fence */
- struct i915_sw_fence guc_blocked;
+ /**
+ * @drop_schedule_disable: Force drop of schedule disable G2H for
+ * selftest
+ */
+ bool drop_schedule_disable;
- /*
- * GuC priority management
+ /**
+ * @drop_deregister: Force drop of deregister G2H for selftest
*/
- u8 guc_prio;
- u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM];
+ bool drop_deregister;
+#endif
};
#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 87579affb952..452248884ef1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -175,6 +175,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_GEM_HWS_SEQNO 0x40
#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
#define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32))
+#define I915_GEM_HWS_PXP 0x60
+#define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32))
#define I915_GEM_HWS_SCRATCH 0x80
#define I915_HWS_CSB_BUF0_INDEX 0x10
@@ -273,7 +275,7 @@ static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine)
static inline bool
intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
{
- if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return false;
return intel_engine_has_preemption(engine);
@@ -300,7 +302,7 @@ intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
static inline bool
intel_engine_has_heartbeat(const struct intel_engine_cs *engine)
{
- if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
return false;
if (intel_engine_is_virtual(engine))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 0d9105a31d84..2ae57e4656a3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -320,6 +320,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
+ INIT_LIST_HEAD(&engine->pinned_contexts_list);
engine->id = id;
engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
@@ -398,7 +399,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
engine->uabi_capabilities |=
I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
- if (GRAPHICS_VER(i915) >= 9)
+ if (GRAPHICS_VER(i915) >= 9 &&
+ engine->gt->info.sfc_mask & BIT(engine->instance))
engine->uabi_capabilities |=
I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
}
@@ -474,18 +476,25 @@ void intel_engines_free(struct intel_gt *gt)
}
static
-bool gen11_vdbox_has_sfc(struct drm_i915_private *i915,
+bool gen11_vdbox_has_sfc(struct intel_gt *gt,
unsigned int physical_vdbox,
unsigned int logical_vdbox, u16 vdbox_mask)
{
+ struct drm_i915_private *i915 = gt->i915;
+
/*
* In Gen11, only even numbered logical VDBOXes are hooked
* up to an SFC (Scaler & Format Converter) unit.
* In Gen12, Even numbered physical instance always are connected
* to an SFC. Odd numbered physical instances have SFC only if
* previous even instance is fused off.
+ *
+ * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field
+ * in the fuse register that tells us whether a specific SFC is present.
*/
- if (GRAPHICS_VER(i915) == 12)
+ if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0)
+ return false;
+ else if (GRAPHICS_VER(i915) == 12)
return (physical_vdbox % 2 == 0) ||
!(BIT(physical_vdbox - 1) & vdbox_mask);
else if (GRAPHICS_VER(i915) == 11)
@@ -512,7 +521,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
unsigned int logical_vdbox = 0;
unsigned int i;
- u32 media_fuse;
+ u32 media_fuse, fuse1;
u16 vdbox_mask;
u16 vebox_mask;
@@ -534,6 +543,13 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
GEN11_GT_VEBOX_DISABLE_SHIFT;
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ fuse1 = intel_uncore_read(uncore, HSW_PAVP_FUSE1);
+ gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
+ } else {
+ gt->info.sfc_mask = ~0;
+ }
+
for (i = 0; i < I915_MAX_VCS; i++) {
if (!HAS_ENGINE(gt, _VCS(i))) {
vdbox_mask &= ~BIT(i);
@@ -546,7 +562,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
continue;
}
- if (gen11_vdbox_has_sfc(i915, i, logical_vdbox, vdbox_mask))
+ if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask))
gt->info.vdbox_sfc_access |= BIT(i);
logical_vdbox++;
}
@@ -875,6 +891,8 @@ intel_engine_create_pinned_context(struct intel_engine_cs *engine,
return ERR_PTR(err);
}
+ list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list);
+
/*
* Give our perma-pinned kernel timelines a separate lockdep class,
* so that we can use them from within the normal user timelines
@@ -897,6 +915,7 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce)
list_del(&ce->timeline->engine_link);
mutex_unlock(&hwsp->vm->mutex);
+ list_del(&ce->pinned_contexts_link);
intel_context_unpin(ce);
intel_context_put(ce);
}
@@ -1163,16 +1182,16 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
u32 mmio_base = engine->mmio_base;
int slice;
int subslice;
+ int iter;
memset(instdone, 0, sizeof(*instdone));
- switch (GRAPHICS_VER(i915)) {
- default:
+ if (GRAPHICS_VER(i915) >= 8) {
instdone->instdone =
intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
if (engine->id != RCS0)
- break;
+ return;
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
@@ -1182,21 +1201,39 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
instdone->slice_common_extra[1] =
intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
}
- for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
- instdone->sampler[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- GEN7_SAMPLER_INSTDONE);
- instdone->row[slice][subslice] =
- read_subslice_reg(engine, slice, subslice,
- GEN7_ROW_INSTDONE);
+
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) {
+ instdone->sampler[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ GEN7_SAMPLER_INSTDONE);
+ instdone->row[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ GEN7_ROW_INSTDONE);
+ }
+ } else {
+ for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
+ instdone->sampler[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ GEN7_SAMPLER_INSTDONE);
+ instdone->row[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ GEN7_ROW_INSTDONE);
+ }
+ }
+
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
+ for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice)
+ instdone->geom_svg[slice][subslice] =
+ read_subslice_reg(engine, slice, subslice,
+ XEHPG_INSTDONE_GEOM_SVG);
}
- break;
- case 7:
+ } else if (GRAPHICS_VER(i915) >= 7) {
instdone->instdone =
intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
if (engine->id != RCS0)
- break;
+ return;
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
@@ -1204,22 +1241,15 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
instdone->row[0][0] =
intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
-
- break;
- case 6:
- case 5:
- case 4:
+ } else if (GRAPHICS_VER(i915) >= 4) {
instdone->instdone =
intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
if (engine->id == RCS0)
/* HACK: Using the wrong struct member */
instdone->slice_common =
intel_uncore_read(uncore, GEN4_INSTDONE1);
- break;
- case 3:
- case 2:
+ } else {
instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
- break;
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 74775ae961b2..a3698f611f45 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -207,7 +207,7 @@ out:
void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
{
- if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
return;
next_heartbeat(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 1f07ac4e0672..dacd62773735 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -298,6 +298,29 @@ void intel_engine_init__pm(struct intel_engine_cs *engine)
intel_engine_init_heartbeat(engine);
}
+/**
+ * intel_engine_reset_pinned_contexts - Reset the pinned contexts of
+ * an engine.
+ * @engine: The engine whose pinned contexts we want to reset.
+ *
+ * Typically the pinned context LMEM images lose or get their content
+ * corrupted on suspend. This function resets their images.
+ */
+void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+
+ list_for_each_entry(ce, &engine->pinned_contexts_list,
+ pinned_contexts_link) {
+ /* kernel context gets reset at __engine_unpark() */
+ if (ce == engine->kernel_context)
+ continue;
+
+ dbg_poison_ce(ce);
+ ce->ops->reset(ce);
+ }
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_engine_pm.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 70ea46d6cfb0..8520c595f5e1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -69,4 +69,6 @@ intel_engine_create_kernel_request(struct intel_engine_cs *engine)
void intel_engine_init__pm(struct intel_engine_cs *engine);
+void intel_engine_reset_pinned_contexts(struct intel_engine_cs *engine);
+
#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index ed91bcff20eb..9167ce52487c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -67,8 +67,11 @@ struct intel_instdone {
/* The following exist only in the RCS engine */
u32 slice_common;
u32 slice_common_extra[2];
- u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
- u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
+ u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
+ u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
+
+ /* Added in XeHPG */
+ u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES];
};
/*
@@ -304,6 +307,13 @@ struct intel_engine_cs {
struct intel_context *kernel_context; /* pinned */
+ /**
+ * pinned_contexts_list: List of pinned contexts. This list is only
+ * assumed to be manipulated during driver load- or unload time and
+ * does therefore not have any additional protection.
+ */
+ struct list_head pinned_contexts_list;
+
intel_engine_mask_t saturated; /* submitting semaphores too late? */
struct {
@@ -546,7 +556,7 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
static inline bool
intel_engine_has_timeslices(const struct intel_engine_cs *engine)
{
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return false;
return engine->flags & I915_ENGINE_HAS_TIMESLICES;
@@ -578,4 +588,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
(instdone_has_subslice(dev_priv_, sseu_, slice_, \
subslice_)))
+
+#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
+ for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
+ (iter_) < GEN_MAX_SUBSLICES; \
+ (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
+ (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
+ for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
+
#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de5f9c86b9a4..73a79c2acd3a 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2140,10 +2140,6 @@ static void __execlists_unhold(struct i915_request *rq)
if (p->flags & I915_DEPENDENCY_WEAK)
continue;
- /* Propagate any change in error status */
- if (rq->fence.error)
- i915_request_set_error_once(w, rq->fence.error);
-
if (w->engine != rq->engine)
continue;
@@ -2565,7 +2561,7 @@ __execlists_context_pre_pin(struct intel_context *ce,
if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) {
lrc_init_state(ce, engine, *vaddr);
- __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size);
+ __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size);
}
return 0;
@@ -2791,6 +2787,8 @@ static void execlists_sanitize(struct intel_engine_cs *engine)
/* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+
+ intel_engine_reset_pinned_contexts(engine);
}
static void enable_error_interrupt(struct intel_engine_cs *engine)
@@ -3341,7 +3339,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
if (can_preempt(engine)) {
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
- if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (CONFIG_DRM_I915_TIMESLICE_DURATION)
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index cbd0e1010a46..f17383e76eb7 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -644,7 +644,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
struct i915_ppgtt *ppgtt;
int err;
- ppgtt = i915_ppgtt_create(ggtt->vm.gt);
+ ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
@@ -727,7 +727,6 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
atomic_set(&ggtt->vm.open, 0);
- rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
flush_workqueue(ggtt->vm.i915->wq);
mutex_lock(&ggtt->vm.mutex);
@@ -814,6 +813,21 @@ static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
return 0;
}
+static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
+{
+ /*
+ * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
+ * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
+ */
+ GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
+ return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
+}
+
+static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
+{
+ return gen6_gttmmadr_size(i915) / 2;
+}
+
static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
@@ -822,8 +836,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
u32 pte_flags;
int ret;
- /* For Modern GENs the PTEs and register space are split in the BAR */
- phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
+ GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
+ phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
/*
* On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
@@ -910,6 +924,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
size = gen8_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.cleanup = gen6_gmch_remove;
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 1c3af0fc0456..f8253012d166 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -28,10 +28,13 @@
#define INSTR_26_TO_24_MASK 0x7000000
#define INSTR_26_TO_24_SHIFT 24
+#define __INSTR(client) ((client) << INSTR_CLIENT_SHIFT)
+
/*
* Memory interface instructions used by the kernel
*/
-#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+#define MI_INSTR(opcode, flags) \
+ (__INSTR(INSTR_MI_CLIENT) | (opcode) << 23 | (flags))
/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
#define MI_GLOBAL_GTT (1<<22)
@@ -57,6 +60,7 @@
#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0)
#define MI_SUSPEND_FLUSH_EN (1<<0)
#define MI_SET_APPID MI_INSTR(0x0e, 0)
+#define MI_SET_APPID_SESSION_ID(x) ((x) << 0)
#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0)
#define MI_OVERLAY_CONTINUE (0x0<<21)
#define MI_OVERLAY_ON (0x1<<21)
@@ -146,6 +150,7 @@
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
+#define MI_FLUSH_DW_PROTECTED_MEM_EN (1 << 22)
#define MI_FLUSH_DW_STORE_INDEX (1<<21)
#define MI_INVALIDATE_TLB (1<<18)
#define MI_FLUSH_DW_OP_STOREDW (1<<14)
@@ -273,6 +278,19 @@
#define MI_MATH_REG_CF 0x33
/*
+ * Media instructions used by the kernel
+ */
+#define MEDIA_INSTR(pipe, op, sub_op, flags) \
+ (__INSTR(INSTR_RC_CLIENT) | (pipe) << INSTR_SUBCLIENT_SHIFT | \
+ (op) << INSTR_26_TO_24_SHIFT | (sub_op) << 16 | (flags))
+
+#define MFX_WAIT MEDIA_INSTR(1, 0, 0, 0)
+#define MFX_WAIT_DW0_MFX_SYNC_CONTROL_FLAG REG_BIT(8)
+#define MFX_WAIT_DW0_PXP_SYNC_CONTROL_FLAG REG_BIT(9)
+
+#define CRYPTO_KEY_EXCHANGE MEDIA_INSTR(2, 6, 9, 0)
+
+/*
* Commands used only by the command parser
*/
#define MI_SET_PREDICATE MI_INSTR(0x01, 0)
@@ -328,8 +346,6 @@
#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
-#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16))
-
#define COLOR_BLT ((0x2<<29)|(0x40<<22))
#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22))
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 62d40c986642..1cb1948ac959 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,7 +3,7 @@
* Copyright © 2019 Intel Corporation
*/
-#include "debugfs_gt.h"
+#include "intel_gt_debugfs.h"
#include "gem/i915_gem_lmem.h"
#include "i915_drv.h"
@@ -15,12 +15,13 @@
#include "intel_gt_requests.h"
#include "intel_migrate.h"
#include "intel_mocs.h"
+#include "intel_pm.h"
#include "intel_rc6.h"
#include "intel_renderstate.h"
#include "intel_rps.h"
#include "intel_uncore.h"
-#include "intel_pm.h"
#include "shmem_utils.h"
+#include "pxp/intel_pxp.h"
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
@@ -434,7 +435,7 @@ void intel_gt_driver_register(struct intel_gt *gt)
{
intel_rps_driver_register(&gt->rps);
- debugfs_gt_register(gt);
+ intel_gt_debugfs_register(gt);
}
static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
@@ -481,7 +482,7 @@ static void intel_gt_fini_scratch(struct intel_gt *gt)
static struct i915_address_space *kernel_vm(struct intel_gt *gt)
{
if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
- return &i915_ppgtt_create(gt)->vm;
+ return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
else
return i915_vm_get(&gt->ggtt->vm);
}
@@ -660,6 +661,8 @@ int intel_gt_init(struct intel_gt *gt)
if (err)
return err;
+ intel_gt_init_workarounds(gt);
+
/*
* This is just a security blanket to placate dragons.
* On some systems, we very sporadically observe that the first TLBs
@@ -682,6 +685,8 @@ int intel_gt_init(struct intel_gt *gt)
goto err_pm;
}
+ intel_set_mocs_index(gt);
+
err = intel_engines_init(gt);
if (err)
goto err_engines;
@@ -710,6 +715,8 @@ int intel_gt_init(struct intel_gt *gt)
intel_migrate_init(&gt->migrate, gt);
+ intel_pxp_init(&gt->pxp);
+
goto out_fw;
err_gt:
__intel_gt_disable(gt);
@@ -737,6 +744,8 @@ void intel_gt_driver_remove(struct intel_gt *gt)
intel_uc_driver_remove(&gt->uc);
intel_engines_release(gt);
+
+ intel_gt_flush_buffer_pool(gt);
}
void intel_gt_driver_unregister(struct intel_gt *gt)
@@ -745,12 +754,14 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
intel_rps_driver_unregister(&gt->rps);
+ intel_pxp_fini(&gt->pxp);
+
/*
* Upon unregistering the device to prevent any new users, cancel
* all in-flight requests so that we can quickly unbind the active
* resources.
*/
- intel_gt_set_wedged(gt);
+ intel_gt_set_wedged_on_fini(gt);
/* Scrub all HW state upon release */
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
@@ -765,6 +776,7 @@ void intel_gt_driver_release(struct intel_gt *gt)
if (vm) /* FIXME being called twice on error paths :( */
i915_vm_put(vm);
+ intel_wa_list_free(&gt->wa_list);
intel_gt_pm_fini(gt);
intel_gt_fini_scratch(gt);
intel_gt_fini_buffer_pool(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index aa0a59c5b614..acc49c56a9f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -245,8 +245,6 @@ void intel_gt_fini_buffer_pool(struct intel_gt *gt)
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
int n;
- intel_gt_flush_buffer_pool(gt);
-
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
index 591eb60785db..1fe19ccd2794 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
@@ -5,14 +5,15 @@
#include <linux/debugfs.h>
-#include "debugfs_engines.h"
-#include "debugfs_gt.h"
-#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+#include "intel_gt_debugfs.h"
+#include "intel_gt_engines_debugfs.h"
+#include "intel_gt_pm_debugfs.h"
#include "intel_sseu_debugfs.h"
+#include "pxp/intel_pxp_debugfs.h"
#include "uc/intel_uc_debugfs.h"
-#include "i915_drv.h"
-void debugfs_gt_register(struct intel_gt *gt)
+void intel_gt_debugfs_register(struct intel_gt *gt)
{
struct dentry *root;
@@ -23,15 +24,16 @@ void debugfs_gt_register(struct intel_gt *gt)
if (IS_ERR(root))
return;
- debugfs_engines_register(gt, root);
- debugfs_gt_pm_register(gt, root);
+ intel_gt_engines_debugfs_register(gt, root);
+ intel_gt_pm_debugfs_register(gt, root);
intel_sseu_debugfs_register(gt, root);
intel_uc_debugfs_register(&gt->uc, root);
+ intel_pxp_debugfs_register(&gt->pxp, root);
}
void intel_gt_debugfs_register_files(struct dentry *root,
- const struct debugfs_gt_file *files,
+ const struct intel_gt_debugfs_file *files,
unsigned long count, void *data)
{
while (count--) {
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
index f77540f727e9..8b6fca09897c 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
@@ -3,14 +3,14 @@
* Copyright © 2019 Intel Corporation
*/
-#ifndef DEBUGFS_GT_H
-#define DEBUGFS_GT_H
+#ifndef INTEL_GT_DEBUGFS_H
+#define INTEL_GT_DEBUGFS_H
#include <linux/file.h>
struct intel_gt;
-#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \
+#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \
static int __name ## _open(struct inode *inode, struct file *file) \
{ \
return single_open(file, __name ## _show, inode->i_private); \
@@ -23,16 +23,16 @@ static const struct file_operations __name ## _fops = { \
.release = single_release, \
}
-void debugfs_gt_register(struct intel_gt *gt);
+void intel_gt_debugfs_register(struct intel_gt *gt);
-struct debugfs_gt_file {
+struct intel_gt_debugfs_file {
const char *name;
const struct file_operations *fops;
bool (*eval)(void *data);
};
void intel_gt_debugfs_register_files(struct dentry *root,
- const struct debugfs_gt_file *files,
+ const struct intel_gt_debugfs_file *files,
unsigned long count, void *data);
-#endif /* DEBUGFS_GT_H */
+#endif /* INTEL_GT_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
index 5e3725e62241..8f9b874fdc9c 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
@@ -6,10 +6,10 @@
#include <drm/drm_print.h>
-#include "debugfs_engines.h"
-#include "debugfs_gt.h"
#include "i915_drv.h" /* for_each_engine! */
#include "intel_engine.h"
+#include "intel_gt_debugfs.h"
+#include "intel_gt_engines_debugfs.h"
static int engines_show(struct seq_file *m, void *data)
{
@@ -24,11 +24,11 @@ static int engines_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(engines);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(engines);
-void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
+void intel_gt_engines_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "engines", &engines_fops },
};
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h
new file mode 100644
index 000000000000..dda113452da9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_ENGINES_DEBUGFS_H
+#define INTEL_GT_ENGINES_DEBUGFS_H
+
+struct intel_gt;
+struct dentry;
+
+void intel_gt_engines_debugfs_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* INTEL_GT_ENGINES_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index b2de83be4d97..699a74582d32 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -13,6 +13,7 @@
#include "intel_lrc_reg.h"
#include "intel_uncore.h"
#include "intel_rps.h"
+#include "pxp/intel_pxp_irq.h"
static void guc_irq_handler(struct intel_guc *guc, u16 iir)
{
@@ -64,6 +65,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
if (instance == OTHER_GTPM_INSTANCE)
return gen11_rps_irq_handler(&gt->rps, iir);
+ if (instance == OTHER_KCR_INSTANCE)
+ return intel_pxp_irq_handler(&gt->pxp, iir);
+
WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
instance, iir);
}
@@ -196,6 +200,9 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0);
intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0);
+
+ intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_ENABLE, 0);
+ intel_uncore_write(uncore, GEN11_CRYPTO_RSVD_INTR_MASK, ~0);
}
void gen11_gt_irq_postinstall(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index dea8e2479897..524eaf678790 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -18,6 +18,9 @@
#include "intel_rc6.h"
#include "intel_rps.h"
#include "intel_wakeref.h"
+#include "pxp/intel_pxp_pm.h"
+
+#define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2)
static void user_forcewake(struct intel_gt *gt, bool suspend)
{
@@ -262,6 +265,8 @@ int intel_gt_resume(struct intel_gt *gt)
intel_uc_resume(&gt->uc);
+ intel_pxp_resume(&gt->pxp);
+
user_forcewake(gt, false);
out_fw:
@@ -279,7 +284,7 @@ static void wait_for_suspend(struct intel_gt *gt)
if (!intel_gt_pm_is_awake(gt))
return;
- if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ if (intel_gt_wait_for_idle(gt, I915_GT_SUSPEND_IDLE_TIMEOUT) == -ETIME) {
/*
* Forcibly cancel outstanding work and leave
* the gpu quiet.
@@ -296,7 +301,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
user_forcewake(gt, true);
wait_for_suspend(gt);
- intel_uc_suspend(&gt->uc);
+ intel_pxp_suspend(&gt->pxp, false);
}
static suspend_state_t pm_suspend_target(void)
@@ -320,6 +325,8 @@ void intel_gt_suspend_late(struct intel_gt *gt)
GEM_BUG_ON(gt->awake);
+ intel_uc_suspend(&gt->uc);
+
/*
* On disabling the device, we want to turn off HW access to memory
* that we no longer own.
@@ -346,6 +353,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
+ intel_pxp_suspend(&gt->pxp, true);
intel_uc_runtime_suspend(&gt->uc);
GT_TRACE(gt, "\n");
@@ -353,11 +361,19 @@ void intel_gt_runtime_suspend(struct intel_gt *gt)
int intel_gt_runtime_resume(struct intel_gt *gt)
{
+ int ret;
+
GT_TRACE(gt, "\n");
intel_gt_init_swizzling(gt);
intel_ggtt_restore_fences(gt->ggtt);
- return intel_uc_runtime_resume(&gt->uc);
+ ret = intel_uc_runtime_resume(&gt->uc);
+ if (ret)
+ return ret;
+
+ intel_pxp_resume(&gt->pxp);
+
+ return 0;
}
static ktime_t __intel_gt_get_awake_time(const struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index d6f5836396f8..5f84ad602642 100644
--- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -6,12 +6,12 @@
#include <linux/seq_file.h>
-#include "debugfs_gt.h"
-#include "debugfs_gt_pm.h"
#include "i915_drv.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
+#include "intel_gt_debugfs.h"
#include "intel_gt_pm.h"
+#include "intel_gt_pm_debugfs.h"
#include "intel_llc.h"
#include "intel_rc6.h"
#include "intel_rps.h"
@@ -36,7 +36,7 @@ static int fw_domains_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
static void print_rc6_res(struct seq_file *m,
const char *title,
@@ -238,11 +238,10 @@ static int drpc_show(struct seq_file *m, void *unused)
return err;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(drpc);
-static int frequency_show(struct seq_file *m, void *unused)
+void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
{
- struct intel_gt *gt = m->private;
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_rps *rps = &gt->rps;
@@ -254,21 +253,21 @@ static int frequency_show(struct seq_file *m, void *unused)
u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK);
- seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
- seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
- seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
+ drm_printf(p, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
+ drm_printf(p, "Requested VID: %d\n", rgvswctl & 0x3f);
+ drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
MEMSTAT_VID_SHIFT);
- seq_printf(m, "Current P-state: %d\n",
+ drm_printf(p, "Current P-state: %d\n",
(rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
u32 rpmodectl, freq_sts;
rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
- seq_printf(m, "Video Turbo Mode: %s\n",
+ drm_printf(p, "Video Turbo Mode: %s\n",
yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
- seq_printf(m, "HW control enabled: %s\n",
+ drm_printf(p, "HW control enabled: %s\n",
yesno(rpmodectl & GEN6_RP_ENABLE));
- seq_printf(m, "SW control enabled: %s\n",
+ drm_printf(p, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
@@ -276,25 +275,25 @@ static int frequency_show(struct seq_file *m, void *unused)
freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
- seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
- seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq);
+ drm_printf(p, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
+ drm_printf(p, "DDR freq: %d MHz\n", i915->mem_freq);
- seq_printf(m, "actual GPU freq: %d MHz\n",
+ drm_printf(p, "actual GPU freq: %d MHz\n",
intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
- seq_printf(m, "current GPU freq: %d MHz\n",
+ drm_printf(p, "current GPU freq: %d MHz\n",
intel_gpu_freq(rps, rps->cur_freq));
- seq_printf(m, "max GPU freq: %d MHz\n",
+ drm_printf(p, "max GPU freq: %d MHz\n",
intel_gpu_freq(rps, rps->max_freq));
- seq_printf(m, "min GPU freq: %d MHz\n",
+ drm_printf(p, "min GPU freq: %d MHz\n",
intel_gpu_freq(rps, rps->min_freq));
- seq_printf(m, "idle GPU freq: %d MHz\n",
+ drm_printf(p, "idle GPU freq: %d MHz\n",
intel_gpu_freq(rps, rps->idle_freq));
- seq_printf(m, "efficient (RPe) frequency: %d MHz\n",
+ drm_printf(p, "efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, rps->efficient_freq));
} else if (GRAPHICS_VER(i915) >= 6) {
u32 rp_state_limits;
@@ -309,13 +308,11 @@ static int frequency_show(struct seq_file *m, void *unused)
int max_freq;
rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
- if (IS_GEN9_LP(i915)) {
- rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ rp_state_cap = intel_rps_read_state_cap(rps);
+ if (IS_GEN9_LP(i915))
gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
- } else {
- rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+ else
gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
- }
/* RPSTAT1 is in the GT power well */
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
@@ -376,113 +373,121 @@ static int frequency_show(struct seq_file *m, void *unused)
}
pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
- seq_printf(m, "Video Turbo Mode: %s\n",
+ drm_printf(p, "Video Turbo Mode: %s\n",
yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
- seq_printf(m, "HW control enabled: %s\n",
+ drm_printf(p, "HW control enabled: %s\n",
yesno(rpmodectl & GEN6_RP_ENABLE));
- seq_printf(m, "SW control enabled: %s\n",
+ drm_printf(p, "SW control enabled: %s\n",
yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
- seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+ drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_mask);
if (GRAPHICS_VER(i915) <= 10)
- seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n",
+ drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
pm_isr, pm_iir);
- seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n",
+ drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
rps->pm_intrmsk_mbz);
- seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
- seq_printf(m, "Render p-state ratio: %d\n",
+ drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+ drm_printf(p, "Render p-state ratio: %d\n",
(gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
- seq_printf(m, "Render p-state VID: %d\n",
+ drm_printf(p, "Render p-state VID: %d\n",
gt_perf_status & 0xff);
- seq_printf(m, "Render p-state limit: %d\n",
+ drm_printf(p, "Render p-state limit: %d\n",
rp_state_limits & 0xff);
- seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
- seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
- seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
- seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
- seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
- seq_printf(m, "CAGF: %dMHz\n", cagf);
- seq_printf(m, "RP CUR UP EI: %d (%lldns)\n",
+ drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
+ drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
+ drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+ drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+ drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
+ drm_printf(p, "CAGF: %dMHz\n", cagf);
+ drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
rpcurupei,
intel_gt_pm_interval_to_ns(gt, rpcurupei));
- seq_printf(m, "RP CUR UP: %d (%lldns)\n",
+ drm_printf(p, "RP CUR UP: %d (%lldns)\n",
rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
- seq_printf(m, "RP PREV UP: %d (%lldns)\n",
+ drm_printf(p, "RP PREV UP: %d (%lldns)\n",
rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
- seq_printf(m, "Up threshold: %d%%\n",
+ drm_printf(p, "Up threshold: %d%%\n",
rps->power.up_threshold);
- seq_printf(m, "RP UP EI: %d (%lldns)\n",
+ drm_printf(p, "RP UP EI: %d (%lldns)\n",
rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
- seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n",
+ drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
- seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n",
+ drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
rpcurdownei,
intel_gt_pm_interval_to_ns(gt, rpcurdownei));
- seq_printf(m, "RP CUR DOWN: %d (%lldns)\n",
+ drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
rpcurdown,
intel_gt_pm_interval_to_ns(gt, rpcurdown));
- seq_printf(m, "RP PREV DOWN: %d (%lldns)\n",
+ drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
rpprevdown,
intel_gt_pm_interval_to_ns(gt, rpprevdown));
- seq_printf(m, "Down threshold: %d%%\n",
+ drm_printf(p, "Down threshold: %d%%\n",
rps->power.down_threshold);
- seq_printf(m, "RP DOWN EI: %d (%lldns)\n",
+ drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
- seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n",
+ drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
rp_state_cap >> 16) & 0xff;
max_freq *= (IS_GEN9_BC(i915) ||
GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
- seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
+ drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
intel_gpu_freq(rps, max_freq));
max_freq = (rp_state_cap & 0xff00) >> 8;
max_freq *= (IS_GEN9_BC(i915) ||
GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
- seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
+ drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
intel_gpu_freq(rps, max_freq));
max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 :
rp_state_cap >> 0) & 0xff;
max_freq *= (IS_GEN9_BC(i915) ||
GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1);
- seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
intel_gpu_freq(rps, max_freq));
- seq_printf(m, "Max overclocked frequency: %dMHz\n",
+ drm_printf(p, "Max overclocked frequency: %dMHz\n",
intel_gpu_freq(rps, rps->max_freq));
- seq_printf(m, "Current freq: %d MHz\n",
+ drm_printf(p, "Current freq: %d MHz\n",
intel_gpu_freq(rps, rps->cur_freq));
- seq_printf(m, "Actual freq: %d MHz\n", cagf);
- seq_printf(m, "Idle freq: %d MHz\n",
+ drm_printf(p, "Actual freq: %d MHz\n", cagf);
+ drm_printf(p, "Idle freq: %d MHz\n",
intel_gpu_freq(rps, rps->idle_freq));
- seq_printf(m, "Min freq: %d MHz\n",
+ drm_printf(p, "Min freq: %d MHz\n",
intel_gpu_freq(rps, rps->min_freq));
- seq_printf(m, "Boost freq: %d MHz\n",
+ drm_printf(p, "Boost freq: %d MHz\n",
intel_gpu_freq(rps, rps->boost_freq));
- seq_printf(m, "Max freq: %d MHz\n",
+ drm_printf(p, "Max freq: %d MHz\n",
intel_gpu_freq(rps, rps->max_freq));
- seq_printf(m,
+ drm_printf(p,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, rps->efficient_freq));
} else {
- seq_puts(m, "no P-state info available\n");
+ drm_puts(p, "no P-state info available\n");
}
- seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk);
- seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq);
- seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
+ drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk);
+ drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq);
+ drm_printf(p, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
intel_runtime_pm_put(uncore->rpm, wakeref);
+}
+
+static int frequency_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ intel_gt_pm_frequency_dump(gt, &p);
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(frequency);
static int llc_show(struct seq_file *m, void *data)
{
@@ -535,7 +540,7 @@ static bool llc_eval(void *data)
return HAS_LLC(gt->i915);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(llc);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(llc);
static const char *rps_power_to_str(unsigned int power)
{
@@ -614,11 +619,11 @@ static bool rps_eval(void *data)
return HAS_RPS(gt->i915);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
-void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
+void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "drpc", &drpc_fops, NULL },
{ "frequency", &frequency_fops, NULL },
{ "forcewake", &fw_domains_fops, NULL },
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
new file mode 100644
index 000000000000..2b824289582b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_PM_DEBUGFS_H
+#define INTEL_GT_PM_DEBUGFS_H
+
+struct intel_gt;
+struct dentry;
+struct drm_printer;
+
+void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root);
+void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m);
+
+#endif /* INTEL_GT_PM_DEBUGFS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a81e21bf1bd1..14216cc471b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -26,6 +26,7 @@
#include "intel_rps_types.h"
#include "intel_migrate_types.h"
#include "intel_wakeref.h"
+#include "pxp/intel_pxp_types.h"
struct drm_i915_private;
struct i915_ggtt;
@@ -72,6 +73,8 @@ struct intel_gt {
struct intel_uc uc;
+ struct i915_wa_list wa_list;
+
struct intel_gt_timelines {
spinlock_t lock; /* protects active_list */
struct list_head active_list;
@@ -184,6 +187,9 @@ struct intel_gt {
u8 num_engines;
+ /* General presence of SFC units */
+ u8 sfc_mask;
+
/* Media engine access to SFC per instance */
u8 vdbox_sfc_access;
@@ -192,6 +198,12 @@ struct intel_gt {
unsigned long mslice_mask;
} info;
+
+ struct {
+ u8 uc_index;
+ } mocs;
+
+ struct intel_pxp pxp;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index e137dd32b5b8..67d14afa6623 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -28,7 +28,8 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
* used the passed in size for the page size, which should ensure it
* also has the same alignment.
*/
- obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0);
+ obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz,
+ vm->lmem_pt_obj_flags);
/*
* Ensure all paging structures for this vm share the same dma-resv
* object underneath, with the idea that one object_lock() will lock
@@ -155,7 +156,7 @@ void i915_vm_resv_release(struct kref *kref)
static void __i915_vm_release(struct work_struct *work)
{
struct i915_address_space *vm =
- container_of(work, struct i915_address_space, rcu.work);
+ container_of(work, struct i915_address_space, release_work);
vm->cleanup(vm);
i915_address_space_fini(vm);
@@ -171,7 +172,7 @@ void i915_vm_release(struct kref *kref)
GEM_BUG_ON(i915_is_ggtt(vm));
trace_i915_ppgtt_release(vm);
- queue_rcu_work(vm->i915->wq, &vm->rcu);
+ queue_work(vm->i915->wq, &vm->release_work);
}
void i915_address_space_init(struct i915_address_space *vm, int subclass)
@@ -185,7 +186,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
if (!kref_read(&vm->resv_ref))
kref_init(&vm->resv_ref);
- INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+ INIT_WORK(&vm->release_work, __i915_vm_release);
atomic_set(&vm->open, 1);
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index bc7153018ebd..bc6750263359 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -213,7 +213,7 @@ struct i915_vma_ops {
struct i915_address_space {
struct kref ref;
- struct rcu_work rcu;
+ struct work_struct release_work;
struct drm_mm mm;
struct intel_gt *gt;
@@ -260,6 +260,9 @@ struct i915_address_space {
u8 pd_shift;
u8 scratch_order;
+ /* Flags used when creating page-table objects for this vm */
+ unsigned long lmem_pt_obj_flags;
+
struct drm_i915_gem_object *
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
@@ -519,7 +522,8 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]);
}
-void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
int i915_ggtt_probe_hw(struct drm_i915_private *i915);
int i915_ggtt_init_hw(struct drm_i915_private *i915);
@@ -537,7 +541,8 @@ static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt)
int i915_ppgtt_init_hw(struct intel_gt *gt);
-struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index bb4af4977920..3ef9eaf8c50e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -226,6 +226,40 @@ static const u8 gen12_xcs_offsets[] = {
END
};
+static const u8 dg2_xcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ REG(0x120),
+ REG(0x124),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ END
+};
+
static const u8 gen8_rcs_offsets[] = {
NOP(1),
LRI(14, POSTED),
@@ -525,6 +559,49 @@ static const u8 xehp_rcs_offsets[] = {
END
};
+static const u8 dg2_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ REG(0x120),
+ REG(0x124),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END
+};
+
#undef END
#undef REG16
#undef REG
@@ -543,7 +620,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
!intel_engine_has_relative_mmio(engine));
if (engine->class == RENDER_CLASS) {
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ return dg2_rcs_offsets;
+ else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
return xehp_rcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_rcs_offsets;
@@ -554,7 +633,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
else
return gen8_rcs_offsets;
} else {
- if (GRAPHICS_VER(engine->i915) >= 12)
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ return dg2_xcs_offsets;
+ else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_xcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 9)
return gen9_xcs_offsets;
@@ -861,7 +942,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
context_size += PAGE_SIZE;
}
- obj = i915_gem_object_create_lmem(engine->i915, context_size, 0);
+ obj = i915_gem_object_create_lmem(engine->i915, context_size,
+ I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj))
obj = i915_gem_object_create_shmem(engine->i915, context_size);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 1dac21aa7e5c..afb1cce9a352 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -78,7 +78,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
* TODO: Add support for huge LMEM PTEs
*/
- vm = i915_ppgtt_create(gt);
+ vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY);
if (IS_ERR(vm))
return ERR_CAST(vm);
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 582c4423b95d..15f9ada28a7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -22,6 +22,8 @@ struct drm_i915_mocs_table {
unsigned int size;
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
+ u8 uc_index;
+ u8 unused_entries_index;
};
/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -40,6 +42,8 @@ struct drm_i915_mocs_table {
#define L3_ESC(value) ((value) << 0)
#define L3_SCC(value) ((value) << 1)
#define _L3_CACHEABILITY(value) ((value) << 4)
+#define L3_GLBGO(value) ((value) << 6)
+#define L3_LKUP(value) ((value) << 7)
/* Helper defines */
#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
@@ -88,18 +92,25 @@ struct drm_i915_mocs_table {
*
* Entries not part of the following tables are undefined as far as
* userspace is concerned and shouldn't be relied upon. For Gen < 12
- * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
- * PTE and will be initialized to an invalid value.
+ * they will be initialized to PTE. Gen >= 12 don't have a setting for
+ * PTE and those platforms except TGL/RKL will be initialized L3 WB to
+ * catch accidental use of reserved and unused mocs indexes.
*
* The last few entries are reserved by the hardware. For ICL+ they
* should be initialized according to bspec and never used, for older
* platforms they should never be written to.
*
- * NOTE: These tables are part of bspec and defined as part of hardware
+ * NOTE1: These tables are part of bspec and defined as part of hardware
* interface for ICL+. For older platforms, they are part of kernel
* ABI. It is expected that, for specific hardware platform, existing
* entries will remain constant and the table will only be updated by
* adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
+ * indices have been set to L3 WB. These reserved entries should never
+ * be used, they may be changed to low performant variants with better
+ * coherency in the future if more entries are needed.
+ * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
*/
#define GEN9_MOCS_ENTRIES \
MOCS_ENTRY(I915_MOCS_UNCACHED, \
@@ -282,17 +293,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
};
static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
- /* Error */
- MOCS_ENTRY(0, 0, L3_0_DIRECT),
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
-
- /* Reserved */
- MOCS_ENTRY(2, 0, L3_0_DIRECT),
- MOCS_ENTRY(3, 0, L3_0_DIRECT),
- MOCS_ENTRY(4, 0, L3_0_DIRECT),
-
/* WB - L3 */
MOCS_ENTRY(5, 0, L3_3_WB),
/* WB - L3 50% */
@@ -314,6 +317,83 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
MOCS_ENTRY(63, 0, L3_1_UC),
};
+static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
+ GEN11_MOCS_ENTRIES,
+ /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+ MOCS_ENTRY(48,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + L3 */
+ MOCS_ENTRY(49,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + LLC */
+ MOCS_ENTRY(50,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* Implicitly enable L1 - HDC:L1 */
+ MOCS_ENTRY(51,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_1_UC),
+ /* HW Special Case (CCS) */
+ MOCS_ENTRY(60,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* HW Special Case (Displayable) */
+ MOCS_ENTRY(61,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+};
+
+static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = {
+ /* wa_1608975824 */
+ MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)),
+
+ /* UC - Coherent; GO:L3 */
+ MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)),
+ /* UC - Coherent; GO:Memory */
+ MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Non-Coherent; GO:Memory */
+ MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)),
+ /* UC - Non-Coherent; GO:L3 */
+ MOCS_ENTRY(4, 0, L3_1_UC),
+
+ /* WB */
+ MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)),
+
+ /* HW Reserved - SW program but never use. */
+ MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)),
+ MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)),
+ MOCS_ENTRY(60, 0, L3_1_UC),
+ MOCS_ENTRY(61, 0, L3_1_UC),
+ MOCS_ENTRY(62, 0, L3_1_UC),
+ MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
+static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
+ /* UC - Coherent; GO:L3 */
+ MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)),
+ /* UC - Coherent; GO:Memory */
+ MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Non-Coherent; GO:Memory */
+ MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+ /* WB - LC */
+ MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
+static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
+ /* Wa_14011441408: Set Go to Memory for MOCS#0 */
+ MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Coherent; GO:Memory */
+ MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+ /* UC - Non-Coherent; GO:Memory */
+ MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
+
+ /* WB - LC */
+ MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
+};
+
enum {
HAS_GLOBAL_MOCS = BIT(0),
HAS_ENGINE_MOCS = BIT(1),
@@ -340,14 +420,45 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
{
unsigned int flags;
- if (IS_DG1(i915)) {
+ memset(table, 0, sizeof(struct drm_i915_mocs_table));
+
+ table->unused_entries_index = I915_MOCS_PTE;
+ if (IS_DG2(i915)) {
+ if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) {
+ table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
+ table->table = dg2_mocs_table_g10_ax;
+ } else {
+ table->size = ARRAY_SIZE(dg2_mocs_table);
+ table->table = dg2_mocs_table;
+ }
+ table->uc_index = 1;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->unused_entries_index = 3;
+ } else if (IS_XEHPSDV(i915)) {
+ table->size = ARRAY_SIZE(xehpsdv_mocs_table);
+ table->table = xehpsdv_mocs_table;
+ table->uc_index = 2;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->unused_entries_index = 5;
+ } else if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
+ table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
- } else if (GRAPHICS_VER(i915) >= 12) {
+ table->uc_index = 1;
+ table->unused_entries_index = 5;
+ } else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
+ /* For TGL/RKL, Can't be changed now for ABI reasons */
table->size = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->uc_index = 3;
+ } else if (GRAPHICS_VER(i915) >= 12) {
+ table->size = ARRAY_SIZE(gen12_mocs_table);
+ table->table = gen12_mocs_table;
+ table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+ table->uc_index = 3;
+ table->unused_entries_index = 2;
} else if (GRAPHICS_VER(i915) == 11) {
table->size = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
@@ -393,16 +504,16 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
}
/*
- * Get control_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
*/
static u32 get_entry_control(const struct drm_i915_mocs_table *table,
unsigned int index)
{
if (index < table->size && table->table[index].used)
return table->table[index].control_value;
-
- return table->table[I915_MOCS_PTE].control_value;
+ return table->table[table->unused_entries_index].control_value;
}
#define for_each_mocs(mocs, t, i) \
@@ -417,6 +528,8 @@ static void __init_mocs_table(struct intel_uncore *uncore,
unsigned int i;
u32 mocs;
+ drm_WARN_ONCE(&uncore->i915->drm, !table->unused_entries_index,
+ "Unused entries index should have been defined\n");
for_each_mocs(mocs, table, i)
intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
}
@@ -443,16 +556,16 @@ static void init_mocs_table(struct intel_engine_cs *engine,
}
/*
- * Get l3cc_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
*/
static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
unsigned int index)
{
if (index < table->size && table->table[index].used)
return table->table[index].l3cc_value;
-
- return table->table[I915_MOCS_PTE].l3cc_value;
+ return table->table[table->unused_entries_index].l3cc_value;
}
static u32 l3cc_combine(u16 low, u16 high)
@@ -468,10 +581,9 @@ static u32 l3cc_combine(u16 low, u16 high)
0; \
i++)
-static void init_l3cc_table(struct intel_engine_cs *engine,
+static void init_l3cc_table(struct intel_uncore *uncore,
const struct drm_i915_mocs_table *table)
{
- struct intel_uncore *uncore = engine->uncore;
unsigned int i;
u32 l3cc;
@@ -496,7 +608,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_mocs_table(engine, &table);
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
- init_l3cc_table(engine, &table);
+ init_l3cc_table(engine->uncore, &table);
}
static u32 global_mocs_offset(void)
@@ -504,6 +616,14 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
}
+void intel_set_mocs_index(struct intel_gt *gt)
+{
+ struct drm_i915_mocs_table table;
+
+ get_mocs_settings(gt->i915, &table);
+ gt->mocs.uc_index = table.uc_index;
+}
+
void intel_mocs_init(struct intel_gt *gt)
{
struct drm_i915_mocs_table table;
@@ -515,6 +635,14 @@ void intel_mocs_init(struct intel_gt *gt)
flags = get_mocs_settings(gt->i915, &table);
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt->uncore, &table, global_mocs_offset());
+
+ /*
+ * Initialize the L3CC table as part of mocs initalization to make
+ * sure the LNCFCMOCSx registers are programmed for the subsequent
+ * memory transactions including guc transactions
+ */
+ if (flags & HAS_RENDER_L3CC)
+ init_l3cc_table(gt->uncore, &table);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index d83274f5163b..76db827210c0 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -36,5 +36,6 @@ struct intel_gt;
void intel_mocs_init(struct intel_gt *gt);
void intel_mocs_init_engine(struct intel_engine_cs *engine);
+void intel_set_mocs_index(struct intel_gt *gt);
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 886060f7e6fc..4396bfd630d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -155,19 +155,20 @@ int i915_ppgtt_init_hw(struct intel_gt *gt)
}
static struct i915_ppgtt *
-__ppgtt_create(struct intel_gt *gt)
+__ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags)
{
if (GRAPHICS_VER(gt->i915) < 8)
return gen6_ppgtt_create(gt);
else
- return gen8_ppgtt_create(gt);
+ return gen8_ppgtt_create(gt, lmem_pt_obj_flags);
}
-struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct i915_ppgtt *ppgtt;
- ppgtt = __ppgtt_create(gt);
+ ppgtt = __ppgtt_create(gt, lmem_pt_obj_flags);
if (IS_ERR(ppgtt))
return ppgtt;
@@ -298,7 +299,8 @@ int ppgtt_set_pages(struct i915_vma *vma)
return 0;
}
-void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
+ unsigned long lmem_pt_obj_flags)
{
struct drm_i915_private *i915 = gt->i915;
@@ -306,6 +308,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
ppgtt->vm.i915 = i915;
ppgtt->vm.dma = i915->drm.dev;
ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
+ ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags;
dma_resv_init(&ppgtt->vm._resv);
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index a74b72f50cc9..afb35d2e5c73 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -32,7 +32,7 @@ static int init_fake_lmem_bar(struct intel_memory_region *mem)
mem->remap_addr = dma_map_resource(i915->drm.dev,
mem->region.start,
mem->fake_mappable.size,
- PCI_DMA_BIDIRECTIONAL,
+ DMA_BIDIRECTIONAL,
DMA_ATTR_FORCE_CONTIGUOUS);
if (dma_mapping_error(i915->drm.dev, mem->remap_addr)) {
drm_mm_remove_node(&mem->fake_mappable);
@@ -62,7 +62,7 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem)
dma_unmap_resource(mem->i915->drm.dev,
mem->remap_addr,
mem->fake_mappable.size,
- PCI_DMA_BIDIRECTIONAL,
+ DMA_BIDIRECTIONAL,
DMA_ATTR_FORCE_CONTIGUOUS);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 7c4d5158e03b..2fdd52b62092 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -112,7 +112,8 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
- obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
+ obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE |
+ I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt))
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 2958e2fae380..593524195707 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -17,6 +17,7 @@
#include "intel_ring.h"
#include "shmem_utils.h"
#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
/* Rough estimate of the typical request size, performing a flush,
* set-context and then emitting the batch.
@@ -292,6 +293,8 @@ static void xcs_sanitize(struct intel_engine_cs *engine)
/* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+
+ intel_engine_reset_pinned_contexts(engine);
}
static void reset_prepare(struct intel_engine_cs *engine)
@@ -1265,7 +1268,7 @@ static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine)
int size, err;
if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS)
- return 0;
+ return NULL;
err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
if (err < 0)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index d812b27835f8..172de6c9f949 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -882,8 +882,6 @@ void intel_rps_park(struct intel_rps *rps)
if (!intel_rps_is_enabled(rps))
return;
- GEM_BUG_ON(atomic_read(&rps->num_waiters));
-
if (!intel_rps_clear_active(rps))
return;
@@ -996,20 +994,16 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
static void gen6_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 rp_state_cap = intel_rps_read_state_cap(rps);
/* All of these values are in units of 50MHz */
/* static values from HW: RP0 > RP1 > RPn (min_freq) */
if (IS_GEN9_LP(i915)) {
- u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
-
rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
rps->min_freq = (rp_state_cap >> 0) & 0xff;
} else {
- u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
-
rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
rps->min_freq = (rp_state_cap >> 16) & 0xff;
@@ -1973,8 +1967,14 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
u32 intel_rps_read_punit_req(struct intel_rps *rps)
{
struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
+ intel_wakeref_t wakeref;
+ u32 freq = 0;
+
+ with_intel_runtime_pm_if_in_use(rpm, wakeref)
+ freq = intel_uncore_read(uncore, GEN6_RPNSWREQ);
- return intel_uncore_read(uncore, GEN6_RPNSWREQ);
+ return freq;
}
static u32 intel_rps_get_req(u32 pureq)
@@ -2140,6 +2140,19 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
return set_min_freq(rps, val);
}
+u32 intel_rps_read_state_cap(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ if (IS_XEHPSDV(i915))
+ return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
+ else if (IS_GEN9_LP(i915))
+ return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ else
+ return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+}
+
/* External interface for intel_ips.ko */
static struct drm_i915_private __rcu *ips_mchdev;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index 4213bcce1667..11960d64ca82 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -41,6 +41,7 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
u32 intel_rps_read_punit_req(struct intel_rps *rps);
u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
+u32 intel_rps_read_state_cap(struct intel_rps *rps);
void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index bbd272943c3f..bdf09051b8a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -46,11 +46,11 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
}
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
- u32 ss_mask)
+ u8 *subslice_mask, u32 ss_mask)
{
int offset = slice * sseu->ss_stride;
- memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride);
+ memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
}
unsigned int
@@ -100,14 +100,24 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
return total;
}
-static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
- u8 s_en, u32 ss_en, u16 eu_en)
+static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
+{
+ u32 ss_mask;
+
+ ss_mask = ss_en >> (s * sseu->max_subslices);
+ ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
+
+ return ss_mask;
+}
+
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
+ u32 g_ss_en, u32 c_ss_en, u16 eu_en)
{
int s, ss;
- /* ss_en represents entire subslice mask across all slices */
+ /* g_ss_en/c_ss_en represent entire subslice mask across all slices */
GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
- sizeof(ss_en) * BITS_PER_BYTE);
+ sizeof(g_ss_en) * BITS_PER_BYTE);
for (s = 0; s < sseu->max_slices; s++) {
if ((s_en & BIT(s)) == 0)
@@ -115,7 +125,22 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
sseu->slice_mask |= BIT(s);
- intel_sseu_set_subslices(sseu, s, ss_en);
+ /*
+ * XeHP introduces the concept of compute vs geometry DSS. To
+ * reduce variation between GENs around subslice usage, store a
+ * mask for both the geometry and compute enabled masks since
+ * userspace will need to be able to query these masks
+ * independently. Also compute a total enabled subslice count
+ * for the purposes of selecting subslices to use in a
+ * particular GEM context.
+ */
+ intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
+ get_ss_stride_mask(sseu, s, c_ss_en));
+ intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
+ get_ss_stride_mask(sseu, s, g_ss_en));
+ intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
+ get_ss_stride_mask(sseu, s,
+ g_ss_en | c_ss_en));
for (ss = 0; ss < sseu->max_subslices; ss++)
if (intel_sseu_has_subslice(sseu, s, ss))
@@ -129,7 +154,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
{
struct sseu_dev_info *sseu = &gt->info.sseu;
struct intel_uncore *uncore = gt->uncore;
- u32 dss_en;
+ u32 g_dss_en, c_dss_en = 0;
u16 eu_en = 0;
u8 eu_en_fuse;
u8 s_en;
@@ -160,7 +185,9 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
GEN11_GT_S_ENA_MASK;
- dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE);
+ g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
+ c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
/* one bit per pair of EUs */
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
@@ -173,7 +200,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
if (eu_en_fuse & BIT(eu))
eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
- gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
+ gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
/* TGL only supports slice-level power gating */
sseu->has_slice_pg = 1;
@@ -199,7 +226,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
GEN11_EU_DIS_MASK);
- gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
+ gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en);
/* ICL has no power gating restrictions. */
sseu->has_slice_pg = 1;
@@ -240,7 +267,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
sseu_set_eus(sseu, 0, 1, ~disabled_mask);
}
- intel_sseu_set_subslices(sseu, 0, subslice_mask);
+ intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
sseu->eu_total = compute_eu_total(sseu);
@@ -296,7 +323,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
/* skip disabled slice */
continue;
- intel_sseu_set_subslices(sseu, s, subslice_mask);
+ intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
+ subslice_mask);
eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
for (ss = 0; ss < sseu->max_subslices; ss++) {
@@ -408,7 +436,8 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
/* skip disabled slice */
continue;
- intel_sseu_set_subslices(sseu, s, subslice_mask);
+ intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
+ subslice_mask);
for (ss = 0; ss < sseu->max_subslices; ss++) {
u8 eu_disabled_mask;
@@ -485,10 +514,9 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
}
fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
- switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
+ switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) {
default:
- MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >>
- HSW_F1_EU_DIS_SHIFT);
+ MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1));
fallthrough;
case HSW_F1_EU_DIS_10EUS:
sseu->eu_per_subslice = 10;
@@ -506,7 +534,8 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
sseu->eu_per_subslice);
for (s = 0; s < sseu->max_slices; s++) {
- intel_sseu_set_subslices(sseu, s, subslice_mask);
+ intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
+ subslice_mask);
for (ss = 0; ss < sseu->max_subslices; ss++) {
sseu_set_eus(sseu, s, ss,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 22fef98887c0..60882a74741e 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -26,9 +26,14 @@ struct drm_printer;
#define GEN_DSS_PER_CSLICE 8
#define GEN_DSS_PER_MSLICE 8
+#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
+#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
+
struct sseu_dev_info {
u8 slice_mask;
u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+ u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+ u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
u16 eu_total;
u8 eu_per_subslice;
@@ -78,6 +83,10 @@ intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
u8 mask;
int ss_idx = subslice / BITS_PER_BYTE;
+ if (slice >= sseu->max_slices ||
+ subslice >= sseu->max_subslices)
+ return false;
+
GEM_BUG_ON(ss_idx >= sseu->ss_stride);
mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
@@ -97,7 +106,7 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
- u32 ss_mask);
+ u8 *subslice_mask, u32 ss_mask);
void intel_sseu_info_init(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
index 1ba8b7da9d37..8bb3a91dad82 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
@@ -4,9 +4,9 @@
* Copyright © 2020 Intel Corporation
*/
-#include "debugfs_gt.h"
-#include "intel_sseu_debugfs.h"
#include "i915_drv.h"
+#include "intel_gt_debugfs.h"
+#include "intel_sseu_debugfs.h"
static void sseu_copy_subslices(const struct sseu_dev_info *sseu,
int slice, u8 *to_mask)
@@ -282,7 +282,7 @@ static int sseu_status_show(struct seq_file *m, void *unused)
return intel_sseu_status(m, gt);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(sseu_status);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_status);
static int rcs_topology_show(struct seq_file *m, void *unused)
{
@@ -293,11 +293,11 @@ static int rcs_topology_show(struct seq_file *m, void *unused)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(rcs_topology);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rcs_topology);
void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "sseu_status", &sseu_status_fops, NULL },
{ "rcs_topology", &rcs_topology_fops, NULL },
};
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index aae609d7d85d..e1f362530889 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -644,6 +644,72 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
}
+static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ /*
+ * This is a "fake" workaround defined by software to ensure we
+ * maintain reliable, backward-compatible behavior for userspace with
+ * regards to how nested MI_BATCH_BUFFER_START commands are handled.
+ *
+ * The per-context setting of MI_MODE[12] determines whether the bits
+ * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
+ * in the traditional manner or whether they should instead use a new
+ * tgl+ meaning that breaks backward compatibility, but allows nesting
+ * into 3rd-level batchbuffers. When this new capability was first
+ * added in TGL, it remained off by default unless a context
+ * intentionally opted in to the new behavior. However Xe_HPG now
+ * flips this on by default and requires that we explicitly opt out if
+ * we don't want the new behavior.
+ *
+ * From a SW perspective, we want to maintain the backward-compatible
+ * behavior for userspace, so we'll apply a fake workaround to set it
+ * back to the legacy behavior on platforms where the hardware default
+ * is to break compatibility. At the moment there is no Linux
+ * userspace that utilizes third-level batchbuffers, so this will avoid
+ * userspace from needing to make any changes. using the legacy
+ * meaning is the correct thing to do. If/when we have userspace
+ * consumers that want to utilize third-level batch nesting, we can
+ * provide a context parameter to allow them to opt-in.
+ */
+ wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
+}
+
+static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ u8 mocs;
+
+ /*
+ * Some blitter commands do not have a field for MOCS, those
+ * commands will use MOCS index pointed by BLIT_CCTL.
+ * BLIT_CCTL registers are needed to be programmed to un-cached.
+ */
+ if (engine->class == COPY_ENGINE_CLASS) {
+ mocs = engine->gt->mocs.uc_index;
+ wa_write_clr_set(wal,
+ BLIT_CCTL(engine->mmio_base),
+ BLIT_CCTL_MASK,
+ BLIT_CCTL_MOCS(mocs, mocs));
+ }
+}
+
+/*
+ * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
+ * defined by the hardware team, but it programming general context registers.
+ * Adding those context register programming in context workaround
+ * allow us to use the wa framework for proper application and validation.
+ */
+static void
+gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ fakewa_disable_nestedbb_mode(engine, wal);
+
+ gen12_ctx_gt_mocs_init(engine, wal);
+}
+
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
struct i915_wa_list *wal,
@@ -651,11 +717,19 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
{
struct drm_i915_private *i915 = engine->i915;
- if (engine->class != RENDER_CLASS)
- return;
-
wa_init_start(wal, name, engine->name);
+ /* Applies to all engines */
+ /*
+ * Fake workarounds are not the actual workaround but
+ * programming of context registers using workaround framework.
+ */
+ if (GRAPHICS_VER(i915) >= 12)
+ gen12_ctx_gt_fake_wa_init(engine, wal);
+
+ if (engine->class != RENDER_CLASS)
+ goto done;
+
if (IS_DG1(i915))
dg1_ctx_workarounds_init(engine, wal);
else if (GRAPHICS_VER(i915) == 12)
@@ -685,6 +759,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
else
MISSING_CASE(GRAPHICS_VER(i915));
+done:
wa_init_finish(wal);
}
@@ -729,7 +804,7 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
}
static void
-gen4_gt_workarounds_init(struct drm_i915_private *i915,
+gen4_gt_workarounds_init(struct intel_gt *gt,
struct i915_wa_list *wal)
{
/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
@@ -737,29 +812,29 @@ gen4_gt_workarounds_init(struct drm_i915_private *i915,
}
static void
-g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen4_gt_workarounds_init(i915, wal);
+ gen4_gt_workarounds_init(gt, wal);
/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
}
static void
-ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- g4x_gt_workarounds_init(i915, wal);
+ g4x_gt_workarounds_init(gt, wal);
wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
}
static void
-snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
}
static void
-ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
wa_masked_dis(wal,
@@ -775,7 +850,7 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* WaForceL3Serialization:vlv */
wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
@@ -788,7 +863,7 @@ vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* L3 caching of data atomics doesn't work -- disable it. */
wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
@@ -803,8 +878,10 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
+ struct drm_i915_private *i915 = gt->i915;
+
/* WaDisableKillLogic:bxt,skl,kbl */
if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
wa_write_or(wal,
@@ -829,9 +906,9 @@ gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal
}
static void
-skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
/* WaDisableGafsUnitClkGating:skl */
wa_write_or(wal,
@@ -839,19 +916,19 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
/* WaInPlaceDecompressionHang:skl */
- if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0))
+ if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0))
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
}
static void
-kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
/* WaDisableDynamicCreditSharing:kbl */
- if (IS_KBL_GT_STEP(i915, 0, STEP_C0))
+ if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0))
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
@@ -868,15 +945,15 @@ kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
}
static void
-cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
/* WaDisableGafsUnitClkGating:cfl */
wa_write_or(wal,
@@ -901,21 +978,21 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
}
-static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal,
+static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
unsigned int slice, unsigned int subslice)
{
- drm_dbg(&i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice);
+ drm_dbg(&gt->i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice);
__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
}
static void
-icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
+icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
{
- const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
+ const struct sseu_dev_info *sseu = &gt->info.sseu;
unsigned int slice, subslice;
- GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
+ GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
slice = 0;
@@ -935,16 +1012,15 @@ icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
* then we can just rely on the default steering and won't need to
* worry about explicitly re-steering L3BANK reads later.
*/
- if (i915->gt.info.l3bank_mask & BIT(subslice))
- i915->gt.steering_table[L3BANK] = NULL;
+ if (gt->info.l3bank_mask & BIT(subslice))
+ gt->steering_table[L3BANK] = NULL;
- __add_mcr_wa(i915, wal, slice, subslice);
+ __add_mcr_wa(gt, wal, slice, subslice);
}
static void
xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = gt->i915;
const struct sseu_dev_info *sseu = &gt->info.sseu;
unsigned long slice, subslice = 0, slice_mask = 0;
u64 dss_mask = 0;
@@ -1008,7 +1084,7 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
WARN_ON(subslice > GEN_DSS_PER_GSLICE);
WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0);
- __add_mcr_wa(i915, wal, slice, subslice);
+ __add_mcr_wa(gt, wal, slice, subslice);
/*
* SQIDI ranges are special because they use different steering
@@ -1024,9 +1100,11 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
-icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- icl_wa_init_mcr(i915, wal);
+ struct drm_i915_private *i915 = gt->i915;
+
+ icl_wa_init_mcr(gt, wal);
/* WaModifyGamTlbPartitioning:icl */
wa_write_clr_set(wal,
@@ -1077,10 +1155,9 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
* the engine-specific workaround list.
*/
static void
-wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal)
+wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
{
struct intel_engine_cs *engine;
- struct intel_gt *gt = &i915->gt;
int id;
for_each_engine(engine, gt, id) {
@@ -1094,22 +1171,23 @@ wa_14011060649(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-gen12_gt_workarounds_init(struct drm_i915_private *i915,
- struct i915_wa_list *wal)
+gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- icl_wa_init_mcr(i915, wal);
+ icl_wa_init_mcr(gt, wal);
/* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
- wa_14011060649(i915, wal);
+ wa_14011060649(gt, wal);
/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
}
static void
-tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen12_gt_workarounds_init(i915, wal);
+ struct drm_i915_private *i915 = gt->i915;
+
+ gen12_gt_workarounds_init(gt, wal);
/* Wa_1409420604:tgl */
if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
@@ -1130,9 +1208,11 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- gen12_gt_workarounds_init(i915, wal);
+ struct drm_i915_private *i915 = gt->i915;
+
+ gen12_gt_workarounds_init(gt, wal);
/* Wa_1607087056:dg1 */
if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0))
@@ -1154,60 +1234,62 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
static void
-xehpsdv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- xehp_init_mcr(&i915->gt, wal);
+ xehp_init_mcr(gt, wal);
}
static void
-gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
+gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
{
+ struct drm_i915_private *i915 = gt->i915;
+
if (IS_XEHPSDV(i915))
- xehpsdv_gt_workarounds_init(i915, wal);
+ xehpsdv_gt_workarounds_init(gt, wal);
else if (IS_DG1(i915))
- dg1_gt_workarounds_init(i915, wal);
+ dg1_gt_workarounds_init(gt, wal);
else if (IS_TIGERLAKE(i915))
- tgl_gt_workarounds_init(i915, wal);
+ tgl_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 12)
- gen12_gt_workarounds_init(i915, wal);
+ gen12_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 11)
- icl_gt_workarounds_init(i915, wal);
+ icl_gt_workarounds_init(gt, wal);
else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
- cfl_gt_workarounds_init(i915, wal);
+ cfl_gt_workarounds_init(gt, wal);
else if (IS_GEMINILAKE(i915))
- glk_gt_workarounds_init(i915, wal);
+ glk_gt_workarounds_init(gt, wal);
else if (IS_KABYLAKE(i915))
- kbl_gt_workarounds_init(i915, wal);
+ kbl_gt_workarounds_init(gt, wal);
else if (IS_BROXTON(i915))
- gen9_gt_workarounds_init(i915, wal);
+ gen9_gt_workarounds_init(gt, wal);
else if (IS_SKYLAKE(i915))
- skl_gt_workarounds_init(i915, wal);
+ skl_gt_workarounds_init(gt, wal);
else if (IS_HASWELL(i915))
- hsw_gt_workarounds_init(i915, wal);
+ hsw_gt_workarounds_init(gt, wal);
else if (IS_VALLEYVIEW(i915))
- vlv_gt_workarounds_init(i915, wal);
+ vlv_gt_workarounds_init(gt, wal);
else if (IS_IVYBRIDGE(i915))
- ivb_gt_workarounds_init(i915, wal);
+ ivb_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 6)
- snb_gt_workarounds_init(i915, wal);
+ snb_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 5)
- ilk_gt_workarounds_init(i915, wal);
+ ilk_gt_workarounds_init(gt, wal);
else if (IS_G4X(i915))
- g4x_gt_workarounds_init(i915, wal);
+ g4x_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) == 4)
- gen4_gt_workarounds_init(i915, wal);
+ gen4_gt_workarounds_init(gt, wal);
else if (GRAPHICS_VER(i915) <= 8)
;
else
MISSING_CASE(GRAPHICS_VER(i915));
}
-void intel_gt_init_workarounds(struct drm_i915_private *i915)
+void intel_gt_init_workarounds(struct intel_gt *gt)
{
- struct i915_wa_list *wal = &i915->gt_wa_list;
+ struct i915_wa_list *wal = &gt->wa_list;
wa_init_start(wal, "GT", "global");
- gt_init_workarounds(i915, wal);
+ gt_init_workarounds(gt, wal);
wa_init_finish(wal);
}
@@ -1278,7 +1360,7 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
void intel_gt_apply_workarounds(struct intel_gt *gt)
{
- wa_list_apply(gt, &gt->i915->gt_wa_list);
+ wa_list_apply(gt, &gt->wa_list);
}
static bool wa_list_verify(struct intel_gt *gt,
@@ -1310,7 +1392,7 @@ static bool wa_list_verify(struct intel_gt *gt,
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
{
- return wa_list_verify(gt, &gt->i915->gt_wa_list, from);
+ return wa_list_verify(gt, &gt->wa_list, from);
}
__maybe_unused
@@ -1604,6 +1686,31 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
i915_mmio_reg_offset(RING_NOPID(base)));
}
+/*
+ * engine_fake_wa_init(), a place holder to program the registers
+ * which are not part of an official workaround defined by the
+ * hardware team.
+ * Adding programming of those register inside workaround will
+ * allow utilizing wa framework to proper application and verification.
+ */
+static void
+engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ u8 mocs;
+
+ /*
+ * RING_CMD_CCTL are need to be programed to un-cached
+ * for memory writes and reads outputted by Command
+ * Streamers on Gen12 onward platforms.
+ */
+ if (GRAPHICS_VER(engine->i915) >= 12) {
+ mocs = engine->gt->mocs.uc_index;
+ wa_masked_field_set(wal,
+ RING_CMD_CCTL(engine->mmio_base),
+ CMD_CCTL_MOCS_MASK,
+ CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
+ }
+}
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
@@ -2044,6 +2151,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4))
return;
+ engine_fake_wa_init(engine, wal);
+
if (engine->class == RENDER_CLASS)
rcs_engine_wa_init(engine, wal);
else
@@ -2067,12 +2176,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
wa_list_apply(engine->gt, &engine->wa_list);
}
-struct mcr_range {
- u32 start;
- u32 end;
-};
-
-static const struct mcr_range mcr_ranges_gen8[] = {
+static const struct i915_range mcr_ranges_gen8[] = {
{ .start = 0x5500, .end = 0x55ff },
{ .start = 0x7000, .end = 0x7fff },
{ .start = 0x9400, .end = 0x97ff },
@@ -2081,7 +2185,7 @@ static const struct mcr_range mcr_ranges_gen8[] = {
{},
};
-static const struct mcr_range mcr_ranges_gen12[] = {
+static const struct i915_range mcr_ranges_gen12[] = {
{ .start = 0x8150, .end = 0x815f },
{ .start = 0x9520, .end = 0x955f },
{ .start = 0xb100, .end = 0xb3ff },
@@ -2090,7 +2194,7 @@ static const struct mcr_range mcr_ranges_gen12[] = {
{},
};
-static const struct mcr_range mcr_ranges_xehp[] = {
+static const struct i915_range mcr_ranges_xehp[] = {
{ .start = 0x4000, .end = 0x4aff },
{ .start = 0x5200, .end = 0x52ff },
{ .start = 0x5400, .end = 0x7fff },
@@ -2109,7 +2213,7 @@ static const struct mcr_range mcr_ranges_xehp[] = {
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
{
- const struct mcr_range *mcr_ranges;
+ const struct i915_range *mcr_ranges;
int i;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
index 15abb68b6c00..9beaab77c7f0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
@@ -24,7 +24,7 @@ static inline void intel_wa_list_free(struct i915_wa_list *wal)
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine);
int intel_engine_emit_ctx_wa(struct i915_request *rq);
-void intel_gt_init_workarounds(struct drm_i915_private *i915);
+void intel_gt_init_workarounds(struct intel_gt *gt);
void intel_gt_apply_workarounds(struct intel_gt *gt);
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 2c1af030310c..8b89215afe46 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -376,6 +376,8 @@ int mock_engine_init(struct intel_engine_cs *engine)
{
struct intel_context *ce;
+ INIT_LIST_HEAD(&engine->pinned_contexts_list);
+
engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK);
if (!engine->sched_engine)
return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 317eebf086c3..6e6e4d747cca 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -290,7 +290,7 @@ static int live_heartbeat_fast(void *arg)
int err = 0;
/* Check that the heartbeat ticks at the desired rate. */
- if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
return 0;
for_each_engine(engine, gt, id) {
@@ -352,7 +352,7 @@ static int live_heartbeat_off(void *arg)
int err = 0;
/* Check that we can turn off heartbeat and not interrupt VIP */
- if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
return 0;
for_each_engine(engine, gt, id) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index f12ffe797639..25a8c4f62b0d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -992,7 +992,7 @@ static int live_timeslice_preempt(void *arg)
* need to preempt the current task and replace it with another
* ready task.
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
@@ -1122,7 +1122,7 @@ static int live_timeslice_rewind(void *arg)
* but only a few of those requests, forcing us to rewind the
* RING_TAIL of the original request.
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
for_each_engine(engine, gt, id) {
@@ -1299,7 +1299,7 @@ static int live_timeslice_queue(void *arg)
* ELSP[1] is already occupied, so must rely on timeslicing to
* eject ELSP[0] in favour of the queue.)
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
@@ -1420,7 +1420,7 @@ static int live_timeslice_nopreempt(void *arg)
* We should not timeslice into a request that is marked with
* I915_REQUEST_NOPREEMPT.
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
if (igt_spinner_init(&spin, gt))
@@ -2260,7 +2260,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
int err;
/* Preempt cancel non-preemptible spinner in ELSP0 */
- if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return 0;
if (!intel_has_reset_engine(arg->engine->gt))
@@ -2316,7 +2316,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
struct i915_request *rq;
int err;
- if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return 0;
if (!intel_has_reset_engine(engine->gt))
@@ -3375,7 +3375,7 @@ static int live_preempt_timeout(void *arg)
* Check that we force preemption to occur by cancelling the previous
* context if it refuses to yield the GPU.
*/
- if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return 0;
if (!intel_has_reset_engine(gt))
@@ -3493,7 +3493,7 @@ static int smoke_submit(struct preempt_smoke *smoke,
if (batch) {
struct i915_address_space *vm;
- vm = i915_gem_context_get_vm_rcu(ctx);
+ vm = i915_gem_context_get_eb_vm(ctx);
vma = i915_vma_instance(batch, vm, NULL);
i915_vm_put(vm);
if (IS_ERR(vma))
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 2c1ed32ca5ac..7e2d99dd012d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -117,7 +117,7 @@ static struct i915_request *
hang_create_request(struct hang *h, struct intel_engine_cs *engine)
{
struct intel_gt *gt = h->gt;
- struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx);
+ struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx);
struct drm_i915_gem_object *obj;
struct i915_request *rq = NULL;
struct i915_vma *hws, *vma;
@@ -789,7 +789,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
if (err)
pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n",
engine->name, rq->fence.context,
- rq->fence.seqno, rq->context->guc_id, err);
+ rq->fence.seqno, rq->context->guc_id.id, err);
}
skip:
@@ -1098,7 +1098,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
if (err)
pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n",
engine->name, rq->fence.context,
- rq->fence.seqno, rq->context->guc_id, err);
+ rq->fence.seqno, rq->context->guc_id.id, err);
}
count++;
@@ -1108,7 +1108,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n",
engine->name, test_name,
rq->fence.context,
- rq->fence.seqno, rq->context->guc_id);
+ rq->fence.seqno, rq->context->guc_id.id);
i915_request_put(rq);
GEM_TRACE_DUMP();
@@ -1596,7 +1596,7 @@ static int igt_reset_evict_ppgtt(void *arg)
if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
return 0;
- ppgtt = i915_ppgtt_create(gt);
+ ppgtt = i915_ppgtt_create(gt, 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index e623ac45f4aa..962e91ba3be4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -66,7 +66,7 @@ reference_lists_init(struct intel_gt *gt, struct wa_lists *lists)
memset(lists, 0, sizeof(*lists));
wa_init_start(&lists->gt_wa_list, "GT_REF", "global");
- gt_init_workarounds(gt->i915, &lists->gt_wa_list);
+ gt_init_workarounds(gt, &lists->gt_wa_list);
wa_init_finish(&lists->gt_wa_list);
for_each_engine(engine, gt, id) {
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
index 99e1fad5ca20..c9086a600bce 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
@@ -102,11 +102,11 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
* | +-------+--------------------------------------------------------------+
* | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message |
* +---+-------+--------------------------------------------------------------+
- * | 1 | 31:0 | +--------------------------------------------------------+ |
- * +---+-------+ | | |
- * |...| | | Embedded `HXG Message`_ | |
- * +---+-------+ | | |
- * | n | 31:0 | +--------------------------------------------------------+ |
+ * | 1 | 31:0 | |
+ * +---+-------+ |
+ * |...| | [Embedded `HXG Message`_] |
+ * +---+-------+ |
+ * | n | 31:0 | |
* +---+-------+--------------------------------------------------------------+
*/
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h
index bbf1ddb77434..9baa3cb07d13 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h
@@ -38,11 +38,11 @@
* +---+-------+--------------------------------------------------------------+
* | | Bits | Description |
* +===+=======+==============================================================+
- * | 0 | 31:0 | +--------------------------------------------------------+ |
- * +---+-------+ | | |
- * |...| | | Embedded `HXG Message`_ | |
- * +---+-------+ | | |
- * | n | 31:0 | +--------------------------------------------------------+ |
+ * | 0 | 31:0 | |
+ * +---+-------+ |
+ * |...| | [Embedded `HXG Message`_] |
+ * +---+-------+ |
+ * | n | 31:0 | |
* +---+-------+--------------------------------------------------------------+
*/
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index fbfcae727d7f..8f8182bf7c11 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -3,6 +3,7 @@
* Copyright © 2014-2019 Intel Corporation
*/
+#include "gem/i915_gem_lmem.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm_irq.h"
@@ -647,7 +648,14 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
u64 flags;
int ret;
- obj = i915_gem_object_create_shmem(gt->i915, size);
+ if (HAS_LMEM(gt->i915))
+ obj = i915_gem_object_create_lmem(gt->i915, size,
+ I915_BO_ALLOC_CPU_CLEAR |
+ I915_BO_ALLOC_CONTIGUOUS |
+ I915_BO_ALLOC_PM_EARLY);
+ else
+ obj = i915_gem_object_create_shmem(gt->i915, size);
+
if (IS_ERR(obj))
return ERR_CAST(obj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2e27fe59786b..5dd174babf7a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -22,74 +22,121 @@
struct __guc_ads_blob;
-/*
- * Top level structure of GuC. It handles firmware loading and manages client
- * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList
- * submission.
+/**
+ * struct intel_guc - Top level structure of GuC.
+ *
+ * It handles firmware loading and manages client pool. intel_guc owns an
+ * i915_sched_engine for submission.
*/
struct intel_guc {
+ /** @fw: the GuC firmware */
struct intel_uc_fw fw;
+ /** @log: sub-structure containing GuC log related data and objects */
struct intel_guc_log log;
+ /** @ct: the command transport communication channel */
struct intel_guc_ct ct;
+ /** @slpc: sub-structure containing SLPC related data and objects */
struct intel_guc_slpc slpc;
- /* Global engine used to submit requests to GuC */
+ /** @sched_engine: Global engine used to submit requests to GuC */
struct i915_sched_engine *sched_engine;
+ /**
+ * @stalled_request: if GuC can't process a request for any reason, we
+ * save it until GuC restarts processing. No other request can be
+ * submitted until the stalled request is processed.
+ */
struct i915_request *stalled_request;
/* intel_guc_recv interrupt related state */
+ /** @irq_lock: protects GuC irq state */
spinlock_t irq_lock;
+ /**
+ * @msg_enabled_mask: mask of events that are processed when receiving
+ * an INTEL_GUC_ACTION_DEFAULT G2H message.
+ */
unsigned int msg_enabled_mask;
+ /**
+ * @outstanding_submission_g2h: number of outstanding GuC to Host
+ * responses related to GuC submission, used to determine if the GT is
+ * idle
+ */
atomic_t outstanding_submission_g2h;
+ /** @interrupts: pointers to GuC interrupt-managing functions. */
struct {
void (*reset)(struct intel_guc *guc);
void (*enable)(struct intel_guc *guc);
void (*disable)(struct intel_guc *guc);
} interrupts;
- /*
- * contexts_lock protects the pool of free guc ids and a linked list of
- * guc ids available to be stolen
+ /**
+ * @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and
+ * ce->guc_id.ref when transitioning in and out of zero
*/
spinlock_t contexts_lock;
+ /** @guc_ids: used to allocate unique ce->guc_id.id values */
struct ida guc_ids;
+ /**
+ * @guc_id_list: list of intel_context with valid guc_ids but no refs
+ */
struct list_head guc_id_list;
+ /**
+ * @submission_supported: tracks whether we support GuC submission on
+ * the current platform
+ */
bool submission_supported;
+ /** @submission_selected: tracks whether the user enabled GuC submission */
bool submission_selected;
+ /**
+ * @rc_supported: tracks whether we support GuC rc on the current platform
+ */
bool rc_supported;
+ /** @rc_selected: tracks whether the user enabled GuC rc */
bool rc_selected;
+ /** @ads_vma: object allocated to hold the GuC ADS */
struct i915_vma *ads_vma;
+ /** @ads_blob: contents of the GuC ADS */
struct __guc_ads_blob *ads_blob;
+ /** @ads_regset_size: size of the save/restore regsets in the ADS */
u32 ads_regset_size;
+ /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+ /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */
struct i915_vma *lrc_desc_pool;
+ /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */
void *lrc_desc_pool_vaddr;
- /* guc_id to intel_context lookup */
+ /**
+ * @context_lookup: used to resolve intel_context from guc_id, if a
+ * context is present in this structure it is registered with the GuC
+ */
struct xarray context_lookup;
- /* Control params for fw initialization */
+ /** @params: Control params for fw initialization */
u32 params[GUC_CTL_MAX_DWORDS];
- /* GuC's FW specific registers used in MMIO send */
+ /** @send_regs: GuC's FW specific registers used for sending MMIO H2G */
struct {
u32 base;
unsigned int count;
enum forcewake_domains fw_domains;
} send_regs;
- /* register used to send interrupts to the GuC FW */
+ /** @notify_reg: register used to send interrupts to the GuC FW */
i915_reg_t notify_reg;
- /* Store msg (e.g. log flush) that we see while CTBs are disabled */
+ /**
+ * @mmio_msg: notification bitmask that the GuC writes in one of its
+ * registers when the CT channel is disabled, to be processed when the
+ * channel is back up.
+ */
u32 mmio_msg;
- /* To serialize the intel_guc_send actions */
+ /** @send_mutex: used to serialize the intel_guc_send actions */
struct mutex send_mutex;
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 6926919bcac6..2c6ea64af7ec 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -349,6 +349,8 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt);
}
+#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
+#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE)
static int guc_prep_golden_context(struct intel_guc *guc,
struct __guc_ads_blob *blob)
{
@@ -396,7 +398,18 @@ static int guc_prep_golden_context(struct intel_guc *guc,
if (!blob)
continue;
- blob->ads.eng_state_size[guc_class] = real_size;
+ /*
+ * This interface is slightly confusing. We need to pass the
+ * base address of the full golden context and the size of just
+ * the engine state, which is the section of the context image
+ * that starts after the execlists context. This is required to
+ * allow the GuC to restore just the engine state when a
+ * watchdog reset occurs.
+ * We calculate the engine state size by removing the size of
+ * what comes before it in the context image (which is identical
+ * on all engines).
+ */
+ blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE;
blob->ads.golden_context_lrca[guc_class] = addr_ggtt;
addr_ggtt += alloc_size;
}
@@ -436,11 +449,6 @@ static void guc_init_golden_context(struct intel_guc *guc)
u8 engine_class, guc_class;
u8 *ptr;
- /* Skip execlist and PPGTT registers + HWSP */
- const u32 lr_hw_context_size = 80 * sizeof(u32);
- const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE +
- lr_hw_context_size;
-
if (!intel_uc_uses_guc_submission(&gt->uc))
return;
@@ -476,12 +484,12 @@ static void guc_init_golden_context(struct intel_guc *guc)
continue;
}
- GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size);
+ GEM_BUG_ON(blob->ads.eng_state_size[guc_class] !=
+ real_size - LRC_SKIP_SIZE);
GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt);
addr_ggtt += alloc_size;
- shmem_read(engine->default_state, skip_size, ptr + skip_size,
- real_size - skip_size);
+ shmem_read(engine->default_state, 0, ptr, real_size);
ptr += alloc_size;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 22b4733b55e2..0a3504bc0b61 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -168,12 +168,15 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type,
FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr),
FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr),
};
+ int ret;
GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
GEM_BUG_ON(size % SZ_4K);
/* CT registration must go over MMIO */
- return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+ ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+
+ return ret > 0 ? -EPROTO : ret;
}
static int ct_register_buffer(struct intel_guc_ct *ct, u32 type,
@@ -188,8 +191,8 @@ static int ct_register_buffer(struct intel_guc_ct *ct, u32 type,
err = guc_action_register_ct_buffer(ct_to_guc(ct), type,
desc_addr, buff_addr, size);
if (unlikely(err))
- CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n",
- guc_ct_buffer_type_to_str(type), err);
+ CT_ERROR(ct, "Failed to register %s buffer (%pe)\n",
+ guc_ct_buffer_type_to_str(type), ERR_PTR(err));
return err;
}
@@ -201,11 +204,14 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type)
FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB),
FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type),
};
+ int ret;
GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
/* CT deregistration must go over MMIO */
- return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+ ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+
+ return ret > 0 ? -EPROTO : ret;
}
static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
@@ -213,8 +219,8 @@ static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type);
if (unlikely(err))
- CT_ERROR(ct, "Failed to deregister %s buffer (err=%d)\n",
- guc_ct_buffer_type_to_str(type), err);
+ CT_ERROR(ct, "Failed to deregister %s buffer (%pe)\n",
+ guc_ct_buffer_type_to_str(type), ERR_PTR(err));
return err;
}
@@ -522,9 +528,6 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
#undef done
- if (unlikely(err))
- DRM_ERROR("CT: fence %u err %d\n", req->fence, err);
-
*status = req->status;
return err;
}
@@ -722,8 +725,11 @@ retry:
err = wait_for_ct_request_update(&request, status);
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
- if (unlikely(err))
+ if (unlikely(err)) {
+ CT_ERROR(ct, "No response for request %#x (fence %u)\n",
+ action[0], request.fence);
goto unlink;
+ }
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) {
err = -EIO;
@@ -775,8 +781,8 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
if (unlikely(ret < 0)) {
- CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n",
- action[0], ret, status);
+ CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
+ action[0], ERR_PTR(ret), status);
} else if (unlikely(ret)) {
CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
action[0], ret, ret);
@@ -1042,9 +1048,9 @@ static void ct_incoming_request_worker_func(struct work_struct *w)
container_of(w, struct intel_guc_ct, requests.worker);
bool done;
- done = ct_process_incoming_requests(ct);
- if (!done)
- queue_work(system_unbound_wq, &ct->requests.worker);
+ do {
+ done = ct_process_incoming_requests(ct);
+ } while (!done);
}
static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
index 887c8c8f35db..25f09a420561 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -5,14 +5,14 @@
#include <drm/drm_print.h>
-#include "gt/debugfs_gt.h"
+#include "gt/intel_gt_debugfs.h"
+#include "gt/uc/intel_guc_ads.h"
+#include "gt/uc/intel_guc_ct.h"
+#include "gt/uc/intel_guc_slpc.h"
+#include "gt/uc/intel_guc_submission.h"
#include "intel_guc.h"
#include "intel_guc_debugfs.h"
#include "intel_guc_log_debugfs.h"
-#include "gt/uc/intel_guc_ct.h"
-#include "gt/uc/intel_guc_ads.h"
-#include "gt/uc/intel_guc_submission.h"
-#include "gt/uc/intel_guc_slpc.h"
static int guc_info_show(struct seq_file *m, void *data)
{
@@ -35,7 +35,7 @@ static int guc_info_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_info);
static int guc_registered_contexts_show(struct seq_file *m, void *data)
{
@@ -49,7 +49,7 @@ static int guc_registered_contexts_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts);
static int guc_slpc_info_show(struct seq_file *m, void *unused)
{
@@ -62,7 +62,7 @@ static int guc_slpc_info_show(struct seq_file *m, void *unused)
return intel_guc_slpc_print_info(slpc, &p);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info);
static bool intel_eval_slpc_support(void *data)
{
@@ -73,7 +73,7 @@ static bool intel_eval_slpc_support(void *data)
void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "guc_info", &guc_info_fops, NULL },
{ "guc_registered_contexts", &guc_registered_contexts_fops, NULL },
{ "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support},
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 76fe766ad1bc..196424be0998 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -41,18 +41,21 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
}
/* Copy RSA signature from the fw image to HW for verification */
-static void guc_xfer_rsa(struct intel_uc_fw *guc_fw,
- struct intel_uncore *uncore)
+static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
{
u32 rsa[UOS_RSA_SCRATCH_COUNT];
size_t copied;
int i;
copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa));
- GEM_BUG_ON(copied < sizeof(rsa));
+ if (copied < sizeof(rsa))
+ return -ENOMEM;
for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]);
+
+ return 0;
}
/*
@@ -141,7 +144,9 @@ int intel_guc_fw_upload(struct intel_guc *guc)
* by the DMA engine in one operation, whereas the RSA signature is
* loaded via MMIO.
*/
- guc_xfer_rsa(&guc->fw, uncore);
+ ret = guc_xfer_rsa(&guc->fw, uncore);
+ if (ret)
+ goto out;
/*
* Current uCode expects the code to be loaded at 8k; locations below
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
index 64e0b86bf258..46026c2c1722 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
@@ -6,7 +6,7 @@
#include <linux/fs.h>
#include <drm/drm_print.h>
-#include "gt/debugfs_gt.h"
+#include "gt/intel_gt_debugfs.h"
#include "intel_guc.h"
#include "intel_guc_log.h"
#include "intel_guc_log_debugfs.h"
@@ -17,7 +17,7 @@ static int guc_log_dump_show(struct seq_file *m, void *data)
return intel_guc_log_dump(m->private, &p, false);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
{
@@ -25,7 +25,7 @@ static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
return intel_guc_log_dump(m->private, &p, true);
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
static int guc_log_level_get(void *data, u64 *val)
{
@@ -109,7 +109,7 @@ static const struct file_operations guc_log_relay_fops = {
void intel_guc_log_debugfs_register(struct intel_guc_log *log,
struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "guc_log_dump", &guc_log_dump_fops, NULL },
{ "guc_load_err_log_dump", &guc_load_err_log_dump_fops, NULL },
{ "guc_log_level", &guc_log_level_fops, NULL },
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 87d8dc8f51b9..ba0de35f6323 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -28,21 +28,6 @@
/**
* DOC: GuC-based command submission
*
- * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
- * firmware is moving to an updated submission interface and we plan to
- * turn submission back on when that lands. The below documentation (and related
- * code) matches the old submission model and will be updated as part of the
- * upgrade to the new flow.
- *
- * GuC stage descriptor:
- * During initialization, the driver allocates a static pool of 1024 such
- * descriptors, and shares them with the GuC. Currently, we only use one
- * descriptor. This stage descriptor lets the GuC know about the workqueue and
- * process descriptor. Theoretically, it also lets the GuC know about our HW
- * contexts (context ID, etc...), but we actually employ a kind of submission
- * where the GuC uses the LRCA sent via the work item instead. This is called
- * a "proxy" submission.
- *
* The Scratch registers:
* There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
* a value to the action register (SOFT_SCRATCH_0) along with any data. It then
@@ -51,14 +36,85 @@
* processes the request. The kernel driver polls waiting for this update and
* then proceeds.
*
- * Work Items:
- * There are several types of work items that the host may place into a
- * workqueue, each with its own requirements and limitations. Currently only
- * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
- * represents in-order queue. The kernel driver packs ring tail pointer and an
- * ELSP context descriptor dword into Work Item.
- * See guc_add_request()
+ * Command Transport buffers (CTBs):
+ * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
+ * - G2H) are a message interface between the i915 and GuC.
+ *
+ * Context registration:
+ * Before a context can be submitted it must be registered with the GuC via a
+ * H2G. A unique guc_id is associated with each context. The context is either
+ * registered at request creation time (normal operation) or at submission time
+ * (abnormal operation, e.g. after a reset).
+ *
+ * Context submission:
+ * The i915 updates the LRC tail value in memory. The i915 must enable the
+ * scheduling of the context within the GuC for the GuC to actually consider it.
+ * Therefore, the first time a disabled context is submitted we use a schedule
+ * enable H2G, while follow up submissions are done via the context submit H2G,
+ * which informs the GuC that a previously enabled context has new work
+ * available.
+ *
+ * Context unpin:
+ * To unpin a context a H2G is used to disable scheduling. When the
+ * corresponding G2H returns indicating the scheduling disable operation has
+ * completed it is safe to unpin the context. While a disable is in flight it
+ * isn't safe to resubmit the context so a fence is used to stall all future
+ * requests of that context until the G2H is returned.
+ *
+ * Context deregistration:
+ * Before a context can be destroyed or if we steal its guc_id we must
+ * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
+ * safe to submit anything to this guc_id until the deregister completes so a
+ * fence is used to stall all requests associated with this guc_id until the
+ * corresponding G2H returns indicating the guc_id has been deregistered.
+ *
+ * guc_ids:
+ * Unique number associated with private GuC context data passed in during
+ * context registration / submission / deregistration. 64k available. Simple ida
+ * is used for allocation.
+ *
+ * Stealing guc_ids:
+ * If no guc_ids are available they can be stolen from another context at
+ * request creation time if that context is unpinned. If a guc_id can't be found
+ * we punt this problem to the user as we believe this is near impossible to hit
+ * during normal use cases.
+ *
+ * Locking:
+ * In the GuC submission code we have 3 basic spin locks which protect
+ * everything. Details about each below.
+ *
+ * sched_engine->lock
+ * This is the submission lock for all contexts that share an i915 schedule
+ * engine (sched_engine), thus only one of the contexts which share a
+ * sched_engine can be submitting at a time. Currently only one sched_engine is
+ * used for all of GuC submission but that could change in the future.
+ *
+ * guc->contexts_lock
+ * Protects guc_id allocation for the given GuC, i.e. only one context can be
+ * doing guc_id allocation operations at a time for each GuC in the system.
*
+ * ce->guc_state.lock
+ * Protects everything under ce->guc_state. Ensures that a context is in the
+ * correct state before issuing a H2G. e.g. We don't issue a schedule disable
+ * on a disabled context (bad idea), we don't issue a schedule enable when a
+ * schedule disable is in flight, etc... Also protects list of inflight requests
+ * on the context and the priority management state. Lock is individual to each
+ * context.
+ *
+ * Lock ordering rules:
+ * sched_engine->lock -> ce->guc_state.lock
+ * guc->contexts_lock -> ce->guc_state.lock
+ *
+ * Reset races:
+ * When a full GT reset is triggered it is assumed that some G2H responses to
+ * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
+ * fatal as we do certain operations upon receiving a G2H (e.g. destroy
+ * contexts, release guc_ids, etc...). When this occurs we can scrub the
+ * context state and cleanup appropriately, however this is quite racey.
+ * To avoid races, the reset code must disable submission before scrubbing for
+ * the missing G2H, while the submission code must check for submission being
+ * disabled and skip sending H2Gs and updating context states when it is. Both
+ * sides must also make sure to hold the relevant locks.
*/
/* GuC Virtual Engine */
@@ -73,86 +129,35 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
#define GUC_REQUEST_SIZE 64 /* bytes */
/*
- * Below is a set of functions which control the GuC scheduling state which do
- * not require a lock as all state transitions are mutually exclusive. i.e. It
- * is not possible for the context pinning code and submission, for the same
- * context, to be executing simultaneously. We still need an atomic as it is
- * possible for some of the bits to changing at the same time though.
- */
-#define SCHED_STATE_NO_LOCK_ENABLED BIT(0)
-#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1)
-#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2)
-static inline bool context_enabled(struct intel_context *ce)
-{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
- SCHED_STATE_NO_LOCK_ENABLED);
-}
-
-static inline void set_context_enabled(struct intel_context *ce)
-{
- atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_enabled(struct intel_context *ce)
-{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
- &ce->guc_sched_state_no_lock);
-}
-
-static inline bool context_pending_enable(struct intel_context *ce)
-{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
- SCHED_STATE_NO_LOCK_PENDING_ENABLE);
-}
-
-static inline void set_context_pending_enable(struct intel_context *ce)
-{
- atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE,
- &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_pending_enable(struct intel_context *ce)
-{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE,
- &ce->guc_sched_state_no_lock);
-}
-
-static inline bool context_registered(struct intel_context *ce)
-{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
- SCHED_STATE_NO_LOCK_REGISTERED);
-}
-
-static inline void set_context_registered(struct intel_context *ce)
-{
- atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
- &ce->guc_sched_state_no_lock);
-}
-
-static inline void clr_context_registered(struct intel_context *ce)
-{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
- &ce->guc_sched_state_no_lock);
-}
-
-/*
* Below is a set of functions which control the GuC scheduling state which
- * require a lock, aside from the special case where the functions are called
- * from guc_lrc_desc_pin(). In that case it isn't possible for any other code
- * path to be executing on the context.
+ * require a lock.
*/
#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
#define SCHED_STATE_DESTROYED BIT(1)
#define SCHED_STATE_PENDING_DISABLE BIT(2)
#define SCHED_STATE_BANNED BIT(3)
-#define SCHED_STATE_BLOCKED_SHIFT 4
+#define SCHED_STATE_ENABLED BIT(4)
+#define SCHED_STATE_PENDING_ENABLE BIT(5)
+#define SCHED_STATE_REGISTERED BIT(6)
+#define SCHED_STATE_BLOCKED_SHIFT 7
#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
+
static inline void init_sched_state(struct intel_context *ce)
{
- /* Only should be called from guc_lrc_desc_pin() */
- atomic_set(&ce->guc_sched_state_no_lock, 0);
- ce->guc_state.sched_state = 0;
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
+}
+
+__maybe_unused
+static bool sched_state_is_init(struct intel_context *ce)
+{
+ /*
+ * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after
+ * suspend.
+ */
+ return !(ce->guc_state.sched_state &=
+ ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
}
static inline bool
@@ -165,7 +170,7 @@ context_wait_for_deregister_to_register(struct intel_context *ce)
static inline void
set_context_wait_for_deregister_to_register(struct intel_context *ce)
{
- /* Only should be called from guc_lrc_desc_pin() without lock */
+ lockdep_assert_held(&ce->guc_state.lock);
ce->guc_state.sched_state |=
SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
}
@@ -225,6 +230,57 @@ static inline void clr_context_banned(struct intel_context *ce)
ce->guc_state.sched_state &= ~SCHED_STATE_BANNED;
}
+static inline bool context_enabled(struct intel_context *ce)
+{
+ return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
+}
+
+static inline bool context_pending_enable(struct intel_context *ce)
+{
+ return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline void set_context_pending_enable(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline void clr_context_pending_enable(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
+}
+
+static inline bool context_registered(struct intel_context *ce)
+{
+ return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
+}
+
+static inline void set_context_registered(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
+}
+
+static inline void clr_context_registered(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
+}
+
static inline u32 context_blocked(struct intel_context *ce)
{
return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
@@ -233,7 +289,6 @@ static inline u32 context_blocked(struct intel_context *ce)
static inline void incr_context_blocked(struct intel_context *ce)
{
- lockdep_assert_held(&ce->engine->sched_engine->lock);
lockdep_assert_held(&ce->guc_state.lock);
ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
@@ -243,7 +298,6 @@ static inline void incr_context_blocked(struct intel_context *ce)
static inline void decr_context_blocked(struct intel_context *ce)
{
- lockdep_assert_held(&ce->engine->sched_engine->lock);
lockdep_assert_held(&ce->guc_state.lock);
GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
@@ -251,14 +305,33 @@ static inline void decr_context_blocked(struct intel_context *ce)
ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
}
+static inline bool context_has_committed_requests(struct intel_context *ce)
+{
+ return !!ce->guc_state.number_committed_requests;
+}
+
+static inline void incr_context_committed_requests(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ++ce->guc_state.number_committed_requests;
+ GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
+}
+
+static inline void decr_context_committed_requests(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ --ce->guc_state.number_committed_requests;
+ GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
+}
+
static inline bool context_guc_id_invalid(struct intel_context *ce)
{
- return ce->guc_id == GUC_INVALID_LRC_ID;
+ return ce->guc_id.id == GUC_INVALID_LRC_ID;
}
static inline void set_context_guc_id_invalid(struct intel_context *ce)
{
- ce->guc_id = GUC_INVALID_LRC_ID;
+ ce->guc_id.id = GUC_INVALID_LRC_ID;
}
static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
@@ -352,20 +425,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
xa_unlock_irqrestore(&guc->context_lookup, flags);
}
+static void decr_outstanding_submission_g2h(struct intel_guc *guc)
+{
+ if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
+ wake_up_all(&guc->ct.wq);
+}
+
static int guc_submission_send_busy_loop(struct intel_guc *guc,
const u32 *action,
u32 len,
u32 g2h_len_dw,
bool loop)
{
- int err;
-
- err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ /*
+ * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
+ * so we don't handle the case where we don't get a reply because we
+ * aborted the send due to the channel being busy.
+ */
+ GEM_BUG_ON(g2h_len_dw && !loop);
- if (!err && g2h_len_dw)
+ if (g2h_len_dw)
atomic_inc(&guc->outstanding_submission_g2h);
- return err;
+ return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
}
int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
@@ -430,6 +512,8 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
u32 g2h_len_dw = 0;
bool enabled;
+ lockdep_assert_held(&rq->engine->sched_engine->lock);
+
/*
* Corner case where requests were sitting in the priority list or a
* request resubmitted after the context was banned.
@@ -437,22 +521,24 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
if (unlikely(intel_context_is_banned(ce))) {
i915_request_put(i915_request_mark_eio(rq));
intel_engine_signal_breadcrumbs(ce->engine);
- goto out;
+ return 0;
}
- GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
+ GEM_BUG_ON(!atomic_read(&ce->guc_id.ref));
GEM_BUG_ON(context_guc_id_invalid(ce));
/*
* Corner case where the GuC firmware was blown away and reloaded while
* this context was pinned.
*/
- if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) {
+ if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id))) {
err = guc_lrc_desc_pin(ce, false);
if (unlikely(err))
- goto out;
+ return err;
}
+ spin_lock(&ce->guc_state.lock);
+
/*
* The request / context will be run on the hardware when scheduling
* gets enabled in the unblock.
@@ -464,14 +550,14 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
if (!enabled) {
action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
- action[len++] = ce->guc_id;
+ action[len++] = ce->guc_id.id;
action[len++] = GUC_CONTEXT_ENABLE;
set_context_pending_enable(ce);
intel_context_get(ce);
g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
} else {
action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
- action[len++] = ce->guc_id;
+ action[len++] = ce->guc_id.id;
}
err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
@@ -487,6 +573,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
trace_i915_request_guc_submit(rq);
out:
+ spin_unlock(&ce->guc_state.lock);
return err;
}
@@ -596,10 +683,18 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
unsigned long index, flags;
bool pending_disable, pending_enable, deregister, destroyed, banned;
+ xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- /* Flush context */
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ /*
+ * Corner case where the ref count on the object is zero but and
+ * deregister G2H was lost. In this case we don't touch the ref
+ * count and finish the destroy of the context.
+ */
+ bool do_put = kref_get_unless_zero(&ce->ref);
+
+ xa_unlock(&guc->context_lookup);
+
+ spin_lock(&ce->guc_state.lock);
/*
* Once we are at this point submission_disabled() is guaranteed
@@ -615,8 +710,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
banned = context_banned(ce);
init_sched_state(ce);
+ spin_unlock(&ce->guc_state.lock);
+
+ GEM_BUG_ON(!do_put && !destroyed);
+
if (pending_enable || destroyed || deregister) {
- atomic_dec(&guc->outstanding_submission_g2h);
+ decr_outstanding_submission_g2h(guc);
if (deregister)
guc_signal_context_fence(ce);
if (destroyed) {
@@ -635,14 +734,20 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
intel_engine_signal_breadcrumbs(ce->engine);
}
intel_context_sched_disable_unpin(ce);
- atomic_dec(&guc->outstanding_submission_g2h);
- spin_lock_irqsave(&ce->guc_state.lock, flags);
+ decr_outstanding_submission_g2h(guc);
+
+ spin_lock(&ce->guc_state.lock);
guc_blocked_fence_complete(ce);
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ spin_unlock(&ce->guc_state.lock);
intel_context_put(ce);
}
+
+ if (do_put)
+ intel_context_put(ce);
+ xa_lock(&guc->context_lookup);
}
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
}
static inline bool
@@ -725,6 +830,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
wait_for_reset(guc, &guc->outstanding_submission_g2h);
} while (!list_empty(&guc->ct.requests.incoming));
}
+
scrub_guc_desc_for_outstanding_g2h(guc);
}
@@ -796,16 +902,14 @@ __unwind_incomplete_requests(struct intel_context *ce)
unsigned long flags;
spin_lock_irqsave(&sched_engine->lock, flags);
- spin_lock(&ce->guc_active.lock);
- list_for_each_entry_safe(rq, rn,
- &ce->guc_active.requests,
- sched.link) {
+ spin_lock(&ce->guc_state.lock);
+ list_for_each_entry_safe_reverse(rq, rn,
+ &ce->guc_state.requests,
+ sched.link) {
if (i915_request_completed(rq))
continue;
list_del_init(&rq->sched.link);
- spin_unlock(&ce->guc_active.lock);
-
__i915_request_unsubmit(rq);
/* Push the request back into the queue for later resubmission. */
@@ -816,29 +920,43 @@ __unwind_incomplete_requests(struct intel_context *ce)
}
GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
- list_add_tail(&rq->sched.link, pl);
+ list_add(&rq->sched.link, pl);
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-
- spin_lock(&ce->guc_active.lock);
}
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
static void __guc_reset_context(struct intel_context *ce, bool stalled)
{
struct i915_request *rq;
+ unsigned long flags;
u32 head;
+ bool skip = false;
intel_context_get(ce);
/*
- * GuC will implicitly mark the context as non-schedulable
- * when it sends the reset notification. Make sure our state
- * reflects this change. The context will be marked enabled
- * on resubmission.
+ * GuC will implicitly mark the context as non-schedulable when it sends
+ * the reset notification. Make sure our state reflects this change. The
+ * context will be marked enabled on resubmission.
+ *
+ * XXX: If the context is reset as a result of the request cancellation
+ * this G2H is received after the schedule disable complete G2H which is
+ * wrong as this creates a race between the request cancellation code
+ * re-submitting the context and this G2H handler. This is a bug in the
+ * GuC but can be worked around in the meantime but converting this to a
+ * NOP if a pending enable is in flight as this indicates that a request
+ * cancellation has occurred.
*/
- clr_context_enabled(ce);
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ if (likely(!context_pending_enable(ce)))
+ clr_context_enabled(ce);
+ else
+ skip = true;
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ if (unlikely(skip))
+ goto out_put;
rq = intel_context_find_active_request(ce);
if (!rq) {
@@ -857,6 +975,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
out_replay:
guc_reset_state(ce, head, stalled);
__unwind_incomplete_requests(ce);
+out_put:
intel_context_put(ce);
}
@@ -864,16 +983,29 @@ void intel_guc_submission_reset(struct intel_guc *guc, bool stalled)
{
struct intel_context *ce;
unsigned long index;
+ unsigned long flags;
if (unlikely(!guc_submission_initialized(guc))) {
/* Reset called during driver load? GuC not yet initialised! */
return;
}
- xa_for_each(&guc->context_lookup, index, ce)
+ xa_lock_irqsave(&guc->context_lookup, flags);
+ xa_for_each(&guc->context_lookup, index, ce) {
+ if (!kref_get_unless_zero(&ce->ref))
+ continue;
+
+ xa_unlock(&guc->context_lookup);
+
if (intel_context_is_pinned(ce))
__guc_reset_context(ce, stalled);
+ intel_context_put(ce);
+
+ xa_lock(&guc->context_lookup);
+ }
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+
/* GuC is blown away, drop all references to contexts */
xa_destroy(&guc->context_lookup);
}
@@ -886,10 +1018,10 @@ static void guc_cancel_context_requests(struct intel_context *ce)
/* Mark all executing requests as skipped. */
spin_lock_irqsave(&sched_engine->lock, flags);
- spin_lock(&ce->guc_active.lock);
- list_for_each_entry(rq, &ce->guc_active.requests, sched.link)
+ spin_lock(&ce->guc_state.lock);
+ list_for_each_entry(rq, &ce->guc_state.requests, sched.link)
i915_request_put(i915_request_mark_eio(rq));
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
@@ -948,11 +1080,24 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
{
struct intel_context *ce;
unsigned long index;
+ unsigned long flags;
+
+ xa_lock_irqsave(&guc->context_lookup, flags);
+ xa_for_each(&guc->context_lookup, index, ce) {
+ if (!kref_get_unless_zero(&ce->ref))
+ continue;
+
+ xa_unlock(&guc->context_lookup);
- xa_for_each(&guc->context_lookup, index, ce)
if (intel_context_is_pinned(ce))
guc_cancel_context_requests(ce);
+ intel_context_put(ce);
+
+ xa_lock(&guc->context_lookup);
+ }
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+
guc_cancel_sched_engine_requests(guc->sched_engine);
/* GuC is blown away, drop all references to contexts */
@@ -1027,6 +1172,7 @@ static inline void queue_request(struct i915_sched_engine *sched_engine,
list_add_tail(&rq->sched.link,
i915_sched_lookup_priolist(sched_engine, prio));
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ tasklet_hi_schedule(&sched_engine->tasklet);
}
static int guc_bypass_tasklet_submit(struct intel_guc *guc,
@@ -1077,12 +1223,12 @@ static int new_guc_id(struct intel_guc *guc)
static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
if (!context_guc_id_invalid(ce)) {
- ida_simple_remove(&guc->guc_ids, ce->guc_id);
- reset_lrc_desc(guc, ce->guc_id);
+ ida_simple_remove(&guc->guc_ids, ce->guc_id.id);
+ reset_lrc_desc(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
}
- if (!list_empty(&ce->guc_id_link))
- list_del_init(&ce->guc_id_link);
+ if (!list_empty(&ce->guc_id.link))
+ list_del_init(&ce->guc_id.link);
}
static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
@@ -1104,14 +1250,18 @@ static int steal_guc_id(struct intel_guc *guc)
if (!list_empty(&guc->guc_id_list)) {
ce = list_first_entry(&guc->guc_id_list,
struct intel_context,
- guc_id_link);
+ guc_id.link);
- GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
+ GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
GEM_BUG_ON(context_guc_id_invalid(ce));
- list_del_init(&ce->guc_id_link);
- guc_id = ce->guc_id;
+ list_del_init(&ce->guc_id.link);
+ guc_id = ce->guc_id.id;
+
+ spin_lock(&ce->guc_state.lock);
clr_context_registered(ce);
+ spin_unlock(&ce->guc_state.lock);
+
set_context_guc_id_invalid(ce);
return guc_id;
} else {
@@ -1142,26 +1292,28 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
int ret = 0;
unsigned long flags, tries = PIN_GUC_ID_TRIES;
- GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
+ GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
try_again:
spin_lock_irqsave(&guc->contexts_lock, flags);
+ might_lock(&ce->guc_state.lock);
+
if (context_guc_id_invalid(ce)) {
- ret = assign_guc_id(guc, &ce->guc_id);
+ ret = assign_guc_id(guc, &ce->guc_id.id);
if (ret)
goto out_unlock;
ret = 1; /* Indidcates newly assigned guc_id */
}
- if (!list_empty(&ce->guc_id_link))
- list_del_init(&ce->guc_id_link);
- atomic_inc(&ce->guc_id_ref);
+ if (!list_empty(&ce->guc_id.link))
+ list_del_init(&ce->guc_id.link);
+ atomic_inc(&ce->guc_id.ref);
out_unlock:
spin_unlock_irqrestore(&guc->contexts_lock, flags);
/*
- * -EAGAIN indicates no guc_ids are available, let's retire any
+ * -EAGAIN indicates no guc_id are available, let's retire any
* outstanding requests to see if that frees up a guc_id. If the first
* retire didn't help, insert a sleep with the timeslice duration before
* attempting to retire more requests. Double the sleep period each
@@ -1189,15 +1341,15 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
{
unsigned long flags;
- GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0);
+ GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0);
if (unlikely(context_guc_id_invalid(ce)))
return;
spin_lock_irqsave(&guc->contexts_lock, flags);
- if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) &&
- !atomic_read(&ce->guc_id_ref))
- list_add_tail(&ce->guc_id_link, &guc->guc_id_list);
+ if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
+ !atomic_read(&ce->guc_id.ref))
+ list_add_tail(&ce->guc_id.link, &guc->guc_id_list);
spin_unlock_irqrestore(&guc->contexts_lock, flags);
}
@@ -1220,21 +1372,25 @@ static int register_context(struct intel_context *ce, bool loop)
{
struct intel_guc *guc = ce_to_guc(ce);
u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
- ce->guc_id * sizeof(struct guc_lrc_desc);
+ ce->guc_id.id * sizeof(struct guc_lrc_desc);
int ret;
trace_intel_context_register(ce);
- ret = __guc_action_register_context(guc, ce->guc_id, offset, loop);
- if (likely(!ret))
+ ret = __guc_action_register_context(guc, ce->guc_id.id, offset, loop);
+ if (likely(!ret)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
set_context_registered(ce);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ }
return ret;
}
static int __guc_action_deregister_context(struct intel_guc *guc,
- u32 guc_id,
- bool loop)
+ u32 guc_id)
{
u32 action[] = {
INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
@@ -1243,16 +1399,16 @@ static int __guc_action_deregister_context(struct intel_guc *guc,
return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
G2H_LEN_DW_DEREGISTER_CONTEXT,
- loop);
+ true);
}
-static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
+static int deregister_context(struct intel_context *ce, u32 guc_id)
{
struct intel_guc *guc = ce_to_guc(ce);
trace_intel_context_deregister(ce);
- return __guc_action_deregister_context(guc, guc_id, loop);
+ return __guc_action_deregister_context(guc, guc_id);
}
static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask)
@@ -1285,22 +1441,19 @@ static void guc_context_policy_init(struct intel_engine_cs *engine,
desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
}
-static inline u8 map_i915_prio_to_guc_prio(int prio);
-
static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
struct intel_guc *guc = &engine->gt->uc.guc;
- u32 desc_idx = ce->guc_id;
+ u32 desc_idx = ce->guc_id.id;
struct guc_lrc_desc *desc;
- const struct i915_gem_context *ctx;
- int prio = I915_CONTEXT_DEFAULT_PRIORITY;
bool context_registered;
intel_wakeref_t wakeref;
int ret = 0;
GEM_BUG_ON(!engine->mask);
+ GEM_BUG_ON(!sched_state_is_init(ce));
/*
* Ensure LRC + CT vmas are is same region as write barrier is done
@@ -1311,12 +1464,6 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
context_registered = lrc_desc_registered(guc, desc_idx);
- rcu_read_lock();
- ctx = rcu_dereference(ce->gem_context);
- if (ctx)
- prio = ctx->sched.priority;
- rcu_read_unlock();
-
reset_lrc_desc(guc, desc_idx);
set_lrc_desc_registered(guc, desc_idx, ce);
@@ -1325,11 +1472,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
desc->engine_submit_mask = adjust_engine_mask(engine->class,
engine->mask);
desc->hw_context_desc = ce->lrc.lrca;
- ce->guc_prio = map_i915_prio_to_guc_prio(prio);
- desc->priority = ce->guc_prio;
+ desc->priority = ce->guc_state.prio;
desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
guc_context_policy_init(engine, desc);
- init_sched_state(ce);
/*
* The context_lookup xarray is used to determine if the hardware
@@ -1340,26 +1485,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
* registering this context.
*/
if (context_registered) {
+ bool disabled;
+ unsigned long flags;
+
trace_intel_context_steal_guc_id(ce);
- if (!loop) {
+ GEM_BUG_ON(!loop);
+
+ /* Seal race with Reset */
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ disabled = submission_disabled(guc);
+ if (likely(!disabled)) {
set_context_wait_for_deregister_to_register(ce);
intel_context_get(ce);
- } else {
- bool disabled;
- unsigned long flags;
-
- /* Seal race with Reset */
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- disabled = submission_disabled(guc);
- if (likely(!disabled)) {
- set_context_wait_for_deregister_to_register(ce);
- intel_context_get(ce);
- }
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- if (unlikely(disabled)) {
- reset_lrc_desc(guc, desc_idx);
- return 0; /* Will get registered later */
- }
+ }
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ if (unlikely(disabled)) {
+ reset_lrc_desc(guc, desc_idx);
+ return 0; /* Will get registered later */
}
/*
@@ -1367,20 +1509,18 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
* context whose guc_id was stolen.
*/
with_intel_runtime_pm(runtime_pm, wakeref)
- ret = deregister_context(ce, ce->guc_id, loop);
- if (unlikely(ret == -EBUSY)) {
- clr_context_wait_for_deregister_to_register(ce);
- intel_context_put(ce);
- } else if (unlikely(ret == -ENODEV)) {
+ ret = deregister_context(ce, ce->guc_id.id);
+ if (unlikely(ret == -ENODEV))
ret = 0; /* Will get registered later */
- }
} else {
with_intel_runtime_pm(runtime_pm, wakeref)
ret = register_context(ce, loop);
- if (unlikely(ret == -EBUSY))
+ if (unlikely(ret == -EBUSY)) {
+ reset_lrc_desc(guc, desc_idx);
+ } else if (unlikely(ret == -ENODEV)) {
reset_lrc_desc(guc, desc_idx);
- else if (unlikely(ret == -ENODEV))
ret = 0; /* Will get registered later */
+ }
}
return ret;
@@ -1440,7 +1580,7 @@ static void __guc_context_sched_enable(struct intel_guc *guc,
{
u32 action[] = {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
- ce->guc_id,
+ ce->guc_id.id,
GUC_CONTEXT_ENABLE
};
@@ -1456,7 +1596,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
{
u32 action[] = {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
- guc_id, /* ce->guc_id not stable */
+ guc_id, /* ce->guc_id.id not stable */
GUC_CONTEXT_DISABLE
};
@@ -1472,24 +1612,24 @@ static void guc_blocked_fence_complete(struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
- if (!i915_sw_fence_done(&ce->guc_blocked))
- i915_sw_fence_complete(&ce->guc_blocked);
+ if (!i915_sw_fence_done(&ce->guc_state.blocked))
+ i915_sw_fence_complete(&ce->guc_state.blocked);
}
static void guc_blocked_fence_reinit(struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
- GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked));
+ GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked));
/*
* This fence is always complete unless a pending schedule disable is
* outstanding. We arm the fence here and complete it when we receive
* the pending schedule disable complete message.
*/
- i915_sw_fence_fini(&ce->guc_blocked);
- i915_sw_fence_reinit(&ce->guc_blocked);
- i915_sw_fence_await(&ce->guc_blocked);
- i915_sw_fence_commit(&ce->guc_blocked);
+ i915_sw_fence_fini(&ce->guc_state.blocked);
+ i915_sw_fence_reinit(&ce->guc_state.blocked);
+ i915_sw_fence_await(&ce->guc_state.blocked);
+ i915_sw_fence_commit(&ce->guc_state.blocked);
}
static u16 prep_context_pending_disable(struct intel_context *ce)
@@ -1501,13 +1641,12 @@ static u16 prep_context_pending_disable(struct intel_context *ce)
guc_blocked_fence_reinit(ce);
intel_context_get(ce);
- return ce->guc_id;
+ return ce->guc_id.id;
}
static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
- struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
unsigned long flags;
struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
intel_wakeref_t wakeref;
@@ -1516,20 +1655,14 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
spin_lock_irqsave(&ce->guc_state.lock, flags);
- /*
- * Sync with submission path, increment before below changes to context
- * state.
- */
- spin_lock(&sched_engine->lock);
incr_context_blocked(ce);
- spin_unlock(&sched_engine->lock);
enabled = context_enabled(ce);
if (unlikely(!enabled || submission_disabled(guc))) {
if (enabled)
clr_context_enabled(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- return &ce->guc_blocked;
+ return &ce->guc_state.blocked;
}
/*
@@ -1545,13 +1678,29 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_sched_disable(guc, ce, guc_id);
- return &ce->guc_blocked;
+ return &ce->guc_state.blocked;
+}
+
+#define SCHED_STATE_MULTI_BLOCKED_MASK \
+ (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED)
+#define SCHED_STATE_NO_UNBLOCK \
+ (SCHED_STATE_MULTI_BLOCKED_MASK | \
+ SCHED_STATE_PENDING_DISABLE | \
+ SCHED_STATE_BANNED)
+
+static bool context_cant_unblock(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+
+ return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) ||
+ context_guc_id_invalid(ce) ||
+ !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) ||
+ !intel_context_is_pinned(ce);
}
static void guc_context_unblock(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
- struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
unsigned long flags;
struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
intel_wakeref_t wakeref;
@@ -1562,9 +1711,7 @@ static void guc_context_unblock(struct intel_context *ce)
spin_lock_irqsave(&ce->guc_state.lock, flags);
if (unlikely(submission_disabled(guc) ||
- !intel_context_is_pinned(ce) ||
- context_pending_disable(ce) ||
- context_blocked(ce) > 1)) {
+ context_cant_unblock(ce))) {
enable = false;
} else {
enable = true;
@@ -1573,13 +1720,7 @@ static void guc_context_unblock(struct intel_context *ce)
intel_context_get(ce);
}
- /*
- * Sync with submission path, decrement after above changes to context
- * state.
- */
- spin_lock(&sched_engine->lock);
decr_context_blocked(ce);
- spin_unlock(&sched_engine->lock);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
@@ -1593,15 +1734,25 @@ static void guc_context_cancel_request(struct intel_context *ce,
struct i915_request *rq)
{
if (i915_sw_fence_signaled(&rq->submit)) {
- struct i915_sw_fence *fence = guc_context_block(ce);
+ struct i915_sw_fence *fence;
+ intel_context_get(ce);
+ fence = guc_context_block(ce);
i915_sw_fence_wait(fence);
if (!i915_request_completed(rq)) {
__i915_request_skip(rq);
guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
true);
}
+
+ /*
+ * XXX: Racey if context is reset, see comment in
+ * __guc_reset_context().
+ */
+ flush_work(&ce_to_guc(ce)->ct.requests.worker);
+
guc_context_unblock(ce);
+ intel_context_put(ce);
}
}
@@ -1662,7 +1813,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
if (!context_guc_id_invalid(ce))
with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_set_preemption_timeout(guc,
- ce->guc_id,
+ ce->guc_id.id,
1);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
@@ -1675,40 +1826,22 @@ static void guc_context_sched_disable(struct intel_context *ce)
struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
intel_wakeref_t wakeref;
u16 guc_id;
- bool enabled;
-
- if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
- !lrc_desc_registered(guc, ce->guc_id)) {
- clr_context_enabled(ce);
- goto unpin;
- }
-
- if (!context_enabled(ce))
- goto unpin;
spin_lock_irqsave(&ce->guc_state.lock, flags);
/*
- * We have to check if the context has been disabled by another thread.
- * We also have to check if the context has been pinned again as another
- * pin operation is allowed to pass this function. Checking the pin
- * count, within ce->guc_state.lock, synchronizes this function with
- * guc_request_alloc ensuring a request doesn't slip through the
- * 'context_pending_disable' fence. Checking within the spin lock (can't
- * sleep) ensures another process doesn't pin this context and generate
- * a request before we set the 'context_pending_disable' flag here.
+ * We have to check if the context has been disabled by another thread,
+ * check if submssion has been disabled to seal a race with reset and
+ * finally check if any more requests have been committed to the
+ * context ensursing that a request doesn't slip through the
+ * 'context_pending_disable' fence.
*/
- enabled = context_enabled(ce);
- if (unlikely(!enabled || submission_disabled(guc))) {
- if (enabled)
- clr_context_enabled(ce);
+ if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
+ context_has_committed_requests(ce))) {
+ clr_context_enabled(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
goto unpin;
}
- if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) {
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- return;
- }
guc_id = prep_context_pending_disable(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
@@ -1725,20 +1858,20 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
- GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id));
- GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
+ GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
+ GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
GEM_BUG_ON(context_enabled(ce));
- clr_context_registered(ce);
- deregister_context(ce, ce->guc_id, true);
+ deregister_context(ce, ce->guc_id.id);
}
static void __guc_context_destroy(struct intel_context *ce)
{
- GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
- ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
- ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
- ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
+ GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
+ ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
+ ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
+ ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
+ GEM_BUG_ON(ce->guc_state.number_committed_requests);
lrc_fini(ce);
intel_context_fini(ce);
@@ -1774,7 +1907,7 @@ static void guc_context_destroy(struct kref *kref)
__guc_context_destroy(ce);
return;
} else if (submission_disabled(guc) ||
- !lrc_desc_registered(guc, ce->guc_id)) {
+ !lrc_desc_registered(guc, ce->guc_id.id)) {
release_guc_id(guc, ce);
__guc_context_destroy(ce);
return;
@@ -1783,10 +1916,10 @@ static void guc_context_destroy(struct kref *kref)
/*
* We have to acquire the context spinlock and check guc_id again, if it
* is valid it hasn't been stolen and needs to be deregistered. We
- * delete this context from the list of unpinned guc_ids available to
+ * delete this context from the list of unpinned guc_id available to
* steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
* returns indicating this context has been deregistered the guc_id is
- * returned to the pool of available guc_ids.
+ * returned to the pool of available guc_id.
*/
spin_lock_irqsave(&guc->contexts_lock, flags);
if (context_guc_id_invalid(ce)) {
@@ -1795,15 +1928,17 @@ static void guc_context_destroy(struct kref *kref)
return;
}
- if (!list_empty(&ce->guc_id_link))
- list_del_init(&ce->guc_id_link);
+ if (!list_empty(&ce->guc_id.link))
+ list_del_init(&ce->guc_id.link);
spin_unlock_irqrestore(&guc->contexts_lock, flags);
/* Seal race with Reset */
spin_lock_irqsave(&ce->guc_state.lock, flags);
disabled = submission_disabled(guc);
- if (likely(!disabled))
+ if (likely(!disabled)) {
set_context_destroyed(ce);
+ clr_context_registered(ce);
+ }
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
if (unlikely(disabled)) {
release_guc_id(guc, ce);
@@ -1839,20 +1974,23 @@ static void guc_context_set_prio(struct intel_guc *guc,
{
u32 action[] = {
INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY,
- ce->guc_id,
+ ce->guc_id.id,
prio,
};
GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
prio > GUC_CLIENT_PRIORITY_NORMAL);
+ lockdep_assert_held(&ce->guc_state.lock);
- if (ce->guc_prio == prio || submission_disabled(guc) ||
- !context_registered(ce))
+ if (ce->guc_state.prio == prio || submission_disabled(guc) ||
+ !context_registered(ce)) {
+ ce->guc_state.prio = prio;
return;
+ }
guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
- ce->guc_prio = prio;
+ ce->guc_state.prio = prio;
trace_intel_context_set_prio(ce);
}
@@ -1871,25 +2009,25 @@ static inline u8 map_i915_prio_to_guc_prio(int prio)
static inline void add_context_inflight_prio(struct intel_context *ce,
u8 guc_prio)
{
- lockdep_assert_held(&ce->guc_active.lock);
- GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count));
+ lockdep_assert_held(&ce->guc_state.lock);
+ GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
- ++ce->guc_prio_count[guc_prio];
+ ++ce->guc_state.prio_count[guc_prio];
/* Overflow protection */
- GEM_WARN_ON(!ce->guc_prio_count[guc_prio]);
+ GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
}
static inline void sub_context_inflight_prio(struct intel_context *ce,
u8 guc_prio)
{
- lockdep_assert_held(&ce->guc_active.lock);
- GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count));
+ lockdep_assert_held(&ce->guc_state.lock);
+ GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count));
/* Underflow protection */
- GEM_WARN_ON(!ce->guc_prio_count[guc_prio]);
+ GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]);
- --ce->guc_prio_count[guc_prio];
+ --ce->guc_state.prio_count[guc_prio];
}
static inline void update_context_prio(struct intel_context *ce)
@@ -1900,10 +2038,10 @@ static inline void update_context_prio(struct intel_context *ce)
BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
- lockdep_assert_held(&ce->guc_active.lock);
+ lockdep_assert_held(&ce->guc_state.lock);
- for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) {
- if (ce->guc_prio_count[i]) {
+ for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) {
+ if (ce->guc_state.prio_count[i]) {
guc_context_set_prio(guc, ce, i);
break;
}
@@ -1923,8 +2061,8 @@ static void add_to_context(struct i915_request *rq)
GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
- spin_lock(&ce->guc_active.lock);
- list_move_tail(&rq->sched.link, &ce->guc_active.requests);
+ spin_lock(&ce->guc_state.lock);
+ list_move_tail(&rq->sched.link, &ce->guc_state.requests);
if (rq->guc_prio == GUC_PRIO_INIT) {
rq->guc_prio = new_guc_prio;
@@ -1936,12 +2074,12 @@ static void add_to_context(struct i915_request *rq)
}
update_context_prio(ce);
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
}
static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
{
- lockdep_assert_held(&ce->guc_active.lock);
+ lockdep_assert_held(&ce->guc_state.lock);
if (rq->guc_prio != GUC_PRIO_INIT &&
rq->guc_prio != GUC_PRIO_FINI) {
@@ -1955,7 +2093,7 @@ static void remove_from_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
- spin_lock_irq(&ce->guc_active.lock);
+ spin_lock_irq(&ce->guc_state.lock);
list_del_init(&rq->sched.link);
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
@@ -1965,9 +2103,11 @@ static void remove_from_context(struct i915_request *rq)
guc_prio_fini(rq, ce);
- spin_unlock_irq(&ce->guc_active.lock);
+ decr_context_committed_requests(ce);
- atomic_dec(&ce->guc_id_ref);
+ spin_unlock_irq(&ce->guc_state.lock);
+
+ atomic_dec(&ce->guc_id.ref);
i915_request_notify_execute_cb_imm(rq);
}
@@ -1994,17 +2134,32 @@ static const struct intel_context_ops guc_context_ops = {
.create_virtual = guc_create_virtual,
};
+static void submit_work_cb(struct irq_work *wrk)
+{
+ struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work);
+
+ might_lock(&rq->engine->sched_engine->lock);
+ i915_sw_fence_complete(&rq->submit);
+}
+
static void __guc_signal_context_fence(struct intel_context *ce)
{
- struct i915_request *rq;
+ struct i915_request *rq, *rn;
lockdep_assert_held(&ce->guc_state.lock);
if (!list_empty(&ce->guc_state.fences))
trace_intel_context_fence_release(ce);
- list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link)
- i915_sw_fence_complete(&rq->submit);
+ /*
+ * Use an IRQ to ensure locking order of sched_engine->lock ->
+ * ce->guc_state.lock is preserved.
+ */
+ list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,
+ guc_fence_link) {
+ list_del(&rq->guc_fence_link);
+ irq_work_queue(&rq->submit_work);
+ }
INIT_LIST_HEAD(&ce->guc_state.fences);
}
@@ -2022,10 +2177,25 @@ static void guc_signal_context_fence(struct intel_context *ce)
static bool context_needs_register(struct intel_context *ce, bool new_guc_id)
{
return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
- !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) &&
+ !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) &&
!submission_disabled(ce_to_guc(ce));
}
+static void guc_context_init(struct intel_context *ce)
+{
+ const struct i915_gem_context *ctx;
+ int prio = I915_CONTEXT_DEFAULT_PRIORITY;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(ce->gem_context);
+ if (ctx)
+ prio = ctx->sched.priority;
+ rcu_read_unlock();
+
+ ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
+ set_bit(CONTEXT_GUC_INIT, &ce->flags);
+}
+
static int guc_request_alloc(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
@@ -2057,14 +2227,17 @@ static int guc_request_alloc(struct i915_request *rq)
rq->reserved_space -= GUC_REQUEST_SIZE;
+ if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags)))
+ guc_context_init(ce);
+
/*
* Call pin_guc_id here rather than in the pinning step as with
* dma_resv, contexts can be repeatedly pinned / unpinned trashing the
- * guc_ids and creating horrible race conditions. This is especially bad
- * when guc_ids are being stolen due to over subscription. By the time
+ * guc_id and creating horrible race conditions. This is especially bad
+ * when guc_id are being stolen due to over subscription. By the time
* this function is reached, it is guaranteed that the guc_id will be
* persistent until the generated request is retired. Thus, sealing these
- * race conditions. It is still safe to fail here if guc_ids are
+ * race conditions. It is still safe to fail here if guc_id are
* exhausted and return -EAGAIN to the user indicating that they can try
* again in the future.
*
@@ -2074,7 +2247,7 @@ static int guc_request_alloc(struct i915_request *rq)
* decremented on each retire. When it is zero, a lock around the
* increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
*/
- if (atomic_add_unless(&ce->guc_id_ref, 1, 0))
+ if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
goto out;
ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
@@ -2087,7 +2260,7 @@ static int guc_request_alloc(struct i915_request *rq)
disable_submission(guc);
goto out; /* GPU will be reset */
}
- atomic_dec(&ce->guc_id_ref);
+ atomic_dec(&ce->guc_id.ref);
unpin_guc_id(guc, ce);
return ret;
}
@@ -2102,22 +2275,16 @@ out:
* schedule enable or context registration if either G2H is pending
* respectfully. Once a G2H returns, the fence is released that is
* blocking these requests (see guc_signal_context_fence).
- *
- * We can safely check the below fields outside of the lock as it isn't
- * possible for these fields to transition from being clear to set but
- * converse is possible, hence the need for the check within the lock.
*/
- if (likely(!context_wait_for_deregister_to_register(ce) &&
- !context_pending_disable(ce)))
- return 0;
-
spin_lock_irqsave(&ce->guc_state.lock, flags);
if (context_wait_for_deregister_to_register(ce) ||
context_pending_disable(ce)) {
+ init_irq_work(&rq->submit_work, submit_work_cb);
i915_sw_fence_await(&rq->submit);
list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
}
+ incr_context_committed_requests(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
return 0;
@@ -2259,7 +2426,7 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
!new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
return;
- spin_lock(&ce->guc_active.lock);
+ spin_lock(&ce->guc_state.lock);
if (rq->guc_prio != GUC_PRIO_FINI) {
if (rq->guc_prio != GUC_PRIO_INIT)
sub_context_inflight_prio(ce, rq->guc_prio);
@@ -2267,16 +2434,16 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
add_context_inflight_prio(ce, rq->guc_prio);
update_context_prio(ce);
}
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
}
static void guc_retire_inflight_request_prio(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
- spin_lock(&ce->guc_active.lock);
+ spin_lock(&ce->guc_state.lock);
guc_prio_fini(rq, ce);
- spin_unlock(&ce->guc_active.lock);
+ spin_unlock(&ce->guc_state.lock);
}
static void sanitize_hwsp(struct intel_engine_cs *engine)
@@ -2310,6 +2477,8 @@ static void guc_sanitize(struct intel_engine_cs *engine)
/* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
+
+ intel_engine_reset_pinned_contexts(engine);
}
static void setup_hwsp(struct intel_engine_cs *engine)
@@ -2385,9 +2554,13 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
* and even it did this code would be run again.
*/
- for_each_engine(engine, gt, id)
- if (engine->kernel_context)
- guc_kernel_context_pin(guc, engine->kernel_context);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+
+ list_for_each_entry(ce, &engine->pinned_contexts_list,
+ pinned_contexts_link)
+ guc_kernel_context_pin(guc, ce);
+ }
}
static void guc_release(struct intel_engine_cs *engine)
@@ -2583,12 +2756,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
return ce;
}
-static void decr_outstanding_submission_g2h(struct intel_guc *guc)
-{
- if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
- wake_up_all(&guc->ct.wq);
-}
-
int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
const u32 *msg,
u32 len)
@@ -2607,6 +2774,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
trace_intel_context_deregister_done(ce);
+#ifdef CONFIG_DRM_I915_SELFTEST
+ if (unlikely(ce->drop_deregister)) {
+ ce->drop_deregister = false;
+ return 0;
+ }
+#endif
+
if (context_wait_for_deregister_to_register(ce)) {
struct intel_runtime_pm *runtime_pm =
&ce->engine->gt->i915->runtime_pm;
@@ -2652,8 +2826,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
(!context_pending_enable(ce) &&
!context_pending_disable(ce)))) {
drm_err(&guc_to_gt(guc)->i915->drm,
- "Bad context sched_state 0x%x, 0x%x, desc_idx %u",
- atomic_read(&ce->guc_sched_state_no_lock),
+ "Bad context sched_state 0x%x, desc_idx %u",
ce->guc_state.sched_state, desc_idx);
return -EPROTO;
}
@@ -2661,10 +2834,26 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
trace_intel_context_sched_done(ce);
if (context_pending_enable(ce)) {
+#ifdef CONFIG_DRM_I915_SELFTEST
+ if (unlikely(ce->drop_schedule_enable)) {
+ ce->drop_schedule_enable = false;
+ return 0;
+ }
+#endif
+
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
clr_context_pending_enable(ce);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
} else if (context_pending_disable(ce)) {
bool banned;
+#ifdef CONFIG_DRM_I915_SELFTEST
+ if (unlikely(ce->drop_schedule_disable)) {
+ ce->drop_schedule_disable = false;
+ return 0;
+ }
+#endif
+
/*
* Unpin must be done before __guc_signal_context_fence,
* otherwise a race exists between the requests getting
@@ -2721,7 +2910,12 @@ static void guc_handle_context_reset(struct intel_guc *guc,
{
trace_intel_context_reset(ce);
- if (likely(!intel_context_is_banned(ce))) {
+ /*
+ * XXX: Racey if request cancellation has occurred, see comment in
+ * __guc_reset_context().
+ */
+ if (likely(!intel_context_is_banned(ce) &&
+ !context_blocked(ce))) {
capture_error_state(guc, ce);
guc_context_replay(ce);
}
@@ -2797,33 +2991,47 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
struct intel_context *ce;
struct i915_request *rq;
unsigned long index;
+ unsigned long flags;
/* Reset called during driver load? GuC not yet initialised! */
if (unlikely(!guc_submission_initialized(guc)))
return;
+ xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- if (!intel_context_is_pinned(ce))
+ if (!kref_get_unless_zero(&ce->ref))
continue;
+ xa_unlock(&guc->context_lookup);
+
+ if (!intel_context_is_pinned(ce))
+ goto next;
+
if (intel_engine_is_virtual(ce->engine)) {
if (!(ce->engine->mask & engine->mask))
- continue;
+ goto next;
} else {
if (ce->engine != engine)
- continue;
+ goto next;
}
- list_for_each_entry(rq, &ce->guc_active.requests, sched.link) {
+ list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
continue;
intel_engine_set_hung_context(engine, ce);
/* Can only cope with one hang at a time... */
- return;
+ intel_context_put(ce);
+ xa_lock(&guc->context_lookup);
+ goto done;
}
+next:
+ intel_context_put(ce);
+ xa_lock(&guc->context_lookup);
}
+done:
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
}
void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
@@ -2839,23 +3047,34 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
if (unlikely(!guc_submission_initialized(guc)))
return;
+ xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- if (!intel_context_is_pinned(ce))
+ if (!kref_get_unless_zero(&ce->ref))
continue;
+ xa_unlock(&guc->context_lookup);
+
+ if (!intel_context_is_pinned(ce))
+ goto next;
+
if (intel_engine_is_virtual(ce->engine)) {
if (!(ce->engine->mask & engine->mask))
- continue;
+ goto next;
} else {
if (ce->engine != engine)
- continue;
+ goto next;
}
- spin_lock_irqsave(&ce->guc_active.lock, flags);
- intel_engine_dump_active_requests(&ce->guc_active.requests,
+ spin_lock(&ce->guc_state.lock);
+ intel_engine_dump_active_requests(&ce->guc_state.requests,
hung_rq, m);
- spin_unlock_irqrestore(&ce->guc_active.lock, flags);
+ spin_unlock(&ce->guc_state.lock);
+
+next:
+ intel_context_put(ce);
+ xa_lock(&guc->context_lookup);
}
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
}
void intel_guc_submission_print_info(struct intel_guc *guc,
@@ -2881,7 +3100,7 @@ void intel_guc_submission_print_info(struct intel_guc *guc,
priolist_for_each_request(rq, pl)
drm_printf(p, "guc_id=%u, seqno=%llu\n",
- rq->context->guc_id,
+ rq->context->guc_id.id,
rq->fence.seqno);
}
spin_unlock_irqrestore(&sched_engine->lock, flags);
@@ -2893,13 +3112,12 @@ static inline void guc_log_context_priority(struct drm_printer *p,
{
int i;
- drm_printf(p, "\t\tPriority: %d\n",
- ce->guc_prio);
+ drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
i < GUC_CLIENT_PRIORITY_NUM; ++i) {
drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
- i, ce->guc_prio_count[i]);
+ i, ce->guc_state.prio_count[i]);
}
drm_printf(p, "\n");
}
@@ -2909,9 +3127,11 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
{
struct intel_context *ce;
unsigned long index;
+ unsigned long flags;
+ xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
- drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id);
+ drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
ce->ring->head,
@@ -2922,13 +3142,13 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
drm_printf(p, "\t\tContext Pin Count: %u\n",
atomic_read(&ce->pin_count));
drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
- atomic_read(&ce->guc_id_ref));
- drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n",
- ce->guc_state.sched_state,
- atomic_read(&ce->guc_sched_state_no_lock));
+ atomic_read(&ce->guc_id.ref));
+ drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
+ ce->guc_state.sched_state);
guc_log_context_priority(p, ce);
}
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
}
static struct intel_context *
@@ -3036,3 +3256,7 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
return false;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_guc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index fc5387b410a2..ff4b6869b80b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -87,17 +87,25 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc)
vma->obj, true));
if (IS_ERR(vaddr)) {
i915_vma_unpin_and_release(&vma, 0);
- return PTR_ERR(vaddr);
+ err = PTR_ERR(vaddr);
+ goto unpin_out;
}
copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
- GEM_BUG_ON(copied < huc->fw.rsa_size);
-
i915_gem_object_unpin_map(vma->obj);
+ if (copied < huc->fw.rsa_size) {
+ err = -ENOMEM;
+ goto unpin_out;
+ }
+
huc->rsa_data = vma;
return 0;
+
+unpin_out:
+ i915_vma_unpin_and_release(&vma, 0);
+ return err;
}
static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
index 5733c15fd123..15998963b863 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c
@@ -5,7 +5,7 @@
#include <drm/drm_print.h>
-#include "gt/debugfs_gt.h"
+#include "gt/intel_gt_debugfs.h"
#include "intel_huc.h"
#include "intel_huc_debugfs.h"
@@ -21,11 +21,11 @@ static int huc_info_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(huc_info);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(huc_info);
void intel_huc_debugfs_register(struct intel_huc *huc, struct dentry *root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "huc_info", &huc_info_fops, NULL },
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index b104fb7607eb..2fef3b0bbe95 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -35,7 +35,7 @@ static void uc_expand_default_options(struct intel_uc *uc)
}
/* Intermediate platforms are HuC authentication only */
- if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) {
+ if (IS_ALDERLAKE_S(i915)) {
i915->params.enable_guc = ENABLE_GUC_LOAD_HUC;
return;
}
@@ -172,11 +172,6 @@ void intel_uc_driver_remove(struct intel_uc *uc)
__uc_free_load_err_log(uc);
}
-static inline bool guc_communication_enabled(struct intel_guc *guc)
-{
- return intel_guc_ct_enabled(&guc->ct);
-}
-
/*
* Events triggered while CT buffers are disabled are logged in the SCRATCH_15
* register using the same bits used in the CT message payload. Since our
@@ -210,7 +205,7 @@ static void guc_get_mmio_msg(struct intel_guc *guc)
static void guc_handle_mmio_msg(struct intel_guc *guc)
{
/* we need communication to be enabled to reply to GuC */
- GEM_BUG_ON(!guc_communication_enabled(guc));
+ GEM_BUG_ON(!intel_guc_ct_enabled(&guc->ct));
spin_lock_irq(&guc->irq_lock);
if (guc->mmio_msg) {
@@ -226,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc)
struct drm_i915_private *i915 = gt->i915;
int ret;
- GEM_BUG_ON(guc_communication_enabled(guc));
+ GEM_BUG_ON(intel_guc_ct_enabled(&guc->ct));
ret = i915_inject_probe_error(i915, -ENXIO);
if (ret)
@@ -662,7 +657,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
return 0;
/* Make sure we enable communication if and only if it's disabled */
- GEM_BUG_ON(enable_communication == guc_communication_enabled(guc));
+ GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct));
if (enable_communication)
guc_enable_communication(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
index 089d98662f46..c2f7924295e7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c
@@ -6,7 +6,7 @@
#include <linux/debugfs.h>
#include <drm/drm_print.h>
-#include "gt/debugfs_gt.h"
+#include "gt/intel_gt_debugfs.h"
#include "intel_guc_debugfs.h"
#include "intel_huc_debugfs.h"
#include "intel_uc.h"
@@ -32,11 +32,11 @@ static int uc_usage_show(struct seq_file *m, void *data)
return 0;
}
-DEFINE_GT_DEBUGFS_ATTRIBUTE(uc_usage);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(uc_usage);
void intel_uc_debugfs_register(struct intel_uc *uc, struct dentry *gt_root)
{
- static const struct debugfs_gt_file files[] = {
+ static const struct intel_gt_debugfs_file files[] = {
{ "usage", &uc_usage_fops, NULL },
};
struct dentry *root;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 3a16d08608a5..3aa87be4f2e4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -7,6 +7,7 @@
#include <linux/firmware.h>
#include <drm/drm_print.h>
+#include "gem/i915_gem_lmem.h"
#include "intel_uc_fw.h"
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
@@ -50,6 +51,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \
fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
+ fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \
fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \
@@ -370,7 +372,14 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
uc_fw->private_data_size = css->private_data_size;
- obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
+ if (HAS_LMEM(i915)) {
+ obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
+ if (!IS_ERR(obj))
+ obj->flags |= I915_BO_ALLOC_PM_EARLY;
+ } else {
+ obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
+ }
+
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto fail;
@@ -413,20 +422,25 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
- struct i915_vma dummy = {
- .node.start = uc_fw_ggtt_offset(uc_fw),
- .node.size = obj->base.size,
- .pages = obj->mm.pages,
- .vm = &ggtt->vm,
- };
+ struct i915_vma *dummy = &uc_fw->dummy;
+ u32 pte_flags = 0;
+
+ dummy->node.start = uc_fw_ggtt_offset(uc_fw);
+ dummy->node.size = obj->base.size;
+ dummy->pages = obj->mm.pages;
+ dummy->vm = &ggtt->vm;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
- GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size);
+ GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
/* uc_fw->obj cache domains were not controlled across suspend */
- drm_clflush_sg(dummy.pages);
+ if (i915_gem_object_has_struct_page(obj))
+ drm_clflush_sg(dummy->pages);
- ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
+ if (i915_gem_object_is_lmem(obj))
+ pte_flags |= PTE_LM;
+
+ ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
}
static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
@@ -585,13 +599,68 @@ void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw)
*/
size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
{
- struct sg_table *pages = uc_fw->obj->mm.pages;
+ struct intel_memory_region *mr = uc_fw->obj->mm.region;
u32 size = min_t(u32, uc_fw->rsa_size, max_len);
u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size;
+ struct sgt_iter iter;
+ size_t count = 0;
+ int idx;
+ /* Called during reset handling, must be atomic [no fs_reclaim] */
GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw));
- return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset);
+ idx = offset >> PAGE_SHIFT;
+ offset = offset_in_page(offset);
+ if (i915_gem_object_has_struct_page(uc_fw->obj)) {
+ struct page *page;
+
+ for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
+ u32 len = min_t(u32, size, PAGE_SIZE - offset);
+ void *vaddr;
+
+ if (idx > 0) {
+ idx--;
+ continue;
+ }
+
+ vaddr = kmap_atomic(page);
+ memcpy(dst, vaddr + offset, len);
+ kunmap_atomic(vaddr);
+
+ offset = 0;
+ dst += len;
+ size -= len;
+ count += len;
+ if (!size)
+ break;
+ }
+ } else {
+ dma_addr_t addr;
+
+ for_each_sgt_daddr(addr, iter, uc_fw->obj->mm.pages) {
+ u32 len = min_t(u32, size, PAGE_SIZE - offset);
+ void __iomem *vaddr;
+
+ if (idx > 0) {
+ idx--;
+ continue;
+ }
+
+ vaddr = io_mapping_map_atomic_wc(&mr->iomap,
+ addr - mr->region.start);
+ memcpy_fromio(dst, vaddr + offset, len);
+ io_mapping_unmap_atomic(vaddr);
+
+ offset = 0;
+ dst += len;
+ size -= len;
+ count += len;
+ if (!size)
+ break;
+ }
+ }
+
+ return count;
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 99bb1fe1af66..1e00bf65639e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -10,6 +10,7 @@
#include "intel_uc_fw_abi.h"
#include "intel_device_info.h"
#include "i915_gem.h"
+#include "i915_vma.h"
struct drm_printer;
struct drm_i915_private;
@@ -75,6 +76,14 @@ struct intel_uc_fw {
bool user_overridden;
size_t size;
struct drm_i915_gem_object *obj;
+ /**
+ * @dummy: A vma used in binding the uc fw to ggtt. We can't define this
+ * vma on the stack as it can lead to a stack overflow, so we define it
+ * here. Safe to have 1 copy per uc fw because the binding is single
+ * threaded as it done during driver load (inherently single threaded)
+ * or during a GT reset (mutex guarantees single threaded).
+ */
+ struct i915_vma dummy;
/*
* The firmware build process will generate a version header file with major and
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
new file mode 100644
index 000000000000..fb0e4a7bd8ca
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright �� 2021 Intel Corporation
+ */
+
+#include "selftests/intel_scheduler_helpers.h"
+
+static struct i915_request *nop_user_request(struct intel_context *ce,
+ struct i915_request *from)
+{
+ struct i915_request *rq;
+ int ret;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ if (from) {
+ ret = i915_sw_fence_await_dma_fence(&rq->submit,
+ &from->fence, 0,
+ I915_FENCE_GFP);
+ if (ret < 0) {
+ i915_request_put(rq);
+ return ERR_PTR(ret);
+ }
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ return rq;
+}
+
+static int intel_guc_scrub_ctbs(void *arg)
+{
+ struct intel_gt *gt = arg;
+ int ret = 0;
+ int i;
+ struct i915_request *last[3] = {NULL, NULL, NULL}, *rq;
+ intel_wakeref_t wakeref;
+ struct intel_engine_cs *engine;
+ struct intel_context *ce;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ engine = intel_selftest_find_any_engine(gt);
+
+ /* Submit requests and inject errors forcing G2H to be dropped */
+ for (i = 0; i < 3; ++i) {
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ ret = PTR_ERR(ce);
+ pr_err("Failed to create context, %d: %d\n", i, ret);
+ goto err;
+ }
+
+ switch (i) {
+ case 0:
+ ce->drop_schedule_enable = true;
+ break;
+ case 1:
+ ce->drop_schedule_disable = true;
+ break;
+ case 2:
+ ce->drop_deregister = true;
+ break;
+ }
+
+ rq = nop_user_request(ce, NULL);
+ intel_context_put(ce);
+
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ pr_err("Failed to create request, %d: %d\n", i, ret);
+ goto err;
+ }
+
+ last[i] = rq;
+ }
+
+ for (i = 0; i < 3; ++i) {
+ ret = i915_request_wait(last[i], 0, HZ);
+ if (ret < 0) {
+ pr_err("Last request failed to complete: %d\n", ret);
+ goto err;
+ }
+ i915_request_put(last[i]);
+ last[i] = NULL;
+ }
+
+ /* Force all H2G / G2H to be submitted / processed */
+ intel_gt_retire_requests(gt);
+ msleep(500);
+
+ /* Scrub missing G2H */
+ intel_gt_handle_error(engine->gt, -1, 0, "selftest reset");
+
+ /* GT will not idle if G2H are lost */
+ ret = intel_gt_wait_for_idle(gt, HZ);
+ if (ret < 0) {
+ pr_err("GT failed to idle: %d\n", ret);
+ goto err;
+ }
+
+err:
+ for (i = 0; i < 3; ++i)
+ if (last[i])
+ i915_request_put(last[i]);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+
+ return ret;
+}
+
+int intel_guc_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(intel_guc_scrub_ctbs),
+ };
+ struct intel_gt *gt = &i915->gt;
+
+ if (intel_gt_is_wedged(gt))
+ return 0;
+
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ return 0;
+
+ return intel_gt_live_subtests(tests, gt);
+}