From 869e54d4d54b1b0ee16c98c2149f5785eea08c02 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Mon, 22 Apr 2024 16:14:54 -0400
Subject: drm/xe: make xe_pm_runtime_lockdep_map a static struct

Fix the new sparse warning:

>> drivers/gpu/drm/xe/xe_pm.c:72:20: sparse: sparse: symbol
'xe_pm_runtime_lockdep_map' was not declared. Should it be static?

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202404191329.EZzOTzwK-lkp@intel.com/
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240422201454.699089-1-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 37fbeda12d3b..c1831106ea4b 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -69,7 +69,7 @@
  */
 
 #ifdef CONFIG_LOCKDEP
-struct lockdep_map xe_pm_runtime_lockdep_map = {
+static struct lockdep_map xe_pm_runtime_lockdep_map = {
 	.name = "xe_pm_runtime_lockdep_map"
 };
 #endif
-- 
cgit 


From 06e7139a034f26804904368fe4af2ceb70724756 Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Tue, 23 Apr 2024 14:11:14 +0200
Subject: drm/xe: Fix unexpected backmerge results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The recent backmerge from drm-next to drm-xe-next brought with it
some silent unexpected results. One code snippet was added twice
and a partial revert had merge errors. Fix that up to
reinstate the affected code as it was before the backmerge.

v2:
- Commit log message rewording (Lucas DeMarchi)

Fixes: 79790b6818e9 ("Merge drm/drm-next into drm-xe-next")
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423121114.39325-1-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_vm.c       | 13 +++++++------
 drivers/gpu/drm/xe/xe_vm_types.h |  4 ++++
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 85d6f359142d..7ae2b0300db6 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -863,11 +863,6 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
 		vma->ufence = NULL;
 	}
 
-	if (vma->ufence) {
-		xe_sync_ufence_put(vma->ufence);
-		vma->ufence = NULL;
-	}
-
 	if (xe_vma_is_userptr(vma)) {
 		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
 		struct xe_userptr *userptr = &uvma->userptr;
@@ -2100,6 +2095,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
 		if (__op->op == DRM_GPUVA_OP_MAP) {
+			op->map.immediate =
+				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
+			op->map.read_only =
+				flags & DRM_XE_VM_BIND_FLAG_READONLY;
 			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
 			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
 			op->map.pat_index = pat_index;
@@ -2294,6 +2293,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 		switch (op->base.op) {
 		case DRM_GPUVA_OP_MAP:
 		{
+			flags |= op->map.read_only ?
+				VMA_CREATE_FLAG_READ_ONLY : 0;
 			flags |= op->map.is_null ?
 				VMA_CREATE_FLAG_IS_NULL : 0;
 			flags |= op->map.dumpable ?
@@ -2438,7 +2439,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 	case DRM_GPUVA_OP_MAP:
 		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
 				 op->syncs, op->num_syncs,
-				 !xe_vm_in_fault_mode(vm),
+				 op->map.immediate || !xe_vm_in_fault_mode(vm),
 				 op->flags & XE_VMA_OP_FIRST,
 				 op->flags & XE_VMA_OP_LAST);
 		break;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 7570c2c6c463..72a100671e5d 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -269,6 +269,10 @@ struct xe_vm {
 struct xe_vma_op_map {
 	/** @vma: VMA to map */
 	struct xe_vma *vma;
+	/** @immediate: Immediate bind */
+	bool immediate;
+	/** @read_only: Read only */
+	bool read_only;
 	/** @is_null: is NULL binding */
 	bool is_null;
 	/** @dumpable: whether BO is dumped on GPU hang */
-- 
cgit 


From 8f21f82d8b7652e11e6800612e34547bffdc7fd2 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 23 Apr 2024 20:04:32 +0200
Subject: drm/xe/guc: Add GuC Relay ABI version 1.0 definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This initial GuC Relay ABI specification includes messages for ABI
version negotiation and to query values of runtime/fuse registers.

We will start handling those messages on the PF driver soon.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h | 170 ++++++++++++++++++++++++-
 1 file changed, 169 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
index 747e428de421..6c2834613081 100644
--- a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
@@ -1,11 +1,179 @@
 /* SPDX-License-Identifier: MIT */
 /*
- * Copyright © 2023 Intel Corporation
+ * Copyright © 2023-2024 Intel Corporation
  */
 
 #ifndef _ABI_GUC_RELAY_ACTIONS_ABI_H_
 #define _ABI_GUC_RELAY_ACTIONS_ABI_H_
 
+#include "abi/guc_relay_communication_abi.h"
+
+/**
+ * DOC: GuC Relay VF/PF ABI Version
+ *
+ * The _`GUC_RELAY_VERSION_BASE` defines minimum VF/PF ABI version that
+ * drivers must support. Currently this is version 1.0.
+ *
+ * The _`GUC_RELAY_VERSION_LATEST` defines latest VF/PF ABI version that
+ * drivers may use. Currently this is version 1.0.
+ *
+ * Some platforms may require different base VF/PF ABI version.
+ * No supported VF/PF ABI version can be 0.0.
+ */
+
+#define GUC_RELAY_VERSION_BASE_MAJOR			1
+#define GUC_RELAY_VERSION_BASE_MINOR			0
+
+#define GUC_RELAY_VERSION_LATEST_MAJOR			1
+#define GUC_RELAY_VERSION_LATEST_MINOR			0
+
+/**
+ * DOC: GuC Relay Actions
+ *
+ * The following actions are supported from VF/PF ABI version 1.0:
+ *
+ *  * `VF2PF_HANDSHAKE`_
+ *  * `VF2PF_QUERY_RUNTIME`_
+ */
+
+/**
+ * DOC: VF2PF_HANDSHAKE
+ *
+ * This `Relay Message`_ is used by the VF to establish ABI version with the PF.
+ *
+ * Prior to exchanging any other messages, both VF driver and PF driver must
+ * negotiate the VF/PF ABI version that will be used in their communication.
+ *
+ * The VF driver shall use @MAJOR and @MINOR fields to pass requested ABI version.
+ * The VF driver may use special version 0.0 (both @MAJOR and @MINOR set to 0)
+ * to request latest (or any) ABI version that is supported by the PF driver.
+ *
+ * This message definition shall be supported by all future ABI versions.
+ * This message definition shall not be changed by future ABI versions.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_RELAY_ACTION_VF2PF_HANDSHAKE` = 0x0001        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:16 | **MAJOR** - requested major version of the VFPF interface    |
+ *  |   |       | (use MAJOR_ANY to request latest version supported by PF)    |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **MINOR** - requested minor version of the VFPF interface    |
+ *  |   |       | (use MINOR_ANY to request latest version supported by PF)    |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:16 | **MAJOR** - agreed major version of the VFPF interface       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **MINOR** - agreed minor version of the VFPF interface       |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_RELAY_ACTION_VF2PF_HANDSHAKE		0x0001u
+
+#define VF2PF_HANDSHAKE_REQUEST_MSG_LEN			2u
+#define VF2PF_HANDSHAKE_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR		(0xffffu << 16)
+#define   VF2PF_HANDSHAKE_MAJOR_ANY			0
+#define VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR		(0xffffu << 0)
+#define   VF2PF_HANDSHAKE_MINOR_ANY			0
+
+#define VF2PF_HANDSHAKE_RESPONSE_MSG_LEN		2u
+#define VF2PF_HANDSHAKE_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
+#define VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR		(0xffffu << 16)
+#define VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR		(0xffffu << 0)
+
+/**
+ * DOC: VF2PF_QUERY_RUNTIME
+ *
+ * This `Relay Message`_ is used by the VF to query values of runtime registers.
+ *
+ * On some platforms, VF drivers may not have access to the some fuse registers
+ * (referred here as 'runtime registers') and therefore VF drivers need to ask
+ * the PF driver to obtain their values.
+ *
+ * However, the list of such registers, and their values, is fully owned and
+ * maintained by the PF driver and the VF driver may only initiate the query
+ * sequence and indicate in the @START field the starting index of the next
+ * requested register from this predefined list.
+ *
+ * In the response, the PF driver will return tuple of 32-bit register offset and
+ * the 32-bit value of that register (respectively @REG_OFFSET and @REG_VALUE).
+ *
+ * The VF driver can use @LIMIT field to limit number of returned register tuples.
+ * If @LIMIT is unset then PF decides about number of returned register tuples.
+ *
+ * This message definition is supported from ABI version 1.0.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = **LIMIT** - limit number of returned entries         |
+ *  |   |       | (use zero to not enforce any limits on the response)         |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME` = 0x0101    |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | DATA1 = **START** - index of the first requested entry       |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = **COUNT** - number of entries included in response   |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | DATA1 = **REMAINING** - number of remaining entries          |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | DATA2 = **REG_OFFSET** - offset of register[START]           |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 3 |  31:0 | DATA3 = **REG_VALUE** - value of register[START]             |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   |       |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |n-1|  31:0 | REG_OFFSET - offset of register[START + x]                   |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | n |  31:0 | REG_VALUE - value of register[START + x]                     |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME		0x0101u
+
+#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN		2u
+#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT		GUC_HXG_REQUEST_MSG_0_DATA0
+#define   VF2PF_QUERY_RUNTIME_NO_LIMIT			0u
+#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START		GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN	(GUC_HXG_MSG_MIN_LEN + 1u)
+#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MAX_LEN	\
+		(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + VF2PF_QUERY_RUNTIME_MAX_COUNT * 2)
+#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT	GUC_HXG_RESPONSE_MSG_0_DATA0
+#define   VF2PF_QUERY_RUNTIME_MIN_COUNT			0
+#define   VF2PF_QUERY_RUNTIME_MAX_COUNT			\
+		((GUC_RELAY_MSG_MAX_LEN - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2)
+#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING	GUC_HXG_RESPONSE_MSG_n_DATAn
+#define VF2PF_QUERY_RUNTIME_RESPONSE_DATAn_REG_OFFSETx	GUC_HXG_RESPONSE_MSG_n_DATAn
+#define VF2PF_QUERY_RUNTIME_RESPONSE_DATAn_REG_VALUEx	GUC_HXG_RESPONSE_MSG_n_DATAn
+
 /**
  * DOC: GuC Relay Debug Actions
  *
-- 
cgit 


From 1cb4db30cf685709584743d8bf8a0db2eac620c9 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 23 Apr 2024 20:04:33 +0200
Subject: drm/xe: Add helper to calculate adjusted register offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Our MMIO accessing functions automatically adjust addresses for the
media registers based on mmio.adj_limit and mmio.adj_offset logic.
Move it to the separate helper to avoid code duplication and to
allow using it by the upcoming changes to PF driver code.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Piotr Piórkowski <piotr.piorkowski@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_mmio.c | 38 +++++++++++++++-----------------------
 drivers/gpu/drm/xe/xe_mmio.h |  7 +++++++
 2 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 334637511e75..2b18e8149ec3 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -423,41 +423,33 @@ int xe_mmio_init(struct xe_device *xe)
 u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
+	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
 
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+	return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
 }
 
 u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
+	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
 
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+	return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
 }
 
 void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
+	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
 
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
 }
 
 u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
+	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
 
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
 }
 
 u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
@@ -486,10 +478,9 @@ bool xe_mmio_in_range(const struct xe_gt *gt,
 		      const struct xe_mmio_range *range,
 		      struct xe_reg reg)
 {
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
+	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
 
-	return range && reg.addr >= range->start && reg.addr <= range->end;
+	return range && addr >= range->start && addr <= range->end;
 }
 
 /**
@@ -519,10 +510,11 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
 	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
 	u32 ldw, udw, oldudw, retries;
 
-	if (reg.addr < gt->mmio.adj_limit) {
-		reg.addr += gt->mmio.adj_offset;
-		reg_udw.addr += gt->mmio.adj_offset;
-	}
+	reg.addr = xe_mmio_adjusted_addr(gt, reg.addr);
+	reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr);
+
+	/* we shouldn't adjust just one register address */
+	xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4);
 
 	oldudw = xe_mmio_read32(gt, reg_udw);
 	for (retries = 5; retries; --retries) {
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index a3cd7b3036c7..445ec6a0753e 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -36,4 +36,11 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
 int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
 		   u32 *out_val, bool atomic);
 
+static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr)
+{
+	if (addr < gt->mmio.adj_limit)
+		addr += gt->mmio.adj_offset;
+	return addr;
+}
+
 #endif
-- 
cgit 


From dec793860d5137c58c633712554abfed71642a88 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 23 Apr 2024 20:04:34 +0200
Subject: drm/xe: Add few more GT register definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While we are not using these registers right now, they are part
of some runtime register lists that PF driver share with VFs on
some legacy platforms that we might want to support as SDV.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 94445810ccc9..6eea7a459c68 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -173,8 +173,11 @@
 #define   MAX_MSLICES				4
 #define   MEML3_EN_MASK				REG_GENMASK(3, 0)
 
+#define MIRROR_FUSE1				XE_REG(0x911c)
+
 #define XELP_EU_ENABLE				XE_REG(0x9134)	/* "_DISABLE" on Xe_LP */
 #define   XELP_EU_MASK				REG_GENMASK(7, 0)
+#define XELP_GT_SLICE_ENABLE			XE_REG(0x9138)
 #define XELP_GT_GEOMETRY_DSS_ENABLE		XE_REG(0x913c)
 
 #define GT_VEBOX_VDBOX_DISABLE			XE_REG(0x9140)
-- 
cgit 


From 98e62805921cebcd2fcac3692037ca2ebef63b4a Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 23 Apr 2024 20:04:35 +0200
Subject: drm/xe/pf: Add SR-IOV GuC Relay PF services
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already have mechanism that allows a VF driver to communicate
with the PF driver, now add PF side handlers for VF2PF requests
defined in version 1.0 of VF/PF GuC Relay ABI specification.

The VF2PF_HANDSHAKE request must be used by the VF driver to
negotiate the ABI version prior to sending any other request.
We will reset any negotiated version later during FLR.

The outcome of the VF2PF_QUERY_RUNTIME requests depends on actual
platform, for legacy platforms used as SDV is provided as-is, for
latest platforms it is preliminary, and might be changed.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-5-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/Makefile                       |   1 +
 drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c       | 546 ++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h       |  36 ++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h |  52 +++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h         |   5 +
 drivers/gpu/drm/xe/xe_guc_relay.c                 |   8 +-
 6 files changed, 646 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 8bc62bfbc679..4fba50036539 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -164,6 +164,7 @@ xe-$(CONFIG_PCI_IOV) += \
 	xe_gt_sriov_pf_config.o \
 	xe_gt_sriov_pf_control.o \
 	xe_gt_sriov_pf_policy.o \
+	xe_gt_sriov_pf_service.o \
 	xe_lmtt.o \
 	xe_lmtt_2l.o \
 	xe_lmtt_ml.o \
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
new file mode 100644
index 000000000000..a253f196261d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_relay_actions_abi.h"
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "regs/xe_regs.h"
+
+#include "xe_mmio.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_service.h"
+#include "xe_gt_sriov_pf_service_types.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hxg_helpers.h"
+
+static void pf_init_versions(struct xe_gt *gt)
+{
+	BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR);
+	BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR);
+
+	/* base versions may differ between platforms */
+	gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR;
+	gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR;
+
+	/* latest version is same for all platforms */
+	gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR;
+	gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR;
+}
+
+/* Return: 0 on success or a negative error code on failure. */
+static int pf_negotiate_version(struct xe_gt *gt,
+				u32 wanted_major, u32 wanted_minor,
+				u32 *major, u32 *minor)
+{
+	struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base;
+	struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest;
+
+	xe_gt_assert(gt, base.major);
+	xe_gt_assert(gt, base.major <= latest.major);
+	xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor));
+
+	/* VF doesn't care - return our latest  */
+	if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY &&
+	    wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) {
+		*major = latest.major;
+		*minor = latest.minor;
+		return 0;
+	}
+
+	/* VF wants newer than our - return our latest  */
+	if (wanted_major > latest.major) {
+		*major = latest.major;
+		*minor = latest.minor;
+		return 0;
+	}
+
+	/* VF wants older than min required - reject */
+	if (wanted_major < base.major ||
+	    (wanted_major == base.major && wanted_minor < base.minor)) {
+		return -EPERM;
+	}
+
+	/* previous major - return wanted, as we should still support it */
+	if (wanted_major < latest.major) {
+		/* XXX: we are not prepared for multi-versions yet */
+		xe_gt_assert(gt, base.major == latest.major);
+		return -ENOPKG;
+	}
+
+	/* same major - return common minor */
+	*major = wanted_major;
+	*minor = min_t(u32, latest.minor, wanted_minor);
+	return 0;
+}
+
+static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor)
+{
+	xe_gt_sriov_pf_assert_vfid(gt, vfid);
+	xe_gt_assert(gt, major || minor);
+
+	gt->sriov.pf.vfs[vfid].version.major = major;
+	gt->sriov.pf.vfs[vfid].version.minor = minor;
+}
+
+static void pf_disconnect(struct xe_gt *gt, u32 vfid)
+{
+	xe_gt_sriov_pf_assert_vfid(gt, vfid);
+
+	gt->sriov.pf.vfs[vfid].version.major = 0;
+	gt->sriov.pf.vfs[vfid].version.minor = 0;
+}
+
+static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor)
+{
+	xe_gt_sriov_pf_assert_vfid(gt, vfid);
+
+	return major == gt->sriov.pf.vfs[vfid].version.major &&
+	       minor <= gt->sriov.pf.vfs[vfid].version.minor;
+}
+
+static const struct xe_reg tgl_runtime_regs[] = {
+	RPM_CONFIG0,			/* _MMIO(0x0d00) */
+	MIRROR_FUSE3,			/* _MMIO(0x9118) */
+	XELP_EU_ENABLE,			/* _MMIO(0x9134) */
+	XELP_GT_SLICE_ENABLE,		/* _MMIO(0x9138) */
+	XELP_GT_GEOMETRY_DSS_ENABLE,	/* _MMIO(0x913c) */
+	GT_VEBOX_VDBOX_DISABLE,		/* _MMIO(0x9140) */
+	CTC_MODE,			/* _MMIO(0xa26c) */
+	HUC_KERNEL_LOAD_INFO,		/* _MMIO(0xc1dc) */
+	TIMESTAMP_OVERRIDE,		/* _MMIO(0x44074) */
+};
+
+static const struct xe_reg ats_m_runtime_regs[] = {
+	RPM_CONFIG0,			/* _MMIO(0x0d00) */
+	MIRROR_FUSE3,			/* _MMIO(0x9118) */
+	MIRROR_FUSE1,			/* _MMIO(0x911c) */
+	XELP_EU_ENABLE,			/* _MMIO(0x9134) */
+	XELP_GT_GEOMETRY_DSS_ENABLE,	/* _MMIO(0x913c) */
+	GT_VEBOX_VDBOX_DISABLE,		/* _MMIO(0x9140) */
+	XEHP_GT_COMPUTE_DSS_ENABLE,	/* _MMIO(0x9144) */
+	CTC_MODE,			/* _MMIO(0xa26c) */
+	HUC_KERNEL_LOAD_INFO,		/* _MMIO(0xc1dc) */
+	TIMESTAMP_OVERRIDE,		/* _MMIO(0x44074) */
+};
+
+static const struct xe_reg pvc_runtime_regs[] = {
+	RPM_CONFIG0,			/* _MMIO(0x0d00) */
+	MIRROR_FUSE3,			/* _MMIO(0x9118) */
+	XELP_EU_ENABLE,			/* _MMIO(0x9134) */
+	XELP_GT_GEOMETRY_DSS_ENABLE,	/* _MMIO(0x913c) */
+	GT_VEBOX_VDBOX_DISABLE,		/* _MMIO(0x9140) */
+	XEHP_GT_COMPUTE_DSS_ENABLE,	/* _MMIO(0x9144) */
+	XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
+	CTC_MODE,			/* _MMIO(0xA26C) */
+	HUC_KERNEL_LOAD_INFO,		/* _MMIO(0xc1dc) */
+	TIMESTAMP_OVERRIDE,		/* _MMIO(0x44074) */
+};
+
+static const struct xe_reg ver_1270_runtime_regs[] = {
+	RPM_CONFIG0,			/* _MMIO(0x0d00) */
+	XEHP_FUSE4,			/* _MMIO(0x9114) */
+	MIRROR_FUSE3,			/* _MMIO(0x9118) */
+	MIRROR_FUSE1,			/* _MMIO(0x911c) */
+	XELP_EU_ENABLE,			/* _MMIO(0x9134) */
+	XELP_GT_GEOMETRY_DSS_ENABLE,	/* _MMIO(0x913c) */
+	GT_VEBOX_VDBOX_DISABLE,		/* _MMIO(0x9140) */
+	XEHP_GT_COMPUTE_DSS_ENABLE,	/* _MMIO(0x9144) */
+	XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
+	CTC_MODE,			/* _MMIO(0xa26c) */
+	HUC_KERNEL_LOAD_INFO,		/* _MMIO(0xc1dc) */
+	TIMESTAMP_OVERRIDE,		/* _MMIO(0x44074) */
+};
+
+static const struct xe_reg ver_2000_runtime_regs[] = {
+	RPM_CONFIG0,			/* _MMIO(0x0d00) */
+	XEHP_FUSE4,			/* _MMIO(0x9114) */
+	MIRROR_FUSE3,			/* _MMIO(0x9118) */
+	MIRROR_FUSE1,			/* _MMIO(0x911c) */
+	XELP_EU_ENABLE,			/* _MMIO(0x9134) */
+	XELP_GT_GEOMETRY_DSS_ENABLE,	/* _MMIO(0x913c) */
+	GT_VEBOX_VDBOX_DISABLE,		/* _MMIO(0x9140) */
+	XEHP_GT_COMPUTE_DSS_ENABLE,	/* _MMIO(0x9144) */
+	XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
+	XE2_GT_COMPUTE_DSS_2,		/* _MMIO(0x914c) */
+	XE2_GT_GEOMETRY_DSS_1,		/* _MMIO(0x9150) */
+	XE2_GT_GEOMETRY_DSS_2,		/* _MMIO(0x9154) */
+	CTC_MODE,			/* _MMIO(0xa26c) */
+	HUC_KERNEL_LOAD_INFO,		/* _MMIO(0xc1dc) */
+	TIMESTAMP_OVERRIDE,		/* _MMIO(0x44074) */
+};
+
+static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count)
+{
+	const struct xe_reg *regs;
+
+	if (GRAPHICS_VERx100(xe) >= 2000) {
+		*count = ARRAY_SIZE(ver_2000_runtime_regs);
+		regs = ver_2000_runtime_regs;
+	} else if (GRAPHICS_VERx100(xe) >= 1270) {
+		*count = ARRAY_SIZE(ver_1270_runtime_regs);
+		regs = ver_1270_runtime_regs;
+	} else if (GRAPHICS_VERx100(xe) == 1260) {
+		*count = ARRAY_SIZE(pvc_runtime_regs);
+		regs = pvc_runtime_regs;
+	} else if (GRAPHICS_VERx100(xe) == 1255) {
+		*count = ARRAY_SIZE(ats_m_runtime_regs);
+		regs = ats_m_runtime_regs;
+	} else if (GRAPHICS_VERx100(xe) == 1200) {
+		*count = ARRAY_SIZE(tgl_runtime_regs);
+		regs = tgl_runtime_regs;
+	} else {
+		regs = ERR_PTR(-ENOPKG);
+		*count = 0;
+	}
+
+	return regs;
+}
+
+static int pf_alloc_runtime_info(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	const struct xe_reg *regs;
+	unsigned int size;
+	u32 *values;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(xe));
+	xe_gt_assert(gt, !gt->sriov.pf.service.runtime.size);
+	xe_gt_assert(gt, !gt->sriov.pf.service.runtime.regs);
+	xe_gt_assert(gt, !gt->sriov.pf.service.runtime.values);
+
+	regs = pick_runtime_regs(xe, &size);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+
+	if (unlikely(!size))
+		return 0;
+
+	values = drmm_kcalloc(&xe->drm, size, sizeof(u32), GFP_KERNEL);
+	if (!values)
+		return -ENOMEM;
+
+	gt->sriov.pf.service.runtime.size = size;
+	gt->sriov.pf.service.runtime.regs = regs;
+	gt->sriov.pf.service.runtime.values = values;
+
+	return 0;
+}
+
+static void read_many(struct xe_gt *gt, unsigned int count,
+		      const struct xe_reg *regs, u32 *values)
+{
+	while (count--)
+		*values++ = xe_mmio_read32(gt, *regs++);
+}
+
+static void pf_prepare_runtime_info(struct xe_gt *gt)
+{
+	const struct xe_reg *regs;
+	unsigned int size;
+	u32 *values;
+
+	if (!gt->sriov.pf.service.runtime.size)
+		return;
+
+	size = gt->sriov.pf.service.runtime.size;
+	regs = gt->sriov.pf.service.runtime.regs;
+	values = gt->sriov.pf.service.runtime.values;
+
+	read_many(gt, size, regs, values);
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+		struct drm_printer p = xe_gt_info_printer(gt);
+
+		xe_gt_sriov_pf_service_print_runtime(gt, &p);
+	}
+}
+
+/**
+ * xe_gt_sriov_pf_service_init - Early initialization of the GT SR-IOV PF services.
+ * @gt: the &xe_gt to initialize
+ *
+ * Performs early initialization of the GT SR-IOV PF services, including preparation
+ * of the runtime info that will be shared with VFs.
+ *
+ * This function can only be called on PF.
+ */
+int xe_gt_sriov_pf_service_init(struct xe_gt *gt)
+{
+	int err;
+
+	pf_init_versions(gt);
+
+	err = pf_alloc_runtime_info(gt);
+	if (unlikely(err))
+		goto failed;
+
+	return 0;
+failed:
+	xe_gt_sriov_err(gt, "Failed to initialize service (%pe)\n", ERR_PTR(err));
+	return err;
+}
+
+/**
+ * xe_gt_sriov_pf_service_update - Update PF SR-IOV services.
+ * @gt: the &xe_gt to update
+ *
+ * Updates runtime data shared with VFs.
+ *
+ * This function can be called more than once.
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_service_update(struct xe_gt *gt)
+{
+	pf_prepare_runtime_info(gt);
+}
+
+/**
+ * xe_gt_sriov_pf_service_reset - Reset a connection with the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * Reset a VF driver negotiated VF/PF ABI version.
+ * After that point, the VF driver will have to perform new version handshake
+ * to continue use of the PF services again.
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid)
+{
+	pf_disconnect(gt, vfid);
+}
+
+/* Return: 0 on success or a negative error code on failure. */
+static int pf_process_handshake(struct xe_gt *gt, u32 vfid,
+				u32 wanted_major, u32 wanted_minor,
+				u32 *major, u32 *minor)
+{
+	int err;
+
+	xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n",
+				vfid, wanted_major, wanted_minor);
+
+	err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor);
+
+	if (err < 0) {
+		xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n",
+				   vfid, wanted_major, wanted_minor, ERR_PTR(err));
+		pf_disconnect(gt, vfid);
+	} else {
+		xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n",
+				vfid, *major, *minor);
+		pf_connect(gt, vfid, *major, *minor);
+	}
+
+	return 0;
+}
+
+/* Return: length of the response message or a negative error code on failure. */
+static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin,
+				    const u32 *request, u32 len, u32 *response, u32 size)
+{
+	u32 wanted_major, wanted_minor;
+	u32 major, minor;
+	u32 mbz;
+	int err;
+
+	if (unlikely(len != VF2PF_HANDSHAKE_REQUEST_MSG_LEN))
+		return -EMSGSIZE;
+
+	mbz = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_0_MBZ, request[0]);
+	if (unlikely(mbz))
+		return -EPFNOSUPPORT;
+
+	wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]);
+	wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]);
+
+	err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor);
+	if (err < 0)
+		return err;
+
+	xe_gt_assert(gt, major || minor);
+	xe_gt_assert(gt, size >= VF2PF_HANDSHAKE_RESPONSE_MSG_LEN);
+
+	response[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		      FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) |
+		      FIELD_PREP(GUC_HXG_RESPONSE_MSG_0_DATA0, 0);
+	response[1] = FIELD_PREP(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR, major) |
+		      FIELD_PREP(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR, minor);
+
+	return VF2PF_HANDSHAKE_RESPONSE_MSG_LEN;
+}
+
+struct reg_data {
+	u32 offset;
+	u32 value;
+} __packed;
+static_assert(hxg_sizeof(struct reg_data) == 2);
+
+/* Return: number of entries copied or negative error code on failure. */
+static int pf_service_runtime_query(struct xe_gt *gt, u32 start, u32 limit,
+				    struct reg_data *data, u32 *remaining)
+{
+	struct xe_gt_sriov_pf_service_runtime_regs *runtime;
+	unsigned int count, i;
+	u32 addr;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	runtime = &gt->sriov.pf.service.runtime;
+
+	if (start > runtime->size)
+		return -ERANGE;
+
+	count = min_t(u32, runtime->size - start, limit);
+
+	for (i = 0; i < count; ++i, ++data) {
+		addr = runtime->regs[start + i].addr;
+		data->offset = xe_mmio_adjusted_addr(gt, addr);
+		data->value = runtime->values[start + i];
+	}
+
+	*remaining = runtime->size - start - count;
+	return count;
+}
+
+/* Return: length of the response message or a negative error code on failure. */
+static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin,
+					const u32 *msg, u32 msg_len, u32 *response, u32 resp_size)
+{
+	const u32 chunk_size = hxg_sizeof(struct reg_data);
+	struct reg_data *reg_data_buf;
+	u32 limit, start, max_chunks;
+	u32 remaining = 0;
+	int ret;
+
+	if (!pf_is_negotiated(gt, origin, 1, 0))
+		return -EACCES;
+	if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN))
+		return -EMSGSIZE;
+	if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN))
+		return -EPROTO;
+	if (unlikely(resp_size < VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN))
+		return -EINVAL;
+
+	limit = FIELD_GET(VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT, msg[0]);
+	start = FIELD_GET(VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START, msg[1]);
+
+	resp_size = min_t(u32, resp_size, VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MAX_LEN);
+	max_chunks = (resp_size - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / chunk_size;
+	limit = limit == VF2PF_QUERY_RUNTIME_NO_LIMIT ? max_chunks : min_t(u32, max_chunks, limit);
+	reg_data_buf = (void *)(response + VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN);
+
+	ret = pf_service_runtime_query(gt, start, limit, reg_data_buf, &remaining);
+	if (ret < 0)
+		return ret;
+
+	response[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		      FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) |
+		      FIELD_PREP(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT, ret);
+	response[1] = FIELD_PREP(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING, remaining);
+
+	return VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + ret * hxg_sizeof(struct reg_data);
+}
+
+/**
+ * xe_gt_sriov_pf_service_process_request - Service GT level SR-IOV request message from the VF.
+ * @gt: the &xe_gt that provides the service
+ * @origin: VF number that is requesting the service
+ * @msg: request message
+ * @msg_len: length of the request message (in dwords)
+ * @response: placeholder for the response message
+ * @resp_size: length of the response message buffer (in dwords)
+ *
+ * This function processes `Relay Message`_ request from the VF.
+ *
+ * Return: length of the response message or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin,
+					   const u32 *msg, u32 msg_len,
+					   u32 *response, u32 resp_size)
+{
+	u32 action, data __maybe_unused;
+	int ret;
+
+	xe_gt_assert(gt, msg_len >= GUC_HXG_MSG_MIN_LEN);
+	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_REQUEST);
+
+	action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
+	data = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
+	xe_gt_sriov_dbg_verbose(gt, "service action %#x:%u from VF%u\n",
+				action, data, origin);
+
+	switch (action) {
+	case GUC_RELAY_ACTION_VF2PF_HANDSHAKE:
+		ret = pf_process_handshake_msg(gt, origin, msg, msg_len, response, resp_size);
+		break;
+	case GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME:
+		ret = pf_process_runtime_query_msg(gt, origin, msg, msg_len, response, resp_size);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_service_print_runtime - Print PF runtime data shared with VFs.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * This function is for PF use only.
+ */
+int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p)
+{
+	const struct xe_reg *regs;
+	unsigned int size;
+	u32 *values;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	size = gt->sriov.pf.service.runtime.size;
+	regs = gt->sriov.pf.service.runtime.regs;
+	values = gt->sriov.pf.service.runtime.values;
+
+	for (; size--; regs++, values++) {
+		drm_printf(p, "reg[%#x] = %#x\n",
+			   xe_mmio_adjusted_addr(gt, regs->addr), *values);
+	}
+
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * This function is for PF use only.
+ */
+int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe);
+	struct xe_gt_sriov_pf_service_version *version;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(xe));
+
+	for (n = 1; n <= total_vfs; n++) {
+		version = &gt->sriov.pf.vfs[n].version;
+		if (!version->major && !version->minor)
+			continue;
+
+		drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h
new file mode 100644
index 000000000000..56aaadf0360d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_SERVICE_H_
+#define _XE_GT_SRIOV_PF_SERVICE_H_
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+int xe_gt_sriov_pf_service_init(struct xe_gt *gt);
+void xe_gt_sriov_pf_service_update(struct xe_gt *gt);
+void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid);
+
+int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p);
+
+#ifdef CONFIG_PCI_IOV
+int xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin,
+					   const u32 *msg, u32 msg_len,
+					   u32 *response, u32 resp_size);
+#else
+static inline int
+xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin,
+				       const u32 *msg, u32 msg_len,
+				       u32 *response, u32 resp_size)
+{
+	return -EPROTO;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h
new file mode 100644
index 000000000000..ad6dd75f0056
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_SERVICE_TYPES_H_
+#define _XE_GT_SRIOV_PF_SERVICE_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_reg;
+
+/**
+ * struct xe_gt_sriov_pf_service_version - VF/PF ABI Version.
+ * @major: the major version of the VF/PF ABI
+ * @minor: the minor version of the VF/PF ABI
+ *
+ * See `GuC Relay Communication`_.
+ */
+struct xe_gt_sriov_pf_service_version {
+	u16 major;
+	u16 minor;
+};
+
+/**
+ * struct xe_gt_sriov_pf_service_runtime_regs - Runtime data shared with VFs.
+ * @regs: pointer to static array with register offsets.
+ * @values: pointer to array with captured register values.
+ * @size: size of the regs and value arrays.
+ */
+struct xe_gt_sriov_pf_service_runtime_regs {
+	const struct xe_reg *regs;
+	u32 *values;
+	u32 size;
+};
+
+/**
+ * struct xe_gt_sriov_pf_service - Data used by the PF service.
+ * @version: information about VF/PF ABI versions for current platform.
+ * @version.base: lowest VF/PF ABI version that could be negotiated with VF.
+ * @version.latest: latest VF/PF ABI version supported by the PF driver.
+ * @runtime: runtime data shared with VFs.
+ */
+struct xe_gt_sriov_pf_service {
+	struct {
+		struct xe_gt_sriov_pf_service_version base;
+		struct xe_gt_sriov_pf_service_version latest;
+	} version;
+	struct xe_gt_sriov_pf_service_runtime_regs runtime;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
index faf9ee8266ce..880754f3e215 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -10,6 +10,7 @@
 
 #include "xe_gt_sriov_pf_config_types.h"
 #include "xe_gt_sriov_pf_policy_types.h"
+#include "xe_gt_sriov_pf_service_types.h"
 
 /**
  * struct xe_gt_sriov_metadata - GT level per-VF metadata.
@@ -17,15 +18,19 @@
 struct xe_gt_sriov_metadata {
 	/** @config: per-VF provisioning data. */
 	struct xe_gt_sriov_config config;
+	/** @version: negotiated VF/PF ABI version */
+	struct xe_gt_sriov_pf_service_version version;
 };
 
 /**
  * struct xe_gt_sriov_pf - GT level PF virtualization data.
+ * @service: service data.
  * @policy: policy data.
  * @spare: PF-only provisioning configuration.
  * @vfs: metadata for all VFs.
  */
 struct xe_gt_sriov_pf {
+	struct xe_gt_sriov_pf_service service;
 	struct xe_gt_sriov_pf_policy policy;
 	struct xe_gt_sriov_spare_config spare;
 	struct xe_gt_sriov_metadata *vfs;
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
index c0a2d8d5d3b3..c3bbaf474f9a 100644
--- a/drivers/gpu/drm/xe/xe_guc_relay.c
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -19,6 +19,7 @@
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_sriov_printk.h"
+#include "xe_gt_sriov_pf_service.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_hxg_helpers.h"
@@ -664,6 +665,7 @@ static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin,
 static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
 				const u32 *msg, u32 len, u32 *response, u32 size)
 {
+	struct xe_gt *gt = relay_to_gt(relay);
 	u32 type;
 	int ret;
 
@@ -674,8 +676,10 @@ static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
 
 	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
 
-	/* XXX: PF services will be added later */
-	ret = -EOPNOTSUPP;
+	if (IS_SRIOV_PF(relay_to_xe(relay)))
+		ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, response, size);
+	else
+		ret = -EOPNOTSUPP;
 
 	if (type == GUC_HXG_TYPE_EVENT)
 		relay_assert(relay, ret <= 0);
-- 
cgit 


From 11294bf38fa2f71619ebb5c7baa3bbe380cbcf0c Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 23 Apr 2024 20:04:36 +0200
Subject: drm/xe/kunit: Add PF service tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Start with basic tests for VF/PF ABI version negotiation. As we
treat all platforms in the same way, we can run the tests on one
platform. More tests will likely come later.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-6-michal.wajdeczko@intel.com
---
 .../gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c | 232 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c        |   4 +
 2 files changed, 236 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c
new file mode 100644
index 000000000000..b683585db852
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+#include "xe_pci_test.h"
+
+static int pf_service_test_init(struct kunit *test)
+{
+	struct xe_pci_fake_data fake = {
+		.sriov_mode = XE_SRIOV_MODE_PF,
+		.platform = XE_TIGERLAKE, /* some random platform */
+		.subplatform = XE_SUBPLATFORM_NONE,
+	};
+	struct xe_device *xe;
+	struct xe_gt *gt;
+
+	test->priv = &fake;
+	xe_kunit_helper_xe_device_test_init(test);
+
+	xe = test->priv;
+	KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0);
+
+	gt = xe_device_get_gt(xe, 0);
+	pf_init_versions(gt);
+
+	/*
+	 * sanity check:
+	 * - all supported platforms VF/PF ABI versions must be defined
+	 * - base version can't be newer than latest
+	 */
+	KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major);
+	KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major);
+	KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major,
+			gt->sriov.pf.service.version.latest.major);
+	if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
+		KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor,
+				gt->sriov.pf.service.version.latest.minor);
+
+	test->priv = gt;
+	return 0;
+}
+
+static void pf_negotiate_any(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY,
+					     VF2PF_HANDSHAKE_MINOR_ANY,
+					     &major, &minor));
+	KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+	KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_base_match(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.base.major,
+					     gt->sriov.pf.service.version.base.minor,
+					     &major, &minor));
+	KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major);
+	KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor);
+}
+
+static void pf_negotiate_base_newer(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.base.major,
+					     gt->sriov.pf.service.version.base.minor + 1,
+					     &major, &minor));
+	KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major);
+	KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor);
+	if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
+		KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor);
+	else
+		KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n");
+}
+
+static void pf_negotiate_base_next(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.base.major + 1, 0,
+					     &major, &minor));
+	KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major);
+	KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major);
+	if (major == gt->sriov.pf.service.version.latest.major)
+		KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor);
+	else
+		KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n");
+}
+
+static void pf_negotiate_base_older(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	if (!gt->sriov.pf.service.version.base.minor)
+		kunit_skip(test, "no older minor\n");
+
+	KUNIT_ASSERT_NE(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.base.major,
+					     gt->sriov.pf.service.version.base.minor - 1,
+					     &major, &minor));
+}
+
+static void pf_negotiate_base_prev(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	KUNIT_ASSERT_NE(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.base.major - 1, 1,
+					     &major, &minor));
+}
+
+static void pf_negotiate_latest_match(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.latest.major,
+					     gt->sriov.pf.service.version.latest.minor,
+					     &major, &minor));
+	KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+	KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_newer(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.latest.major,
+					     gt->sriov.pf.service.version.latest.minor + 1,
+					     &major, &minor));
+	KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+	KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_next(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.latest.major + 1, 0,
+					     &major, &minor));
+	KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+	KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_older(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	if (!gt->sriov.pf.service.version.latest.minor)
+		kunit_skip(test, "no older minor\n");
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.latest.major,
+					     gt->sriov.pf.service.version.latest.minor - 1,
+					     &major, &minor));
+	KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
+	KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1);
+}
+
+static void pf_negotiate_latest_prev(struct kunit *test)
+{
+	struct xe_gt *gt = test->priv;
+	u32 major, minor;
+
+	if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
+		kunit_skip(test, "no prev major");
+
+	KUNIT_ASSERT_EQ(test, 0,
+			pf_negotiate_version(gt,
+					     gt->sriov.pf.service.version.latest.major - 1,
+					     gt->sriov.pf.service.version.base.minor + 1,
+					     &major, &minor));
+	KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1);
+	KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major);
+}
+
+static struct kunit_case pf_service_test_cases[] = {
+	KUNIT_CASE(pf_negotiate_any),
+	KUNIT_CASE(pf_negotiate_base_match),
+	KUNIT_CASE(pf_negotiate_base_newer),
+	KUNIT_CASE(pf_negotiate_base_next),
+	KUNIT_CASE(pf_negotiate_base_older),
+	KUNIT_CASE(pf_negotiate_base_prev),
+	KUNIT_CASE(pf_negotiate_latest_match),
+	KUNIT_CASE(pf_negotiate_latest_newer),
+	KUNIT_CASE(pf_negotiate_latest_next),
+	KUNIT_CASE(pf_negotiate_latest_older),
+	KUNIT_CASE(pf_negotiate_latest_prev),
+	{}
+};
+
+static struct kunit_suite pf_service_suite = {
+	.name = "pf_service",
+	.test_cases = pf_service_test_cases,
+	.init = pf_service_test_init,
+};
+
+kunit_test_suite(pf_service_suite);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
index a253f196261d..0e23b7ea4f3e 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -544,3 +544,7 @@ int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p
 
 	return 0;
 }
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_gt_sriov_pf_service_test.c"
+#endif
-- 
cgit 


From e42a51fb9c0f386d3ebb115d081896d41eb844af Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 23 Apr 2024 15:12:42 +0200
Subject: drm/xe/pf: Expose SR-IOV VFs configuration over debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already have functions to configure VF resources and to print
actual provisioning details. Expose this functionality in debugfs
to allow experiment with different settings or inspect details in
case of unexpected issues with the provisioning.

As debugfs attributes are per-VF, we use parent d_inode->i_private
to store VFID, similarly how we did for per-GT attributes.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423131244.2045-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/Makefile                 |   1 +
 drivers/gpu/drm/xe/xe_gt_debugfs.c          |   5 +
 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 203 ++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h |  18 +++
 4 files changed, 227 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 4fba50036539..efd660553673 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -163,6 +163,7 @@ xe-$(CONFIG_PCI_IOV) += \
 	xe_gt_sriov_pf.o \
 	xe_gt_sriov_pf_config.o \
 	xe_gt_sriov_pf_control.o \
+	xe_gt_sriov_pf_debugfs.o \
 	xe_gt_sriov_pf_policy.o \
 	xe_gt_sriov_pf_service.o \
 	xe_lmtt.o \
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index ff7f4cf52fa9..599aed47f2ba 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -13,6 +13,7 @@
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
+#include "xe_gt_sriov_pf_debugfs.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_engine.h"
 #include "xe_lrc.h"
@@ -21,6 +22,7 @@
 #include "xe_pm.h"
 #include "xe_reg_sr.h"
 #include "xe_reg_whitelist.h"
+#include "xe_sriov.h"
 #include "xe_uc_debugfs.h"
 #include "xe_wa.h"
 
@@ -288,4 +290,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 				 root, minor);
 
 	xe_uc_debugfs_register(&gt->uc, root);
+
+	if (IS_SRIOV_PF(xe))
+		xe_gt_sriov_pf_debugfs_register(gt, root);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
new file mode 100644
index 000000000000..32ce98698690
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include <drm/drm_print.h>
+#include <drm/drm_debugfs.h>
+
+#include "xe_bo.h"
+#include "xe_debugfs.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_debugfs.h"
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_debugfs.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_pm.h"
+
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0		# d_inode->i_private = gt
+ *      │   ├── pf	# d_inode->i_private = gt
+ *      │   ├── vf1	# d_inode->i_private = VFID(1)
+ *      :   :
+ *      │   ├── vfN	# d_inode->i_private = VFID(N)
+ */
+
+static void *extract_priv(struct dentry *d)
+{
+	return d->d_inode->i_private;
+}
+
+static struct xe_gt *extract_gt(struct dentry *d)
+{
+	return extract_priv(d->d_parent);
+}
+
+static unsigned int extract_vfid(struct dentry *d)
+{
+	return extract_priv(d) == extract_gt(d) ? PFID : (uintptr_t)extract_priv(d);
+}
+
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0
+ *      │   ├── pf
+ *      │   │   ├── ggtt_available
+ *      │   │   ├── ggtt_provisioned
+ *      │   │   ├── contexts_provisioned
+ *      │   │   ├── doorbells_provisioned
+ */
+
+static const struct drm_info_list pf_info[] = {
+	{
+		"ggtt_available",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_pf_config_print_available_ggtt,
+	},
+	{
+		"ggtt_provisioned",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_pf_config_print_ggtt,
+	},
+	{
+		"contexts_provisioned",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_pf_config_print_ctxs,
+	},
+	{
+		"doorbells_provisioned",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_pf_config_print_dbs,
+	},
+};
+
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0
+ *      │   ├── pf
+ *      │   │   ├── ggtt_spare
+ *      │   │   ├── lmem_spare
+ *      │   │   ├── doorbells_spare
+ *      │   │   ├── contexts_spare
+ *      │   │   ├── exec_quantum_ms
+ *      │   │   ├── preempt_timeout_us
+ *      │   ├── vf1
+ *      │   │   ├── ggtt_quota
+ *      │   │   ├── lmem_quota
+ *      │   │   ├── doorbells_quota
+ *      │   │   ├── contexts_quota
+ *      │   │   ├── exec_quantum_ms
+ *      │   │   ├── preempt_timeout_us
+ */
+
+#define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT)		\
+										\
+static int CONFIG##_set(void *data, u64 val)					\
+{										\
+	struct xe_gt *gt = extract_gt(data);					\
+	unsigned int vfid = extract_vfid(data);					\
+	struct xe_device *xe = gt_to_xe(gt);					\
+	int err;								\
+										\
+	if (val > (TYPE)~0ull)							\
+		return -EOVERFLOW;						\
+										\
+	xe_pm_runtime_get(xe);							\
+	err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val);		\
+	xe_pm_runtime_put(xe);							\
+										\
+	return err;								\
+}										\
+										\
+static int CONFIG##_get(void *data, u64 *val)					\
+{										\
+	struct xe_gt *gt = extract_gt(data);					\
+	unsigned int vfid = extract_vfid(data);					\
+										\
+	*val = xe_gt_sriov_pf_config_get_##CONFIG(gt, vfid);			\
+	return 0;								\
+}										\
+										\
+DEFINE_DEBUGFS_ATTRIBUTE(CONFIG##_fops, CONFIG##_get, CONFIG##_set, FORMAT)
+
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, u64, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(lmem, u64, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n");
+DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n");
+
+static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigned int vfid)
+{
+	xe_gt_assert(gt, gt == extract_gt(parent));
+	xe_gt_assert(gt, vfid == extract_vfid(parent));
+
+	if (!xe_gt_is_media_type(gt)) {
+		debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare",
+					   0644, parent, parent, &ggtt_fops);
+		if (IS_DGFX(gt_to_xe(gt)))
+			debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare",
+						   0644, parent, parent, &lmem_fops);
+	}
+	debugfs_create_file_unsafe(vfid ? "doorbells_quota" : "doorbells_spare",
+				   0644, parent, parent, &dbs_fops);
+	debugfs_create_file_unsafe(vfid ? "contexts_quota" : "contexts_spare",
+				   0644, parent, parent, &ctxs_fops);
+	debugfs_create_file_unsafe("exec_quantum_ms", 0644, parent, parent,
+				   &exec_quantum_fops);
+	debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent,
+				   &preempt_timeout_fops);
+}
+
+/**
+ * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs.
+ * @gt: the &xe_gt to register
+ * @root: the &dentry that represents the GT directory
+ *
+ * Register SR-IOV PF entries that are GT related and must be shown under GT debugfs.
+ */
+void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_minor *minor = xe->drm.primary;
+	int n, totalvfs = xe_sriov_pf_get_totalvfs(xe);
+	struct dentry *pfdentry;
+	struct dentry *vfdentry;
+	char buf[14]; /* should be enough up to "vf%u\0" for 2^32 - 1 */
+
+	xe_gt_assert(gt, IS_SRIOV_PF(xe));
+	xe_gt_assert(gt, root->d_inode->i_private == gt);
+
+	/*
+	 *      /sys/kernel/debug/dri/0/
+	 *      ├── gt0
+	 *      │   ├── pf
+	 */
+	pfdentry = debugfs_create_dir("pf", root);
+	if (IS_ERR(pfdentry))
+		return;
+	pfdentry->d_inode->i_private = gt;
+
+	drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor);
+	pf_add_config_attrs(gt, pfdentry, PFID);
+
+	for (n = 1; n <= totalvfs; n++) {
+		/*
+		 *      /sys/kernel/debug/dri/0/
+		 *      ├── gt0
+		 *      │   ├── vf1
+		 *      │   ├── vf2
+		 */
+		snprintf(buf, sizeof(buf), "vf%u", n);
+		vfdentry = debugfs_create_dir(buf, root);
+		if (IS_ERR(vfdentry))
+			break;
+		vfdentry->d_inode->i_private = (void *)(uintptr_t)n;
+
+		pf_add_config_attrs(gt, vfdentry, VFID(n));
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h
new file mode 100644
index 000000000000..038cc8ddc244
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_DEBUGFS_H_
+#define _XE_GT_SRIOV_PF_DEBUGFS_H_
+
+struct xe_gt;
+struct dentry;
+
+#ifdef CONFIG_PCI_IOV
+void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root);
+#else
+static inline void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) { }
+#endif
+
+#endif
-- 
cgit 


From b00240b6a28a36986c4021daabaecc81c708c01c Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 23 Apr 2024 15:12:43 +0200
Subject: drm/xe/pf: Expose SR-IOV VF control commands over debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already have functions to control the VF.
Allow to control the VF using debugfs.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423131244.2045-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 79 +++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 32ce98698690..8909bb950a8b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -14,6 +14,7 @@
 #include "xe_gt.h"
 #include "xe_gt_debugfs.h"
 #include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_control.h"
 #include "xe_gt_sriov_pf_debugfs.h"
 #include "xe_gt_sriov_pf_helpers.h"
 #include "xe_pm.h"
@@ -153,6 +154,83 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
 				   &preempt_timeout_fops);
 }
 
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0
+ *      │   ├── vf1
+ *      │   │   ├── control { stop, pause, resume }
+ */
+
+static const struct {
+	const char *cmd;
+	int (*fn)(struct xe_gt *gt, unsigned int vfid);
+} control_cmds[] = {
+	{ "stop", xe_gt_sriov_pf_control_stop_vf },
+	{ "pause", xe_gt_sriov_pf_control_pause_vf },
+	{ "resume", xe_gt_sriov_pf_control_resume_vf },
+};
+
+static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
+{
+	struct dentry *dent = file_dentry(file);
+	struct dentry *parent = dent->d_parent;
+	struct xe_gt *gt = extract_gt(parent);
+	struct xe_device *xe = gt_to_xe(gt);
+	unsigned int vfid = extract_vfid(parent);
+	int ret = -EINVAL;
+	char cmd[32];
+	size_t n;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_sriov_pf_assert_vfid(gt, vfid);
+
+	if (*pos)
+		return -ESPIPE;
+
+	if (count > sizeof(cmd) - 1)
+		return -EINVAL;
+
+	ret = simple_write_to_buffer(cmd, sizeof(cmd) - 1, pos, buf, count);
+	if (ret < 0)
+		return ret;
+	cmd[ret] = '\0';
+
+	for (n = 0; n < ARRAY_SIZE(control_cmds); n++) {
+		xe_gt_assert(gt, sizeof(cmd) > strlen(control_cmds[n].cmd));
+
+		if (sysfs_streq(cmd, control_cmds[n].cmd)) {
+			xe_pm_runtime_get(xe);
+			ret = control_cmds[n].fn ? (*control_cmds[n].fn)(gt, vfid) : 0;
+			xe_pm_runtime_put(xe);
+			break;
+		}
+	}
+
+	return (ret < 0) ? ret : count;
+}
+
+static ssize_t control_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	char help[128];
+	size_t n;
+
+	help[0] = '\0';
+	for (n = 0; n < ARRAY_SIZE(control_cmds); n++) {
+		strlcat(help, control_cmds[n].cmd, sizeof(help));
+		strlcat(help, "\n", sizeof(help));
+	}
+
+	return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
+}
+
+static const struct file_operations control_ops = {
+	.owner		= THIS_MODULE,
+	.open		= simple_open,
+	.write		= control_write,
+	.read		= control_read,
+	.llseek		= default_llseek,
+};
+
 /**
  * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs.
  * @gt: the &xe_gt to register
@@ -199,5 +277,6 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
 		vfdentry->d_inode->i_private = (void *)(uintptr_t)n;
 
 		pf_add_config_attrs(gt, vfdentry, VFID(n));
+		debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops);
 	}
 }
-- 
cgit 


From 2cab6319b41023e4ad7b1c4604b9aa994fa2d4d0 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 23 Apr 2024 15:12:44 +0200
Subject: drm/xe/pf: Expose SR-IOV policy settings over debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already have functions to configure SR-IOV policies.
Allow to tweak those policy settings over debugfs.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423131244.2045-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 53 +++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 8909bb950a8b..ab1a26fce3aa 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -17,6 +17,7 @@
 #include "xe_gt_sriov_pf_control.h"
 #include "xe_gt_sriov_pf_debugfs.h"
 #include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_policy.h"
 #include "xe_pm.h"
 
 /*
@@ -76,6 +77,57 @@ static const struct drm_info_list pf_info[] = {
 	},
 };
 
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0
+ *      │   ├── pf
+ *      │   │   ├── reset_engine
+ *      │   │   ├── sample_period
+ *      │   │   ├── sched_if_idle
+ */
+
+#define DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(POLICY, TYPE, FORMAT)		\
+										\
+static int POLICY##_set(void *data, u64 val)					\
+{										\
+	struct xe_gt *gt = extract_gt(data);					\
+	struct xe_device *xe = gt_to_xe(gt);					\
+	int err;								\
+										\
+	if (val > (TYPE)~0ull)							\
+		return -EOVERFLOW;						\
+										\
+	xe_pm_runtime_get(xe);							\
+	err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val);			\
+	xe_pm_runtime_put(xe);							\
+										\
+	return err;								\
+}										\
+										\
+static int POLICY##_get(void *data, u64 *val)					\
+{										\
+	struct xe_gt *gt = extract_gt(data);					\
+										\
+	*val = xe_gt_sriov_pf_policy_get_##POLICY(gt);				\
+	return 0;								\
+}										\
+										\
+DEFINE_DEBUGFS_ATTRIBUTE(POLICY##_fops, POLICY##_get, POLICY##_set, FORMAT)
+
+DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(reset_engine, bool, "%llu\n");
+DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(sched_if_idle, bool, "%llu\n");
+DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(sample_period, u32, "%llu\n");
+
+static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent)
+{
+	xe_gt_assert(gt, gt == extract_gt(parent));
+	xe_gt_assert(gt, PFID == extract_vfid(parent));
+
+	debugfs_create_file_unsafe("reset_engine", 0644, parent, parent, &reset_engine_fops);
+	debugfs_create_file_unsafe("sched_if_idle", 0644, parent, parent, &sched_if_idle_fops);
+	debugfs_create_file_unsafe("sample_period_ms", 0644, parent, parent, &sample_period_fops);
+}
+
 /*
  *      /sys/kernel/debug/dri/0/
  *      ├── gt0
@@ -261,6 +313,7 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
 	pfdentry->d_inode->i_private = gt;
 
 	drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor);
+	pf_add_policy_attrs(gt, pfdentry);
 	pf_add_config_attrs(gt, pfdentry, PFID);
 
 	for (n = 1; n <= totalvfs; n++) {
-- 
cgit 


From 5a8c292f74c2b0dc84653c7b59323368a849a3ad Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 19 Apr 2024 14:35:42 +0200
Subject: drm/xe/guc: Update VF configuration KLVs definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GuC firmware specification says that maximum value for the execution
quantum KLV is 100s and anything exceeding that will be clamped.
The same limitation applies to the preemption timeout KLV.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419123543.270-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 511cf974d585..e53ffaee2fcd 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -201,7 +201,11 @@ enum  {
  *      it to take effect. Such cases might typically happen on a 1PF+1VF
  *      Virtualization config enabled for heavier workloads like AI/ML.
  *
+ *      The max value for this KLV is 100 seconds, anything exceeding that
+ *      will be clamped to the max.
+ *
  *      :0: infinite exec quantum (default)
+ *      :100000: maximum exec quantum (100000ms == 100s)
  *
  * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02
  *      This config sets the VF-preemption-timeout in microseconds.
@@ -219,7 +223,11 @@ enum  {
  *      on a 1PF+1VF Virtualization config enabled for heavier workloads like
  *      AI/ML.
  *
+ *      The max value for this KLV is 100 seconds, anything exceeding that
+ *      will be clamped to the max.
+ *
  *      :0: no preemption timeout (default)
+ *      :100000000: maximum preemption timeout (100000000us == 100s)
  *
  * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03
  *      This config sets threshold for CAT errors caused by the VF.
@@ -291,9 +299,11 @@ enum  {
 
 #define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY		0x8a01
 #define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN		1u
+#define GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE	100000u
 
-#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY	0x8a02
-#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN	1u
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY		0x8a02
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN		1u
+#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE	100000000u
 
 #define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY		0x8a03
 #define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN		1u
-- 
cgit 


From 49f853c78e688780cacb9712be4136869f3e34fe Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 19 Apr 2024 14:35:43 +0200
Subject: drm/xe/pf: Clamp maximum execution quantum to 100s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GuC is silently clamping values of the execution quantum and
preemption timeout KLVs to 100s. Perform explicit clamping on the
driver side as later there is no way to read back values used by
the firmware and we shouldn't mislead the user about actual values
being used when we print them in dmesg or debugfs.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419123543.270-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 79116ad58620..7eac01e04cc5 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -187,14 +187,20 @@ static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u3
 	return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
 }
 
-static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum)
+static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 *exec_quantum)
 {
-	return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, exec_quantum);
+	/* GuC will silently clamp values exceeding max */
+	*exec_quantum = min_t(u32, *exec_quantum, GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE);
+
+	return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, *exec_quantum);
 }
 
-static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout)
+static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 *preempt_timeout)
 {
-	return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, preempt_timeout);
+	/* GuC will silently clamp values exceeding max */
+	*preempt_timeout = min_t(u32, *preempt_timeout, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE);
+
+	return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout);
 }
 
 static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
@@ -1604,7 +1610,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid,
 	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
 	int err;
 
-	err = pf_push_vf_cfg_exec_quantum(gt, vfid, exec_quantum);
+	err = pf_push_vf_cfg_exec_quantum(gt, vfid, &exec_quantum);
 	if (unlikely(err))
 		return err;
 
@@ -1674,7 +1680,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
 	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
 	int err;
 
-	err = pf_push_vf_cfg_preempt_timeout(gt, vfid, preempt_timeout);
+	err = pf_push_vf_cfg_preempt_timeout(gt, vfid, &preempt_timeout);
 	if (unlikely(err))
 		return err;
 
-- 
cgit 


From f332625733b967afca3ccbc32553e3003d847b44 Mon Sep 17 00:00:00 2001
From: José Roberto de Souza <jose.souza@intel.com>
Date: Wed, 24 Apr 2024 07:03:01 -0700
Subject: drm/xe: Store xe_hw_engine in xe_hw_engine_snapshot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A future patch will require gt and xe device structs, so here
replacing class by hwe.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Zhanjun Dong <zhanjun.dong@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424140319.61651-1-jose.souza@intel.com
---
 drivers/gpu/drm/xe/xe_hw_engine.c       | 6 +++---
 drivers/gpu/drm/xe/xe_hw_engine_types.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 455f375c1cbd..c84dbe8a8ed1 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -791,7 +791,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 		return NULL;
 
 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
-	snapshot->class = hwe->class;
+	snapshot->hwe = hwe;
 	snapshot->logical_instance = hwe->logical_instance;
 	snapshot->forcewake.domain = hwe->domain;
 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
@@ -842,7 +842,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
 
-	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
 
 	return snapshot;
@@ -887,7 +887,7 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
-	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
+	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
 			   snapshot->reg.rcu_mode);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index d7f828c76cc5..27deaa31efd3 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -158,8 +158,8 @@ struct xe_hw_engine {
 struct xe_hw_engine_snapshot {
 	/** @name: name of the hw engine */
 	char *name;
-	/** @class: class of this hw engine */
-	enum xe_engine_class class;
+	/** @hwe: hw engine */
+	struct xe_hw_engine *hwe;
 	/** @logical_instance: logical instance of this hw engine */
 	u16 logical_instance;
 	/** @forcewake: Force Wake information snapshot */
-- 
cgit 


From 082a634f608200d569412114fc0ee4d8c9f0f2aa Mon Sep 17 00:00:00 2001
From: José Roberto de Souza <jose.souza@intel.com>
Date: Wed, 24 Apr 2024 07:03:02 -0700
Subject: drm/xe: Add helpers to loop over geometry and compute DSS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some DSS can only be available for geometry while others can only be
available for compute.
So here adding helpers to loop only available DSS for given usage.

User of this helper will come in the next patch.

v2:
- drop has_dss()

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Zhanjun Dong <zhanjun.dong@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424140319.61651-2-jose.souza@intel.com
---
 drivers/gpu/drm/xe/xe_gt_mcr.h      | 24 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_topology.c | 10 ++++++++++
 drivers/gpu/drm/xe/xe_gt_topology.h |  3 +++
 3 files changed, 37 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index a7f4ab1aa584..e7d03e001a49 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -40,4 +40,28 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group,
 	for_each_dss((dss), (gt)) \
 		for_each_if((xe_gt_mcr_get_dss_steering((gt), (dss), &(group), &(instance)), true))
 
+/*
+ * Loop over each DSS available for geometry and determine the group and
+ * instance IDs that should be used to steer MCR accesses toward this DSS.
+ * @dss: DSS ID to obtain steering for
+ * @gt: GT structure
+ * @group: steering group ID, data type: u16
+ * @instance: steering instance ID, data type: u16
+ */
+#define for_each_geometry_dss(dss, gt, group, instance) \
+		for_each_dss_steering(dss, gt, group, instance) \
+			if (xe_gt_has_geometry_dss(gt, dss))
+
+/*
+ * Loop over each DSS available for compute and determine the group and
+ * instance IDs that should be used to steer MCR accesses toward this DSS.
+ * @dss: DSS ID to obtain steering for
+ * @gt: GT structure
+ * @group: steering group ID, data type: u16
+ * @instance: steering instance ID, data type: u16
+ */
+#define for_each_compute_dss(dss, gt, group, instance) \
+		for_each_dss_steering(dss, gt, group, instance) \
+			if (xe_gt_has_compute_dss(gt, dss))
+
 #endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 3733e7a6860d..af841d801a8f 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -278,3 +278,13 @@ bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
 
 	return quad_first < (quad + 1) * dss_per_quad;
 }
+
+bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
+{
+	return test_bit(dss, gt->fuse_topo.g_dss_mask);
+}
+
+bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
+{
+	return test_bit(dss, gt->fuse_topo.c_dss_mask);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index b3e357777a6e..746b325bbf6e 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -33,4 +33,7 @@ bool xe_dss_mask_empty(const xe_dss_mask_t mask);
 bool
 xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
 
+bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss);
+bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss);
+
 #endif /* _XE_GT_TOPOLOGY_H_ */
-- 
cgit 


From c8d4524ecc79f8b5a3bf58c6bd4438127c54a4cd Mon Sep 17 00:00:00 2001
From: José Roberto de Souza <jose.souza@intel.com>
Date: Wed, 24 Apr 2024 07:03:03 -0700
Subject: drm/xe: Add INSTDONE registers to devcoredump
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This registers contains important information that can help with debug
of GPU hangs.

While at it also fixing the double line jump at the end of engine
registers for CCS engines.

v2:
- print other INSTDONE registers

v3:
- add for_each_geometry/compute_dss()

v4:
- print one slice_common_instdone per glice in DG2+

v5:
- rename registers prefix from DG2 to XEHPG (Zhanjun)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Zhanjun Dong <zhanjun.dong@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424140319.61651-3-jose.souza@intel.com
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |   1 +
 drivers/gpu/drm/xe/regs/xe_gt_regs.h     |  13 ++++
 drivers/gpu/drm/xe/xe_hw_engine.c        | 128 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_hw_engine_types.h  |  16 ++++
 4 files changed, 158 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index af71b87d8030..97d2aed63e01 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -65,6 +65,7 @@
 #define RING_ACTHD_UDW(base)			XE_REG((base) + 0x5c)
 #define RING_DMA_FADD_UDW(base)			XE_REG((base) + 0x60)
 #define RING_IPEHR(base)			XE_REG((base) + 0x68)
+#define RING_INSTDONE(base)			XE_REG((base) + 0x6c)
 #define RING_ACTHD(base)			XE_REG((base) + 0x74)
 #define RING_DMA_FADD(base)			XE_REG((base) + 0x78)
 #define RING_HWS_PGA(base)			XE_REG((base) + 0x80)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 6eea7a459c68..83847f2da72a 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -98,6 +98,8 @@
 #define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
 #define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
 
+#define XEHPG_INSTDONE_GEOM_SVGUNIT		XE_REG_MCR(0x666c)
+
 #define CACHE_MODE_1				XE_REG(0x7004, XE_REG_OPTION_MASKED)
 #define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
 
@@ -115,6 +117,14 @@
 #define   FLSH_IGNORES_PSD			REG_BIT(10)
 #define   FD_END_COLLECT			REG_BIT(5)
 
+#define SC_INSTDONE				XE_REG(0x7100)
+#define SC_INSTDONE_EXTRA			XE_REG(0x7104)
+#define SC_INSTDONE_EXTRA2			XE_REG(0x7108)
+
+#define XEHPG_SC_INSTDONE			XE_REG_MCR(0x7100)
+#define XEHPG_SC_INSTDONE_EXTRA			XE_REG_MCR(0x7104)
+#define XEHPG_SC_INSTDONE_EXTRA2		XE_REG_MCR(0x7108)
+
 #define COMMON_SLICE_CHICKEN4			XE_REG(0x7300, XE_REG_OPTION_MASKED)
 #define   DISABLE_TDC_LOAD_BALANCING_CALC	REG_BIT(6)
 
@@ -345,6 +355,9 @@
 #define HALF_SLICE_CHICKEN5			XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
 #define   DISABLE_SAMPLE_G_PERFORMANCE		REG_BIT(0)
 
+#define SAMPLER_INSTDONE			XE_REG_MCR(0xe160)
+#define ROW_INSTDONE				XE_REG_MCR(0xe164)
+
 #define SAMPLER_MODE				XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
 #define   ENABLE_SMALLPL			REG_BIT(15)
 #define   SC_DISABLE_POWER_OPTIMIZATION_EBB	REG_BIT(9)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index c84dbe8a8ed1..4cc757457e01 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -18,6 +18,7 @@
 #include "xe_gt.h"
 #include "xe_gt_ccs_mode.h"
 #include "xe_gt_printk.h"
+#include "xe_gt_mcr.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_fence.h"
 #include "xe_irq.h"
@@ -766,6 +767,57 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
 		xe_hw_fence_irq_run(hwe->fence_irq);
 }
 
+static bool
+is_slice_common_per_gslice(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) >= 1255;
+}
+
+static void
+xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
+				       struct xe_hw_engine_snapshot *snapshot)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	unsigned int dss;
+	u16 group, instance;
+
+	snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
+
+	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
+		return;
+
+	if (is_slice_common_per_gslice(xe) == false) {
+		snapshot->reg.instdone.slice_common[0] =
+			xe_mmio_read32(gt, SC_INSTDONE);
+		snapshot->reg.instdone.slice_common_extra[0] =
+			xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
+		snapshot->reg.instdone.slice_common_extra2[0] =
+			xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
+	} else {
+		for_each_geometry_dss(dss, gt, group, instance) {
+			snapshot->reg.instdone.slice_common[dss] =
+				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
+			snapshot->reg.instdone.slice_common_extra[dss] =
+				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
+			snapshot->reg.instdone.slice_common_extra2[dss] =
+				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
+		}
+	}
+
+	for_each_geometry_dss(dss, gt, group, instance) {
+		snapshot->reg.instdone.sampler[dss] =
+			xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
+		snapshot->reg.instdone.row[dss] =
+			xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
+
+		if (GRAPHICS_VERx100(xe) >= 1255)
+			snapshot->reg.instdone.geom_svg[dss] =
+				xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
+						       group, instance);
+	}
+}
+
 /**
  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
  * @hwe: Xe HW Engine.
@@ -780,6 +832,7 @@ struct xe_hw_engine_snapshot *
 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 {
 	struct xe_hw_engine_snapshot *snapshot;
+	size_t len;
 	u64 val;
 
 	if (!xe_hw_engine_is_valid(hwe))
@@ -790,6 +843,28 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	if (!snapshot)
 		return NULL;
 
+	/* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
+	 * includes xe_hw_engine_types.h the length of this 3 registers can't be
+	 * set in struct xe_hw_engine_snapshot, so here doing additional
+	 * allocations.
+	 */
+	len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
+	snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
+	snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
+	snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
+	snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
+	snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
+	snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
+	if (!snapshot->reg.instdone.slice_common ||
+	    !snapshot->reg.instdone.slice_common_extra ||
+	    !snapshot->reg.instdone.slice_common_extra2 ||
+	    !snapshot->reg.instdone.sampler ||
+	    !snapshot->reg.instdone.row ||
+	    !snapshot->reg.instdone.geom_svg) {
+		xe_hw_engine_snapshot_free(snapshot);
+		return NULL;
+	}
+
 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
 	snapshot->hwe = hwe;
 	snapshot->logical_instance = hwe->logical_instance;
@@ -841,6 +916,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
+	xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
 
 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
@@ -848,6 +924,49 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	return snapshot;
 }
 
+static void
+xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
+{
+	struct xe_gt *gt = snapshot->hwe->gt;
+	struct xe_device *xe = gt_to_xe(gt);
+	u16 group, instance;
+	unsigned int dss;
+
+	drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
+
+	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
+		return;
+
+	if (is_slice_common_per_gslice(xe) == false) {
+		drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
+			   snapshot->reg.instdone.slice_common[0]);
+		drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
+			   snapshot->reg.instdone.slice_common_extra[0]);
+		drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
+			   snapshot->reg.instdone.slice_common_extra2[0]);
+	} else {
+		for_each_geometry_dss(dss, gt, group, instance) {
+			drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
+				   snapshot->reg.instdone.slice_common[dss]);
+			drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
+				   snapshot->reg.instdone.slice_common_extra[dss]);
+			drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
+				   snapshot->reg.instdone.slice_common_extra2[dss]);
+		}
+	}
+
+	for_each_geometry_dss(dss, gt, group, instance) {
+		drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
+			   snapshot->reg.instdone.sampler[dss]);
+		drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
+			   snapshot->reg.instdone.row[dss]);
+
+		if (GRAPHICS_VERx100(xe) >= 1255)
+			drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
+				   dss, snapshot->reg.instdone.geom_svg[dss]);
+	}
+}
+
 /**
  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
  * @snapshot: Xe HW Engine snapshot object.
@@ -887,9 +1006,12 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
+	xe_hw_engine_snapshot_instdone_print(snapshot, p);
+
 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
 			   snapshot->reg.rcu_mode);
+	drm_puts(p, "\n");
 }
 
 /**
@@ -904,6 +1026,12 @@ void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
 	if (!snapshot)
 		return;
 
+	kfree(snapshot->reg.instdone.slice_common);
+	kfree(snapshot->reg.instdone.slice_common_extra);
+	kfree(snapshot->reg.instdone.slice_common_extra2);
+	kfree(snapshot->reg.instdone.sampler);
+	kfree(snapshot->reg.instdone.row);
+	kfree(snapshot->reg.instdone.geom_svg);
 	kfree(snapshot->name);
 	kfree(snapshot);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 27deaa31efd3..9f9755e31b9f 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -211,6 +211,22 @@ struct xe_hw_engine_snapshot {
 		u32 ipehr;
 		/** @reg.rcu_mode: RCU_MODE */
 		u32 rcu_mode;
+		struct {
+			/** @reg.instdone.ring: RING_INSTDONE */
+			u32 ring;
+			/** @reg.instdone.slice_common: SC_INSTDONE */
+			u32 *slice_common;
+			/** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */
+			u32 *slice_common_extra;
+			/** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */
+			u32 *slice_common_extra2;
+			/** @reg.instdone.sampler: SAMPLER_INSTDONE */
+			u32 *sampler;
+			/** @reg.instdone.row: ROW_INSTDONE */
+			u32 *row;
+			/** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */
+			u32 *geom_svg;
+		} instdone;
 	} reg;
 };
 
-- 
cgit 


From fb74b205cdd26357469cab8957f5935f10b810e2 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 23 Apr 2024 18:18:14 -0400
Subject: drm/xe: Introduce a simple wedged state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a very simple 'wedged' state where any attempt
to access the GPU is entirely blocked.

On some critical cases, like on gt_reset failure, we need to
block any other attempt to use the GPU. Otherwise we are at
a risk of reaching cases that would force us to reboot the machine.

So, when this cases are identified we corner and block any GPU
access. No IOCTL and not even another GT reset should be attempted.

The 'wedged' state in Xe is an end state with no way back.
Only a device "re-probe" (unbind + bind) can restore the GPU access.

v2: - s/wedged/busted (Lucas)
    - use unbind+bind instead of module reload (Lucas)
    - added more info on unbind operations and instruction on bug report
    - only print the message once.

v3: - s/busted/wedged (Ashutosh, Tvrtko, Thomas)
    - don't assume user has sudo and tee available (Lucas)

v4: - remove unnecessary cases around ct communication or migration.

Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Cc: Tvrtko Ursulin <tursulin@ursulin.net>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com> #v2
Link: https://patchwork.freedesktop.org/patch/msgid/20240423221817.1285081-1-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c       |  6 ++++++
 drivers/gpu/drm/xe/xe_device.h       | 20 ++++++++++++++++++++
 drivers/gpu/drm/xe/xe_device_types.h |  3 +++
 drivers/gpu/drm/xe/xe_gt.c           |  5 ++++-
 drivers/gpu/drm/xe/xe_guc_pc.c       |  3 +++
 5 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 55bbc8b8df15..76a7b37a4a53 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -137,6 +137,9 @@ static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct xe_device *xe = to_xe_device(file_priv->minor->dev);
 	long ret;
 
+	if (xe_device_wedged(xe))
+		return -ECANCELED;
+
 	ret = xe_pm_runtime_get_ioctl(xe);
 	if (ret >= 0)
 		ret = drm_ioctl(file, cmd, arg);
@@ -152,6 +155,9 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo
 	struct xe_device *xe = to_xe_device(file_priv->minor->dev);
 	long ret;
 
+	if (xe_device_wedged(xe))
+		return -ECANCELED;
+
 	ret = xe_pm_runtime_get_ioctl(xe);
 	if (ret >= 0)
 		ret = drm_compat_ioctl(file, cmd, arg);
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 36d4434ebccc..d2e4249d37ce 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -167,4 +167,24 @@ void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
 u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
 u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
 
+static inline bool xe_device_wedged(struct xe_device *xe)
+{
+	return atomic_read(&xe->wedged);
+}
+
+static inline void xe_device_declare_wedged(struct xe_device *xe)
+{
+	if (!atomic_xchg(&xe->wedged, 1)) {
+		xe->needs_flr_on_fini = true;
+		drm_err(&xe->drm,
+			"CRITICAL: Xe has declared device %s as wedged.\n"
+			"IOCTLs and executions are blocked until device is probed again with unbind and bind operations:\n"
+			"echo '%s' > /sys/bus/pci/drivers/xe/unbind\n"
+			"echo '%s' > /sys/bus/pci/drivers/xe/bind\n"
+			"Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+			dev_name(xe->drm.dev), dev_name(xe->drm.dev),
+			dev_name(xe->drm.dev));
+	}
+}
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 8a9f12a8d7c1..91c720d6ad29 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -459,6 +459,9 @@ struct xe_device {
 	/** @needs_flr_on_fini: requests function-reset on fini */
 	bool needs_flr_on_fini;
 
+	/** @wedged: Xe device faced a critical error and is now blocked. */
+	atomic_t wedged;
+
 	/* private: */
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 491d0413de15..e922e77f5010 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -633,6 +633,9 @@ static int gt_reset(struct xe_gt *gt)
 {
 	int err;
 
+	if (xe_device_wedged(gt_to_xe(gt)))
+		return -ECANCELED;
+
 	/* We only support GT resets with GuC submission */
 	if (!xe_device_uc_enabled(gt_to_xe(gt)))
 		return -ENODEV;
@@ -685,7 +688,7 @@ err_msg:
 err_fail:
 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
 
-	gt_to_xe(gt)->needs_flr_on_fini = true;
+	xe_device_declare_wedged(gt_to_xe(gt));
 
 	return err;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 509649d0e65e..8fc757900ed1 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -902,6 +902,9 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
 		return;
 	}
 
+	if (xe_device_wedged(xe))
+		return;
+
 	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
-- 
cgit 


From 692818678e80e5999ee1975953f7c6f82cb4a2be Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 23 Apr 2024 18:18:15 -0400
Subject: drm/xe: declare wedged upon GuC load failure

Let's block the device upon any GuC load failure.
But let's continue with the probe so guc logs can be read
from the debugfs.

v2: - s/wedged/busted
    - do not block probe or we lose guc_logs in debugfs (Matt)

v3: - s/busted/wedged

v4: Do not change __xe_guc_upload return. (Himal)

Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423221817.1285081-2-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 240e7a4bbff1..17438d5b18a4 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -451,7 +451,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
 	return 0;
 }
 
-static int guc_wait_ucode(struct xe_guc *guc)
+static void guc_wait_ucode(struct xe_guc *guc)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
 	u32 status;
@@ -479,30 +479,26 @@ static int guc_wait_ucode(struct xe_guc *guc)
 			     200000, &status, false);
 
 	if (ret) {
-		xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status);
-		xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n",
-			   REG_FIELD_GET(GS_MIA_IN_RESET, status),
-			   REG_FIELD_GET(GS_BOOTROM_MASK, status),
-			   REG_FIELD_GET(GS_UKERNEL_MASK, status),
-			   REG_FIELD_GET(GS_MIA_MASK, status),
-			   REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
-
-		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
-			xe_gt_info(gt, "GuC firmware signature verification failed\n");
-			ret = -ENOEXEC;
-		}
+		xe_gt_err(gt, "GuC load failed: status = 0x%08X\n", status);
+		xe_gt_err(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n",
+			  REG_FIELD_GET(GS_MIA_IN_RESET, status),
+			  REG_FIELD_GET(GS_BOOTROM_MASK, status),
+			  REG_FIELD_GET(GS_UKERNEL_MASK, status),
+			  REG_FIELD_GET(GS_MIA_MASK, status),
+			  REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED)
+			xe_gt_err(gt, "GuC firmware signature verification failed\n");
 
 		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
-		    XE_GUC_LOAD_STATUS_EXCEPTION) {
-			xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n",
-				   xe_mmio_read32(gt, SOFT_SCRATCH(13)));
-			ret = -ENXIO;
-		}
+		    XE_GUC_LOAD_STATUS_EXCEPTION)
+			xe_gt_err(gt, "GuC firmware exception. EIP: %#x\n",
+				  xe_mmio_read32(gt, SOFT_SCRATCH(13)));
+
+		xe_device_declare_wedged(gt_to_xe(gt));
 	} else {
 		xe_gt_dbg(gt, "GuC successfully loaded\n");
 	}
-
-	return ret;
 }
 
 static int __xe_guc_upload(struct xe_guc *guc)
@@ -532,9 +528,7 @@ static int __xe_guc_upload(struct xe_guc *guc)
 		goto out;
 
 	/* Wait for authentication */
-	ret = guc_wait_ucode(guc);
-	if (ret)
-		goto out;
+	guc_wait_ucode(guc);
 
 	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
 	return 0;
-- 
cgit 


From 8ed9aaae39f39130b7a3eb2726be05d7f64b344c Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 23 Apr 2024 18:18:16 -0400
Subject: drm/xe: Force wedged state and block GT reset upon any GPU hang

In many validation situations when debugging GPU Hangs,
it is useful to preserve the GT situation from the moment
that the timeout occurred.

This patch introduces a module parameter that could be used
on situations like this.

If xe.wedged module parameter is set to 2, Xe will be declared
wedged on every single execution timeout (a.k.a. GPU hang) right
after devcoredump snapshot capture and without attempting any
kind of GT reset and blocking entirely any kind of execution.

v2: Really block gt_reset from guc side. (Lucas)
    s/wedged/busted (Lucas)

v3: - s/busted/wedged
    - Really use global_flags (Dafna)
    - More robust timeout handling when wedging it.

v4: A really robust clean exit done by Matt Brost.
    No more kernel warns on unbind.

v5: Simplify error message (Lucas)

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Dafna Hirschfeld <dhirschfeld@habana.ai>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: Himanshu Somaiya <himanshu.somaiya@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423221817.1285081-3-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c              | 29 ++++++++++
 drivers/gpu/drm/xe/xe_device.h              | 15 +----
 drivers/gpu/drm/xe/xe_exec_queue.h          |  9 +++
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c |  2 +-
 drivers/gpu/drm/xe/xe_guc_ads.c             |  9 ++-
 drivers/gpu/drm/xe/xe_guc_submit.c          | 90 ++++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_module.c              |  5 ++
 drivers/gpu/drm/xe/xe_module.h              |  1 +
 8 files changed, 129 insertions(+), 31 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 76a7b37a4a53..d45db6ff1fa3 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -764,3 +764,32 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
 {
 	return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
 }
+
+/**
+ * xe_device_declare_wedged - Declare device wedged
+ * @xe: xe device instance
+ *
+ * This is a final state that can only be cleared with a module
+ * re-probe (unbind + bind).
+ * In this state every IOCTL will be blocked so the GT cannot be used.
+ * In general it will be called upon any critical error such as gt reset
+ * failure or guc loading failure.
+ * If xe.wedged module parameter is set to 2, this function will be called
+ * on every single execution timeout (a.k.a. GPU hang) right after devcoredump
+ * snapshot capture. In this mode, GT reset won't be attempted so the state of
+ * the issue is preserved for further debugging.
+ */
+void xe_device_declare_wedged(struct xe_device *xe)
+{
+	if (xe_modparam.wedged_mode == 0)
+		return;
+
+	if (!atomic_xchg(&xe->wedged, 1)) {
+		xe->needs_flr_on_fini = true;
+		drm_err(&xe->drm,
+			"CRITICAL: Xe has declared device %s as wedged.\n"
+			"IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
+			"Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
+			dev_name(xe->drm.dev));
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index d2e4249d37ce..9ede45fc062a 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -172,19 +172,6 @@ static inline bool xe_device_wedged(struct xe_device *xe)
 	return atomic_read(&xe->wedged);
 }
 
-static inline void xe_device_declare_wedged(struct xe_device *xe)
-{
-	if (!atomic_xchg(&xe->wedged, 1)) {
-		xe->needs_flr_on_fini = true;
-		drm_err(&xe->drm,
-			"CRITICAL: Xe has declared device %s as wedged.\n"
-			"IOCTLs and executions are blocked until device is probed again with unbind and bind operations:\n"
-			"echo '%s' > /sys/bus/pci/drivers/xe/unbind\n"
-			"echo '%s' > /sys/bus/pci/drivers/xe/bind\n"
-			"Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
-			dev_name(xe->drm.dev), dev_name(xe->drm.dev),
-			dev_name(xe->drm.dev));
-	}
-}
+void xe_device_declare_wedged(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 02ce8d204622..48f6da53a292 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -26,6 +26,15 @@ void xe_exec_queue_fini(struct xe_exec_queue *q);
 void xe_exec_queue_destroy(struct kref *ref);
 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance);
 
+static inline struct xe_exec_queue *
+xe_exec_queue_get_unless_zero(struct xe_exec_queue *q)
+{
+	if (kref_get_unless_zero(&q->refcount))
+		return q;
+
+	return NULL;
+}
+
 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id);
 
 static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 93df2d7969b3..8e9c4b990fbb 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -245,7 +245,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
 			return seqno;
 
 		xe_gt_tlb_invalidation_wait(gt, seqno);
-	} else if (xe_device_uc_enabled(xe)) {
+	} else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
 		xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
 		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
 			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 1aafa486edec..db817a46f157 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -20,6 +20,7 @@
 #include "xe_lrc.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
+#include "xe_module.h"
 #include "xe_platform_types.h"
 #include "xe_wa.h"
 
@@ -440,11 +441,17 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
 
 static void guc_policies_init(struct xe_guc_ads *ads)
 {
+	u32 global_flags = 0;
+
 	ads_blob_write(ads, policies.dpc_promote_time,
 		       GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
 	ads_blob_write(ads, policies.max_num_work_items,
 		       GLOBAL_POLICY_MAX_NUM_WI);
-	ads_blob_write(ads, policies.global_flags, 0);
+
+	if (xe_modparam.wedged_mode == 2)
+		global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+
+	ads_blob_write(ads, policies.global_flags, global_flags);
 	ads_blob_write(ads, policies.is_valid, 1);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index c7d38469fb46..0bea17536659 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -35,6 +35,7 @@
 #include "xe_macros.h"
 #include "xe_map.h"
 #include "xe_mocs.h"
+#include "xe_module.h"
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 #include "xe_trace.h"
@@ -59,6 +60,7 @@ exec_queue_to_guc(struct xe_exec_queue *q)
 #define ENGINE_STATE_SUSPENDED		(1 << 5)
 #define EXEC_QUEUE_STATE_RESET		(1 << 6)
 #define ENGINE_STATE_KILLED		(1 << 7)
+#define EXEC_QUEUE_STATE_WEDGED		(1 << 8)
 
 static bool exec_queue_registered(struct xe_exec_queue *q)
 {
@@ -175,9 +177,20 @@ static void set_exec_queue_killed(struct xe_exec_queue *q)
 	atomic_or(ENGINE_STATE_KILLED, &q->guc->state);
 }
 
-static bool exec_queue_killed_or_banned(struct xe_exec_queue *q)
+static bool exec_queue_wedged(struct xe_exec_queue *q)
 {
-	return exec_queue_killed(q) || exec_queue_banned(q);
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
+}
+
+static void set_exec_queue_wedged(struct xe_exec_queue *q)
+{
+	atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
+}
+
+static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
+{
+	return exec_queue_banned(q) || (atomic_read(&q->guc->state) &
+		(EXEC_QUEUE_STATE_WEDGED | ENGINE_STATE_KILLED));
 }
 
 #ifdef CONFIG_PROVE_LOCKING
@@ -240,6 +253,17 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
 	free_submit_wq(guc);
 }
 
+static void guc_submit_wedged_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_guc *guc = arg;
+	struct xe_exec_queue *q;
+	unsigned long index;
+
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		if (exec_queue_wedged(q))
+			xe_exec_queue_put(q);
+}
+
 static const struct xe_exec_queue_ops guc_exec_queue_ops;
 
 static void primelockdep(struct xe_guc *guc)
@@ -708,7 +732,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
 
 	trace_xe_sched_job_run(job);
 
-	if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) {
+	if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
 		if (!exec_queue_registered(q))
 			register_engine(q);
 		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
@@ -844,6 +868,28 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
 		xe_sched_tdr_queue_imm(&q->guc->sched);
 }
 
+static void guc_submit_wedged(struct xe_guc *guc)
+{
+	struct xe_exec_queue *q;
+	unsigned long index;
+	int err;
+
+	xe_device_declare_wedged(guc_to_xe(guc));
+	xe_guc_submit_reset_prepare(guc);
+	xe_guc_ct_stop(&guc->ct);
+
+	err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm,
+				       guc_submit_wedged_fini, guc);
+	if (err)
+		return;
+
+	mutex_lock(&guc->submission_state.lock);
+	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+		if (xe_exec_queue_get_unless_zero(q))
+			set_exec_queue_wedged(q);
+	mutex_unlock(&guc->submission_state.lock);
+}
+
 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 {
 	struct xe_guc_exec_queue *ge =
@@ -852,10 +898,16 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gpu_scheduler *sched = &ge->sched;
+	bool wedged = xe_device_wedged(xe);
 
 	xe_assert(xe, xe_exec_queue_is_lr(q));
 	trace_xe_exec_queue_lr_cleanup(q);
 
+	if (!wedged && xe_modparam.wedged_mode == 2) {
+		guc_submit_wedged(exec_queue_to_guc(q));
+		wedged = true;
+	}
+
 	/* Kill the run_job / process_msg entry points */
 	xe_sched_submission_stop(sched);
 
@@ -870,7 +922,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 	 * xe_guc_deregister_done_handler() which treats it as an unexpected
 	 * state.
 	 */
-	if (exec_queue_registered(q) && !exec_queue_destroyed(q)) {
+	if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
 		struct xe_guc *guc = exec_queue_to_guc(q);
 		int ret;
 
@@ -905,6 +957,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
 	int err = -ETIME;
 	int i = 0;
+	bool wedged = xe_device_wedged(xe);
 
 	/*
 	 * TDR has fired before free job worker. Common if exec queue
@@ -928,6 +981,11 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 
 	trace_xe_sched_job_timedout(job);
 
+	if (!wedged && xe_modparam.wedged_mode == 2) {
+		guc_submit_wedged(exec_queue_to_guc(q));
+		wedged = true;
+	}
+
 	/* Kill the run_job entry point */
 	xe_sched_submission_stop(sched);
 
@@ -935,8 +993,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	 * Kernel jobs should never fail, nor should VM jobs if they do
 	 * somethings has gone wrong and the GT needs a reset
 	 */
-	if (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
-	    (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) {
+	if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
+			(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
 		if (!xe_sched_invalidate_job(job, 2)) {
 			xe_sched_add_pending_job(sched, job);
 			xe_sched_submission_start(sched);
@@ -946,7 +1004,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	}
 
 	/* Engine state now stable, disable scheduling if needed */
-	if (exec_queue_registered(q)) {
+	if (!wedged && exec_queue_registered(q)) {
 		struct xe_guc *guc = exec_queue_to_guc(q);
 		int ret;
 
@@ -989,6 +1047,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	 */
 	xe_sched_add_pending_job(sched, job);
 	xe_sched_submission_start(sched);
+
 	xe_guc_exec_queue_trigger_cleanup(q);
 
 	/* Mark all outstanding jobs as bad, thus completing them */
@@ -1028,7 +1087,7 @@ static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
 	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
 
 	/* We must block on kernel engines so slabs are empty on driver unload */
-	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT)
+	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
 		__guc_exec_queue_fini_async(&q->guc->fini_async);
 	else
 		queue_work(system_wq, &q->guc->fini_async);
@@ -1063,7 +1122,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
 
 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
 {
-	return !exec_queue_killed_or_banned(q) && exec_queue_registered(q);
+	return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
 }
 
 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
@@ -1274,7 +1333,7 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q)
 {
 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
 
-	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT))
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
 		guc_exec_queue_add_msg(q, msg, CLEANUP);
 	else
 		__guc_exec_queue_fini(exec_queue_to_guc(q), q);
@@ -1285,7 +1344,8 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
 {
 	struct xe_sched_msg *msg;
 
-	if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q))
+	if (q->sched_props.priority == priority ||
+	    exec_queue_killed_or_banned_or_wedged(q))
 		return 0;
 
 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
@@ -1303,7 +1363,7 @@ static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_u
 	struct xe_sched_msg *msg;
 
 	if (q->sched_props.timeslice_us == timeslice_us ||
-	    exec_queue_killed_or_banned(q))
+	    exec_queue_killed_or_banned_or_wedged(q))
 		return 0;
 
 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
@@ -1322,7 +1382,7 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
 	struct xe_sched_msg *msg;
 
 	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
-	    exec_queue_killed_or_banned(q))
+	    exec_queue_killed_or_banned_or_wedged(q))
 		return 0;
 
 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
@@ -1339,7 +1399,7 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q)
 {
 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
 
-	if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending)
+	if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending)
 		return -EINVAL;
 
 	q->guc->suspend_pending = true;
@@ -1485,7 +1545,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q)
 {
 	struct xe_gpu_scheduler *sched = &q->guc->sched;
 
-	if (!exec_queue_killed_or_banned(q)) {
+	if (!exec_queue_killed_or_banned_or_wedged(q)) {
 		int i;
 
 		trace_xe_exec_queue_resubmit(q);
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index ceb8345cbca6..3edeb30d5ccb 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -17,6 +17,7 @@ struct xe_modparam xe_modparam = {
 	.enable_display = true,
 	.guc_log_level = 5,
 	.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
+	.wedged_mode = 1,
 	/* the rest are 0 by default */
 };
 
@@ -55,6 +56,10 @@ MODULE_PARM_DESC(max_vfs,
 		 "(0 = no VFs [default]; N = allow up to N VFs)");
 #endif
 
+module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600);
+MODULE_PARM_DESC(wedged_mode,
+		 "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang");
+
 struct init_funcs {
 	int (*init)(void);
 	void (*exit)(void);
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index b369984f08ec..61a0d28a28c8 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -21,6 +21,7 @@ struct xe_modparam {
 #ifdef CONFIG_PCI_IOV
 	unsigned int max_vfs;
 #endif
+	int wedged_mode;
 };
 
 extern struct xe_modparam xe_modparam;
-- 
cgit 


From 6b8ef44cc0a952549a6773a0233cee853f807a79 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 23 Apr 2024 18:18:17 -0400
Subject: drm/xe: Introduce the wedged_mode debugfs

So, the wedged mode can be selected per device at runtime,
before the tests or before reproducing the issue.

v2: - s/busted/wedged
    - some locking consistency

v3: - remove mutex
    - toggle guc reset policy on any mode change

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423221817.1285081-4-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c      | 55 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_device.c       | 10 ++++--
 drivers/gpu/drm/xe/xe_device.h       |  2 +-
 drivers/gpu/drm/xe/xe_device_types.h |  9 ++++--
 drivers/gpu/drm/xe/xe_guc_ads.c      | 60 ++++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_guc_ads.h      |  1 +
 drivers/gpu/drm/xe/xe_guc_submit.c   | 35 ++++++++++++---------
 7 files changed, 149 insertions(+), 23 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index c9b30dbdc14d..0e61fa462c7b 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -12,6 +12,8 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt_debugfs.h"
+#include "xe_gt_printk.h"
+#include "xe_guc_ads.h"
 #include "xe_pm.h"
 #include "xe_sriov.h"
 #include "xe_step.h"
@@ -117,6 +119,56 @@ static const struct file_operations forcewake_all_fops = {
 	.release = forcewake_release,
 };
 
+static ssize_t wedged_mode_show(struct file *f, char __user *ubuf,
+				size_t size, loff_t *pos)
+{
+	struct xe_device *xe = file_inode(f)->i_private;
+	char buf[32];
+	int len = 0;
+
+	len = scnprintf(buf, sizeof(buf), "%d\n", xe->wedged.mode);
+
+	return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf,
+			       size_t size, loff_t *pos)
+{
+	struct xe_device *xe = file_inode(f)->i_private;
+	struct xe_gt *gt;
+	u32 wedged_mode;
+	ssize_t ret;
+	u8 id;
+
+	ret = kstrtouint_from_user(ubuf, size, 0, &wedged_mode);
+	if (ret)
+		return ret;
+
+	if (wedged_mode > 2)
+		return -EINVAL;
+
+	if (xe->wedged.mode == wedged_mode)
+		return 0;
+
+	xe->wedged.mode = wedged_mode;
+
+	for_each_gt(gt, xe, id) {
+		ret = xe_guc_ads_scheduler_policy_toggle_reset(&gt->uc.guc.ads);
+		if (ret) {
+			xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n");
+			return -EIO;
+		}
+	}
+
+	return size;
+}
+
+static const struct file_operations wedged_mode_fops = {
+	.owner = THIS_MODULE,
+	.read = wedged_mode_show,
+	.write = wedged_mode_set,
+};
+
 void xe_debugfs_register(struct xe_device *xe)
 {
 	struct ttm_device *bdev = &xe->ttm;
@@ -134,6 +186,9 @@ void xe_debugfs_register(struct xe_device *xe)
 	debugfs_create_file("forcewake_all", 0400, root, xe,
 			    &forcewake_all_fops);
 
+	debugfs_create_file("wedged_mode", 0400, root, xe,
+			    &wedged_mode_fops);
+
 	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
 		man = ttm_manager_type(bdev, mem_type);
 
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d45db6ff1fa3..a5b4a9643a78 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -506,6 +506,8 @@ int xe_device_probe_early(struct xe_device *xe)
 	if (err)
 		return err;
 
+	xe->wedged.mode = xe_modparam.wedged_mode;
+
 	return 0;
 }
 
@@ -769,7 +771,7 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
  * xe_device_declare_wedged - Declare device wedged
  * @xe: xe device instance
  *
- * This is a final state that can only be cleared with a module
+ * This is a final state that can only be cleared with a mudule
  * re-probe (unbind + bind).
  * In this state every IOCTL will be blocked so the GT cannot be used.
  * In general it will be called upon any critical error such as gt reset
@@ -781,10 +783,12 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
  */
 void xe_device_declare_wedged(struct xe_device *xe)
 {
-	if (xe_modparam.wedged_mode == 0)
+	if (xe->wedged.mode == 0) {
+		drm_dbg(&xe->drm, "Wedged mode is forcebly disabled\n");
 		return;
+	}
 
-	if (!atomic_xchg(&xe->wedged, 1)) {
+	if (!atomic_xchg(&xe->wedged.flag, 1)) {
 		xe->needs_flr_on_fini = true;
 		drm_err(&xe->drm,
 			"CRITICAL: Xe has declared device %s as wedged.\n"
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 9ede45fc062a..82317580f4bf 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -169,7 +169,7 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
 
 static inline bool xe_device_wedged(struct xe_device *xe)
 {
-	return atomic_read(&xe->wedged);
+	return atomic_read(&xe->wedged.flag);
 }
 
 void xe_device_declare_wedged(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 91c720d6ad29..af509af922b9 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -459,8 +459,13 @@ struct xe_device {
 	/** @needs_flr_on_fini: requests function-reset on fini */
 	bool needs_flr_on_fini;
 
-	/** @wedged: Xe device faced a critical error and is now blocked. */
-	atomic_t wedged;
+	/** @wedged: Struct to control Wedged States and mode */
+	struct {
+		/** @wedged.flag: Xe device faced a critical error and is now blocked. */
+		atomic_t flag;
+		/** @wedged.mode: Mode controlled by kernel parameter and debugfs */
+		int mode;
+	} wedged;
 
 	/* private: */
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index db817a46f157..6a5eb21748b1 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -9,6 +9,7 @@
 
 #include <generated/xe_wa_oob.h>
 
+#include "abi/guc_actions_abi.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_guc_regs.h"
@@ -16,11 +17,11 @@
 #include "xe_gt.h"
 #include "xe_gt_ccs_mode.h"
 #include "xe_guc.h"
+#include "xe_guc_ct.h"
 #include "xe_hw_engine.h"
 #include "xe_lrc.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
-#include "xe_module.h"
 #include "xe_platform_types.h"
 #include "xe_wa.h"
 
@@ -441,6 +442,7 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
 
 static void guc_policies_init(struct xe_guc_ads *ads)
 {
+	struct xe_device *xe = ads_to_xe(ads);
 	u32 global_flags = 0;
 
 	ads_blob_write(ads, policies.dpc_promote_time,
@@ -448,7 +450,7 @@ static void guc_policies_init(struct xe_guc_ads *ads)
 	ads_blob_write(ads, policies.max_num_work_items,
 		       GLOBAL_POLICY_MAX_NUM_WI);
 
-	if (xe_modparam.wedged_mode == 2)
+	if (xe->wedged.mode == 2)
 		global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
 
 	ads_blob_write(ads, policies.global_flags, global_flags);
@@ -806,3 +808,57 @@ void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
 {
 	guc_populate_golden_lrc(ads);
 }
+
+static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
+{
+	struct  xe_guc_ct *ct = &ads_to_guc(ads)->ct;
+	u32 action[] = {
+		XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
+		policy_offset
+	};
+
+	return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+/**
+ * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy
+ * @ads: Additional data structures object
+ *
+ * This function update the GuC's engine reset policy based on wedged.mode.
+ *
+ * Return: 0 on success, and negative error code otherwise.
+ */
+int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads)
+{
+	struct xe_device *xe = ads_to_xe(ads);
+	struct xe_gt *gt = ads_to_gt(ads);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct guc_policies *policies;
+	struct xe_bo *bo;
+	int ret = 0;
+
+	policies = kmalloc(sizeof(*policies), GFP_KERNEL);
+	if (!policies)
+		return -ENOMEM;
+
+	policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
+	policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
+	policies->is_valid = 1;
+	if (xe->wedged.mode == 2)
+		policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+	else
+		policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET;
+
+	bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies),
+					    XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+					    XE_BO_FLAG_GGTT);
+	if (IS_ERR(bo)) {
+		ret = PTR_ERR(bo);
+		goto out;
+	}
+
+	ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo));
+out:
+	kfree(policies);
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
index 138ef6267671..2e2531779122 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
@@ -13,5 +13,6 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
 void xe_guc_ads_populate(struct xe_guc_ads *ads);
 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads);
 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads);
+int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 0bea17536659..93e1ee183e4a 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -35,7 +35,6 @@
 #include "xe_macros.h"
 #include "xe_map.h"
 #include "xe_mocs.h"
-#include "xe_module.h"
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 #include "xe_trace.h"
@@ -868,26 +867,38 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
 		xe_sched_tdr_queue_imm(&q->guc->sched);
 }
 
-static void guc_submit_wedged(struct xe_guc *guc)
+static bool guc_submit_hint_wedged(struct xe_guc *guc)
 {
+	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_exec_queue *q;
 	unsigned long index;
 	int err;
 
-	xe_device_declare_wedged(guc_to_xe(guc));
+	if (xe->wedged.mode != 2)
+		return false;
+
+	if (xe_device_wedged(xe))
+		return true;
+
+	xe_device_declare_wedged(xe);
+
 	xe_guc_submit_reset_prepare(guc);
 	xe_guc_ct_stop(&guc->ct);
 
 	err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm,
 				       guc_submit_wedged_fini, guc);
-	if (err)
-		return;
+	if (err) {
+		drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
+		return true; /* Device is wedged anyway */
+	}
 
 	mutex_lock(&guc->submission_state.lock);
 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
 		if (xe_exec_queue_get_unless_zero(q))
 			set_exec_queue_wedged(q);
 	mutex_unlock(&guc->submission_state.lock);
+
+	return true;
 }
 
 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
@@ -898,15 +909,12 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gpu_scheduler *sched = &ge->sched;
-	bool wedged = xe_device_wedged(xe);
+	bool wedged;
 
 	xe_assert(xe, xe_exec_queue_is_lr(q));
 	trace_xe_exec_queue_lr_cleanup(q);
 
-	if (!wedged && xe_modparam.wedged_mode == 2) {
-		guc_submit_wedged(exec_queue_to_guc(q));
-		wedged = true;
-	}
+	wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
 
 	/* Kill the run_job / process_msg entry points */
 	xe_sched_submission_stop(sched);
@@ -957,7 +965,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
 	int err = -ETIME;
 	int i = 0;
-	bool wedged = xe_device_wedged(xe);
+	bool wedged;
 
 	/*
 	 * TDR has fired before free job worker. Common if exec queue
@@ -981,10 +989,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 
 	trace_xe_sched_job_timedout(job);
 
-	if (!wedged && xe_modparam.wedged_mode == 2) {
-		guc_submit_wedged(exec_queue_to_guc(q));
-		wedged = true;
-	}
+	wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
 
 	/* Kill the run_job entry point */
 	xe_sched_submission_stop(sched);
-- 
cgit 


From ad4ca914de384681ce8984785f4ee2078945a759 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 19 Apr 2024 17:34:07 +0200
Subject: drm/xe/guc: Improve GuC doorbell/context ID manager intro message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can use recently added str_plural() helper.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419153407.402-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc_db_mgr.c | 3 ++-
 drivers/gpu/drm/xe/xe_guc_id_mgr.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.c b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
index 8d9a0287df6b..6767e8076e6b 100644
--- a/drivers/gpu/drm/xe/xe_guc_db_mgr.c
+++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
@@ -106,7 +106,8 @@ int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count)
 	if (ret)
 		return ret;
 done:
-	xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell(s)\n", dbm->count);
+	xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell%s\n",
+		  dbm->count, str_plural(dbm->count));
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
index 0fb7c6b78c31..cd0549d0ef89 100644
--- a/drivers/gpu/drm/xe/xe_guc_id_mgr.c
+++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
@@ -97,7 +97,8 @@ int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit)
 	if (ret)
 		return ret;
 
-	xe_gt_info(idm_to_gt(idm), "using %u GUC ID(s)\n", idm->total);
+	xe_gt_info(idm_to_gt(idm), "using %u GUC ID%s\n",
+		   idm->total, str_plural(idm->total));
 	return 0;
 }
 
-- 
cgit 


From b5ef80879dfec1c8e2a992dc186196687293e1fe Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Wed, 10 Apr 2024 12:16:40 +0530
Subject: drm/xe/xe2: Add workaround 14021567978

Workaround 14021567978 applies to RenderCS xe2

V3:
  - Cover xe2_hpg as its landed upstream now
V2(MattR):
  - Move tuning to wa and apply to xe2

Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240410064640.1010098-1-tejas.upadhyay@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index dcf7ed51757c..9d9b7fa7a8f0 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -673,6 +673,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
 	},
+	{ XE_RTP_NAME("14021567978"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
+	},
 
 	/* Xe2_HPG */
 	{ XE_RTP_NAME("15010599737"),
-- 
cgit 


From cbf7579304c234208569d767355cc39c0665bd5b Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 9 Apr 2024 17:31:32 +0200
Subject: drm/xe: Check result of drmm_mutex_init()

Although it's unlikely that drmm_mutex_init() will fail during
driver initialization, however we shouldn't ignore this case.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240409153132.1111-1-michal.wajdeczko@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index a5b4a9643a78..47db4bc9f12c 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -274,7 +274,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 
 	init_waitqueue_head(&xe->ufence_wq);
 
-	drmm_mutex_init(&xe->drm, &xe->usm.lock);
+	err = drmm_mutex_init(&xe->drm, &xe->usm.lock);
+	if (err)
+		goto err;
+
 	xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
 
 	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
-- 
cgit 


From 4befb17e83ed8747049c91f5009e786bb858e446 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 24 Apr 2024 19:10:30 +0200
Subject: drm/xe/pf: Expose PF service details via debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For debug purposes we might want to verify which registers values
PF is sharing with VFs and to view which VF/PF ABI versions were
negotiated by the VFs. Plug the 'print' functions already provided
by the PF service code into our debugfs.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424171030.2177-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index ab1a26fce3aa..5102035faa7e 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -18,6 +18,7 @@
 #include "xe_gt_sriov_pf_debugfs.h"
 #include "xe_gt_sriov_pf_helpers.h"
 #include "xe_gt_sriov_pf_policy.h"
+#include "xe_gt_sriov_pf_service.h"
 #include "xe_pm.h"
 
 /*
@@ -52,6 +53,8 @@ static unsigned int extract_vfid(struct dentry *d)
  *      │   │   ├── ggtt_provisioned
  *      │   │   ├── contexts_provisioned
  *      │   │   ├── doorbells_provisioned
+ *      │   │   ├── runtime_registers
+ *      │   │   ├── negotiated_versions
  */
 
 static const struct drm_info_list pf_info[] = {
@@ -75,6 +78,16 @@ static const struct drm_info_list pf_info[] = {
 		.show = xe_gt_debugfs_simple_show,
 		.data = xe_gt_sriov_pf_config_print_dbs,
 	},
+	{
+		"runtime_registers",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_pf_service_print_runtime,
+	},
+	{
+		"negotiated_versions",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_pf_service_print_version,
+	},
 };
 
 /*
-- 
cgit 


From 7547a23cae4145836dbb94522453af4e7d0ccc92 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 24 Apr 2024 16:05:06 +0200
Subject: drm/xe/guc: Fix typos in VF CFG KLVs descriptions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apart from the obvious spelling typo, use the correct values for
infinity quantum/timeout settings (it's 0x0 instead of 0xFFFFFFFF).

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Piotr Piórkowski <piotr.piorkowski@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424140506.2133-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index e53ffaee2fcd..5c1d40432ca0 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -194,9 +194,9 @@ enum  {
  *      granularity) since the GPUs clock time runs off a different crystal
  *      from the CPUs clock. Changing this KLV on a VF that is currently
  *      running a context wont take effect until a new context is scheduled in.
- *      That said, when the PF is changing this value from 0xFFFFFFFF to
- *      something else, it might never take effect if the VF is running an
- *      inifinitely long compute or shader kernel. In such a scenario, the
+ *      That said, when the PF is changing this value from 0x0 to
+ *      a non-zero value, it might never take effect if the VF is running an
+ *      infinitely long compute or shader kernel. In such a scenario, the
  *      PF would need to trigger a VM PAUSE and then change the KLV to force
  *      it to take effect. Such cases might typically happen on a 1PF+1VF
  *      Virtualization config enabled for heavier workloads like AI/ML.
@@ -215,9 +215,9 @@ enum  {
  *      different crystal from the CPUs clock. Changing this KLV on a VF
  *      that is currently running a context wont take effect until a new
  *      context is scheduled in.
- *      That said, when the PF is changing this value from 0xFFFFFFFF to
- *      something else, it might never take effect if the VF is running an
- *      inifinitely long compute or shader kernel.
+ *      That said, when the PF is changing this value from 0x0 to
+ *      a non-zero value, it might never take effect if the VF is running an
+ *      infinitely long compute or shader kernel.
  *      In this case, the PF would need to trigger a VM PAUSE and then change
  *      the KLV to force it to take effect. Such cases might typically happen
  *      on a 1PF+1VF Virtualization config enabled for heavier workloads like
-- 
cgit 


From 3cd1585e57908b6efcd967465ef7685f40b2a294 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 18 Apr 2024 15:46:31 +0100
Subject: drm/xe/preempt_fence: enlarge the fence critical section

It is really easy to introduce subtle deadlocks in
preempt_fence_work_func() since we operate on single global ordered-wq
for signalling our preempt fences behind the scenes, so even though we
signal a particular fence, everything in the callback should be in the
fence critical section, since blocking in the callback will prevent
other published fences from signalling. If we enlarge the fence critical
section to cover the entire callback, then lockdep should be able to
understand this better, and complain if we grab a sensitive lock like
vm->lock, which is also held when waiting on preempt fences.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240418144630.299531-2-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_preempt_fence.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 7d50c6e89d8e..5b243b7feb59 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -23,11 +23,19 @@ static void preempt_fence_work_func(struct work_struct *w)
 		q->ops->suspend_wait(q);
 
 	dma_fence_signal(&pfence->base);
-	dma_fence_end_signalling(cookie);
-
+	/*
+	 * Opt for keep everything in the fence critical section. This looks really strange since we
+	 * have just signalled the fence, however the preempt fences are all signalled via single
+	 * global ordered-wq, therefore anything that happens in this callback can easily block
+	 * progress on the entire wq, which itself may prevent other published preempt fences from
+	 * ever signalling.  Therefore try to keep everything here in the callback in the fence
+	 * critical section. For example if something below grabs a scary lock like vm->lock,
+	 * lockdep should complain since we also hold that lock whilst waiting on preempt fences to
+	 * complete.
+	 */
 	xe_vm_queue_rebind_worker(q->vm);
-
 	xe_exec_queue_put(q);
+	dma_fence_end_signalling(cookie);
 }
 
 static const char *
-- 
cgit 


From 6e78e0719d0ed5ec230e8e28bd59e47acb3dbc04 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 23 Apr 2024 08:47:22 +0100
Subject: Revert "drm/xe/vm: drop vm->destroy_work"

This reverts commit 5b259c0d1d3caa6efc66c2b856840e68993f814e.

Cleanup here is good, however we need to able to flush a worker during
vm destruction which might involve sleeping, so bring back the worker.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423074721.119633-3-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c       | 17 +++++++++++++++--
 drivers/gpu/drm/xe/xe_vm_types.h |  7 +++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 7ae2b0300db6..633485c8c62b 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1173,6 +1173,8 @@ static const struct xe_pt_ops xelp_pt_ops = {
 	.pde_encode_bo = xelp_pde_encode_bo,
 };
 
+static void vm_destroy_work_func(struct work_struct *w);
+
 /**
  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
  * given tile and vm.
@@ -1252,6 +1254,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	init_rwsem(&vm->userptr.notifier_lock);
 	spin_lock_init(&vm->userptr.invalidated_lock);
 
+	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
+
 	INIT_LIST_HEAD(&vm->preempt.exec_queues);
 	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
 
@@ -1489,9 +1493,10 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	xe_vm_put(vm);
 }
 
-static void xe_vm_free(struct drm_gpuvm *gpuvm)
+static void vm_destroy_work_func(struct work_struct *w)
 {
-	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+	struct xe_vm *vm =
+		container_of(w, struct xe_vm, destroy_work);
 	struct xe_device *xe = vm->xe;
 	struct xe_tile *tile;
 	u8 id;
@@ -1511,6 +1516,14 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm)
 	kfree(vm);
 }
 
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
+{
+	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
+
+	/* To destroy the VM we need to be able to sleep */
+	queue_work(system_unbound_wq, &vm->destroy_work);
+}
+
 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
 {
 	struct xe_vm *vm;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 72a100671e5d..0447c79c40a2 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -177,6 +177,13 @@ struct xe_vm {
 	 */
 	struct list_head rebind_list;
 
+	/**
+	 * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
+	 * from an irq context can be last put and the destroy needs to be able
+	 * to sleep.
+	 */
+	struct work_struct destroy_work;
+
 	/**
 	 * @rftree: range fence tree to track updates to page table structure.
 	 * Used to implement conflict tracking between independent bind engines.
-- 
cgit 


From 3d44d67c441a9fe6f81a1d705f7de009a32a5b35 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Tue, 23 Apr 2024 08:47:23 +0100
Subject: drm/xe/vm: prevent UAF in rebind_work_func()

We flush the rebind worker during the vm close phase, however in places
like preempt_fence_work_func() we seem to queue the rebind worker
without first checking if the vm has already been closed.  The concern
here is the vm being closed with the worker flushed, but then being
rearmed later, which looks like potential uaf, since there is no actual
refcounting to track the queued worker. We can't take the vm->lock here
in preempt_rebind_work_func() to first check if the vm is closed since
that will deadlock, so instead flush the worker again when the vm
refcount reaches zero.

v2:
 - Grabbing vm->lock in the preempt worker creates a deadlock, so
   checking the closed state is tricky. Instead flush the worker when
   the refcount reaches zero. It should be impossible to queue the
   preempt worker without already holding vm ref.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1676
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1591
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1364
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1304
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1249
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423074721.119633-4-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 633485c8c62b..dc685bf45857 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1504,6 +1504,9 @@ static void vm_destroy_work_func(struct work_struct *w)
 	/* xe_vm_close_and_put was not called? */
 	xe_assert(xe, !vm->size);
 
+	if (xe_vm_in_preempt_fence_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
+
 	mutex_destroy(&vm->snap_mutex);
 
 	if (!(vm->flags & XE_VM_FLAG_MIGRATION))
-- 
cgit 


From 3f371a98deada9aee53d908c9aa53f6cdcb1300b Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 22:47:47 -0700
Subject: drm/xe: Delete unused GuC submission_state.suspend

GuC submission_state.suspend is unused, delete it.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425054747.1918811-1-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 4 ----
 drivers/gpu/drm/xe/xe_guc_types.h  | 9 ---------
 2 files changed, 13 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 93e1ee183e4a..8f409c9e0f3c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -273,7 +273,6 @@ static void primelockdep(struct xe_guc *guc)
 	fs_reclaim_acquire(GFP_KERNEL);
 
 	mutex_lock(&guc->submission_state.lock);
-	might_lock(&guc->submission_state.suspend.lock);
 	mutex_unlock(&guc->submission_state.lock);
 
 	fs_reclaim_release(GFP_KERNEL);
@@ -301,9 +300,6 @@ int xe_guc_submit_init(struct xe_guc *guc)
 
 	xa_init(&guc->submission_state.exec_queue_lookup);
 
-	spin_lock_init(&guc->submission_state.suspend.lock);
-	guc->submission_state.suspend.context = dma_fence_context_alloc(1);
-
 	primelockdep(guc);
 
 	return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 82bd93f7867d..546ac6350a31 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -72,15 +72,6 @@ struct xe_guc {
 		atomic_t stopped;
 		/** @submission_state.lock: protects submission state */
 		struct mutex lock;
-		/** @submission_state.suspend: suspend fence state */
-		struct {
-			/** @submission_state.suspend.lock: suspend fences lock */
-			spinlock_t lock;
-			/** @submission_state.suspend.context: suspend fences context */
-			u64 context;
-			/** @submission_state.suspend.seqno: suspend fences seqno */
-			u32 seqno;
-		} suspend;
 #ifdef CONFIG_PROVE_LOCKING
 #define NUM_SUBMIT_WQ	256
 		/** @submission_state.submit_wq_pool: submission ordered workqueues pool */
-- 
cgit 


From f85ada84f60cdcccb0ce897d7e54bac8c6f0722e Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 25 Apr 2024 16:25:40 -0700
Subject: drm/xe: s/ENGINE_STATE_ENABLED/EXEC_QUEUE_STATE_ENABLED

Exec queue has replaced engine nomenclature.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-2-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 8f409c9e0f3c..872a782337f2 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -52,7 +52,7 @@ exec_queue_to_guc(struct xe_exec_queue *q)
  * engine done being processed).
  */
 #define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
-#define ENGINE_STATE_ENABLED		(1 << 1)
+#define EXEC_QUEUE_STATE_ENABLED		(1 << 1)
 #define EXEC_QUEUE_STATE_PENDING_ENABLE	(1 << 2)
 #define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
 #define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
@@ -78,17 +78,17 @@ static void clear_exec_queue_registered(struct xe_exec_queue *q)
 
 static bool exec_queue_enabled(struct xe_exec_queue *q)
 {
-	return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED;
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
 }
 
 static void set_exec_queue_enabled(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_ENABLED, &q->guc->state);
+	atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
 }
 
 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
 {
-	atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state);
+	atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
 }
 
 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
-- 
cgit 


From 03b3517630ce2ad079d1863c408d5d4df7d80388 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 25 Apr 2024 16:25:41 -0700
Subject: drm/xe: s/ENGINE_STATE_SUSPENDED/EXEC_QUEUE_STATE_SUSPENDED

Exec queue has replaced engine nomenclature.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-3-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 872a782337f2..5dab20fa6d74 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -56,7 +56,7 @@ exec_queue_to_guc(struct xe_exec_queue *q)
 #define EXEC_QUEUE_STATE_PENDING_ENABLE	(1 << 2)
 #define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
 #define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
-#define ENGINE_STATE_SUSPENDED		(1 << 5)
+#define EXEC_QUEUE_STATE_SUSPENDED		(1 << 5)
 #define EXEC_QUEUE_STATE_RESET		(1 << 6)
 #define ENGINE_STATE_KILLED		(1 << 7)
 #define EXEC_QUEUE_STATE_WEDGED		(1 << 8)
@@ -143,17 +143,17 @@ static void set_exec_queue_banned(struct xe_exec_queue *q)
 
 static bool exec_queue_suspended(struct xe_exec_queue *q)
 {
-	return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED;
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
 }
 
 static void set_exec_queue_suspended(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state);
+	atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
 }
 
 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
 {
-	atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state);
+	atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
 }
 
 static bool exec_queue_reset(struct xe_exec_queue *q)
@@ -1471,7 +1471,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
 		set_exec_queue_suspended(q);
 		suspend_fence_signal(q);
 	}
-	atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
+	atomic_and(EXEC_QUEUE_STATE_DESTROYED | EXEC_QUEUE_STATE_SUSPENDED,
 		   &q->guc->state);
 	q->guc->resume_time = 0;
 	trace_xe_exec_queue_stop(q);
-- 
cgit 


From 1a1563e3245d96a30b62c30c4e6861ec9518699f Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 25 Apr 2024 16:25:42 -0700
Subject: drm/xe: s/ENGINE_STATE_KILLED/EXEC_QUEUE_STATE_KILLED

Exec queue has replaced engine nomenclature.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-4-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 5dab20fa6d74..d4aa3823410c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -58,7 +58,7 @@ exec_queue_to_guc(struct xe_exec_queue *q)
 #define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
 #define EXEC_QUEUE_STATE_SUSPENDED		(1 << 5)
 #define EXEC_QUEUE_STATE_RESET		(1 << 6)
-#define ENGINE_STATE_KILLED		(1 << 7)
+#define EXEC_QUEUE_STATE_KILLED			(1 << 7)
 #define EXEC_QUEUE_STATE_WEDGED		(1 << 8)
 
 static bool exec_queue_registered(struct xe_exec_queue *q)
@@ -168,12 +168,12 @@ static void set_exec_queue_reset(struct xe_exec_queue *q)
 
 static bool exec_queue_killed(struct xe_exec_queue *q)
 {
-	return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED;
+	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
 }
 
 static void set_exec_queue_killed(struct xe_exec_queue *q)
 {
-	atomic_or(ENGINE_STATE_KILLED, &q->guc->state);
+	atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
 }
 
 static bool exec_queue_wedged(struct xe_exec_queue *q)
@@ -189,7 +189,7 @@ static void set_exec_queue_wedged(struct xe_exec_queue *q)
 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
 {
 	return exec_queue_banned(q) || (atomic_read(&q->guc->state) &
-		(EXEC_QUEUE_STATE_WEDGED | ENGINE_STATE_KILLED));
+		(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED));
 }
 
 #ifdef CONFIG_PROVE_LOCKING
-- 
cgit 


From 3713a383f5402c57007d341703ce447fb6df1083 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 25 Apr 2024 16:25:43 -0700
Subject: drm/xe: Fix alignment in GuC exec queue state defines

Normalize the alignment for readability.

v3:
 - Fix typo in commit (Himal)
 - Fix EXEC_QUEUE_STATE_WEDGED too (Himal)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-5-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index d4aa3823410c..1945bc5ffc21 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -53,13 +53,13 @@ exec_queue_to_guc(struct xe_exec_queue *q)
  */
 #define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
 #define EXEC_QUEUE_STATE_ENABLED		(1 << 1)
-#define EXEC_QUEUE_STATE_PENDING_ENABLE	(1 << 2)
+#define EXEC_QUEUE_STATE_PENDING_ENABLE		(1 << 2)
 #define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
 #define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
 #define EXEC_QUEUE_STATE_SUSPENDED		(1 << 5)
-#define EXEC_QUEUE_STATE_RESET		(1 << 6)
+#define EXEC_QUEUE_STATE_RESET			(1 << 6)
 #define EXEC_QUEUE_STATE_KILLED			(1 << 7)
-#define EXEC_QUEUE_STATE_WEDGED		(1 << 8)
+#define EXEC_QUEUE_STATE_WEDGED			(1 << 8)
 
 static bool exec_queue_registered(struct xe_exec_queue *q)
 {
-- 
cgit 


From edc9f11af3adab20ede4a0289a1335f0d8125998 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 25 Apr 2024 16:25:44 -0700
Subject: drm/xe: Replace engine references with exec queue in xe_guc_submit.c

Exec queue has replaced engine nomenclature.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-6-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 1945bc5ffc21..cd082b8523fa 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -449,9 +449,9 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue
 	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
 			field_, val_)
 
-static void __register_mlrc_engine(struct xe_guc *guc,
-				   struct xe_exec_queue *q,
-				   struct guc_ctxt_registration_info *info)
+static void __register_mlrc_exec_queue(struct xe_guc *guc,
+				       struct xe_exec_queue *q,
+				       struct guc_ctxt_registration_info *info)
 {
 #define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
 	struct xe_device *xe = guc_to_xe(guc);
@@ -488,8 +488,8 @@ static void __register_mlrc_engine(struct xe_guc *guc,
 	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
 }
 
-static void __register_engine(struct xe_guc *guc,
-			      struct guc_ctxt_registration_info *info)
+static void __register_exec_queue(struct xe_guc *guc,
+				  struct guc_ctxt_registration_info *info)
 {
 	u32 action[] = {
 		XE_GUC_ACTION_REGISTER_CONTEXT,
@@ -509,7 +509,7 @@ static void __register_engine(struct xe_guc *guc,
 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
 }
 
-static void register_engine(struct xe_exec_queue *q)
+static void register_exec_queue(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
@@ -557,9 +557,9 @@ static void register_engine(struct xe_exec_queue *q)
 	set_exec_queue_registered(q);
 	trace_xe_exec_queue_register(q);
 	if (xe_exec_queue_is_parallel(q))
-		__register_mlrc_engine(guc, q, &info);
+		__register_mlrc_exec_queue(guc, q, &info);
 	else
-		__register_engine(guc, &info);
+		__register_exec_queue(guc, &info);
 	init_policies(guc, q);
 }
 
@@ -729,7 +729,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
 
 	if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
 		if (!exec_queue_registered(q))
-			register_engine(q);
+			register_exec_queue(q);
 		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
 			q->ring_ops->emit_job(job);
 		submit_exec_queue(q);
-- 
cgit 


From a1adb3d250925ddccd5270106d39aa09493d6edf Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Wed, 24 Apr 2024 10:09:09 +0530
Subject: drm/xe/vm: Use xe_vm_lock()/xe_vm_unlock() helpers

There is no change in functionality. Using the helper function
defined within the driver.

-v2
Use xe_vm_unlock() (Ashutosh/Matt)

-v3
Use xe_vm_unlock() for error label too (Matt)

Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424043910.2190376-2-himal.prasad.ghimiray@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index dc685bf45857..c506432420c5 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1278,7 +1278,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 	drm_gem_object_put(vm_resv_obj);
 
-	err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+	err = xe_vm_lock(vm, true);
 	if (err)
 		goto err_close;
 
@@ -1322,7 +1322,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
 	}
-	dma_resv_unlock(xe_vm_resv(vm));
+	xe_vm_unlock(vm);
 
 	/* Kernel migration VM shouldn't have a circular loop.. */
 	if (!(flags & XE_VM_FLAG_MIGRATION)) {
@@ -1364,7 +1364,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	return vm;
 
 err_unlock_close:
-	dma_resv_unlock(xe_vm_resv(vm));
+	xe_vm_unlock(vm);
 err_close:
 	xe_vm_close_and_put(vm);
 	return ERR_PTR(err);
-- 
cgit 


From c79828e0c7795cccc92abcd24107aa478168628c Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Wed, 24 Apr 2024 10:09:10 +0530
Subject: drm/xe: Use xe_bo_lock()/xe_bo_unlock() helpers

There is no change in functionality. Using the helper function
defined within the driver for locking/unlocking the reservation
object.

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424043910.2190376-3-himal.prasad.ghimiray@intel.com
---
 drivers/gpu/drm/xe/xe_lrc.c | 4 ++--
 drivers/gpu/drm/xe/xe_vm.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 615bbc372ac6..2066d34ddf0b 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1382,7 +1382,7 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
 	if (!snapshot->lrc_snapshot)
 		goto put_bo;
 
-	dma_resv_lock(bo->ttm.base.resv, NULL);
+	xe_bo_lock(bo, false);
 	if (!ttm_bo_vmap(&bo->ttm, &src)) {
 		xe_map_memcpy_from(xe_bo_device(bo),
 				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
@@ -1392,7 +1392,7 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
 		kvfree(snapshot->lrc_snapshot);
 		snapshot->lrc_snapshot = NULL;
 	}
-	dma_resv_unlock(bo->ttm.base.resv);
+	xe_bo_unlock(bo);
 put_bo:
 	xe_bo_put(bo);
 }
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c506432420c5..89c73d109f6a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3352,7 +3352,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
 		}
 
 		if (bo) {
-			dma_resv_lock(bo->ttm.base.resv, NULL);
+			xe_bo_lock(bo, false);
 			err = ttm_bo_vmap(&bo->ttm, &src);
 			if (!err) {
 				xe_map_memcpy_from(xe_bo_device(bo),
@@ -3361,7 +3361,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
 						   snap->snap[i].len);
 				ttm_bo_vunmap(&bo->ttm, &src);
 			}
-			dma_resv_unlock(bo->ttm.base.resv);
+			xe_bo_unlock(bo);
 		} else {
 			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
 
-- 
cgit 


From c832541ca8d5b04cbf957ffce5f4a2a4ee6b396e Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Wed, 24 Apr 2024 09:49:11 +0530
Subject: drm/xe: Change xe_guc_submit_stop return to void

The function xe_guc_submit_stop consistently returns 0 without an error
state, prompting the caller to verify it, which is redundant.

Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424041911.2184868-1-himal.prasad.ghimiray@intel.com
---
 drivers/gpu/drm/xe/xe_gt.c         |  4 +---
 drivers/gpu/drm/xe/xe_guc.c        | 10 ++--------
 drivers/gpu/drm/xe/xe_guc.h        |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c |  3 +--
 drivers/gpu/drm/xe/xe_guc_submit.h |  2 +-
 drivers/gpu/drm/xe/xe_uc.c         | 12 ++++--------
 drivers/gpu/drm/xe/xe_uc.h         |  2 +-
 7 files changed, 11 insertions(+), 24 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index e922e77f5010..a49e456b968d 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -658,9 +658,7 @@ static int gt_reset(struct xe_gt *gt)
 	xe_uc_stop_prepare(&gt->uc);
 	xe_gt_pagefault_reset(gt);
 
-	err = xe_uc_stop(&gt->uc);
-	if (err)
-		goto err_out;
+	xe_uc_stop(&gt->uc);
 
 	xe_gt_tlb_invalidation_reset(gt);
 
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 17438d5b18a4..0c9938e0ab8c 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -885,17 +885,11 @@ void xe_guc_stop_prepare(struct xe_guc *guc)
 	XE_WARN_ON(xe_guc_pc_stop(&guc->pc));
 }
 
-int xe_guc_stop(struct xe_guc *guc)
+void xe_guc_stop(struct xe_guc *guc)
 {
-	int ret;
-
 	xe_guc_ct_stop(&guc->ct);
 
-	ret = xe_guc_submit_stop(guc);
-	if (ret)
-		return ret;
-
-	return 0;
+	xe_guc_submit_stop(guc);
 }
 
 int xe_guc_start(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 94f2dc5f6f90..a3c92b74a3d5 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -35,7 +35,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p);
 int xe_guc_reset_prepare(struct xe_guc *guc);
 void xe_guc_reset_wait(struct xe_guc *guc);
 void xe_guc_stop_prepare(struct xe_guc *guc);
-int xe_guc_stop(struct xe_guc *guc);
+void xe_guc_stop(struct xe_guc *guc);
 int xe_guc_start(struct xe_guc *guc);
 bool xe_guc_in_reset(struct xe_guc *guc);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index cd082b8523fa..d274a139010b 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1519,7 +1519,7 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc)
 	wait_event(guc->ct.wq, !guc_read_stopped(guc));
 }
 
-int xe_guc_submit_stop(struct xe_guc *guc)
+void xe_guc_submit_stop(struct xe_guc *guc)
 {
 	struct xe_exec_queue *q;
 	unsigned long index;
@@ -1539,7 +1539,6 @@ int xe_guc_submit_stop(struct xe_guc *guc)
 	 * creation which is protected by guc->submission_state.lock.
 	 */
 
-	return 0;
 }
 
 static void guc_exec_queue_start(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index fad0421ead36..4275b7da9df5 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -16,7 +16,7 @@ int xe_guc_submit_init(struct xe_guc *guc);
 
 int xe_guc_submit_reset_prepare(struct xe_guc *guc);
 void xe_guc_submit_reset_wait(struct xe_guc *guc);
-int xe_guc_submit_stop(struct xe_guc *guc);
+void xe_guc_submit_stop(struct xe_guc *guc);
 int xe_guc_submit_start(struct xe_guc *guc);
 
 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 4feb35c95a1c..0f6cfe06e635 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -215,13 +215,13 @@ void xe_uc_stop_prepare(struct xe_uc *uc)
 	xe_guc_stop_prepare(&uc->guc);
 }
 
-int xe_uc_stop(struct xe_uc *uc)
+void xe_uc_stop(struct xe_uc *uc)
 {
 	/* GuC submission not enabled, nothing to do */
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
-		return 0;
+		return;
 
-	return xe_guc_stop(&uc->guc);
+	xe_guc_stop(&uc->guc);
 }
 
 int xe_uc_start(struct xe_uc *uc)
@@ -247,17 +247,13 @@ again:
 
 int xe_uc_suspend(struct xe_uc *uc)
 {
-	int ret;
-
 	/* GuC submission not enabled, nothing to do */
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
 	uc_reset_wait(uc);
 
-	ret = xe_uc_stop(uc);
-	if (ret)
-		return ret;
+	xe_uc_stop(uc);
 
 	return xe_guc_suspend(&uc->guc);
 }
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index e4d4e3c99f0e..5dfa7725483d 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -16,7 +16,7 @@ int xe_uc_fini_hw(struct xe_uc *uc);
 void xe_uc_gucrc_disable(struct xe_uc *uc);
 int xe_uc_reset_prepare(struct xe_uc *uc);
 void xe_uc_stop_prepare(struct xe_uc *uc);
-int xe_uc_stop(struct xe_uc *uc);
+void xe_uc_stop(struct xe_uc *uc);
 int xe_uc_start(struct xe_uc *uc);
 int xe_uc_suspend(struct xe_uc *uc);
 int xe_uc_sanitize_reset(struct xe_uc *uc);
-- 
cgit 


From d6c5bac8e3638de85190ff381f75b8120feafb9c Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 25 Apr 2024 16:39:26 +0200
Subject: drm/xe/pf: Re-initialize SR-IOV specific HW settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On older platforms (12.00) the PF driver must explicitly unblock
VF's modifications to the GGTT. On newer platforms this capability
is enabled by default.

Bspec: 49908, 53204
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425143927.2265-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/regs/xe_sriov_regs.h |  3 +++
 drivers/gpu/drm/xe/xe_gt.c              |  6 ++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf.c     | 25 +++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf.h     |  5 +++++
 4 files changed, 39 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
index 617ddb84b7fa..017b4ddd1ecf 100644
--- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
@@ -14,6 +14,9 @@
 #define   LMEM_EN			REG_BIT(31)
 #define   LMTT_DIR_PTR			REG_GENMASK(30, 0) /* in multiples of 64KB */
 
+#define VIRTUAL_CTRL_REG		XE_REG(0x10108c)
+#define   GUEST_GTT_UPDATE_EN		REG_BIT(8)
+
 #define VF_CAP_REG			XE_REG(0x1901f8, XE_REG_OPTION_VF)
 #define   VF_CAP			REG_BIT(0)
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index a49e456b968d..0528d599c3fe 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -477,6 +477,9 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
 
+	if (IS_SRIOV_PF(gt_to_xe(gt)))
+		xe_gt_sriov_pf_init_hw(gt);
+
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	XE_WARN_ON(err);
 
@@ -613,6 +616,9 @@ static int do_gt_restart(struct xe_gt *gt)
 	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
 
+	if (IS_SRIOV_PF(gt_to_xe(gt)))
+		xe_gt_sriov_pf_init_hw(gt);
+
 	xe_mocs_init(gt);
 	err = xe_uc_start(&gt->uc);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 791dcdd767e2..687ea81931d1 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -5,8 +5,11 @@
 
 #include <drm/drm_managed.h>
 
+#include "regs/xe_sriov_regs.h"
+
 #include "xe_gt_sriov_pf.h"
 #include "xe_gt_sriov_pf_helpers.h"
+#include "xe_mmio.h"
 
 /*
  * VF's metadata is maintained in the flexible array where:
@@ -50,3 +53,25 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
 
 	return 0;
 }
+
+static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) == 1200;
+}
+
+static void pf_enable_ggtt_guest_update(struct xe_gt *gt)
+{
+	xe_mmio_write32(gt, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN);
+}
+
+/**
+ * xe_gt_sriov_pf_init_hw - Initialize SR-IOV hardware support.
+ * @gt: the &xe_gt to initialize
+ *
+ * On some platforms the PF must explicitly enable VF's access to the GGTT.
+ */
+void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
+{
+	if (pf_needs_enable_ggtt_guest_update(gt_to_xe(gt)))
+		pf_enable_ggtt_guest_update(gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index 05142ffc4319..37d7d6c3df03 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -10,11 +10,16 @@ struct xe_gt;
 
 #ifdef CONFIG_PCI_IOV
 int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
+void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
 #else
 static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
 {
 	return 0;
 }
+
+static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
+{
+}
 #endif
 
 #endif
-- 
cgit 


From e77dff51baf565c2a6b8c77b979c42e814ed0c73 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 25 Apr 2024 16:39:27 +0200
Subject: drm/xe/pf: Initialize and update PF services on driver init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xe_gt_sriov_pf_init_early() and xe_gt_sriov_pf_init_hw() are
ideal places to call per-GT PF service init and update functions.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425143927.2265-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index 687ea81931d1..7decf71c2b7d 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -9,6 +9,7 @@
 
 #include "xe_gt_sriov_pf.h"
 #include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_service.h"
 #include "xe_mmio.h"
 
 /*
@@ -51,6 +52,10 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
 	if (err)
 		return err;
 
+	err = xe_gt_sriov_pf_service_init(gt);
+	if (err)
+		return err;
+
 	return 0;
 }
 
@@ -74,4 +79,6 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
 {
 	if (pf_needs_enable_ggtt_guest_update(gt_to_xe(gt)))
 		pf_enable_ggtt_guest_update(gt);
+
+	xe_gt_sriov_pf_service_update(gt);
 }
-- 
cgit 


From 445237d67a818c18a748602f8eaa4b52f8c6b39c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 26 Apr 2024 10:49:04 +0100
Subject: drm/xe: Fix spelling mistake "forcebly" -> "forcibly"

There is a spelling mistake in a drm_dbg message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240426094904.816033-1-colin.i.king@gmail.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 47db4bc9f12c..ca7a101bd34e 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -787,7 +787,7 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
 void xe_device_declare_wedged(struct xe_device *xe)
 {
 	if (xe->wedged.mode == 0) {
-		drm_dbg(&xe->drm, "Wedged mode is forcebly disabled\n");
+		drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n");
 		return;
 	}
 
-- 
cgit 


From 6a2a90cba12b42eb96c2af3426b77ceb4be31df2 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 25 Apr 2024 11:16:09 -0700
Subject: drm/xe/display: Fix ADL-N detection

Contrary to i915, in xe ADL-N is kept as a different platform, not a
subplatform of ADL-P. Since the display side doesn't need to
differentiate between P and N, i.e. IS_ALDERLAKE_P_N() is never called,
just fixup the compat header to check for both P and N.

Moving ADL-N to be a subplatform would be more complex as the firmware
loading in xe only handles platforms, not subplatforms, as going forward
the direction is to check on IP version rather than
platforms/subplatforms.

Fix warning when initializing display:

	xe 0000:00:02.0: [drm:intel_pch_type [xe]] Found Alder Lake PCH
	------------[ cut here ]------------
	xe 0000:00:02.0: drm_WARN_ON(!((dev_priv)->info.platform == XE_ALDERLAKE_S) && !((dev_priv)->info.platform == XE_ALDERLAKE_P))

And wrong paths being taken on the display side.

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425181610.2704633-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index 6a502e9f97d0..9ee694bf331f 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -84,7 +84,8 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
 #define IS_ROCKETLAKE(dev_priv)	IS_PLATFORM(dev_priv, XE_ROCKETLAKE)
 #define IS_DG1(dev_priv)        IS_PLATFORM(dev_priv, XE_DG1)
 #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S)
-#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P)
+#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \
+				  IS_PLATFORM(dev_priv, XE_ALDERLAKE_N))
 #define IS_XEHPSDV(dev_priv) (dev_priv && 0)
 #define IS_DG2(dev_priv)	IS_PLATFORM(dev_priv, XE_DG2)
 #define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC)
-- 
cgit 


From 77f2ef3f16f511c8a8444061d59c8eadc634d33b Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:01 -0700
Subject: drm/xe: Lock all gpuva ops during VM bind IOCTL

Lock all BOs used in gpuva ops and validate all BOs in a single step
during the VM bind IOCTL.

This help with the transition to making all gpuva ops in a VM bind IOCTL
a single atomic job which is required for proper error handling.

v2:
 - Better commit message (Oak)
 - s/op_lock/op_lock_and_prep, few other renames too (Oak)
 - Use DRM_EXEC_IGNORE_DUPLICATES flag in drm_exec_init (local testing)
 - Do not reserve slots in locking step (direction based on series from Thomas)
v3:
 - Validate BO if is immediate set (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-2-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c | 148 +++++++++++++++++++++++++++++++++------------
 1 file changed, 108 insertions(+), 40 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 89c73d109f6a..cc9d2ebc7e66 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -315,19 +315,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
 
 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
 
-static void xe_vm_kill(struct xe_vm *vm)
+static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
 {
 	struct xe_exec_queue *q;
 
 	lockdep_assert_held(&vm->lock);
 
-	xe_vm_lock(vm, false);
+	if (unlocked)
+		xe_vm_lock(vm, false);
+
 	vm->flags |= XE_VM_FLAG_BANNED;
 	trace_xe_vm_kill(vm);
 
 	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
 		q->ops->kill(q);
-	xe_vm_unlock(vm);
+
+	if (unlocked)
+		xe_vm_unlock(vm);
 
 	/* TODO: Inform user the VM is banned */
 }
@@ -557,7 +561,7 @@ out_unlock_outer:
 
 	if (err) {
 		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
-		xe_vm_kill(vm);
+		xe_vm_kill(vm, true);
 	}
 	up_write(&vm->lock);
 
@@ -1774,17 +1778,9 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue
 		      u32 num_syncs, bool immediate, bool first_op,
 		      bool last_op)
 {
-	int err;
-
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(bo);
 
-	if (bo && immediate) {
-		err = xe_bo_validate(bo, vm, true);
-		if (err)
-			return err;
-	}
-
 	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
 			    last_op);
 }
@@ -2437,17 +2433,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	return 0;
 }
 
-static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
-		      struct xe_vma *vma, struct xe_vma_op *op)
+static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
+		      struct xe_vma_op *op)
 {
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
 
-	err = xe_vm_lock_vma(exec, vma);
-	if (err)
-		return err;
-
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
 
@@ -2528,19 +2520,10 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
 			       struct xe_vma_op *op)
 {
-	struct drm_exec exec;
 	int err;
 
 retry_userptr:
-	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
-	drm_exec_until_all_locked(&exec) {
-		err = op_execute(&exec, vm, vma, op);
-		drm_exec_retry_on_contention(&exec);
-		if (err)
-			break;
-	}
-	drm_exec_fini(&exec);
-
+	err = op_execute(vm, vma, op);
 	if (err == -EAGAIN) {
 		lockdep_assert_held_write(&vm->lock);
 
@@ -2705,29 +2688,114 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 	}
 }
 
+static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
+				 bool validate)
+{
+	struct xe_bo *bo = xe_vma_bo(vma);
+	int err = 0;
+
+	if (bo) {
+		if (!bo->vm)
+			err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0);
+		if (!err && validate)
+			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
+	}
+
+	return err;
+}
+
+static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
+			    struct xe_vma_op *op)
+{
+	int err = 0;
+
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		err = vma_lock_and_validate(exec, op->map.vma,
+					    !xe_vm_in_fault_mode(vm) ||
+					    op->map.immediate);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.remap.unmap->va),
+					    false);
+		if (!err && op->remap.prev)
+			err = vma_lock_and_validate(exec, op->remap.prev, true);
+		if (!err && op->remap.next)
+			err = vma_lock_and_validate(exec, op->remap.next, true);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.unmap.va),
+					    false);
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		err = vma_lock_and_validate(exec,
+					    gpuva_to_vma(op->base.prefetch.va), true);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+
+	return err;
+}
+
+static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
+					   struct xe_vm *vm,
+					   struct list_head *ops_list)
+{
+	struct xe_vma_op *op;
+	int err;
+
+	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0);
+	if (err)
+		return err;
+
+	list_for_each_entry(op, ops_list, link) {
+		err = op_lock_and_prep(exec, vm, op);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 				     struct list_head *ops_list)
 {
+	struct drm_exec exec;
 	struct xe_vma_op *op, *next;
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
 
-	list_for_each_entry_safe(op, next, ops_list, link) {
-		err = xe_vma_op_execute(vm, op);
-		if (err) {
-			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
-				 op->base.op, err);
-			/*
-			 * FIXME: Killing VM rather than proper error handling
-			 */
-			xe_vm_kill(vm);
-			return -ENOSPC;
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list);
+		drm_exec_retry_on_contention(&exec);
+		if (err)
+			goto unlock;
+
+		list_for_each_entry_safe(op, next, ops_list, link) {
+			err = xe_vma_op_execute(vm, op);
+			if (err) {
+				drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
+					 op->base.op, err);
+				/*
+				 * FIXME: Killing VM rather than proper error handling
+				 */
+				xe_vm_kill(vm, false);
+				err = -ENOSPC;
+				goto unlock;
+			}
+			xe_vma_op_cleanup(vm, op);
 		}
-		xe_vma_op_cleanup(vm, op);
 	}
 
-	return 0;
+unlock:
+	drm_exec_fini(&exec);
+	return err;
 }
 
 #define SUPPORTED_FLAGS	\
-- 
cgit 


From 75192758d640227b68e4e21de811891219f3d0e2 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:02 -0700
Subject: drm/xe: Add ops_execute function which returns a fence

Add ops_execute function which returns a fence. This will be helpful to
initiate all binds (VM bind IOCTL, rebinds in exec IOCTL, rebinds in
preempt rebind worker, and rebinds in pagefaults) via a gpuva ops list.
Returning a fence is needed in various paths.

v2:
 - Rebase

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-3-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c | 211 ++++++++++++++++++++++++---------------------
 1 file changed, 111 insertions(+), 100 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index cc9d2ebc7e66..d0905d98de8c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1732,16 +1732,17 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
 	return NULL;
 }
 
-static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
-			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
-			u32 num_syncs, bool immediate, bool first_op,
-			bool last_op)
+static struct dma_fence *
+xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
+	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
+	   bool immediate, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 	struct xe_user_fence *ufence;
 
 	xe_vm_assert_held(vm);
+	xe_bo_assert_held(bo);
 
 	ufence = find_ufence_get(syncs, num_syncs);
 	if (vma->ufence && ufence)
@@ -1753,7 +1754,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
 				       last_op);
 		if (IS_ERR(fence))
-			return PTR_ERR(fence);
+			return fence;
 	} else {
 		int i;
 
@@ -1768,26 +1769,14 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 
 	if (last_op)
 		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-	dma_fence_put(fence);
-
-	return 0;
-}
-
-static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
-		      struct xe_bo *bo, struct xe_sync_entry *syncs,
-		      u32 num_syncs, bool immediate, bool first_op,
-		      bool last_op)
-{
-	xe_vm_assert_held(vm);
-	xe_bo_assert_held(bo);
 
-	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
-			    last_op);
+	return fence;
 }
 
-static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
-			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
-			u32 num_syncs, bool first_op, bool last_op)
+static struct dma_fence *
+xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
+	     struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+	     u32 num_syncs, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
@@ -1797,14 +1786,13 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 
 	fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
 	if (IS_ERR(fence))
-		return PTR_ERR(fence);
+		return fence;
 
 	xe_vma_destroy(vma, fence);
 	if (last_op)
 		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-	dma_fence_put(fence);
 
-	return 0;
+	return fence;
 }
 
 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
@@ -1947,10 +1935,11 @@ static const u32 region_to_mem_type[] = {
 	XE_PL_VRAM1,
 };
 
-static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
-			  struct xe_exec_queue *q, u32 region,
-			  struct xe_sync_entry *syncs, u32 num_syncs,
-			  bool first_op, bool last_op)
+static struct dma_fence *
+xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
+	       struct xe_exec_queue *q, u32 region,
+	       struct xe_sync_entry *syncs, u32 num_syncs,
+	       bool first_op, bool last_op)
 {
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 	int err;
@@ -1960,27 +1949,24 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 	if (!xe_vma_has_no_bo(vma)) {
 		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
 		if (err)
-			return err;
+			return ERR_PTR(err);
 	}
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
 				  true, first_op, last_op);
 	} else {
+		struct dma_fence *fence =
+			xe_exec_queue_last_fence_get(wait_exec_queue, vm);
 		int i;
 
 		/* Nothing to do, signal fences now */
 		if (last_op) {
-			for (i = 0; i < num_syncs; i++) {
-				struct dma_fence *fence =
-					xe_exec_queue_last_fence_get(wait_exec_queue, vm);
-
+			for (i = 0; i < num_syncs; i++)
 				xe_sync_entry_signal(&syncs[i], fence);
-				dma_fence_put(fence);
-			}
 		}
 
-		return 0;
+		return fence;
 	}
 }
 
@@ -2433,10 +2419,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	return 0;
 }
 
-static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
-		      struct xe_vma_op *op)
+static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
+				    struct xe_vma_op *op)
 {
-	int err;
+	struct dma_fence *fence = NULL;
 
 	lockdep_assert_held_write(&vm->lock);
 
@@ -2445,11 +2431,11 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
-				 op->syncs, op->num_syncs,
-				 op->map.immediate || !xe_vm_in_fault_mode(vm),
-				 op->flags & XE_VMA_OP_FIRST,
-				 op->flags & XE_VMA_OP_LAST);
+		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
+				   op->syncs, op->num_syncs,
+				   op->map.immediate || !xe_vm_in_fault_mode(vm),
+				   op->flags & XE_VMA_OP_FIRST,
+				   op->flags & XE_VMA_OP_LAST);
 		break;
 	case DRM_GPUVA_OP_REMAP:
 	{
@@ -2459,37 +2445,39 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
 		if (!op->remap.unmap_done) {
 			if (prev || next)
 				vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
-			err = xe_vm_unbind(vm, vma, op->q, op->syncs,
-					   op->num_syncs,
-					   op->flags & XE_VMA_OP_FIRST,
-					   op->flags & XE_VMA_OP_LAST &&
-					   !prev && !next);
-			if (err)
+			fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+					     op->num_syncs,
+					     op->flags & XE_VMA_OP_FIRST,
+					     op->flags & XE_VMA_OP_LAST &&
+					     !prev && !next);
+			if (IS_ERR(fence))
 				break;
 			op->remap.unmap_done = true;
 		}
 
 		if (prev) {
 			op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
-			err = xe_vm_bind(vm, op->remap.prev, op->q,
-					 xe_vma_bo(op->remap.prev), op->syncs,
-					 op->num_syncs, true, false,
-					 op->flags & XE_VMA_OP_LAST && !next);
+			dma_fence_put(fence);
+			fence = xe_vm_bind(vm, op->remap.prev, op->q,
+					   xe_vma_bo(op->remap.prev), op->syncs,
+					   op->num_syncs, true, false,
+					   op->flags & XE_VMA_OP_LAST && !next);
 			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
-			if (err)
+			if (IS_ERR(fence))
 				break;
 			op->remap.prev = NULL;
 		}
 
 		if (next) {
 			op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
-			err = xe_vm_bind(vm, op->remap.next, op->q,
-					 xe_vma_bo(op->remap.next),
-					 op->syncs, op->num_syncs,
-					 true, false,
-					 op->flags & XE_VMA_OP_LAST);
+			dma_fence_put(fence);
+			fence = xe_vm_bind(vm, op->remap.next, op->q,
+					   xe_vma_bo(op->remap.next),
+					   op->syncs, op->num_syncs,
+					   true, false,
+					   op->flags & XE_VMA_OP_LAST);
 			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
-			if (err)
+			if (IS_ERR(fence))
 				break;
 			op->remap.next = NULL;
 		}
@@ -2497,34 +2485,36 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
 		break;
 	}
 	case DRM_GPUVA_OP_UNMAP:
-		err = xe_vm_unbind(vm, vma, op->q, op->syncs,
-				   op->num_syncs, op->flags & XE_VMA_OP_FIRST,
-				   op->flags & XE_VMA_OP_LAST);
+		fence = xe_vm_unbind(vm, vma, op->q, op->syncs,
+				     op->num_syncs, op->flags & XE_VMA_OP_FIRST,
+				     op->flags & XE_VMA_OP_LAST);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
-		err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
-				     op->syncs, op->num_syncs,
-				     op->flags & XE_VMA_OP_FIRST,
-				     op->flags & XE_VMA_OP_LAST);
+		fence = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
+				       op->syncs, op->num_syncs,
+				       op->flags & XE_VMA_OP_FIRST,
+				       op->flags & XE_VMA_OP_LAST);
 		break;
 	default:
 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
 
-	if (err)
+	if (IS_ERR(fence))
 		trace_xe_vma_fail(vma);
 
-	return err;
+	return fence;
 }
 
-static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
-			       struct xe_vma_op *op)
+static struct dma_fence *
+__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
+		    struct xe_vma_op *op)
 {
+	struct dma_fence *fence;
 	int err;
 
 retry_userptr:
-	err = op_execute(vm, vma, op);
-	if (err == -EAGAIN) {
+	fence = op_execute(vm, vma, op);
+	if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
 		lockdep_assert_held_write(&vm->lock);
 
 		if (op->base.op == DRM_GPUVA_OP_REMAP) {
@@ -2541,22 +2531,24 @@ retry_userptr:
 			if (!err)
 				goto retry_userptr;
 
+			fence = ERR_PTR(err);
 			trace_xe_vma_fail(vma);
 		}
 	}
 
-	return err;
+	return fence;
 }
 
-static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
+static struct dma_fence *
+xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 {
-	int ret = 0;
+	struct dma_fence *fence = ERR_PTR(-ENOMEM);
 
 	lockdep_assert_held_write(&vm->lock);
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-		ret = __xe_vma_op_execute(vm, op->map.vma, op);
+		fence = __xe_vma_op_execute(vm, op->map.vma, op);
 		break;
 	case DRM_GPUVA_OP_REMAP:
 	{
@@ -2569,23 +2561,23 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 		else
 			vma = op->remap.next;
 
-		ret = __xe_vma_op_execute(vm, vma, op);
+		fence = __xe_vma_op_execute(vm, vma, op);
 		break;
 	}
 	case DRM_GPUVA_OP_UNMAP:
-		ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
-					  op);
+		fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
+					    op);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
-		ret = __xe_vma_op_execute(vm,
-					  gpuva_to_vma(op->base.prefetch.va),
-					  op);
+		fence = __xe_vma_op_execute(vm,
+					    gpuva_to_vma(op->base.prefetch.va),
+					    op);
 		break;
 	default:
 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
 
-	return ret;
+	return fence;
 }
 
 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
@@ -2760,11 +2752,35 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 	return 0;
 }
 
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+				     struct list_head *ops_list,
+				     bool cleanup)
+{
+	struct xe_vma_op *op, *next;
+	struct dma_fence *fence = NULL;
+
+	list_for_each_entry_safe(op, next, ops_list, link) {
+		if (!IS_ERR(fence)) {
+			dma_fence_put(fence);
+			fence = xe_vma_op_execute(vm, op);
+		}
+		if (IS_ERR(fence)) {
+			drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld",
+				 op->base.op, PTR_ERR(fence));
+			fence = ERR_PTR(-ENOSPC);
+		}
+		if (cleanup)
+			xe_vma_op_cleanup(vm, op);
+	}
+
+	return fence;
+}
+
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 				     struct list_head *ops_list)
 {
 	struct drm_exec exec;
-	struct xe_vma_op *op, *next;
+	struct dma_fence *fence;
 	int err;
 
 	lockdep_assert_held_write(&vm->lock);
@@ -2777,19 +2793,14 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 		if (err)
 			goto unlock;
 
-		list_for_each_entry_safe(op, next, ops_list, link) {
-			err = xe_vma_op_execute(vm, op);
-			if (err) {
-				drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
-					 op->base.op, err);
-				/*
-				 * FIXME: Killing VM rather than proper error handling
-				 */
-				xe_vm_kill(vm, false);
-				err = -ENOSPC;
-				goto unlock;
-			}
-			xe_vma_op_cleanup(vm, op);
+		fence = ops_execute(vm, ops_list, true);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			/* FIXME: Killing VM rather than proper error handling */
+			xe_vm_kill(vm, false);
+			goto unlock;
+		} else {
+			dma_fence_put(fence);
 		}
 	}
 
-- 
cgit 


From 0a34c12449de4b09f74808c6f6c39205ee5071f0 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:03 -0700
Subject: drm/xe: Move migrate to prefetch to op_lock_and_prep function

All non-binding operations in VM bind IOCTL should be in the lock and
prepare step rather than the execution step. Move prefetch to conform to
this pattern.

v2:
 - Rebase
 - New function names (Oak)
 - Update stale comment (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-4-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d0905d98de8c..e7be99acaff2 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1937,20 +1937,10 @@ static const u32 region_to_mem_type[] = {
 
 static struct dma_fence *
 xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
-	       struct xe_exec_queue *q, u32 region,
-	       struct xe_sync_entry *syncs, u32 num_syncs,
-	       bool first_op, bool last_op)
+	       struct xe_exec_queue *q, struct xe_sync_entry *syncs,
+	       u32 num_syncs, bool first_op, bool last_op)
 {
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
-	int err;
-
-	xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
-
-	if (!xe_vma_has_no_bo(vma)) {
-		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
-		if (err)
-			return ERR_PTR(err);
-	}
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
@@ -2490,8 +2480,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 				     op->flags & XE_VMA_OP_LAST);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
-		fence = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
-				       op->syncs, op->num_syncs,
+		fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs,
 				       op->flags & XE_VMA_OP_FIRST,
 				       op->flags & XE_VMA_OP_LAST);
 		break;
@@ -2722,9 +2711,20 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 					    false);
 		break;
 	case DRM_GPUVA_OP_PREFETCH:
+	{
+		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+		u32 region = op->prefetch.region;
+
+		xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+
 		err = vma_lock_and_validate(exec,
-					    gpuva_to_vma(op->base.prefetch.va), true);
+					    gpuva_to_vma(op->base.prefetch.va),
+					    false);
+		if (!err && !xe_vma_has_no_bo(vma))
+			err = xe_bo_migrate(xe_vma_bo(vma),
+					    region_to_mem_type[region]);
 		break;
+	}
 	default:
 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
 	}
-- 
cgit 


From 701109f2e3a45b9748f0f98849fde9a35d391efb Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:04 -0700
Subject: drm/xe: Add struct xe_vma_ops abstraction

Having a structure which encapsulates a list of VMA operations will help
enable 1 job for the entire list.

v2:
 - Rebase

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-5-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c       | 37 +++++++++++++++++++++----------------
 drivers/gpu/drm/xe/xe_vm_types.h |  7 +++++++
 2 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index e7be99acaff2..93cf5116d2a9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2252,7 +2252,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				   struct drm_gpuva_ops *ops,
 				   struct xe_sync_entry *syncs, u32 num_syncs,
-				   struct list_head *ops_list, bool last)
+				   struct xe_vma_ops *vops, bool last)
 {
 	struct xe_device *xe = vm->xe;
 	struct xe_vma_op *last_op = NULL;
@@ -2264,11 +2264,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	drm_gpuva_for_each_op(__op, ops) {
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 		struct xe_vma *vma;
-		bool first = list_empty(ops_list);
+		bool first = list_empty(&vops->list);
 		unsigned int flags = 0;
 
 		INIT_LIST_HEAD(&op->link);
-		list_add_tail(&op->link, ops_list);
+		list_add_tail(&op->link, &vops->list);
 
 		if (first) {
 			op->flags |= XE_VMA_OP_FIRST;
@@ -2394,7 +2394,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	}
 
 	/* FIXME: Unhandled corner case */
-	XE_WARN_ON(!last_op && last && !list_empty(ops_list));
+	XE_WARN_ON(!last_op && last && !list_empty(&vops->list));
 
 	if (!last_op)
 		return 0;
@@ -2734,7 +2734,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 
 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 					   struct xe_vm *vm,
-					   struct list_head *ops_list)
+					   struct xe_vma_ops *vops)
 {
 	struct xe_vma_op *op;
 	int err;
@@ -2743,7 +2743,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 	if (err)
 		return err;
 
-	list_for_each_entry(op, ops_list, link) {
+	list_for_each_entry(op, &vops->list, link) {
 		err = op_lock_and_prep(exec, vm, op);
 		if (err)
 			return err;
@@ -2753,13 +2753,13 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 }
 
 static struct dma_fence *ops_execute(struct xe_vm *vm,
-				     struct list_head *ops_list,
+				     struct xe_vma_ops *vops,
 				     bool cleanup)
 {
 	struct xe_vma_op *op, *next;
 	struct dma_fence *fence = NULL;
 
-	list_for_each_entry_safe(op, next, ops_list, link) {
+	list_for_each_entry_safe(op, next, &vops->list, link) {
 		if (!IS_ERR(fence)) {
 			dma_fence_put(fence);
 			fence = xe_vma_op_execute(vm, op);
@@ -2777,7 +2777,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
 }
 
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
-				     struct list_head *ops_list)
+				     struct xe_vma_ops *vops)
 {
 	struct drm_exec exec;
 	struct dma_fence *fence;
@@ -2788,12 +2788,12 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
 		      DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
-		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list);
+		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
 		drm_exec_retry_on_contention(&exec);
 		if (err)
 			goto unlock;
 
-		fence = ops_execute(vm, ops_list, true);
+		fence = ops_execute(vm, vops, true);
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
 			/* FIXME: Killing VM rather than proper error handling */
@@ -2954,6 +2954,11 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
 	return err;
 }
 
+static void xe_vma_ops_init(struct xe_vma_ops *vops)
+{
+	INIT_LIST_HEAD(&vops->list);
+}
+
 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
@@ -2967,7 +2972,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	u32 num_syncs, num_ufence = 0;
 	struct xe_sync_entry *syncs = NULL;
 	struct drm_xe_vm_bind_op *bind_ops;
-	LIST_HEAD(ops_list);
+	struct xe_vma_ops vops;
 	int err;
 	int i;
 
@@ -3118,6 +3123,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto free_syncs;
 	}
 
+	xe_vma_ops_init(&vops);
 	for (i = 0; i < args->num_binds; ++i) {
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
@@ -3137,14 +3143,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		}
 
 		err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
-					      &ops_list,
-					      i == args->num_binds - 1);
+					      &vops, i == args->num_binds - 1);
 		if (err)
 			goto unwind_ops;
 	}
 
 	/* Nothing to do */
-	if (list_empty(&ops_list)) {
+	if (list_empty(&vops.list)) {
 		err = -ENODATA;
 		goto unwind_ops;
 	}
@@ -3153,7 +3158,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (q)
 		xe_exec_queue_get(q);
 
-	err = vm_bind_ioctl_ops_execute(vm, &ops_list);
+	err = vm_bind_ioctl_ops_execute(vm, &vops);
 
 	up_write(&vm->lock);
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 0447c79c40a2..466b6c62d1f9 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -358,4 +358,11 @@ struct xe_vma_op {
 		struct xe_vma_op_prefetch prefetch;
 	};
 };
+
+/** struct xe_vma_ops - VMA operations */
+struct xe_vma_ops {
+	/** @list: list of VMA operations */
+	struct list_head list;
+};
+
 #endif
-- 
cgit 


From 5f677a9b6537dbfe061ec9ab1c5b34b21e4855a3 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:05 -0700
Subject: drm/xe: Use xe_vma_ops to implement xe_vm_rebind

All page tables updates are moving to a xe_vma_ops interface to
implement 1 job per VM bind IOCTL. Convert xe_vm_rebind to use a
xe_vma_ops based interface.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-6-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c | 78 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 14 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 93cf5116d2a9..be41b3f41529 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -712,37 +712,87 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
 		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
 }
 
-static struct dma_fence *
-xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
-	       struct xe_sync_entry *syncs, u32 num_syncs,
-	       bool first_op, bool last_op);
+static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
+				  u8 tile_mask)
+{
+	INIT_LIST_HEAD(&op->link);
+	op->base.op = DRM_GPUVA_OP_MAP;
+	op->base.map.va.addr = vma->gpuva.va.addr;
+	op->base.map.va.range = vma->gpuva.va.range;
+	op->base.map.gem.obj = vma->gpuva.gem.obj;
+	op->base.map.gem.offset = vma->gpuva.gem.offset;
+	op->map.vma = vma;
+	op->map.immediate = true;
+	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
+	op->map.is_null = xe_vma_is_null(vma);
+}
+
+static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
+				u8 tile_mask)
+{
+	struct xe_vma_op *op;
+
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	xe_vm_populate_rebind(op, vma, tile_mask);
+	list_add_tail(&op->link, &vops->list);
+
+	return 0;
+}
+
+static struct dma_fence *ops_execute(struct xe_vm *vm,
+				     struct xe_vma_ops *vops,
+				     bool cleanup);
+static void xe_vma_ops_init(struct xe_vma_ops *vops);
 
 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 {
 	struct dma_fence *fence;
 	struct xe_vma *vma, *next;
+	struct xe_vma_ops vops;
+	struct xe_vma_op *op, *next_op;
+	int err;
 
 	lockdep_assert_held(&vm->lock);
-	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
+	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
+	    list_empty(&vm->rebind_list))
 		return 0;
 
+	xe_vma_ops_init(&vops);
+
 	xe_vm_assert_held(vm);
-	list_for_each_entry_safe(vma, next, &vm->rebind_list,
-				 combined_links.rebind) {
+	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
 		xe_assert(vm->xe, vma->tile_present);
 
-		list_del_init(&vma->combined_links.rebind);
 		if (rebind_worker)
 			trace_xe_vma_rebind_worker(vma);
 		else
 			trace_xe_vma_rebind_exec(vma);
-		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
-		if (IS_ERR(fence))
-			return PTR_ERR(fence);
+
+		err = xe_vm_ops_add_rebind(&vops, vma,
+					   vma->tile_present);
+		if (err)
+			goto free_ops;
+	}
+
+	fence = ops_execute(vm, &vops, false);
+	if (IS_ERR(fence)) {
+		err = PTR_ERR(fence);
+	} else {
 		dma_fence_put(fence);
+		list_for_each_entry_safe(vma, next, &vm->rebind_list,
+					 combined_links.rebind)
+			list_del_init(&vma->combined_links.rebind);
+	}
+free_ops:
+	list_for_each_entry_safe(op, next_op, &vops.list, link) {
+		list_del(&op->link);
+		kfree(op);
 	}
 
-	return 0;
+	return err;
 }
 
 static void xe_vma_free(struct xe_vma *vma)
@@ -2414,7 +2464,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 {
 	struct dma_fence *fence = NULL;
 
-	lockdep_assert_held_write(&vm->lock);
+	lockdep_assert_held(&vm->lock);
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
@@ -2533,7 +2583,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 {
 	struct dma_fence *fence = ERR_PTR(-ENOMEM);
 
-	lockdep_assert_held_write(&vm->lock);
+	lockdep_assert_held(&vm->lock);
 
 	switch (op->base.op) {
 	case DRM_GPUVA_OP_MAP:
-- 
cgit 


From 4dbbe4579490b4dbc89bf9c79ef3b9f1d94366c3 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:06 -0700
Subject: drm/xe: Simplify VM bind IOCTL error handling and cleanup

Clean up everything in VM bind IOCTL in 1 path for both errors and
non-errors. Also move VM bind IOCTL cleanup from ops (also used by
non-IOCTL binds) to the VM bind IOCTL.

v2:
 - Break ops_execute on error (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-7-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c       | 67 +++++++---------------------------------
 drivers/gpu/drm/xe/xe_vm_types.h |  5 ---
 2 files changed, 12 insertions(+), 60 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index be41b3f41529..66a27ade77d7 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -743,8 +743,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
 }
 
 static struct dma_fence *ops_execute(struct xe_vm *vm,
-				     struct xe_vma_ops *vops,
-				     bool cleanup);
+				     struct xe_vma_ops *vops);
 static void xe_vma_ops_init(struct xe_vma_ops *vops);
 
 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
@@ -777,7 +776,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 			goto free_ops;
 	}
 
-	fence = ops_execute(vm, &vops, false);
+	fence = ops_execute(vm, &vops);
 	if (IS_ERR(fence)) {
 		err = PTR_ERR(fence);
 	} else {
@@ -2449,7 +2448,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	if (!last_op)
 		return 0;
 
-	last_op->ops = ops;
 	if (last) {
 		last_op->flags |= XE_VMA_OP_LAST;
 		last_op->num_syncs = num_syncs;
@@ -2619,25 +2617,6 @@ xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
 	return fence;
 }
 
-static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
-{
-	bool last = op->flags & XE_VMA_OP_LAST;
-
-	if (last) {
-		while (op->num_syncs--)
-			xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
-		kfree(op->syncs);
-		if (op->q)
-			xe_exec_queue_put(op->q);
-	}
-	if (!list_empty(&op->link))
-		list_del(&op->link);
-	if (op->ops)
-		drm_gpuva_ops_free(&vm->gpuvm, op->ops);
-	if (last)
-		xe_vm_put(vm);
-}
-
 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
 			     bool post_commit, bool prev_post_commit,
 			     bool next_post_commit)
@@ -2714,8 +2693,6 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
 					 op->flags & XE_VMA_OP_PREV_COMMITTED,
 					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
 		}
-
-		drm_gpuva_ops_free(&vm->gpuvm, __ops);
 	}
 }
 
@@ -2803,24 +2780,20 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 }
 
 static struct dma_fence *ops_execute(struct xe_vm *vm,
-				     struct xe_vma_ops *vops,
-				     bool cleanup)
+				     struct xe_vma_ops *vops)
 {
 	struct xe_vma_op *op, *next;
 	struct dma_fence *fence = NULL;
 
 	list_for_each_entry_safe(op, next, &vops->list, link) {
-		if (!IS_ERR(fence)) {
-			dma_fence_put(fence);
-			fence = xe_vma_op_execute(vm, op);
-		}
+		dma_fence_put(fence);
+		fence = xe_vma_op_execute(vm, op);
 		if (IS_ERR(fence)) {
 			drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld",
 				 op->base.op, PTR_ERR(fence));
 			fence = ERR_PTR(-ENOSPC);
+			break;
 		}
-		if (cleanup)
-			xe_vma_op_cleanup(vm, op);
 	}
 
 	return fence;
@@ -2843,7 +2816,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 		if (err)
 			goto unlock;
 
-		fence = ops_execute(vm, vops, true);
+		fence = ops_execute(vm, vops);
 		if (IS_ERR(fence)) {
 			err = PTR_ERR(fence);
 			/* FIXME: Killing VM rather than proper error handling */
@@ -3204,30 +3177,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto unwind_ops;
 	}
 
-	xe_vm_get(vm);
-	if (q)
-		xe_exec_queue_get(q);
-
 	err = vm_bind_ioctl_ops_execute(vm, &vops);
 
-	up_write(&vm->lock);
-
-	if (q)
-		xe_exec_queue_put(q);
-	xe_vm_put(vm);
-
-	for (i = 0; bos && i < args->num_binds; ++i)
-		xe_bo_put(bos[i]);
-
-	kvfree(bos);
-	kvfree(ops);
-	if (args->num_binds > 1)
-		kvfree(bind_ops);
-
-	return err;
-
 unwind_ops:
-	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+	if (err && err != -ENODATA)
+		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
+	for (i = args->num_binds - 1; i >= 0; --i)
+		if (ops[i])
+			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
 free_syncs:
 	if (err == -ENODATA)
 		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 466b6c62d1f9..149ab892967e 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -330,11 +330,6 @@ enum xe_vma_op_flags {
 struct xe_vma_op {
 	/** @base: GPUVA base operation */
 	struct drm_gpuva_op base;
-	/**
-	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
-	 * operations is processed
-	 */
-	struct drm_gpuva_ops *ops;
 	/** @q: exec queue for this operation */
 	struct xe_exec_queue *q;
 	/**
-- 
cgit 


From bf69918b7199ffa5bb6213f2b0a2c0b1be8f87dd Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:07 -0700
Subject: drm/xe: Use xe_vma_ops to implement page fault rebinds

In effort to make multiple VMA binds operations atomic (1 job), all
device page tables updates will be implemented via a xe_vma_ops (atomic
unit) interface,

Add xe_vma_rebind function which is implemented using xe_vma_ops
interface. Use xe_vma_rebind in GPU page faults for rebinds rather than
directly called deprecated function in PT layer.

v3:
 - Update commit message (Oak)
v4:
 - Fix tile_mask argument (CI)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-8-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_gt_pagefault.c | 16 +++++-----
 drivers/gpu/drm/xe/xe_vm.c           | 57 +++++++++++++++++++++++++++++-------
 drivers/gpu/drm/xe/xe_vm.h           |  2 ++
 drivers/gpu/drm/xe/xe_vm_types.h     |  2 ++
 4 files changed, 58 insertions(+), 19 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index fa9e9853c53b..040dd142c49c 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -19,7 +19,6 @@
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_migrate.h"
-#include "xe_pt.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
 
@@ -204,15 +203,14 @@ retry_userptr:
 		drm_exec_retry_on_contention(&exec);
 		if (ret)
 			goto unlock_dma_resv;
-	}
 
-	/* Bind VMA only to the GT that has faulted */
-	trace_xe_vma_pf_bind(vma);
-	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0,
-				 vma->tile_present & BIT(tile->id));
-	if (IS_ERR(fence)) {
-		ret = PTR_ERR(fence);
-		goto unlock_dma_resv;
+		/* Bind VMA only to the GT that has faulted */
+		trace_xe_vma_pf_bind(vma);
+		fence = xe_vma_rebind(vm, vma, BIT(tile->id));
+		if (IS_ERR(fence)) {
+			ret = PTR_ERR(fence);
+			goto unlock_dma_resv;
+		}
 	}
 
 	/*
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 66a27ade77d7..cb38acabe682 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -716,6 +716,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
 				  u8 tile_mask)
 {
 	INIT_LIST_HEAD(&op->link);
+	op->tile_mask = tile_mask;
 	op->base.op = DRM_GPUVA_OP_MAP;
 	op->base.map.va.addr = vma->gpuva.va.addr;
 	op->base.map.va.range = vma->gpuva.va.range;
@@ -794,6 +795,33 @@ free_ops:
 	return err;
 }
 
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
+{
+	struct dma_fence *fence = NULL;
+	struct xe_vma_ops vops;
+	struct xe_vma_op *op, *next_op;
+	int err;
+
+	lockdep_assert_held(&vm->lock);
+	xe_vm_assert_held(vm);
+	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
+
+	xe_vma_ops_init(&vops);
+
+	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
+	if (err)
+		return ERR_PTR(err);
+
+	fence = ops_execute(vm, &vops);
+
+	list_for_each_entry_safe(op, next_op, &vops.list, link) {
+		list_del(&op->link);
+		kfree(op);
+	}
+
+	return fence;
+}
+
 static void xe_vma_free(struct xe_vma *vma)
 {
 	if (xe_vma_is_userptr(vma))
@@ -1694,7 +1722,7 @@ err_fences:
 static struct dma_fence *
 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	       struct xe_sync_entry *syncs, u32 num_syncs,
-	       bool first_op, bool last_op)
+	       u8 tile_mask, bool first_op, bool last_op)
 {
 	struct xe_tile *tile;
 	struct dma_fence *fence;
@@ -1702,7 +1730,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	struct dma_fence_array *cf = NULL;
 	struct xe_vm *vm = xe_vma_vm(vma);
 	int cur_fence = 0, i;
-	int number_tiles = hweight8(vma->tile_mask);
+	int number_tiles = hweight8(tile_mask);
 	int err;
 	u8 id;
 
@@ -1716,7 +1744,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	}
 
 	for_each_tile(tile, vm->xe, id) {
-		if (!(vma->tile_mask & BIT(id)))
+		if (!(tile_mask & BIT(id)))
 			goto next;
 
 		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
@@ -1784,7 +1812,7 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
 static struct dma_fence *
 xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
-	   bool immediate, bool first_op, bool last_op)
+	   u8 tile_mask, bool immediate, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
@@ -1800,8 +1828,8 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 	vma->ufence = ufence ?: vma->ufence;
 
 	if (immediate) {
-		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
-				       last_op);
+		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask,
+				       first_op, last_op);
 		if (IS_ERR(fence))
 			return fence;
 	} else {
@@ -1993,7 +2021,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 
 	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
-				  true, first_op, last_op);
+				  vma->tile_mask, true, first_op, last_op);
 	} else {
 		struct dma_fence *fence =
 			xe_exec_queue_last_fence_get(wait_exec_queue, vm);
@@ -2306,10 +2334,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 	struct xe_device *xe = vm->xe;
 	struct xe_vma_op *last_op = NULL;
 	struct drm_gpuva_op *__op;
+	struct xe_tile *tile;
+	u8 id, tile_mask = 0;
 	int err = 0;
 
 	lockdep_assert_held_write(&vm->lock);
 
+	for_each_tile(tile, vm->xe, id)
+		tile_mask |= 0x1 << id;
+
 	drm_gpuva_for_each_op(__op, ops) {
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 		struct xe_vma *vma;
@@ -2326,6 +2359,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 		}
 
 		op->q = q;
+		op->tile_mask = tile_mask;
 
 		switch (op->base.op) {
 		case DRM_GPUVA_OP_MAP:
@@ -2471,6 +2505,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 	case DRM_GPUVA_OP_MAP:
 		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
 				   op->syncs, op->num_syncs,
+				   op->tile_mask,
 				   op->map.immediate || !xe_vm_in_fault_mode(vm),
 				   op->flags & XE_VMA_OP_FIRST,
 				   op->flags & XE_VMA_OP_LAST);
@@ -2498,7 +2533,9 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 			dma_fence_put(fence);
 			fence = xe_vm_bind(vm, op->remap.prev, op->q,
 					   xe_vma_bo(op->remap.prev), op->syncs,
-					   op->num_syncs, true, false,
+					   op->num_syncs,
+					   op->remap.prev->tile_mask, true,
+					   false,
 					   op->flags & XE_VMA_OP_LAST && !next);
 			op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
 			if (IS_ERR(fence))
@@ -2512,8 +2549,8 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma,
 			fence = xe_vm_bind(vm, op->remap.next, op->q,
 					   xe_vma_bo(op->remap.next),
 					   op->syncs, op->num_syncs,
-					   true, false,
-					   op->flags & XE_VMA_OP_LAST);
+					   op->remap.next->tile_mask, true,
+					   false, op->flags & XE_VMA_OP_LAST);
 			op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
 			if (IS_ERR(fence))
 				break;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 306cd0934a19..204a4ff63f88 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -208,6 +208,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
 int xe_vm_userptr_check_repin(struct xe_vm *vm);
 
 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
+				u8 tile_mask);
 
 int xe_vm_invalidate_vma(struct xe_vma *vma);
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 149ab892967e..e9cd6da6263a 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -343,6 +343,8 @@ struct xe_vma_op {
 	struct list_head link;
 	/** @flags: operation flags */
 	enum xe_vma_op_flags flags;
+	/** @tile_mask: Tile mask for operation */
+	u8 tile_mask;
 
 	union {
 		/** @map: VMA map operation specific data */
-- 
cgit 


From 22cfdd286572decf5225cc219205ca3348cfc4af Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:08 -0700
Subject: drm/xe: Add some members to xe_vma_ops

This will help with moving to single jobs for many bind operations.

v2:
 - Rebase

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-9-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c       | 19 ++++++++++++++-----
 drivers/gpu/drm/xe/xe_vm_types.h |  8 ++++++++
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index cb38acabe682..45258d38d4ee 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -745,7 +745,9 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
 
 static struct dma_fence *ops_execute(struct xe_vm *vm,
 				     struct xe_vma_ops *vops);
-static void xe_vma_ops_init(struct xe_vma_ops *vops);
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+			    struct xe_exec_queue *q,
+			    struct xe_sync_entry *syncs, u32 num_syncs);
 
 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 {
@@ -760,7 +762,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 	    list_empty(&vm->rebind_list))
 		return 0;
 
-	xe_vma_ops_init(&vops);
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
 
 	xe_vm_assert_held(vm);
 	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
@@ -806,7 +808,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
 	xe_vm_assert_held(vm);
 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
 
-	xe_vma_ops_init(&vops);
+	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
 
 	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
 	if (err)
@@ -3014,9 +3016,16 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
 	return err;
 }
 
-static void xe_vma_ops_init(struct xe_vma_ops *vops)
+static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
+			    struct xe_exec_queue *q,
+			    struct xe_sync_entry *syncs, u32 num_syncs)
 {
+	memset(vops, 0, sizeof(*vops));
 	INIT_LIST_HEAD(&vops->list);
+	vops->vm = vm;
+	vops->q = q;
+	vops->syncs = syncs;
+	vops->num_syncs = num_syncs;
 }
 
 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -3183,7 +3192,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto free_syncs;
 	}
 
-	xe_vma_ops_init(&vops);
+	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
 	for (i = 0; i < args->num_binds; ++i) {
 		u64 range = bind_ops[i].range;
 		u64 addr = bind_ops[i].addr;
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index e9cd6da6263a..ce1a63a5e3e7 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -360,6 +360,14 @@ struct xe_vma_op {
 struct xe_vma_ops {
 	/** @list: list of VMA operations */
 	struct list_head list;
+	/** @vm: VM */
+	struct xe_vm *vm;
+	/** @q: exec queue these operations */
+	struct xe_exec_queue *q;
+	/** @syncs: syncs these operation */
+	struct xe_sync_entry *syncs;
+	/** @num_syncs: number of syncs */
+	u32 num_syncs;
 };
 
 #endif
-- 
cgit 


From 61e3270ef9610ab40b1b56aa62e2b8471c32f1f7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:09 -0700
Subject: drm/xe: Add vm_bind_ioctl_ops_fini helper

Simplify VM bind code by signaling out-fences / destroying VMAs in a
single location. Will help with transition single job for many bind ops.

v2:
 - s/vm_bind_ioctl_ops_install_fences/vm_bind_ioctl_ops_fini (Oak)
 - Set last fence in vm_bind_ioctl_ops_fini (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-10-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c | 62 ++++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 38 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 45258d38d4ee..2f19372aaad5 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1646,7 +1646,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	struct dma_fence *fence = NULL;
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
-	int cur_fence = 0, i;
+	int cur_fence = 0;
 	int number_tiles = hweight8(vma->tile_present);
 	int err;
 	u8 id;
@@ -1704,10 +1704,6 @@ next:
 
 	fence = cf ? &cf->base : !fence ?
 		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
-	if (last_op) {
-		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i], fence);
-	}
 
 	return fence;
 
@@ -1731,7 +1727,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 	struct dma_fence **fences = NULL;
 	struct dma_fence_array *cf = NULL;
 	struct xe_vm *vm = xe_vma_vm(vma);
-	int cur_fence = 0, i;
+	int cur_fence = 0;
 	int number_tiles = hweight8(tile_mask);
 	int err;
 	u8 id;
@@ -1778,12 +1774,6 @@ next:
 		}
 	}
 
-	if (last_op) {
-		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i],
-					     cf ? &cf->base : fence);
-	}
-
 	return cf ? &cf->base : fence;
 
 err_fences:
@@ -1835,20 +1825,11 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 		if (IS_ERR(fence))
 			return fence;
 	} else {
-		int i;
-
 		xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
 
 		fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
-		if (last_op) {
-			for (i = 0; i < num_syncs; i++)
-				xe_sync_entry_signal(&syncs[i], fence);
-		}
 	}
 
-	if (last_op)
-		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-
 	return fence;
 }
 
@@ -1858,7 +1839,6 @@ xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	     u32 num_syncs, bool first_op, bool last_op)
 {
 	struct dma_fence *fence;
-	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(xe_vma_bo(vma));
@@ -1867,10 +1847,6 @@ xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
 	if (IS_ERR(fence))
 		return fence;
 
-	xe_vma_destroy(vma, fence);
-	if (last_op)
-		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-
 	return fence;
 }
 
@@ -2025,17 +2001,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
 				  vma->tile_mask, true, first_op, last_op);
 	} else {
-		struct dma_fence *fence =
-			xe_exec_queue_last_fence_get(wait_exec_queue, vm);
-		int i;
-
-		/* Nothing to do, signal fences now */
-		if (last_op) {
-			for (i = 0; i < num_syncs; i++)
-				xe_sync_entry_signal(&syncs[i], fence);
-		}
-
-		return fence;
+		return xe_exec_queue_last_fence_get(wait_exec_queue, vm);
 	}
 }
 
@@ -2838,6 +2804,26 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
 	return fence;
 }
 
+static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
+				   struct dma_fence *fence)
+{
+	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
+	struct xe_vma_op *op;
+	int i;
+
+	list_for_each_entry(op, &vops->list, link) {
+		if (op->base.op == DRM_GPUVA_OP_UNMAP)
+			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
+		else if (op->base.op == DRM_GPUVA_OP_REMAP)
+			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
+				       fence);
+	}
+	for (i = 0; i < vops->num_syncs; i++)
+		xe_sync_entry_signal(vops->syncs + i, fence);
+	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
+	dma_fence_put(fence);
+}
+
 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 				     struct xe_vma_ops *vops)
 {
@@ -2862,7 +2848,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 			xe_vm_kill(vm, false);
 			goto unlock;
 		} else {
-			dma_fence_put(fence);
+			vm_bind_ioctl_ops_fini(vm, vops, fence);
 		}
 	}
 
-- 
cgit 


From fda75ef80bddf2f08b0e597d59da69a3d8eb5be2 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:10 -0700
Subject: drm/xe: Move ufence check to op_lock_and_prep

Rather than checking for an unsignaled ufence ay unbind time, check for
this during the op_lock_and_prep function. This helps with the
transition to job 1 per VM bind IOCTL.

v2:
 - Rebase
v3:
 - Fix typo in commit message (Oak)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-11-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_vm.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 2f19372aaad5..40c1258c3282 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1653,16 +1653,6 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
 
 	trace_xe_vma_unbind(vma);
 
-	if (vma->ufence) {
-		struct xe_user_fence * const f = vma->ufence;
-
-		if (!xe_sync_ufence_get_status(f))
-			return ERR_PTR(-EBUSY);
-
-		vma->ufence = NULL;
-		xe_sync_ufence_put(f);
-	}
-
 	if (number_tiles > 1) {
 		fences = kmalloc_array(number_tiles, sizeof(*fences),
 				       GFP_KERNEL);
@@ -2717,6 +2707,21 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
 	return err;
 }
 
+static int check_ufence(struct xe_vma *vma)
+{
+	if (vma->ufence) {
+		struct xe_user_fence * const f = vma->ufence;
+
+		if (!xe_sync_ufence_get_status(f))
+			return -EBUSY;
+
+		vma->ufence = NULL;
+		xe_sync_ufence_put(f);
+	}
+
+	return 0;
+}
+
 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 			    struct xe_vma_op *op)
 {
@@ -2729,6 +2734,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 					    op->map.immediate);
 		break;
 	case DRM_GPUVA_OP_REMAP:
+		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
+		if (err)
+			break;
+
 		err = vma_lock_and_validate(exec,
 					    gpuva_to_vma(op->base.remap.unmap->va),
 					    false);
@@ -2738,6 +2747,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
 			err = vma_lock_and_validate(exec, op->remap.next, true);
 		break;
 	case DRM_GPUVA_OP_UNMAP:
+		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
+		if (err)
+			break;
+
 		err = vma_lock_and_validate(exec,
 					    gpuva_to_vma(op->base.unmap.va),
 					    false);
-- 
cgit 


From 5aa5eea09af08ad446f78f3de10388c98f52f19c Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:11 -0700
Subject: drm/xe: Move ufence add to vm_bind_ioctl_ops_fini

Rather than adding a ufence to a VMA in the bind function, add the
ufence to all VMAs in the IOCTL that require binds in
vm_bind_ioctl_ops_fini. This help withs the transition to job 1 per VM
bind IOCTL.

v2:
 - Rebase
v3:
 - Fix typo in commit (Oak)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-12-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_sync.c | 15 +++++++++++++++
 drivers/gpu/drm/xe/xe_sync.h |  1 +
 drivers/gpu/drm/xe/xe_vm.c   | 44 +++++++++++++++++++++++++++++++++++++-------
 3 files changed, 53 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 65f1f1628235..2883d9aca404 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -338,6 +338,21 @@ err_out:
 	return ERR_PTR(-ENOMEM);
 }
 
+/**
+ * __xe_sync_ufence_get() - Get user fence from user fence
+ * @ufence: input user fence
+ *
+ * Get a user fence reference from user fence
+ *
+ * Return: xe_user_fence pointer with reference
+ */
+struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence)
+{
+	user_fence_get(ufence);
+
+	return ufence;
+}
+
 /**
  * xe_sync_ufence_get() - Get user fence from sync
  * @sync: input sync
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 3e03396af2c6..006dbf780793 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -37,6 +37,7 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync)
 	return !!sync->ufence;
 }
 
+struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence);
 struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
 void xe_sync_ufence_put(struct xe_user_fence *ufence);
 int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 40c1258c3282..dfd31b346021 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1798,17 +1798,10 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
 {
 	struct dma_fence *fence;
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
-	struct xe_user_fence *ufence;
 
 	xe_vm_assert_held(vm);
 	xe_bo_assert_held(bo);
 
-	ufence = find_ufence_get(syncs, num_syncs);
-	if (vma->ufence && ufence)
-		xe_sync_ufence_put(vma->ufence);
-
-	vma->ufence = ufence ?: vma->ufence;
-
 	if (immediate) {
 		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask,
 				       first_op, last_op);
@@ -2817,20 +2810,57 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
 	return fence;
 }
 
+static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
+{
+	if (vma->ufence)
+		xe_sync_ufence_put(vma->ufence);
+	vma->ufence = __xe_sync_ufence_get(ufence);
+}
+
+static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
+			  struct xe_user_fence *ufence)
+{
+	switch (op->base.op) {
+	case DRM_GPUVA_OP_MAP:
+		vma_add_ufence(op->map.vma, ufence);
+		break;
+	case DRM_GPUVA_OP_REMAP:
+		if (op->remap.prev)
+			vma_add_ufence(op->remap.prev, ufence);
+		if (op->remap.next)
+			vma_add_ufence(op->remap.next, ufence);
+		break;
+	case DRM_GPUVA_OP_UNMAP:
+		break;
+	case DRM_GPUVA_OP_PREFETCH:
+		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
+		break;
+	default:
+		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
+	}
+}
+
 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
 				   struct dma_fence *fence)
 {
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
+	struct xe_user_fence *ufence;
 	struct xe_vma_op *op;
 	int i;
 
+	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
 	list_for_each_entry(op, &vops->list, link) {
+		if (ufence)
+			op_add_ufence(vm, op, ufence);
+
 		if (op->base.op == DRM_GPUVA_OP_UNMAP)
 			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
 		else if (op->base.op == DRM_GPUVA_OP_REMAP)
 			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
 				       fence);
 	}
+	if (ufence)
+		xe_sync_ufence_put(ufence);
 	for (i = 0; i < vops->num_syncs; i++)
 		xe_sync_entry_signal(vops->syncs + i, fence);
 	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
-- 
cgit 


From c4f18703629dd0112641d6974eb295a53c4a4615 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:12 -0700
Subject: drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use
 this

xe_gt_tlb_invalidation_range accepts a start and end address rather than
a VMA. This will enable multiple VMAs to be invalidated in a single
invalidation. Update the PT layer to use this new function.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-13-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 59 +++++++++++++++++++++--------
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h |  3 ++
 drivers/gpu/drm/xe/xe_pt.c                  | 25 ++++++++----
 3 files changed, 65 insertions(+), 22 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 8e9c4b990fbb..d0ee1e0df0bd 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -263,11 +263,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
 }
 
 /**
- * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
+ * address range
+ *
  * @gt: graphics tile
  * @fence: invalidation fence which will be signal on TLB invalidation
  * completion, can be NULL
- * @vma: VMA to invalidate
+ * @start: start address
+ * @end: end address
+ * @asid: address space id
  *
  * Issue a range based TLB invalidation if supported, if not fallback to a full
  * TLB invalidation. Completion of TLB is asynchronous and caller can either use
@@ -277,17 +281,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
  * negative error code on error.
  */
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
-			       struct xe_gt_tlb_invalidation_fence *fence,
-			       struct xe_vma *vma)
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+				 struct xe_gt_tlb_invalidation_fence *fence,
+				 u64 start, u64 end, u32 asid)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 #define MAX_TLB_INVALIDATION_LEN	7
 	u32 action[MAX_TLB_INVALIDATION_LEN];
 	int len = 0;
 
-	xe_gt_assert(gt, vma);
-
 	/* Execlists not supported */
 	if (gt_to_xe(gt)->info.force_execlist) {
 		if (fence)
@@ -301,8 +303,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	if (!xe->info.has_range_tlb_invalidation) {
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
 	} else {
-		u64 start = xe_vma_start(vma);
-		u64 length = xe_vma_size(vma);
+		u64 orig_start = start;
+		u64 length = end - start;
 		u64 align, end;
 
 		if (length < SZ_4K)
@@ -315,12 +317,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		 * address mask covering the required range.
 		 */
 		align = roundup_pow_of_two(length);
-		start = ALIGN_DOWN(xe_vma_start(vma), align);
-		end = ALIGN(xe_vma_end(vma), align);
+		start = ALIGN_DOWN(start, align);
+		end = ALIGN(end, align);
 		length = align;
 		while (start + length < end) {
 			length <<= 1;
-			start = ALIGN_DOWN(xe_vma_start(vma), length);
+			start = ALIGN_DOWN(orig_start, length);
 		}
 
 		/*
@@ -329,16 +331,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 		 */
 		if (length >= SZ_2M) {
 			length = max_t(u64, SZ_16M, length);
-			start = ALIGN_DOWN(xe_vma_start(vma), length);
+			start = ALIGN_DOWN(orig_start, length);
 		}
 
 		xe_gt_assert(gt, length >= SZ_4K);
 		xe_gt_assert(gt, is_power_of_2(length));
-		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
+		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
+						    ilog2(SZ_2M) + 1)));
 		xe_gt_assert(gt, IS_ALIGNED(start, length));
 
 		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
-		action[len++] = xe_vma_vm(vma)->usm.asid;
+		action[len++] = asid;
 		action[len++] = lower_32_bits(start);
 		action[len++] = upper_32_bits(start);
 		action[len++] = ilog2(length) - ilog2(SZ_4K);
@@ -349,6 +352,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
 }
 
+/**
+ * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion, can be NULL
+ * @vma: VMA to invalidate
+ *
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can either use
+ * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
+ * completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+			       struct xe_gt_tlb_invalidation_fence *fence,
+			       struct xe_vma *vma)
+{
+	xe_gt_assert(gt, vma);
+
+	return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
+					    xe_vma_end(vma),
+					    xe_vma_vm(vma)->usm.asid);
+}
+
 /**
  * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
  * @gt: graphics tile
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index fbb743d80d2c..bf3bebd9f985 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
 int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 			       struct xe_gt_tlb_invalidation_fence *fence,
 			       struct xe_vma *vma);
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+				 struct xe_gt_tlb_invalidation_fence *fence,
+				 u64 start, u64 end, u32 asid);
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 5b7930f46cf3..8d3765d3351e 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
 struct invalidation_fence {
 	struct xe_gt_tlb_invalidation_fence base;
 	struct xe_gt *gt;
-	struct xe_vma *vma;
 	struct dma_fence *fence;
 	struct dma_fence_cb cb;
 	struct work_struct work;
+	u64 start;
+	u64 end;
+	u32 asid;
 };
 
 static const char *
@@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct work_struct *w)
 		container_of(w, struct invalidation_fence, work);
 
 	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
-	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
+	xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
+				     ifence->end, ifence->asid);
 }
 
 static int invalidation_fence_init(struct xe_gt *gt,
 				   struct invalidation_fence *ifence,
 				   struct dma_fence *fence,
-				   struct xe_vma *vma)
+				   u64 start, u64 end, u32 asid)
 {
 	int ret;
 
@@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
 	dma_fence_get(&ifence->base.base);	/* Ref for caller */
 	ifence->fence = fence;
 	ifence->gt = gt;
-	ifence->vma = vma;
+	ifence->start = start;
+	ifence->end = end;
+	ifence->asid = asid;
 
 	INIT_WORK(&ifence->work, invalidation_fence_work_func);
 	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 
 		/* TLB invalidation must be done before signaling rebind */
 		if (ifence) {
-			int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
-							  vma);
+			int err = invalidation_fence_init(tile->primary_gt,
+							  ifence, fence,
+							  xe_vma_start(vma),
+							  xe_vma_end(vma),
+							  xe_vma_vm(vma)->usm.asid);
 			if (err) {
 				dma_fence_put(fence);
 				kfree(ifence);
@@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
 			dma_fence_wait(fence, false);
 
 		/* TLB invalidation must be done before signaling unbind */
-		err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
+		err = invalidation_fence_init(tile->primary_gt, ifence, fence,
+					      xe_vma_start(vma),
+					      xe_vma_end(vma),
+					      xe_vma_vm(vma)->usm.asid);
 		if (err) {
 			dma_fence_put(fence);
 			kfree(ifence);
-- 
cgit 


From 98ad158e543426561fa5df5c4387d4368601866f Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Wed, 24 Apr 2024 21:55:13 -0700
Subject: drm/xe: Delete PT update selftest

IGTs (e.g. xe_vm) can provide the exact same coverage as the PT update
selftest. The PT update selftest is dependent on internal functions
which can change thus maintaining this test is costly and provide no
extra coverage. Delete this test.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-14-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 86 -----------------------------------
 1 file changed, 86 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 977d5f4e4490..b6e7f80c3774 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -62,36 +62,6 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
 	return 0;
 }
 
-static void
-sanity_populate_cb(struct xe_migrate_pt_update *pt_update,
-		   struct xe_tile *tile, struct iosys_map *map, void *dst,
-		   u32 qword_ofs, u32 num_qwords,
-		   const struct xe_vm_pgtable_update *update)
-{
-	struct migrate_test_params *p =
-		to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE));
-	int i;
-	u64 *ptr = dst;
-	u64 value;
-
-	for (i = 0; i < num_qwords; i++) {
-		value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL;
-		if (map)
-			xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) *
-				  sizeof(u64), u64, value);
-		else
-			ptr[i] = value;
-	}
-
-	kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU");
-	if (p->force_gpu && map)
-		KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n");
-}
-
-static const struct xe_migrate_pt_update_ops sanity_ops = {
-	.populate = sanity_populate_cb,
-};
-
 #define check(_retval, _expected, str, _test)				\
 	do { if ((_retval) != (_expected)) {				\
 			KUNIT_FAIL(_test, "Sanity check failed: " str	\
@@ -209,57 +179,6 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
 	test_copy(m, bo, test, region);
 }
 
-static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
-			   struct kunit *test, bool force_gpu)
-{
-	struct xe_device *xe = tile_to_xe(m->tile);
-	struct dma_fence *fence;
-	u64 retval, expected;
-	ktime_t then, now;
-	int i;
-
-	struct xe_vm_pgtable_update update = {
-		.ofs = 1,
-		.qwords = 0x10,
-		.pt_bo = pt,
-	};
-	struct xe_migrate_pt_update pt_update = {
-		.ops = &sanity_ops,
-	};
-	struct migrate_test_params p = {
-		.base.id = XE_TEST_LIVE_MIGRATE,
-		.force_gpu = force_gpu,
-	};
-
-	test->priv = &p;
-	/* Test xe_migrate_update_pgtables() updates the pagetable as expected */
-	expected = 0xf0f0f0f0f0f0f0f0ULL;
-	xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size);
-
-	then = ktime_get();
-	fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1,
-					   NULL, 0, &pt_update);
-	now = ktime_get();
-	if (sanity_fence_failed(xe, fence, "Migration pagetable update", test))
-		return;
-
-	kunit_info(test, "Updating without syncing took %llu us,\n",
-		   (unsigned long long)ktime_to_us(ktime_sub(now, then)));
-
-	dma_fence_put(fence);
-	retval = xe_map_rd(xe, &pt->vmap, 0, u64);
-	check(retval, expected, "PTE[0] must stay untouched", test);
-
-	for (i = 0; i < update.qwords; i++) {
-		retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64);
-		check(retval, i * 0x1111111111111111ULL, "PTE update", test);
-	}
-
-	retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords),
-			   u64);
-	check(retval, expected, "PTE[0x11] must stay untouched", test);
-}
-
 static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 {
 	struct xe_tile *tile = m->tile;
@@ -398,11 +317,6 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		test_copy_vram(m, big, test);
 	}
 
-	kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
-	test_pt_update(m, pt, test, false);
-	kunit_info(test, "Testing page table update using GPU\n");
-	test_pt_update(m, pt, test, true);
-
 out:
 	xe_bb_free(bb, NULL);
 free_tiny:
-- 
cgit 


From bb442bfb9b3d1bc6ebd9fc64fa566e12d64627ce Mon Sep 17 00:00:00 2001
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Date: Wed, 24 Apr 2024 09:12:47 +0530
Subject: drm/xe/xe2hpg: Add Wa_14021490052

Add Wa_14021490052 for Xe2HPG 20.01.

Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424034247.1352755-1-shekhar.chauhan@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 9d9b7fa7a8f0..4266ffac3503 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -692,6 +692,15 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
 	},
+	{ XE_RTP_NAME("14021490052"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(FF_MODE,
+			     DIS_MESH_PARTIAL_AUTOSTRIP |
+			     DIS_MESH_AUTOSTRIP),
+			 SET(VFLSKPD,
+			     DIS_PARTIAL_AUTOSTRIP |
+			     DIS_AUTOSTRIP))
+	},
 
 	{}
 };
-- 
cgit 


From 4caf410766add8cf376a3afc910b17dd0961dd75 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Sat, 27 Apr 2024 06:53:39 -0700
Subject: drm/xe: Merge 16021540221 and 18034896535 WAs

In order to detect duplicate implementations for the same workaround,
early in the implementation of RTP it was decided to error out even if
the values set are exactly the same. With the introduction of 18034896535
in commit 74671d23ca18 ("drm/xe/xe2: Add workaround 18034896535"), LNL
stepping with graphics stepping A1 now gives the following error on
module load:

	xe 0000:00:02.0: [drm] *ERROR* GT0: [GT OTHER] \
	discarding save-restore reg e48c (clear: 00000200, set: 00000200,\
	masked: yes, mcr: yes): ret=-22

RTP may be improved in the future, but for now simply join the entries
like done with e.g. "1607297627, 1607030317, 1607186500".

Fixes: 74671d23ca18 ("drm/xe/xe2: Add workaround 18034896535")
Cc: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240427135339.3485559-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_wa.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 4266ffac3503..134a34dbfe8d 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -449,12 +449,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS))
 	},
-	{ XE_RTP_NAME("16021540221"),
-	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
-	},
-	{ XE_RTP_NAME("18034896535"),
+	{ XE_RTP_NAME("18034896535, 16021540221"), /* 16021540221: GRAPHICS_STEP(A0, B0) */
 	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
-- 
cgit 


From e29a7a34c3cf140ceb2f994a8eae0b68d21e357e Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Mon, 29 Apr 2024 22:30:39 +0200
Subject: drm/xe: Remove uninitialized end var from
 xe_gt_tlb_invalidation_range()

This fixes commit c4f18703629d ("drm/xe: Add
xe_gt_tlb_invalidation_range and convert PT layer to use this")
which added the end variable as part of the function param.

v2: Add fixes tag(Matt)

Fixes: c4f18703629d ("drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this")
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240429203039.26918-1-nirmoy.das@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index d0ee1e0df0bd..c3d015a7ac33 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -305,7 +305,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
 	} else {
 		u64 orig_start = start;
 		u64 length = end - start;
-		u64 align, end;
+		u64 align;
 
 		if (length < SZ_4K)
 			length = SZ_4K;
-- 
cgit 


From d457519c942d4885d92f6367e42ca67f4c7471ad Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 19 Apr 2024 11:34:11 -0700
Subject: drm/xe/gsc: Turn off GSCCS interrupts when disabling the engine

Starting on LNL, there is a new GSCCS interrupt that is triggered when
the GSC engine reset fails. If the HW is in a bad state, this interrupt
might end up being triggered even if we're not using the engine, which
will lead to a warning because we'll see it as unexpected. Since there
is no point in handling the interrupt in this scenario, we can just
make sure the interrupts are off when we disable the engine.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Tested-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419183412.1605782-1-daniele.ceraolospurio@intel.com
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 4cc757457e01..ec69803152a2 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -717,6 +717,11 @@ static void check_gsc_availability(struct xe_gt *gt)
 	 */
 	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
 		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
+
+		/* interrupts where previously enabled, so turn them off */
+		xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
+		xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
+
 		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
 	}
 }
-- 
cgit 


From a1ea30b69e02eb02043b0d6d7c42abcfafe99bd0 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Fri, 19 Apr 2024 11:34:12 -0700
Subject: drm/xe/gsc: define GSCCS for LNL

LNL has 1 GSCCS, same as MTL. Note that the GSCCS will be disabled until
we have a GSC FW defined, but having it in the list of engine is a
requirement to add such definition.

v2: rebase

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419183412.1605782-2-daniele.ceraolospurio@intel.com
---
 drivers/gpu/drm/xe/xe_pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index fb20c9828563..d0d4d8f9749c 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -211,7 +211,8 @@ static const struct xe_media_desc media_xe2 = {
 	.name = "Xe2_LPM / Xe2_HPM",
 	.hw_engine_mask =
 		GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
-		GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
+		GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) |
+		BIT(XE_HW_ENGINE_GSCCS0)
 };
 
 static const struct xe_device_desc tgl_desc = {
-- 
cgit 


From 8ad0e1810bf23f22cedb8a2664548b15646570c7 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Thu, 2 May 2024 14:43:10 +0200
Subject: drm/xe/gt: Fix assert in L3 bank mask generation

What needs to be asserted is that the pattern fits in the number
of bits provided by the user in patternbits, otherwise it would
be truncated when replicated according to the mask, which is
likely not the intended use of this function.
The pattern argument is a bitmap so use find_last_bit() instead
of fls(). The bit position starts at index 0 so remove "or equal"
from the comparison. XE_MAX_L3_BANK_MASK_BITS would be the
returned value if the pattern is 0, which can be the case on some
platforms.

v2: Check the result does not overflow the array (Lucas De Marchi)

v3: Use __fls() for long and handle mask == 0  (Lucas De Marchi)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502124311.159695-1-francois.dugast@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_topology.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index af841d801a8f..25ff03ab8448 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -108,7 +108,9 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
 {
 	unsigned long bit;
 
-	xe_assert(xe, fls(mask) <= patternbits);
+	xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
+		  bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
+	xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
 	for_each_set_bit(bit, &mask, 32) {
 		xe_l3_bank_mask_t shifted_pattern = {};
 
-- 
cgit 


From 75521e8b56e8f9dc673b782df7bc3660f51f329a Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Thu, 2 May 2024 20:32:51 +0200
Subject: drm/xe: Perform dma_map when moving system buffer objects to TT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we dma_map on ttm_tt population and dma_unmap when
the pages are released in ttm_tt unpopulate.

Strictly, the dma_map is not needed until the bo is moved to the
XE_PL_TT placement, so perform the dma_mapping on such moves
instead, and remove the dma_mappig when moving to XE_PL_SYSTEM.

This is desired for the upcoming shrinker series where shrinking
of a ttm_tt might fail. That would lead to an odd construct where
we first dma_unmap, then shrink and if shrinking fails dma_map
again. If dma_mapping instead is performed on move like this,
shrinking does not need to care at all about dma mapping.

Finally, where a ttm_tt is destroyed while bound to a different
memory type than XE_PL_SYSTEM, we keep the dma_unmap in
unpopulate().

v2:
- Don't accidently unmap the dma-buf's sgtable.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502183251.10170-1-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_bo.c | 47 +++++++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index bc1f794e3e61..52a16cb4e736 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -302,6 +302,18 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
 	return 0;
 }
 
+static void xe_tt_unmap_sg(struct ttm_tt *tt)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+	if (xe_tt->sg) {
+		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+				  DMA_BIDIRECTIONAL, 0);
+		sg_free_table(xe_tt->sg);
+		xe_tt->sg = NULL;
+	}
+}
+
 struct sg_table *xe_bo_sg(struct xe_bo *bo)
 {
 	struct ttm_tt *tt = bo->ttm.ttm;
@@ -377,27 +389,15 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
 	if (err)
 		return err;
 
-	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
-	err = xe_tt_map_sg(tt);
-	if (err)
-		ttm_pool_free(&ttm_dev->pool, tt);
-
 	return err;
 }
 
 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
 {
-	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
-
 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
 		return;
 
-	if (xe_tt->sg) {
-		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
-				  DMA_BIDIRECTIONAL, 0);
-		sg_free_table(xe_tt->sg);
-		xe_tt->sg = NULL;
-	}
+	xe_tt_unmap_sg(tt);
 
 	return ttm_pool_free(&ttm_dev->pool, tt);
 }
@@ -628,17 +628,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
 	int ret = 0;
+
 	/* Bo creation path, moving to system or TT. */
 	if ((!old_mem && ttm) && !handle_system_ccs) {
-		ttm_bo_move_null(ttm_bo, new_mem);
-		return 0;
+		if (new_mem->mem_type == XE_PL_TT)
+			ret = xe_tt_map_sg(ttm);
+		if (!ret)
+			ttm_bo_move_null(ttm_bo, new_mem);
+		goto out;
 	}
 
 	if (ttm_bo->type == ttm_bo_type_sg) {
 		ret = xe_bo_move_notify(bo, ctx);
 		if (!ret)
 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
-		goto out;
+		return ret;
 	}
 
 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
@@ -650,6 +654,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
 		(!ttm && ttm_bo->type == ttm_bo_type_device);
 
+	if (new_mem->mem_type == XE_PL_TT) {
+		ret = xe_tt_map_sg(ttm);
+		if (ret)
+			goto out;
+	}
+
 	if ((move_lacks_source && !needs_clear)) {
 		ttm_bo_move_null(ttm_bo, new_mem);
 		goto out;
@@ -786,8 +796,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	xe_pm_runtime_put(xe);
 
 out:
-	return ret;
+	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
+	    ttm_bo->ttm)
+		xe_tt_unmap_sg(ttm_bo->ttm);
 
+	return ret;
 }
 
 /**
-- 
cgit 


From 87ea92a19216a454a6eb5710501a470dcdb8577d Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 3 May 2024 00:33:11 +0200
Subject: drm/xe: Add helpers for manipulating macro arguments

Define generic helpers that will replace private definitions used
by the RTP code and will allow reuse by the new code.

Put them in new xe_args.h file (instead of infamous xe_macros.h)
as once we find more potential users outside of the Xe driver we
may want to move all of these macros as-is to linux/args.h.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502223313.2527-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_args.h | 121 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_args.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_args.h b/drivers/gpu/drm/xe/xe_args.h
new file mode 100644
index 000000000000..40b9eb4151d8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_args.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_ARGS_H_
+#define _XE_ARGS_H_
+
+#include <linux/args.h>
+
+/*
+ * Why don't the following macros have the XE prefix?
+ *
+ * Once we find more potential users outside of the Xe driver, we plan to move
+ * all of the following macros unchanged to linux/args.h.
+ */
+
+/**
+ * CALL_ARGS - Invoke a macro, but allow parameters to be expanded beforehand.
+ * @f: name of the macro to invoke
+ * @args: arguments for the macro
+ *
+ * This macro allows calling macros which names might generated or we want to
+ * make sure it's arguments will be correctly expanded.
+ *
+ * Example:
+ *
+ *	#define foo	X,Y,Z,Q
+ *	#define bar	COUNT_ARGS(foo)
+ *	#define buz	CALL_ARGS(COUNT_ARGS, foo)
+ *
+ *	With above definitions bar expands to 1 while buz expands to 4.
+ */
+#define CALL_ARGS(f, args...)		__CALL_ARGS(f, args)
+#define __CALL_ARGS(f, args...)		f(args)
+
+/**
+ * DROP_FIRST - Returns all arguments except the first one.
+ * @args: arguments
+ *
+ * This helper macro allows manipulation the argument list before passing it
+ * to the next level macro.
+ *
+ * Example:
+ *
+ *	#define foo	X,Y,Z,Q
+ *	#define bar	CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo))
+ *
+ *	With above definitions bar expands to 3.
+ */
+#define DROP_FIRST(args...)		__DROP_FIRST(args)
+#define __DROP_FIRST(a, b...)		b
+
+/**
+ * PICK_FIRST - Returns the first argument.
+ * @args: arguments
+ *
+ * This helper macro allows manipulation the argument list before passing it
+ * to the next level macro.
+ *
+ * Example:
+ *
+ *	#define foo	X,Y,Z,Q
+ *	#define bar	PICK_FIRST(foo)
+ *
+ *	With above definitions bar expands to X.
+ */
+#define PICK_FIRST(args...)		__PICK_FIRST(args)
+#define __PICK_FIRST(a, b...)		a
+
+/**
+ * PICK_LAST - Returns the last argument.
+ * @args: arguments
+ *
+ * This helper macro allows manipulation the argument list before passing it
+ * to the next level macro.
+ *
+ * Like COUNT_ARGS() this macro works up to 12 arguments.
+ *
+ * Example:
+ *
+ *	#define foo	X,Y,Z,Q
+ *	#define bar	PICK_LAST(foo)
+ *
+ *	With above definitions bar expands to Q.
+ */
+#define PICK_LAST(args...)		__PICK_ARG(COUNT_ARGS(args), args)
+#define __PICK_ARG(n, args...)		CALL_ARGS(CONCATENATE(PICK_ARG, n), args)
+#define PICK_ARG1(args...)		PICK_FIRST(args)
+#define PICK_ARG2(args...)		PICK_ARG1(DROP_FIRST(args))
+#define PICK_ARG3(args...)		PICK_ARG2(DROP_FIRST(args))
+#define PICK_ARG4(args...)		PICK_ARG3(DROP_FIRST(args))
+#define PICK_ARG5(args...)		PICK_ARG4(DROP_FIRST(args))
+#define PICK_ARG6(args...)		PICK_ARG5(DROP_FIRST(args))
+#define PICK_ARG7(args...)		PICK_ARG6(DROP_FIRST(args))
+#define PICK_ARG8(args...)		PICK_ARG7(DROP_FIRST(args))
+#define PICK_ARG9(args...)		PICK_ARG8(DROP_FIRST(args))
+#define PICK_ARG10(args...)		PICK_ARG9(DROP_FIRST(args))
+#define PICK_ARG11(args...)		PICK_ARG10(DROP_FIRST(args))
+#define PICK_ARG12(args...)		PICK_ARG11(DROP_FIRST(args))
+
+/**
+ * ARGS_SEP_COMMA - Definition of a comma character.
+ *
+ * This definition can be used in cases where any intermediate macro expects
+ * fixed number of arguments, but we want to pass more arguments which can
+ * be properly evaluated only by the next level macro.
+ *
+ * Example:
+ *
+ *	#define foo(f)	f(X) f(Y) f(Z) f(Q)
+ *	#define bar	DROP_FIRST(foo(ARGS_SEP_COMMA __stringify))
+ *	#define buz	CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo(ARGS_SEP_COMMA)))
+ *
+ *	With above definitions bar expands to
+ *		"X", "Y", "Z", "Q"
+ *	and buz expands to 4.
+ */
+#define ARGS_SEP_COMMA			,
+
+#endif
-- 
cgit 


From 9f79e24485494f35740fd85ffb2c67fefb8c67b2 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 3 May 2024 00:33:12 +0200
Subject: drm/xe/kunit: Add simple tests for new xe_args macros

We want to make sure that helper macros are working as expected.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502223313.2527-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/tests/Makefile       |   1 +
 drivers/gpu/drm/xe/tests/xe_args_test.c | 190 ++++++++++++++++++++++++++++++++
 2 files changed, 191 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/tests/xe_args_test.c

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 8cf2367449d8..6e58931fddd4 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -11,6 +11,7 @@ xe_live_test-y = xe_live_test_mod.o \
 # Normal kunit tests
 obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o
 xe_test-y = xe_test_mod.o \
+	xe_args_test.o \
 	xe_pci_test.o \
 	xe_rtp_test.o \
 	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_args_test.c b/drivers/gpu/drm/xe/tests/xe_args_test.c
new file mode 100644
index 000000000000..9b44c1ab6364
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_args_test.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_args.h"
+
+static void call_args_example(struct kunit *test)
+{
+#define foo	X, Y, Z, Q
+#define bar	COUNT_ARGS(foo)
+#define buz	CALL_ARGS(COUNT_ARGS, foo)
+
+	KUNIT_EXPECT_EQ(test, bar, 1);
+	KUNIT_EXPECT_EQ(test, buz, 4);
+
+#undef foo
+#undef bar
+#undef buz
+}
+
+static void drop_first_example(struct kunit *test)
+{
+#define foo	X, Y, Z, Q
+#define bar	CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo))
+
+	KUNIT_EXPECT_EQ(test, bar, 3);
+
+#undef foo
+#undef bar
+}
+
+static void pick_first_example(struct kunit *test)
+{
+	int X = 1;
+
+#define foo	X, Y, Z, Q
+#define bar	PICK_FIRST(foo)
+
+	KUNIT_EXPECT_EQ(test, bar, X);
+	KUNIT_EXPECT_STREQ(test, __stringify(bar), "X");
+
+#undef foo
+#undef bar
+}
+
+static void pick_last_example(struct kunit *test)
+{
+	int Q = 1;
+
+#define foo	X, Y, Z, Q
+#define bar	PICK_LAST(foo)
+
+	KUNIT_EXPECT_EQ(test, bar, Q);
+	KUNIT_EXPECT_STREQ(test, __stringify(bar), "Q");
+
+#undef foo
+#undef bar
+}
+
+static void sep_comma_example(struct kunit *test)
+{
+#define foo(f)	f(X) f(Y) f(Z) f(Q)
+#define bar	DROP_FIRST(foo(ARGS_SEP_COMMA __stringify))
+#define buz	CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo(ARGS_SEP_COMMA)))
+
+	static const char * const a[] = { bar };
+
+	KUNIT_EXPECT_STREQ(test, a[0], "X");
+	KUNIT_EXPECT_STREQ(test, a[1], "Y");
+	KUNIT_EXPECT_STREQ(test, a[2], "Z");
+	KUNIT_EXPECT_STREQ(test, a[3], "Q");
+
+	KUNIT_EXPECT_EQ(test, buz, 4);
+
+#undef foo
+#undef bar
+#undef buz
+}
+
+#define NO_ARGS
+#define FOO_ARGS	X, Y, Z, Q
+#define MAX_ARGS	-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12
+
+static void count_args_test(struct kunit *test)
+{
+	int count;
+
+	/* COUNT_ARGS() counts to 12 */
+
+	count = COUNT_ARGS();
+	KUNIT_EXPECT_EQ(test, count, 0);
+
+	count = COUNT_ARGS(1);
+	KUNIT_EXPECT_EQ(test, count, 1);
+
+	count = COUNT_ARGS(a, b, c, d, e);
+	KUNIT_EXPECT_EQ(test, count, 5);
+
+	count = COUNT_ARGS(a, b, c, d, e, f, g, h, i, j, k, l);
+	KUNIT_EXPECT_EQ(test, count, 12);
+
+	/* COUNT_ARGS() does not expand params */
+
+	count = COUNT_ARGS(NO_ARGS);
+	KUNIT_EXPECT_EQ(test, count, 1);
+
+	count = COUNT_ARGS(FOO_ARGS);
+	KUNIT_EXPECT_EQ(test, count, 1);
+}
+
+static void call_args_test(struct kunit *test)
+{
+	int count;
+
+	count = CALL_ARGS(COUNT_ARGS, NO_ARGS);
+	KUNIT_EXPECT_EQ(test, count, 0);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, NO_ARGS), 0);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FOO_ARGS), 4);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FOO_ARGS, FOO_ARGS), 8);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, MAX_ARGS), 12);
+}
+
+static void drop_first_test(struct kunit *test)
+{
+	int Y = -2, Z = -3, Q = -4;
+	int a[] = { DROP_FIRST(FOO_ARGS) };
+
+	KUNIT_EXPECT_EQ(test, DROP_FIRST(0, -1), -1);
+	KUNIT_EXPECT_EQ(test, DROP_FIRST(DROP_FIRST(0, -1, -2)), -2);
+
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, DROP_FIRST(FOO_ARGS)), 3);
+	KUNIT_EXPECT_EQ(test, DROP_FIRST(DROP_FIRST(DROP_FIRST(FOO_ARGS))), -4);
+	KUNIT_EXPECT_EQ(test, a[0], -2);
+	KUNIT_EXPECT_EQ(test, a[1], -3);
+	KUNIT_EXPECT_EQ(test, a[2], -4);
+	KUNIT_EXPECT_STREQ(test, __stringify(DROP_FIRST(DROP_FIRST(DROP_FIRST(FOO_ARGS)))), "Q");
+}
+
+static void pick_first_test(struct kunit *test)
+{
+	int X = -1;
+	int a[] = { PICK_FIRST(FOO_ARGS) };
+
+	KUNIT_EXPECT_EQ(test, PICK_FIRST(-1, -2), -1);
+
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, PICK_FIRST(FOO_ARGS)), 1);
+	KUNIT_EXPECT_EQ(test, PICK_FIRST(FOO_ARGS), -1);
+	KUNIT_EXPECT_EQ(test, a[0], -1);
+	KUNIT_EXPECT_STREQ(test, __stringify(PICK_FIRST(FOO_ARGS)), "X");
+}
+
+static void pick_last_test(struct kunit *test)
+{
+	int Q = -4;
+	int a[] = { PICK_LAST(FOO_ARGS) };
+
+	KUNIT_EXPECT_EQ(test, PICK_LAST(-1, -2), -2);
+
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, PICK_LAST(FOO_ARGS)), 1);
+	KUNIT_EXPECT_EQ(test, PICK_LAST(FOO_ARGS), -4);
+	KUNIT_EXPECT_EQ(test, a[0], -4);
+	KUNIT_EXPECT_STREQ(test, __stringify(PICK_LAST(FOO_ARGS)), "Q");
+
+	KUNIT_EXPECT_EQ(test, PICK_LAST(MAX_ARGS), -12);
+	KUNIT_EXPECT_STREQ(test, __stringify(PICK_LAST(MAX_ARGS)), "-12");
+}
+
+static struct kunit_case args_tests[] = {
+	KUNIT_CASE(count_args_test),
+	KUNIT_CASE(call_args_example),
+	KUNIT_CASE(call_args_test),
+	KUNIT_CASE(drop_first_example),
+	KUNIT_CASE(drop_first_test),
+	KUNIT_CASE(pick_first_example),
+	KUNIT_CASE(pick_first_test),
+	KUNIT_CASE(pick_last_example),
+	KUNIT_CASE(pick_last_test),
+	KUNIT_CASE(sep_comma_example),
+	{}
+};
+
+static struct kunit_suite args_test_suite = {
+	.name = "args",
+	.test_cases = args_tests,
+};
+
+kunit_test_suite(args_test_suite);
-- 
cgit 


From 233e8d1de827b58ec92c60b86a3b0f5bdf7c3892 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 3 May 2024 00:33:13 +0200
Subject: drm/xe/rtp: Prefer helper macros from xe_args.h

Some custom implementation can be replaced with generic macros
from the linux/args.h or xe_args.h.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240502223313.2527-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_rtp.h         |  4 ++--
 drivers/gpu/drm/xe/xe_rtp_helpers.h | 26 ++++++++++----------------
 2 files changed, 12 insertions(+), 18 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index c56fedd126e6..337b1ef1959c 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -341,7 +341,7 @@ struct xe_reg_sr;
  *	};
  */
 #define XE_RTP_RULES(...)							\
-	.n_rules = _XE_COUNT_ARGS(__VA_ARGS__),					\
+	.n_rules = COUNT_ARGS(__VA_ARGS__),					\
 	.rules = (const struct xe_rtp_rule[]) {					\
 		XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__))	\
 	}
@@ -366,7 +366,7 @@ struct xe_reg_sr;
  *	};
  */
 #define XE_RTP_ACTIONS(...)							\
-	.n_actions = _XE_COUNT_ARGS(__VA_ARGS__),				\
+	.n_actions = COUNT_ARGS(__VA_ARGS__),					\
 	.actions = (const struct xe_rtp_action[]) {				\
 		XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__))	\
 	}
diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h
index 181b6290fac3..8129d6d9ef37 100644
--- a/drivers/gpu/drm/xe/xe_rtp_helpers.h
+++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h
@@ -10,22 +10,16 @@
 #error "This header is supposed to be included by xe_rtp.h only"
 #endif
 
+#include "xe_args.h"
+
 /*
  * Helper macros - not to be used outside this header.
  */
 #define _XE_ESC(...) __VA_ARGS__
-#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,))
-#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_
-
-#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
-#define __XE_FIRST(x_, ...) x_
-#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
-#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__)
 
-#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
+#define _XE_TUPLE_TAIL(...) (DROP_FIRST(__VA_ARGS__))
 
-#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
-#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b
+#define _XE_RTP_CONCAT(a, b) CONCATENATE(XE_RTP_, CONCATENATE(a, b))
 
 #define __XE_RTP_PASTE_SEP_COMMA		,
 #define __XE_RTP_PASTE_SEP_BITWISE_OR		|
@@ -59,11 +53,11 @@
  *
  *	XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
  */
-#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_))
-#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
-#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_RTP_CONCAT(PASTE_, COUNT_ARGS args_)(prefix_, sep_, args_)
+#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_)
+#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
 
 /*
  * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
@@ -76,6 +70,6 @@
  *
  *	{ .a = 10 }
  */
-#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__)
+#define XE_RTP_DROP_CAST(...) _XE_ESC(DROP_FIRST _XE_ESC __VA_ARGS__)
 
 #endif
-- 
cgit 


From 786754124189e3f67fc52e8fe08703e3f50b1894 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Fri, 3 May 2024 10:24:50 +0200
Subject: drm/xe/debugfs: Get a runtime_pm reference when setting wedged mode

This function is another entry point where it must be ensured that
the device resumes before operating on the GuC, so grab a runtime_pm
reference. This fixes inner xe_pm_runtime_get_noresume calls which
were previously failing.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240503082450.268335-1-francois.dugast@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 98e3fbde50ea..2c060a0d6251 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -153,6 +153,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf,
 
 	xe->wedged.mode = wedged_mode;
 
+	xe_pm_runtime_get(xe);
 	for_each_gt(gt, xe, id) {
 		ret = xe_guc_ads_scheduler_policy_toggle_reset(&gt->uc.guc.ads);
 		if (ret) {
@@ -160,6 +161,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf,
 			return -EIO;
 		}
 	}
+	xe_pm_runtime_put(xe);
 
 	return size;
 }
-- 
cgit 


From e9c190b9b8e7e07bc0ef0ba9b87321fa37b456c5 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Fri, 3 May 2024 15:03:31 -0400
Subject: drm/xe: Demote CCS_MODE info to debug only

This information is printed in any gt_reset, which actually
occurs in any runtime resume, what can be so verbose in
production build. Let's demote it to debug only.

Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240503190331.6690-1-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 396aeb5b9924..a34c9a24dafc 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -68,8 +68,8 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
 
 	xe_mmio_write32(gt, CCS_MODE, mode);
 
-	xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
-		   mode, config, num_engines, num_slices);
+	xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
+		  mode, config, num_engines, num_slices);
 }
 
 void xe_gt_apply_ccs_mode(struct xe_gt *gt)
-- 
cgit 


From c462f81b695a7cfde5ba3b0ea1a52c6abaa52a0b Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 30 Apr 2024 18:25:25 +0200
Subject: drm/xe: Introduce has_atomic_enable_pte_bit device info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add has_atomic_enable_pte_bit to specify that a device
has PTE_AE bit in its PTE feild. Currently XE2 and PVC
supports this so set this for those two.

This will help consolidate setting atomic access bit in PTE
logic which is spread between multiple files.

Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-2-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 3 +++
 drivers/gpu/drm/xe/xe_pci_types.h    | 1 +
 3 files changed, 6 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 0f68c55ea405..7cddb00f9c35 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -281,6 +281,8 @@ struct xe_device {
 		u8 has_heci_gscfi:1;
 		/** @info.skip_guc_pc: Skip GuC based PM feature init */
 		u8 skip_guc_pc:1;
+		/** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
+		u8 has_atomic_enable_pte_bit:1;
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
 		struct {
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 27edf4fd8bb8..c385f4ddf163 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -146,6 +146,7 @@ static const struct xe_graphics_desc graphics_xehpc = {
 	.vram_flags = XE_VRAM_FLAGS_NEED64K,
 
 	.has_asid = 1,
+	.has_atomic_enable_pte_bit = 1,
 	.has_flat_ccs = 0,
 	.has_usm = 1,
 };
@@ -163,6 +164,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
 #define XE2_GFX_FEATURES \
 	.dma_mask_size = 46, \
 	.has_asid = 1, \
+	.has_atomic_enable_pte_bit = 1, \
 	.has_flat_ccs = 1, \
 	.has_range_tlb_invalidation = 1, \
 	.has_usm = 1, \
@@ -629,6 +631,7 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.va_bits = graphics_desc->va_bits;
 	xe->info.vm_max_level = graphics_desc->vm_max_level;
 	xe->info.has_asid = graphics_desc->has_asid;
+	xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit;
 	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
 	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
 	xe->info.has_usm = graphics_desc->has_usm;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index b1ad12fa22d6..e1f2b4879fc2 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -25,6 +25,7 @@ struct xe_graphics_desc {
 	u8 max_remote_tiles:2;
 
 	u8 has_asid:1;
+	u8 has_atomic_enable_pte_bit:1;
 	u8 has_flat_ccs:1;
 	u8 has_range_tlb_invalidation:1;
 	u8 has_usm:1;
-- 
cgit 


From e7192f0162a069bc80a519c087bd2a2f18597d52 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 30 Apr 2024 18:25:26 +0200
Subject: drm/xe: Move vm bind bo validation to a helper function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move vm bind bo validation to a helper function to make the
xe_vm_bind_ioctl() more readable.

v2: Capture ret value of xe_vm_bind_ioctl_validate_bo(Matt B).
    Remove redundant coh_mode param.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-3-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 77 ++++++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 34 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index dfd31b346021..f1357e2a3b10 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3057,6 +3057,46 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
 	vops->num_syncs = num_syncs;
 }
 
+static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
+					u64 addr, u64 range, u64 obj_offset,
+					u16 pat_index)
+{
+	u16 coh_mode;
+
+	if (XE_IOCTL_DBG(xe, range > bo->size) ||
+	    XE_IOCTL_DBG(xe, obj_offset >
+			 bo->size - range)) {
+		return -EINVAL;
+	}
+
+	if (bo->flags & XE_BO_FLAG_INTERNAL_64K) {
+		if (XE_IOCTL_DBG(xe, obj_offset &
+				 XE_64K_PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
+		    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
+			return  -EINVAL;
+		}
+	}
+
+	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+	if (bo->cpu_caching) {
+		if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
+				 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
+			return  -EINVAL;
+		}
+	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
+		/*
+		 * Imported dma-buf from a different device should
+		 * require 1way or 2way coherency since we don't know
+		 * how it was mapped on the CPU. Just assume is it
+		 * potentially cached on CPU side.
+		 */
+		return  -EINVAL;
+	}
+
+	return 0;
+}
+
 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
@@ -3140,7 +3180,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		u32 obj = bind_ops[i].obj;
 		u64 obj_offset = bind_ops[i].obj_offset;
 		u16 pat_index = bind_ops[i].pat_index;
-		u16 coh_mode;
 
 		if (!obj)
 			continue;
@@ -3152,40 +3191,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		}
 		bos[i] = gem_to_xe_bo(gem_obj);
 
-		if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
-		    XE_IOCTL_DBG(xe, obj_offset >
-				 bos[i]->size - range)) {
-			err = -EINVAL;
-			goto put_obj;
-		}
-
-		if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) {
-			if (XE_IOCTL_DBG(xe, obj_offset &
-					 XE_64K_PAGE_MASK) ||
-			    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
-			    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
-				err = -EINVAL;
-				goto put_obj;
-			}
-		}
-
-		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
-		if (bos[i]->cpu_caching) {
-			if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
-					 bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
-				err = -EINVAL;
-				goto put_obj;
-			}
-		} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
-			/*
-			 * Imported dma-buf from a different device should
-			 * require 1way or 2way coherency since we don't know
-			 * how it was mapped on the CPU. Just assume is it
-			 * potentially cached on CPU side.
-			 */
-			err = -EINVAL;
+		err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
+						   obj_offset, pat_index);
+		if (err)
 			goto put_obj;
-		}
 	}
 
 	if (args->num_syncs) {
-- 
cgit 


From 06e69a424930154bf030a56f8ddf781aee71f0e3 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 30 Apr 2024 18:25:27 +0200
Subject: drm/xe: Introduce has_device_atomics_on_smem device info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add has_device_atomics_on_smem to specify that a device
supports device atomics on system memory. Currently XE2
supports this so set this for XE2.

v2: Set has_device_atomics_on_smem for all platform but
    PVC.

Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-4-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 7cddb00f9c35..0af739981ebf 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -283,6 +283,8 @@ struct xe_device {
 		u8 skip_guc_pc:1;
 		/** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
 		u8 has_atomic_enable_pte_bit:1;
+		/** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */
+		u8 has_device_atomics_on_smem:1;
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
 		struct {
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c385f4ddf163..99723a423850 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -632,6 +632,8 @@ static int xe_info_init(struct xe_device *xe,
 	xe->info.vm_max_level = graphics_desc->vm_max_level;
 	xe->info.has_asid = graphics_desc->has_asid;
 	xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit;
+	if (xe->info.platform != XE_PVC)
+		xe->info.has_device_atomics_on_smem = 1;
 	xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
 	xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
 	xe->info.has_usm = graphics_desc->has_usm;
-- 
cgit 


From a4b725767d93e3564019906ad43908b8bf3d4d9e Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 30 Apr 2024 18:25:28 +0200
Subject: drm/xe: Add function to check if BO has single placement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A new helper function xe_bo_has_single_placement() to check
if a BO has single placement.

Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-5-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 14 ++++++++++++++
 drivers/gpu/drm/xe/xe_bo.h |  1 +
 2 files changed, 15 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 52a16cb4e736..03f7fe7acf8c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -95,6 +95,20 @@ bool xe_bo_is_stolen(struct xe_bo *bo)
 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
 }
 
+/**
+ * xe_bo_has_single_placement - check if BO is placed only in one memory location
+ * @bo: The BO
+ *
+ * This function checks whether a given BO is placed in only one memory location.
+ *
+ * Returns: true if the BO is placed in a single memory location, false otherwise.
+ *
+ */
+bool xe_bo_has_single_placement(struct xe_bo *bo)
+{
+	return bo->placement.num_placement == 1;
+}
+
 /**
  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
  * @bo: The BO
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index a885b14bf595..6de894c728f5 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -206,6 +206,7 @@ bool mem_type_is_vram(u32 mem_type);
 bool xe_bo_is_vram(struct xe_bo *bo);
 bool xe_bo_is_stolen(struct xe_bo *bo);
 bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
+bool xe_bo_has_single_placement(struct xe_bo *bo);
 uint64_t vram_region_gpu_offset(struct ttm_resource *res);
 
 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
-- 
cgit 


From a0862cf2febcc37188ab47441b69960c8c8f3fa3 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 30 Apr 2024 18:25:29 +0200
Subject: drm/xe: Refactor default device atomic settings

The default behavior of device atomics depends on the
VM type and buffer allocation types. Device atomics are
expected to function with all types of allocations for
traditional applications/APIs. Additionally, in compute/SVM
API scenarios with fault mode or LR mode VMs, device atomics
must work with single-region allocations. In all other cases
device atomics should be disabled by default also on platforms
where we know device atomics doesn't on work on particular
allocations types.

v3: fault mode requires LR mode so only check for LR mode
    to determine compute API(Jose).
    Handle SMEM+LMEM BO's migration to LMEM where device
    atomics is expected to work. (Brian).
v2: Fix platform checks to correct atomics behaviour on PVC.

Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-6-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 37 ++++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_vm.c |  2 +-
 2 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 8d3765d3351e..87975e45622a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -619,9 +619,40 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 	int ret;
 
-	if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
-	    (is_devmem || !IS_DGFX(xe)))
-		xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+	/**
+	 * Default atomic expectations for different allocation scenarios are as follows:
+	 *
+	 * 1. Traditional API: When the VM is not in LR mode:
+	 *    - Device atomics are expected to function with all allocations.
+	 *
+	 * 2. Compute/SVM API: When the VM is in LR mode:
+	 *    - Device atomics are the default behavior when the bo is placed in a single region.
+	 *    - In all other cases device atomics will be disabled with AE=0 until an application
+	 *      request differently using a ioctl like madvise.
+	 */
+	if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
+		if (xe_vm_in_lr_mode(xe_vma_vm(vma))) {
+			if (bo && xe_bo_has_single_placement(bo))
+				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+			/**
+			 * If a SMEM+LMEM allocation is backed by SMEM, a device
+			 * atomics will cause a gpu page fault and which then
+			 * gets migrated to LMEM, bind such allocations with
+			 * device atomics enabled.
+			 */
+			else if (is_devmem && !xe_bo_has_single_placement(bo))
+				xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+		} else {
+			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
+		}
+
+		/**
+		 * Unset AE if the platform(PVC) doesn't support it on an
+		 * allocation
+		 */
+		if (!xe->info.has_device_atomics_on_smem && !is_devmem)
+			xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE;
+	}
 
 	if (is_devmem) {
 		xe_walk.default_pte |= XE_PPGTT_PTE_DM;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index f1357e2a3b10..d17192c8b7de 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -888,7 +888,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	for_each_tile(tile, vm->xe, id)
 		vma->tile_mask |= 0x1 << id;
 
-	if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
+	if (vm->xe->info.has_atomic_enable_pte_bit)
 		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
 
 	vma->pat_index = pat_index;
-- 
cgit 


From 72c7163f27483c333a1f27916505459efa1a373a Mon Sep 17 00:00:00 2001
From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Date: Sat, 4 May 2024 01:09:01 +0530
Subject: drm/xe: Relocate regs_are_mcr function

Relocate regs_are_mcr funciton to a higher position in the file
for improved visibility.

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240503193902.2056202-2-janga.rahul.kumar@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 1e92f8ee07ba..aef09eb423e9 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -255,6 +255,16 @@ static const struct xe_mocs_entry gen12_mocs_desc[] = {
 		   L3_1_UC)
 };
 
+static bool regs_are_mcr(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe_gt_is_media_type(gt))
+		return MEDIA_VER(xe) >= 20;
+	else
+		return GRAPHICS_VERx100(xe) >= 1250;
+}
+
 static const struct xe_mocs_entry dg1_mocs_desc[] = {
 	/* UC */
 	MOCS_ENTRY(1, 0, L3_1_UC),
@@ -467,16 +477,6 @@ static u32 get_entry_control(const struct xe_mocs_info *info,
 	return info->table[info->unused_entries_index].control_value;
 }
 
-static bool regs_are_mcr(struct xe_gt *gt)
-{
-	struct xe_device *xe = gt_to_xe(gt);
-
-	if (xe_gt_is_media_type(gt))
-		return MEDIA_VER(xe) >= 20;
-	else
-		return GRAPHICS_VERx100(xe) >= 1250;
-}
-
 static void __init_mocs_table(struct xe_gt *gt,
 			      const struct xe_mocs_info *info)
 {
-- 
cgit 


From 9fbd0adbcbe81e207eb030d9ad59953905625dd1 Mon Sep 17 00:00:00 2001
From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Date: Sat, 4 May 2024 01:09:02 +0530
Subject: drm/xe/mocs: Add debugfs node to dump mocs

This is useful to check mocs configuration. Tests/Tools can use
this debugfs entry to get mocs info.

v2: Address review comments. Change debugfs output style similar
to pat debugfs. (Lucas De Marchi)

v3: rebase.

v4: Address review comments. Use function pointer inside ops
struct. Update Test-with links. Remove usage of flags wherever
not required. (Lucas De Marchi)

v5: Address review comments. Move register defines. Modify mocs
info struct to avoid holes. (Luca De Marchi)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240503193902.2056202-3-janga.rahul.kumar@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  38 ++++-
 drivers/gpu/drm/xe/xe_gt_debugfs.c   |  11 ++
 drivers/gpu/drm/xe/xe_mocs.c         | 279 +++++++++++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_mocs.h         |   8 +
 4 files changed, 304 insertions(+), 32 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 83847f2da72a..8f44437c8e02 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -59,6 +59,27 @@
 
 #define XELP_GLOBAL_MOCS(i)			XE_REG(0x4000 + (i) * 4)
 #define XEHP_GLOBAL_MOCS(i)			XE_REG_MCR(0x4000 + (i) * 4)
+#define   LE_SSE_MASK				REG_GENMASK(18, 17)
+#define   LE_SSE(value)				REG_FIELD_PREP(LE_SSE_MASK, value)
+#define   LE_COS_MASK				REG_GENMASK(16, 15)
+#define   LE_COS(value)				REG_FIELD_PREP(LE_COS_MASK)
+#define   LE_SCF_MASK				REG_BIT(14)
+#define   LE_SCF(value)				REG_FIELD_PREP(LE_SCF_MASK, value)
+#define   LE_PFM_MASK				REG_GENMASK(13, 11)
+#define   LE_PFM(value)				REG_FIELD_PREP(LE_PFM_MASK, value)
+#define   LE_SCC_MASK				REG_GENMASK(10, 8)
+#define   LE_SCC(value)				REG_FIELD_PREP(LE_SCC_MASK, value)
+#define   LE_RSC_MASK				REG_BIT(7)
+#define   LE_RSC(value)				REG_FIELD_PREP(LE_RSC_MASK, value)
+#define   LE_AOM_MASK				REG_BIT(6)
+#define   LE_AOM(value)				REG_FIELD_PREP(LE_AOM_MASK, value)
+#define   LE_LRUM_MASK				REG_GENMASK(5, 4)
+#define   LE_LRUM(value)			REG_FIELD_PREP(LE_LRUM_MASK, value)
+#define   LE_TGT_CACHE_MASK			REG_GENMASK(3, 2)
+#define   LE_TGT_CACHE(value)			REG_FIELD_PREP(LE_TGT_CACHE_MASK, value)
+#define   LE_CACHEABILITY_MASK			REG_GENMASK(1, 0)
+#define   LE_CACHEABILITY(value)		REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
+
 #define CCS_AUX_INV				XE_REG(0x4208)
 
 #define VD0_AUX_INV				XE_REG(0x4218)
@@ -314,9 +335,24 @@
 #define   XEHPC_OVRLSCCC			REG_BIT(0)
 
 /* L3 Cache Control */
+#define LNCFCMOCS_REG_COUNT			32
 #define XELP_LNCFCMOCS(i)			XE_REG(0xb020 + (i) * 4)
 #define XEHP_LNCFCMOCS(i)			XE_REG_MCR(0xb020 + (i) * 4)
-#define LNCFCMOCS_REG_COUNT			32
+#define   L3_UPPER_LKUP_MASK			REG_BIT(23)
+#define   L3_UPPER_GLBGO_MASK			REG_BIT(22)
+#define   L3_UPPER_IDX_CACHEABILITY_MASK	REG_GENMASK(21, 20)
+#define   L3_UPPER_IDX_SCC_MASK			REG_GENMASK(19, 17)
+#define   L3_UPPER_IDX_ESC_MASK			REG_BIT(16)
+#define   L3_LKUP_MASK				REG_BIT(7)
+#define   L3_LKUP(value)			REG_FIELD_PREP(L3_LKUP_MASK, value)
+#define   L3_GLBGO_MASK				REG_BIT(6)
+#define   L3_GLBGO(value)			REG_FIELD_PREP(L3_GLBGO_MASK, value)
+#define   L3_CACHEABILITY_MASK			REG_GENMASK(5, 4)
+#define   L3_CACHEABILITY(value)		REG_FIELD_PREP(L3_CACHEABILITY_MASK, value)
+#define   L3_SCC_MASK				REG_GENMASK(3, 1)
+#define   L3_SCC(value)				REG_FIELD_PREP(L3_SCC_MASK, value)
+#define   L3_ESC_MASK				REG_BIT(0)
+#define   L3_ESC(value)				REG_FIELD_PREP(L3_ESC_MASK, value)
 
 #define XEHP_L3NODEARBCFG			XE_REG_MCR(0xb0b4)
 #define   XEHP_LNESPARE				REG_BIT(19)
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 94f226a4438e..c5e562e143fd 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -20,6 +20,7 @@
 #include "xe_hw_engine.h"
 #include "xe_lrc.h"
 #include "xe_macros.h"
+#include "xe_mocs.h"
 #include "xe_pat.h"
 #include "xe_pm.h"
 #include "xe_reg_sr.h"
@@ -202,6 +203,15 @@ static int pat(struct xe_gt *gt, struct drm_printer *p)
 	return 0;
 }
 
+static int mocs(struct xe_gt *gt, struct drm_printer *p)
+{
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_mocs_dump(gt, p);
+	xe_pm_runtime_put(gt_to_xe(gt));
+
+	return 0;
+}
+
 static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
 {
 	xe_pm_runtime_get(gt_to_xe(gt));
@@ -257,6 +267,7 @@ static const struct drm_info_list debugfs_list[] = {
 	{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
 	{"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
 	{"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
+	{"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs},
 	{"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc},
 	{"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc},
 	{"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index aef09eb423e9..4780708e5fae 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -13,6 +13,7 @@
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
+#include "xe_pm.h"
 #include "xe_sriov.h"
 #include "xe_step_types.h"
 
@@ -36,34 +37,23 @@ struct xe_mocs_entry {
 	u16 used;
 };
 
+struct xe_mocs_info;
+
+struct xe_mocs_ops {
+	void (*dump)(struct xe_mocs_info *mocs, unsigned int flags,
+		     struct xe_gt *gt, struct drm_printer *p);
+};
+
 struct xe_mocs_info {
 	unsigned int size;
 	unsigned int n_entries;
 	const struct xe_mocs_entry *table;
+	const struct xe_mocs_ops *ops;
 	u8 uc_index;
 	u8 wb_index;
 	u8 unused_entries_index;
 };
 
-/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
-#define _LE_CACHEABILITY(value)	((value) << 0)
-#define _LE_TGT_CACHE(value)	((value) << 2)
-#define LE_LRUM(value)		((value) << 4)
-#define LE_AOM(value)		((value) << 6)
-#define LE_RSC(value)		((value) << 7)
-#define LE_SCC(value)		((value) << 8)
-#define LE_PFM(value)		((value) << 11)
-#define LE_SCF(value)		((value) << 14)
-#define LE_COS(value)		((value) << 15)
-#define LE_SSE(value)		((value) << 17)
-
-/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
-#define L3_ESC(value)		((value) << 0)
-#define L3_SCC(value)		((value) << 1)
-#define _L3_CACHEABILITY(value)	((value) << 4)
-#define L3_GLBGO(value)		((value) << 6)
-#define L3_LKUP(value)		((value) << 7)
-
 /* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
 #define IG_PAT				REG_BIT(8)
 #define L3_CACHE_POLICY_MASK		REG_GENMASK(5, 4)
@@ -80,22 +70,22 @@ struct xe_mocs_info {
  * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
  * the same as LE_UC
  */
-#define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
-#define LE_1_UC			_LE_CACHEABILITY(1)
-#define LE_2_WT			_LE_CACHEABILITY(2)
-#define LE_3_WB			_LE_CACHEABILITY(3)
+#define LE_0_PAGETABLE		LE_CACHEABILITY(0)
+#define LE_1_UC			LE_CACHEABILITY(1)
+#define LE_2_WT			LE_CACHEABILITY(2)
+#define LE_3_WB			LE_CACHEABILITY(3)
 
 /* Target cache */
-#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0)
-#define LE_TC_1_LLC		_LE_TGT_CACHE(1)
-#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2)
-#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3)
+#define LE_TC_0_PAGETABLE	LE_TGT_CACHE(0)
+#define LE_TC_1_LLC		LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC	LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT	LE_TGT_CACHE(3)
 
 /* L3 caching options */
-#define L3_0_DIRECT		_L3_CACHEABILITY(0)
-#define L3_1_UC			_L3_CACHEABILITY(1)
-#define L3_2_RESERVED		_L3_CACHEABILITY(2)
-#define L3_3_WB			_L3_CACHEABILITY(3)
+#define L3_0_DIRECT		L3_CACHEABILITY(0)
+#define L3_1_UC			L3_CACHEABILITY(1)
+#define L3_2_RESERVED		L3_CACHEABILITY(2)
+#define L3_3_WB			L3_CACHEABILITY(3)
 
 /* L4 caching options */
 #define L4_0_WB                 REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0)
@@ -107,6 +97,8 @@ struct xe_mocs_info {
 #define XE2_L3_1_XD		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1)
 #define XE2_L3_3_UC		REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3)
 
+#define XE2_L3_CLOS_MASK	REG_GENMASK(7, 6)
+
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
 	[__idx] = { \
 		.control_value = __control_value, \
@@ -265,6 +257,74 @@ static bool regs_are_mcr(struct xe_gt *gt)
 		return GRAPHICS_VERx100(xe) >= 1250;
 }
 
+static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int i, j;
+	u32 reg_val;
+
+	drm_printf(p, "LNCFCMOCS[idx] = [ESC, SCC, L3CC] (value)\n\n");
+
+	for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+		if (regs_are_mcr(gt))
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+
+		drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+			   j++,
+			   !!(reg_val & L3_ESC_MASK),
+			   REG_FIELD_GET(L3_SCC_MASK, reg_val),
+			   REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val),
+			   reg_val);
+
+		drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+			   j,
+			   !!(reg_val & L3_UPPER_IDX_ESC_MASK),
+			   REG_FIELD_GET(L3_UPPER_IDX_SCC_MASK, reg_val),
+			   REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val),
+			   reg_val);
+	}
+}
+
+static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
+			   struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int i;
+	u32 reg_val;
+
+	if (flags & HAS_GLOBAL_MOCS) {
+		drm_printf(p, "Global mocs table configuration:\n");
+		drm_printf(p, "GLOB_MOCS[idx] = [LeCC, TC, LRUM, AOM, RSC, SCC, PFM, SCF, CoS, SSE] (value)\n\n");
+
+		for (i = 0; i < info->n_entries; i++) {
+			if (regs_are_mcr(gt))
+				reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+			else
+				reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+
+			drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u ] (%#8x)\n",
+				   i,
+				   REG_FIELD_GET(LE_CACHEABILITY_MASK, reg_val),
+				   REG_FIELD_GET(LE_TGT_CACHE_MASK, reg_val),
+				   REG_FIELD_GET(LE_LRUM_MASK, reg_val),
+				   !!(reg_val & LE_AOM_MASK),
+				   !!(reg_val & LE_RSC_MASK),
+				   REG_FIELD_GET(LE_SCC_MASK, reg_val),
+				   REG_FIELD_GET(LE_PFM_MASK, reg_val),
+				   !!(reg_val & LE_SCF_MASK),
+				   REG_FIELD_GET(LE_COS_MASK, reg_val),
+				   REG_FIELD_GET(LE_SSE_MASK, reg_val),
+				   reg_val);
+		}
+	}
+
+	xelp_lncf_dump(info, gt, p);
+}
+
+static const struct xe_mocs_ops xelp_mocs_ops = {
+	.dump = xelp_mocs_dump,
+};
+
 static const struct xe_mocs_entry dg1_mocs_desc[] = {
 	/* UC */
 	MOCS_ENTRY(1, 0, L3_1_UC),
@@ -301,6 +361,40 @@ static const struct xe_mocs_entry dg2_mocs_desc[] = {
 	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
 };
 
+static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags,
+			   struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int i, j;
+	u32 reg_val;
+
+	drm_printf(p, "LNCFCMOCS[idx] = [UCL3LOOKUP, GLBGO, L3CC] (value)\n\n");
+
+	for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+		if (regs_are_mcr(gt))
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+
+		drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+			   j++,
+			   !!(reg_val & L3_LKUP_MASK),
+			   !!(reg_val & L3_GLBGO_MASK),
+			   REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val),
+			   reg_val);
+
+		drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n",
+			   j,
+			   !!(reg_val & L3_UPPER_LKUP_MASK),
+			   !!(reg_val & L3_UPPER_GLBGO_MASK),
+			   REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val),
+			   reg_val);
+	}
+}
+
+static const struct xe_mocs_ops xehp_mocs_ops = {
+	.dump = xehp_lncf_dump,
+};
+
 static const struct xe_mocs_entry pvc_mocs_desc[] = {
 	/* Error */
 	MOCS_ENTRY(0, 0, L3_3_WB),
@@ -312,6 +406,36 @@ static const struct xe_mocs_entry pvc_mocs_desc[] = {
 	MOCS_ENTRY(2, 0, L3_3_WB),
 };
 
+static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct xe_gt *gt,
+			  struct drm_printer *p)
+{
+	unsigned int i, j;
+	u32 reg_val;
+
+	drm_printf(p, "LNCFCMOCS[idx] = [ L3CC ] (value)\n\n");
+
+	for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) {
+		if (regs_are_mcr(gt))
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
+
+		drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n",
+			   j++,
+			   REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val),
+			   reg_val);
+
+		drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n",
+			   j,
+			   REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val),
+			   reg_val);
+	}
+}
+
+static const struct xe_mocs_ops pvc_mocs_ops = {
+	.dump = pvc_mocs_dump,
+};
+
 static const struct xe_mocs_entry mtl_mocs_desc[] = {
 	/* Error - Reserved for Non-Use */
 	MOCS_ENTRY(0,
@@ -363,6 +487,36 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = {
 		   L3_GLBGO(1) | L3_1_UC),
 };
 
+static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
+			  struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int i;
+	u32 reg_val;
+
+	drm_printf(p, "Global mocs table configuration:\n");
+	drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L4_CACHE_POLICY] (value)\n\n");
+
+	for (i = 0; i < info->n_entries; i++) {
+		if (regs_are_mcr(gt))
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+
+		drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u]  (%#8x)\n",
+			   i,
+			   !!(reg_val & IG_PAT),
+			   REG_FIELD_GET(L4_CACHE_POLICY_MASK, reg_val),
+			   reg_val);
+	}
+
+	/* MTL lncf mocs table pattern is similar to that of xehp */
+	xehp_lncf_dump(info, flags, gt, p);
+}
+
+static const struct xe_mocs_ops mtl_mocs_ops = {
+	.dump = mtl_mocs_dump,
+};
+
 static const struct xe_mocs_entry xe2_mocs_table[] = {
 	/* Defer to PAT */
 	MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0),
@@ -376,6 +530,34 @@ static const struct xe_mocs_entry xe2_mocs_table[] = {
 	MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
 };
 
+static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags,
+			  struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int i;
+	u32 reg_val;
+
+	drm_printf(p, "Global mocs table configuration:\n");
+	drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L3_CLOS, L3_CACHE_POLICY, L4_CACHE_POLICY] (value)\n\n");
+
+	for (i = 0; i < info->n_entries; i++) {
+		if (regs_are_mcr(gt))
+			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
+		else
+			reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
+
+		drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u]  (%#8x)\n",
+			   i,
+			   !!(reg_val & IG_PAT),
+			   REG_FIELD_GET(XE2_L3_CLOS_MASK, reg_val),
+			   REG_FIELD_GET(L4_CACHE_POLICY_MASK, reg_val),
+			   reg_val);
+	}
+}
+
+static const struct xe_mocs_ops xe2_mocs_ops = {
+	.dump = xe2_mocs_dump,
+};
+
 static unsigned int get_mocs_settings(struct xe_device *xe,
 				      struct xe_mocs_info *info)
 {
@@ -386,6 +568,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	switch (xe->info.platform) {
 	case XE_LUNARLAKE:
 	case XE_BATTLEMAGE:
+		info->ops = &xe2_mocs_ops;
 		info->size = ARRAY_SIZE(xe2_mocs_table);
 		info->table = xe2_mocs_table;
 		info->n_entries = XE2_NUM_MOCS_ENTRIES;
@@ -394,6 +577,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 4;
 		break;
 	case XE_PVC:
+		info->ops = &pvc_mocs_ops;
 		info->size = ARRAY_SIZE(pvc_mocs_desc);
 		info->table = pvc_mocs_desc;
 		info->n_entries = PVC_NUM_MOCS_ENTRIES;
@@ -402,6 +586,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 2;
 		break;
 	case XE_METEORLAKE:
+		info->ops = &mtl_mocs_ops;
 		info->size = ARRAY_SIZE(mtl_mocs_desc);
 		info->table = mtl_mocs_desc;
 		info->n_entries = MTL_NUM_MOCS_ENTRIES;
@@ -409,6 +594,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 1;
 		break;
 	case XE_DG2:
+		info->ops = &xehp_mocs_ops;
 		info->size = ARRAY_SIZE(dg2_mocs_desc);
 		info->table = dg2_mocs_desc;
 		info->uc_index = 1;
@@ -420,6 +606,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 3;
 		break;
 	case XE_DG1:
+		info->ops = &xelp_mocs_ops;
 		info->size = ARRAY_SIZE(dg1_mocs_desc);
 		info->table = dg1_mocs_desc;
 		info->uc_index = 1;
@@ -431,6 +618,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	case XE_ALDERLAKE_S:
 	case XE_ALDERLAKE_P:
 	case XE_ALDERLAKE_N:
+		info->ops = &xelp_mocs_ops;
 		info->size  = ARRAY_SIZE(gen12_mocs_desc);
 		info->table = gen12_mocs_desc;
 		info->n_entries = XELP_NUM_MOCS_ENTRIES;
@@ -452,6 +640,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 	 */
 	xe_assert(xe, info->unused_entries_index != 0);
 
+	xe_assert(xe, !info->ops || info->ops->dump);
+
 	if (XE_WARN_ON(info->size > info->n_entries)) {
 		info->table = NULL;
 		return 0;
@@ -578,6 +768,33 @@ void xe_mocs_init(struct xe_gt *gt)
 		init_l3cc_table(gt, &table);
 }
 
+void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_mocs_info table;
+	unsigned int flags;
+	u32 ret;
+	struct xe_device *xe = gt_to_xe(gt);
+
+	flags = get_mocs_settings(xe, &table);
+
+	if (!table.ops->dump)
+		return;
+
+	xe_pm_runtime_get_noresume(xe);
+	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+
+	if (ret)
+		goto err_fw;
+
+	table.ops->dump(&table, flags, gt, p);
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+
+err_fw:
+	xe_assert(xe, !ret);
+	xe_pm_runtime_put(xe);
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 #include "tests/xe_mocs.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
index 053754c5a94e..d6fa4485a6e9 100644
--- a/drivers/gpu/drm/xe/xe_mocs.h
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -10,8 +10,16 @@
 
 struct xe_exec_queue;
 struct xe_gt;
+struct drm_printer;
 
 void xe_mocs_init_early(struct xe_gt *gt);
 void xe_mocs_init(struct xe_gt *gt);
 
+/**
+ * xe_mocs_dump - Dump mocs table
+ * @gt: GT structure
+ * @p: Printer to dump info to
+ */
+void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p);
+
 #endif
-- 
cgit 


From 50aec9665e0babd62b9eee4e613d9a1ef8d2b7de Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Sun, 5 May 2024 20:47:58 -0700
Subject: drm/xe: Use ordered WQ for G2H handler

System work queues are shared, use a dedicated work queue for G2H
processing to avoid G2H processing getting block behind system tasks.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: <stable@vger.kernel.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240506034758.3697397-1-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_guc_ct.c       | 5 +++++
 drivers/gpu/drm/xe/xe_guc_ct.h       | 2 +-
 drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 8ac819a7061e..0151d29b3c58 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -121,6 +121,7 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc_ct *ct = arg;
 
+	destroy_workqueue(ct->g2h_wq);
 	xa_destroy(&ct->fence_lookup);
 }
 
@@ -146,6 +147,10 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 
 	xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
 
+	ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+	if (!ct->g2h_wq)
+		return -ENOMEM;
+
 	spin_lock_init(&ct->fast_lock);
 	xa_init(&ct->fence_lookup);
 	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 5083e099064f..105bb8e99a8d 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -34,7 +34,7 @@ static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
 		return;
 
 	wake_up_all(&ct->wq);
-	queue_work(system_unbound_wq, &ct->g2h_worker);
+	queue_work(ct->g2h_wq, &ct->g2h_worker);
 	xe_guc_ct_fast_path(ct);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index d29144c9f20b..fede4c6e93cb 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -120,6 +120,8 @@ struct xe_guc_ct {
 	wait_queue_head_t wq;
 	/** @g2h_fence_wq: wait queue used for G2H fencing */
 	wait_queue_head_t g2h_fence_wq;
+	/** @g2h_wq: used to process G2H */
+	struct workqueue_struct *g2h_wq;
 	/** @msg: Message buffer */
 	u32 msg[GUC_CTB_MSG_MAX_LEN];
 	/** @fast_msg: Message buffer */
-- 
cgit 


From 5b882c1e5a355d034c0e08fba2402b4451765ab2 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 6 May 2024 22:52:54 +0200
Subject: drm/xe: Fix xe_mocs.h

We don't need to include <linux/types.h>.
We don't use struct xe_exec_queue here.
We should sort forward declarations.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240506205254.2659-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_mocs.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
index d6fa4485a6e9..dc972ffd4d07 100644
--- a/drivers/gpu/drm/xe/xe_mocs.h
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -6,11 +6,8 @@
 #ifndef _XE_MOCS_H_
 #define _XE_MOCS_H_
 
-#include <linux/types.h>
-
-struct xe_exec_queue;
-struct xe_gt;
 struct drm_printer;
+struct xe_gt;
 
 void xe_mocs_init_early(struct xe_gt *gt);
 void xe_mocs_init(struct xe_gt *gt);
-- 
cgit 


From a4cb575d910a5c65c5f8b764e2b5f56b66019522 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Mon, 6 May 2024 22:29:50 +0200
Subject: drm/xe/vm_doc: Fix some typos

Fix some typos and add / remove / change a few words to improve
readability and prevent some ambiguities.

Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240506202950.109750-1-francois.dugast@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm_doc.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index bdc6659891a5..4d33f310b653 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -25,7 +25,7 @@
  * VM bind (create GPU mapping for a BO or userptr)
  * ================================================
  *
- * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same
+ * Creates GPU mappings for a BO or userptr within a VM. VM binds uses the same
  * in / out fence interface (struct drm_xe_sync) as execs which allows users to
  * think of binds and execs as more or less the same operation.
  *
@@ -190,8 +190,8 @@
  * Deferred binds in fault mode
  * ----------------------------
  *
- * In a VM is in fault mode (TODO: link to fault mode), new bind operations that
- * create mappings are by default are deferred to the page fault handler (first
+ * If a VM is in fault mode (TODO: link to fault mode), new bind operations that
+ * create mappings are by default deferred to the page fault handler (first
  * use). This behavior can be overriden by setting the flag
  * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping
  * immediately.
@@ -225,7 +225,7 @@
  *
  * A VM in compute mode enables long running workloads and ultra low latency
  * submission (ULLS). ULLS is implemented via a continuously running batch +
- * semaphores. This enables to the user to insert jump to new batch commands
+ * semaphores. This enables the user to insert jump to new batch commands
  * into the continuously running batch. In both cases these batches exceed the
  * time a dma fence is allowed to exist for before signaling, as such dma fences
  * are not used when a VM is in compute mode. User fences (TODO: link user fence
@@ -244,7 +244,7 @@
  * Once all preempt fences are signaled for a VM the kernel can safely move the
  * memory and kick the rebind worker which resumes all the engines execution.
  *
- * A preempt fence, for every engine using the VM, is installed the VM's
+ * A preempt fence, for every engine using the VM, is installed into the VM's
  * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every
  * engine using the VM, is also installed into the same dma-resv slot of every
  * external BO mapped in the VM.
@@ -314,7 +314,7 @@
  * signaling, and memory allocation is usually required to resolve a page
  * fault, but memory allocation is not allowed to gate dma fence signaling. As
  * such, dma fences are not allowed when VM is in fault mode. Because dma-fences
- * are not allowed, long running workloads and ULLS are enabled on a faulting
+ * are not allowed, only long running workloads and ULLS are enabled on a faulting
  * VM.
  *
  * Defered VM binds
@@ -399,14 +399,14 @@
  * Notice no rebind is issued in the access counter handler as the rebind will
  * be issued on next page fault.
  *
- * Cavets with eviction / user pointer invalidation
- * ------------------------------------------------
+ * Caveats with eviction / user pointer invalidation
+ * -------------------------------------------------
  *
  * In the case of eviction and user pointer invalidation on a faulting VM, there
  * is no need to issue a rebind rather we just need to blow away the page tables
  * for the VMAs and the page fault handler will rebind the VMAs when they fault.
- * The cavet is to update / read the page table structure the VM global lock is
- * neeeed. In both the case of eviction and user pointer invalidation locks are
+ * The caveat is to update / read the page table structure the VM global lock is
+ * needed. In both the case of eviction and user pointer invalidation locks are
  * held which make acquiring the VM global lock impossible. To work around this
  * every VMA maintains a list of leaf page table entries which should be written
  * to zero to blow away the VMA's page tables. After writing zero to these
@@ -427,9 +427,9 @@
  * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects
  * the list of userptrs mapped in the VM, the list of engines using this VM, and
  * the array of external BOs mapped in the VM. When adding or removing any of the
- * aforemented state from the VM should acquire this lock in write mode. The VM
+ * aforementioned state from the VM should acquire this lock in write mode. The VM
  * bind path also acquires this lock in write while the exec / compute mode
- * rebind worker acquire this lock in read mode.
+ * rebind worker acquires this lock in read mode.
  *
  * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv
  * slots which is shared with any private BO in the VM. Expected to be acquired
-- 
cgit 


From c18a5e3e61650110b5d8523292abaf6ae19ebdd2 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Tue, 30 Apr 2024 18:42:29 +0530
Subject: drm/xe: skip error capture when exec queue is killed

When user closes exec queue soon after job submission,
we are generating error coredump. Instead check if
exec queue is killed during job timeout then skip
error coredump capture.

V2:
  - Just skip error capture - MattB

Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240430131229.2228809-1-tejas.upadhyay@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index d274a139010b..2c0aa3443cd9 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -980,8 +980,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
 		   "VM job timed out on non-killed execqueue\n");
 
-	simple_error_capture(q);
-	xe_devcoredump(job);
+	if (!exec_queue_killed(q)) {
+		simple_error_capture(q);
+		xe_devcoredump(job);
+	}
 
 	trace_xe_sched_job_timedout(job);
 
-- 
cgit 


From 7348a9a1122884ccfc414166daaf3977100d1c30 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 7 May 2024 13:09:57 +0200
Subject: drm/xe: Don't rely on xe_assert.h to be included elsewhere

While xe_assert.h is now included and used by the xe_force_wake.h,
we want to stop include xe_force_wake.h from xe_device.h as it's
not needed there.  Explicitly include xe_assert.h where needed.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507110959.2747-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_bb.c         | 1 +
 drivers/gpu/drm/xe/xe_gsc_submit.c | 1 +
 drivers/gpu/drm/xe/xe_gt_clock.c   | 1 +
 drivers/gpu/drm/xe/xe_uc.c         | 1 +
 drivers/gpu/drm/xe/xe_vm.h         | 1 +
 5 files changed, 5 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 541361caff3b..37e056fde95d 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -7,6 +7,7 @@
 
 #include "instructions/xe_mi_commands.h"
 #include "regs/xe_gpu_commands.h"
+#include "xe_assert.h"
 #include "xe_device.h"
 #include "xe_exec_queue_types.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
index d34d03248843..9ede483d37ef 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.c
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -8,6 +8,7 @@
 #include <linux/poison.h>
 
 #include "abi/gsc_command_header_abi.h"
+#include "xe_assert.h"
 #include "xe_bb.h"
 #include "xe_exec_queue.h"
 #include "xe_gt_printk.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index c7bca20f6b65..9ff2061133df 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -7,6 +7,7 @@
 
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
+#include "xe_assert.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_macros.h"
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 0f6cfe06e635..45035e38388b 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -5,6 +5,7 @@
 
 #include "xe_uc.h"
 
+#include "xe_assert.h"
 #include "xe_device.h"
 #include "xe_gsc.h"
 #include "xe_gsc_proxy.h"
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 204a4ff63f88..3ac9021f970e 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -6,6 +6,7 @@
 #ifndef _XE_VM_H_
 #define _XE_VM_H_
 
+#include "xe_assert.h"
 #include "xe_bo_types.h"
 #include "xe_macros.h"
 #include "xe_map.h"
-- 
cgit 


From 93dd6ad89c7d436da988cb5917daf406a3941893 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 7 May 2024 13:09:58 +0200
Subject: drm/xe: Don't rely on xe_force_wake.h to be included elsewhere

While xe_force_wake.h is now included from the xe_device.h, we
want to drop that include as we don't need it there. Explicitly
include xe_force_wake.h where needed.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507110959.2747-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/display/xe_hdcp_gsc.c    | 1 +
 drivers/gpu/drm/xe/xe_debugfs.c             | 1 +
 drivers/gpu/drm/xe/xe_gsc.c                 | 1 +
 drivers/gpu/drm/xe/xe_gsc_proxy.c           | 1 +
 drivers/gpu/drm/xe/xe_gt_idle.c             | 1 +
 drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 1 +
 drivers/gpu/drm/xe/xe_guc_pc.c              | 1 +
 drivers/gpu/drm/xe/xe_mmio.c                | 1 +
 drivers/gpu/drm/xe/xe_mocs.c                | 1 +
 drivers/gpu/drm/xe/xe_pat.c                 | 1 +
 drivers/gpu/drm/xe/xe_query.c               | 1 +
 11 files changed, 11 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index d46f87a039f2..eb67ecf08db2 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -13,6 +13,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_device_types.h"
+#include "xe_force_wake.h"
 #include "xe_gsc_proxy.h"
 #include "xe_gsc_submit.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 2c060a0d6251..1011e5d281fa 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -12,6 +12,7 @@
 
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_force_wake.h"
 #include "xe_gt_debugfs.h"
 #include "xe_gt_printk.h"
 #include "xe_guc_ads.h"
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 60202b903687..8cc6420a9e7f 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -14,6 +14,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
+#include "xe_force_wake.h"
 #include "xe_gsc_proxy.h"
 #include "xe_gsc_submit.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index 1b908d238bd1..6d6d1068cf23 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -15,6 +15,7 @@
 #include "abi/gsc_proxy_commands_abi.h"
 #include "regs/xe_gsc_regs.h"
 #include "xe_bo.h"
+#include "xe_force_wake.h"
 #include "xe_gsc.h"
 #include "xe_gsc_submit.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 8fc0f3f6ecc5..a4f6f0a96d05 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -5,6 +5,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "xe_force_wake.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_idle.h"
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index c3d015a7ac33..105797776a6c 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -7,6 +7,7 @@
 
 #include "abi/guc_actions_abi.h"
 #include "xe_device.h"
+#include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
 #include "xe_guc.h"
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 8fc757900ed1..d10aab29651e 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -15,6 +15,7 @@
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_idle.h"
 #include "xe_gt_sysfs.h"
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 2b18e8149ec3..05edab0e085d 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -15,6 +15,7 @@
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_force_wake.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 4780708e5fae..f04754ad911b 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -9,6 +9,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
+#include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index d5b516f115ad..4ee32ee1cc88 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -10,6 +10,7 @@
 #include "regs/xe_reg_defs.h"
 #include "xe_assert.h"
 #include "xe_device.h"
+#include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index df407d73e5f5..29f847debb5c 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -16,6 +16,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
+#include "xe_force_wake.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_guc_hwconfig.h"
-- 
cgit 


From b7f6318a9c3d9c79b724b20ff5382775a9c58346 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 7 May 2024 13:09:59 +0200
Subject: drm/xe: Fix xe_device.h

Some explicit includes are needed only from the xe_device.c.
And there is no need for redundant forward declarations.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507110959.2747-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_device.c | 2 ++
 drivers/gpu/drm/xe/xe_device.h | 6 ------
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ca7a101bd34e..f8eb477f359d 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -16,6 +16,7 @@
 #include <drm/xe_drm.h>
 
 #include "display/xe_display.h"
+#include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
@@ -26,6 +27,7 @@
 #include "xe_drv.h"
 #include "xe_exec.h"
 #include "xe_exec_queue.h"
+#include "xe_force_wake.h"
 #include "xe_ggtt.h"
 #include "xe_gsc_proxy.h"
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 82317580f4bf..3ed14072d8d1 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -6,15 +6,9 @@
 #ifndef _XE_DEVICE_H_
 #define _XE_DEVICE_H_
 
-struct xe_exec_queue;
-struct xe_file;
-
 #include <drm/drm_util.h>
 
-#include "regs/xe_gpu_commands.h"
 #include "xe_device_types.h"
-#include "xe_force_wake.h"
-#include "xe_macros.h"
 
 static inline struct xe_device *to_xe_device(const struct drm_device *dev)
 {
-- 
cgit 


From ee7284230644e21fef0e38fc5bf8f907b6bb7f7c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 6 May 2024 07:19:17 -0700
Subject: drm/xe/ads: Use flexible-array

Zero-length arrays are deprecated and flexible arrays should be
used instead: https://www.kernel.org/doc/html/v6.9-rc7/process/deprecated.html#zero-length-and-one-element-arrays

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Julia Lawall <julia.lawall@inria.fr>
Closes: https://lore.kernel.org/r/202405051824.AmjAI5Pg-lkp@intel.com/
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240506141917.205714-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ads.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 6a5eb21748b1..b48639cfe955 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -109,7 +109,7 @@ struct __guc_ads_blob {
 	struct guc_engine_usage engine_usage;
 	struct guc_um_init_params um_init_params;
 	/* From here on, location is dynamic! Refer to above diagram. */
-	struct guc_mmio_reg regset[0];
+	struct guc_mmio_reg regset[];
 } __packed;
 
 #define ads_blob_read(ads_, field_) \
-- 
cgit 


From 598dc939edf8d7bb1d69e84513c31451812128fc Mon Sep 17 00:00:00 2001
From: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Date: Thu, 18 Apr 2024 16:45:34 +0530
Subject: drm/xe/xe2: Add workaround 14021402888

This workaround applies to Graphics 20.01 as RCS engine workaround

Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240418111534.481568-1-krishnaiah.bommu@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 +
 drivers/gpu/drm/xe/xe_wa.c           | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 8f44437c8e02..9cacdcfe27ff 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -402,6 +402,7 @@
 
 #define HALF_SLICE_CHICKEN7				XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED)
 #define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
+#define   CLEAR_OPTIMIZATION_DISABLE			REG_BIT(6)
 
 #define CACHE_MODE_SS				XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED)
 #define   DISABLE_ECC				REG_BIT(5)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 134a34dbfe8d..05db53c1448c 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -533,6 +533,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS))
 	},
+	{ XE_RTP_NAME("14021402888"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
+	},
 
 	/* Xe2_HPM */
 
-- 
cgit 


From 85cfc412579c041f1aaebba71427acec75ceca39 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 7 May 2024 15:42:50 -0700
Subject: drm/xe: Minor cleanup in LRC handling

Properly define register fields and remove redundant
lower_32_bits().

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-2-niranjana.vishwanathapura@intel.com
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 ++--
 drivers/gpu/drm/xe/xe_lrc.c              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 97d2aed63e01..7e1b0fd68275 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -44,9 +44,10 @@
 #define GSCCS_RING_BASE				0x11a000
 
 #define RING_TAIL(base)				XE_REG((base) + 0x30)
+#define   TAIL_ADDR				REG_GENMASK(20, 3)
 
 #define RING_HEAD(base)				XE_REG((base) + 0x34)
-#define   HEAD_ADDR				0x001FFFFC
+#define   HEAD_ADDR				REG_GENMASK(20, 2)
 
 #define RING_START(base)			XE_REG((base) + 0x38)
 
@@ -136,7 +137,6 @@
 #define   RING_VALID_MASK			0x00000001
 #define   RING_VALID				0x00000001
 #define   STOP_RING				REG_BIT(8)
-#define   TAIL_ADDR				0x001FFFF8
 
 #define RING_CTX_TIMESTAMP(base)		XE_REG((base) + 0x3a8)
 #define CSBE_DEBUG_STATUS(base)			XE_REG((base) + 0x3fc)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 2066d34ddf0b..f759f4c10914 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1354,7 +1354,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
 	if (!snapshot)
 		return NULL;
 
-	snapshot->context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc));
+	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
 	snapshot->head = xe_lrc_ring_head(lrc);
 	snapshot->tail.internal = lrc->ring.tail;
 	snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
-- 
cgit 


From d6219e1cd5e321351954e317b362db2c1d34402a Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 7 May 2024 15:42:51 -0700
Subject: drm/xe: Add Indirect Ring State support

When Indirect Ring State is enabled, the Ring Buffer state and
Batch Buffer state are context save/restored to/from Indirect
Ring State instead of the LRC. The Indirect Ring State is a 4K
page mapped in global GTT at a 4K aligned address. This address
is programmed in the INDIRECT_RING_STATE register of the
corresponding context's LRC.

v2: Fix kernel-doc, add bspec reference
v3: Fix typo in commit text

Bspec: 67296, 67139
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-3-niranjana.vishwanathapura@intel.com
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |   1 +
 drivers/gpu/drm/xe/regs/xe_lrc_layout.h  |   7 ++
 drivers/gpu/drm/xe/xe_gt.c               |   6 +-
 drivers/gpu/drm/xe/xe_gt.h               |   7 ++
 drivers/gpu/drm/xe/xe_gt_types.h         |   6 +-
 drivers/gpu/drm/xe/xe_guc_ads.c          |   5 +-
 drivers/gpu/drm/xe/xe_guc_submit.c       |   2 +-
 drivers/gpu/drm/xe/xe_lrc.c              | 185 ++++++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_lrc.h              |   5 +-
 drivers/gpu/drm/xe/xe_lrc_types.h        |   4 +
 drivers/gpu/drm/xe/xe_pci.c              |   2 +
 drivers/gpu/drm/xe/xe_pci_types.h        |   3 +
 12 files changed, 197 insertions(+), 36 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 7e1b0fd68275..260a44f46f7e 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -125,6 +125,7 @@
 #define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
 
 #define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
+#define	  CTX_CTRL_INDIRECT_RING_STATE_ENABLE	REG_BIT(4)
 #define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	REG_BIT(3)
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	REG_BIT(0)
 
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 1825d8f79db6..e6ca8bbda8f4 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -11,6 +11,7 @@
 #define CTX_RING_TAIL			(0x06 + 1)
 #define CTX_RING_START			(0x08 + 1)
 #define CTX_RING_CTL			(0x0a + 1)
+#define CTX_INDIRECT_RING_STATE		(0x26 + 1)
 #define CTX_PDP0_UDW			(0x30 + 1)
 #define CTX_PDP0_LDW			(0x32 + 1)
 
@@ -23,4 +24,10 @@
 #define CTX_INT_SRC_REPORT_REG		(CTX_LRI_INT_REPORT_PTR + 3)
 #define CTX_INT_SRC_REPORT_PTR		(CTX_LRI_INT_REPORT_PTR + 4)
 
+#define INDIRECT_CTX_RING_HEAD		(0x02 + 1)
+#define INDIRECT_CTX_RING_TAIL		(0x04 + 1)
+#define INDIRECT_CTX_RING_START		(0x06 + 1)
+#define INDIRECT_CTX_RING_START_UDW	(0x08 + 1)
+#define INDIRECT_CTX_RING_CTL		(0x0a + 1)
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0528d599c3fe..36c7b1631fa6 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -160,7 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
 
 	if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
 		/* Big enough to emit all of the context's 3DSTATE */
-		bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false);
+		bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false);
 	else
 		/* Just pick a large BB size */
 		bb = xe_bb_new(gt, SZ_4K, false);
@@ -244,7 +244,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 		xe_tuning_process_lrc(hwe);
 
 		default_lrc = drmm_kzalloc(&xe->drm,
-					   xe_lrc_size(xe, hwe->class),
+					   xe_gt_lrc_size(gt, hwe->class),
 					   GFP_KERNEL);
 		if (!default_lrc)
 			return -ENOMEM;
@@ -294,7 +294,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 		xe_map_memcpy_from(xe, default_lrc,
 				   &q->lrc[0].bo->vmap,
 				   xe_lrc_pphwsp_offset(&q->lrc[0]),
-				   xe_lrc_size(xe, hwe->class));
+				   xe_gt_lrc_size(gt, hwe->class));
 
 		gt->default_lrc[hwe->class] = default_lrc;
 put_nop_q:
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index ed6ea8057e35..8474c50b1b30 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -8,6 +8,7 @@
 
 #include <drm/drm_util.h>
 
+#include "xe_device.h"
 #include "xe_device_types.h"
 #include "xe_hw_engine.h"
 
@@ -58,6 +59,12 @@ struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
 				     u16 instance,
 				     bool logical);
 
+static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt)
+{
+	return gt->info.has_indirect_ring_state &&
+	       xe_device_uc_enabled(gt_to_xe(gt));
+}
+
 static inline bool xe_gt_is_media_type(struct xe_gt *gt)
 {
 	return gt->info.type == XE_GT_TYPE_MEDIA;
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index cfdc761ff7f4..8dc203413a27 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -110,8 +110,6 @@ struct xe_gt {
 	struct {
 		/** @info.type: type of GT */
 		enum xe_gt_type type;
-		/** @info.id: Unique ID of this GT within the PCI Device */
-		u8 id;
 		/** @info.reference_clock: clock frequency */
 		u32 reference_clock;
 		/** @info.engine_mask: mask of engines present on GT */
@@ -124,6 +122,10 @@ struct xe_gt {
 		u64 __engine_mask;
 		/** @info.gmdid: raw GMD_ID value from hardware */
 		u32 gmdid;
+		/** @info.id: Unique ID of this GT within the PCI Device */
+		u8 id;
+		/** @info.has_indirect_ring_state: GT has indirect ring state support */
+		u8 has_indirect_ring_state:1;
 	} info;
 
 	/**
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index b48639cfe955..9c33cca4e370 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -267,7 +267,6 @@ static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
 
 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
 {
-	struct xe_device *xe = ads_to_xe(ads);
 	struct xe_gt *gt = ads_to_gt(ads);
 	size_t total_size = 0, alloc_size, real_size;
 	int class;
@@ -276,7 +275,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
 		if (!engine_enable_mask(gt, class))
 			continue;
 
-		real_size = xe_lrc_size(xe, class);
+		real_size = xe_gt_lrc_size(gt, class);
 		alloc_size = PAGE_ALIGN(real_size);
 		total_size += alloc_size;
 	}
@@ -774,7 +773,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
 
 		xe_gt_assert(gt, gt->default_lrc[class]);
 
-		real_size = xe_lrc_size(xe, class);
+		real_size = xe_gt_lrc_size(gt, class);
 		alloc_size = PAGE_ALIGN(real_size);
 		total_size += alloc_size;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 2c0aa3443cd9..fde527d34f58 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -677,7 +677,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
 	if (xe_exec_queue_is_parallel(q))
 		wq_item_append(q);
 	else
-		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+		xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
 
 	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
 		return;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index f759f4c10914..ef954cd5cd68 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -34,12 +34,15 @@
 #define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
 #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
 
+#define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
+
 struct xe_lrc_snapshot {
 	struct xe_bo *lrc_bo;
 	void *lrc_snapshot;
 	unsigned long lrc_size, lrc_offset;
 
 	u32 context_desc;
+	u32 indirect_context_desc;
 	u32 head;
 	struct {
 		u32 internal;
@@ -55,20 +58,25 @@ lrc_to_xe(struct xe_lrc *lrc)
 	return gt_to_xe(lrc->fence_ctx.gt);
 }
 
-size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
+size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
 {
+	struct xe_device *xe = gt_to_xe(gt);
+	size_t size;
+
 	switch (class) {
 	case XE_ENGINE_CLASS_RENDER:
 		if (GRAPHICS_VER(xe) >= 20)
-			return 4 * SZ_4K;
+			size = 4 * SZ_4K;
 		else
-			return 14 * SZ_4K;
+			size = 14 * SZ_4K;
+		break;
 	case XE_ENGINE_CLASS_COMPUTE:
 		/* 14 pages since graphics_ver == 11 */
 		if (GRAPHICS_VER(xe) >= 20)
-			return 3 * SZ_4K;
+			size = 3 * SZ_4K;
 		else
-			return 14 * SZ_4K;
+			size = 14 * SZ_4K;
+		break;
 	default:
 		WARN(1, "Unknown engine class: %d", class);
 		fallthrough;
@@ -76,8 +84,14 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
 	case XE_ENGINE_CLASS_VIDEO_DECODE:
 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
 	case XE_ENGINE_CLASS_OTHER:
-		return 2 * SZ_4K;
+		size = 2 * SZ_4K;
 	}
+
+	/* Add indirect ring state page */
+	if (xe_gt_has_indirect_ring_state(gt))
+		size += LRC_INDIRECT_RING_STATE_SIZE;
+
+	return size;
 }
 
 /*
@@ -508,6 +522,32 @@ static const u8 xe2_xcs_offsets[] = {
 	0
 };
 
+static const u8 xe2_indirect_ring_state_offsets[] = {
+	NOP(1),                 /* [0x00] */
+	LRI(5, POSTED),         /* [0x01] */
+	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
+	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
+	REG(0x038),             /* [0x06] RING_BUFFER_START */
+	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
+	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
+
+	NOP(5),                 /* [0x0c] */
+	LRI(9, POSTED),         /* [0x11] */
+	REG(0x168),             /* [0x12] BB_ADDR_UDW */
+	REG(0x140),             /* [0x14] BB_ADDR */
+	REG(0x110),             /* [0x16] BB_STATE */
+	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
+	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
+	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
+	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
+	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
+	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
+
+	NOP(12),                 /* [0x00] */
+
+	0
+};
+
 #undef REG16
 #undef REG
 #undef LRI
@@ -546,6 +586,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
 						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
+	if (xe_gt_has_indirect_ring_state(hwe->gt))
+		regs[CTX_CONTEXT_CONTROL] |=
+			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
+
 	/* TODO: Timestamp */
 }
 
@@ -589,6 +633,11 @@ static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
 	regs[x + 1] |= STOP_RING << 16;
 }
 
+static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
+{
+	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
+}
+
 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
 {
 	return 0;
@@ -643,6 +692,12 @@ static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
 }
 
+static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
+{
+	/* Indirect ring state page is at the very end of LRC */
+	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
+}
+
 #define DECL_MAP_ADDR_HELPERS(elem) \
 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
 { \
@@ -663,6 +718,7 @@ DECL_MAP_ADDR_HELPERS(seqno)
 DECL_MAP_ADDR_HELPERS(regs)
 DECL_MAP_ADDR_HELPERS(start_seqno)
 DECL_MAP_ADDR_HELPERS(parallel)
+DECL_MAP_ADDR_HELPERS(indirect_ring)
 
 #undef DECL_MAP_ADDR_HELPERS
 
@@ -671,6 +727,35 @@ u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
 }
 
+u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
+{
+	if (!xe_lrc_has_indirect_ring_state(lrc))
+		return 0;
+
+	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
+}
+
+static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_indirect_ring_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	return xe_map_read32(xe, &map);
+}
+
+static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
+					  int reg_nr, u32 val)
+{
+	struct xe_device *xe = lrc_to_xe(lrc);
+	struct iosys_map map;
+
+	map = __xe_lrc_indirect_ring_map(lrc);
+	iosys_map_incr(&map, reg_nr * sizeof(u32));
+	xe_map_write32(xe, &map, val);
+}
+
 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
 {
 	struct xe_device *xe = lrc_to_xe(lrc);
@@ -693,20 +778,25 @@ void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
 
 static void *empty_lrc_data(struct xe_hw_engine *hwe)
 {
-	struct xe_device *xe = gt_to_xe(hwe->gt);
+	struct xe_gt *gt = hwe->gt;
 	void *data;
 	u32 *regs;
 
-	data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
+	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
 	if (!data)
 		return NULL;
 
 	/* 1st page: Per-Process of HW status Page */
 	regs = data + LRC_PPHWSP_SIZE;
-	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
+	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
 	set_context_control(regs, hwe);
 	set_memory_based_intr(regs, hwe);
 	reset_stop_ring(regs, hwe);
+	if (xe_gt_has_indirect_ring_state(gt)) {
+		regs = data + xe_gt_lrc_size(gt, hwe->class) -
+		       LRC_INDIRECT_RING_STATE_SIZE;
+		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
+	}
 
 	return data;
 }
@@ -731,23 +821,27 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	struct iosys_map map;
 	void *init_data = NULL;
 	u32 arb_enable;
+	u32 lrc_size;
 	int err;
 
 	lrc->flags = 0;
+	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
+	if (xe_gt_has_indirect_ring_state(gt))
+		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
 
 	/*
 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
 	 * via VM bind calls.
 	 */
-	lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
-				      ring_size + xe_lrc_size(xe, hwe->class),
-				      ttm_bo_type_kernel,
-				      XE_BO_FLAG_VRAM_IF_DGFX(tile) |
-				      XE_BO_FLAG_GGTT |
-				      XE_BO_FLAG_GGTT_INVALIDATE);
+	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
+				       ttm_bo_type_kernel,
+				       XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				       XE_BO_FLAG_GGTT |
+				       XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(lrc->bo))
 		return PTR_ERR(lrc->bo);
 
+	lrc->size = lrc_size;
 	lrc->tile = gt_to_tile(hwe->gt);
 	lrc->ring.size = ring_size;
 	lrc->ring.tail = 0;
@@ -772,10 +866,10 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
-				 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
+				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
 	} else {
 		xe_map_memcpy_to(xe, &map, 0, init_data,
-				 xe_lrc_size(xe, hwe->class));
+				 xe_gt_lrc_size(gt, hwe->class));
 		kfree(init_data);
 	}
 
@@ -786,11 +880,25 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
 	}
 
-	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
-	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
-	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
-	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
-			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+	if (xe_gt_has_indirect_ring_state(gt)) {
+		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
+				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
+
+		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
+					      __xe_lrc_ring_ggtt_addr(lrc));
+		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
+		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
+		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
+		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
+					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+	} else {
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
+				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
+	}
+
 	if (xe->info.has_asid && vm)
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
 
@@ -834,14 +942,36 @@ void xe_lrc_finish(struct xe_lrc *lrc)
 	xe_bo_put(lrc->bo);
 }
 
+void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
+{
+	if (xe_lrc_has_indirect_ring_state(lrc))
+		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
+	else
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
+}
+
+u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
+{
+	if (xe_lrc_has_indirect_ring_state(lrc))
+		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
+	else
+		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
+}
+
 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
 {
-	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
+	if (xe_lrc_has_indirect_ring_state(lrc))
+		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
+	else
+		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
 }
 
 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
 {
-	return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
+	if (xe_lrc_has_indirect_ring_state(lrc))
+		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
+	else
+		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
 }
 
 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
@@ -1214,7 +1344,7 @@ void xe_lrc_dump_default(struct drm_printer *p,
 	 * hardware status page.
 	 */
 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
-	remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
+	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
 
 	while (remaining_dw > 0) {
 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
@@ -1355,9 +1485,10 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
 		return NULL;
 
 	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
+	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
 	snapshot->head = xe_lrc_ring_head(lrc);
 	snapshot->tail.internal = lrc->ring.tail;
-	snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
+	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
 	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
 	snapshot->seqno = xe_lrc_seqno(lrc);
 	snapshot->lrc_bo = xe_bo_get(lrc->bo);
@@ -1405,6 +1536,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
 		return;
 
 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
+	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
+		   snapshot->indirect_context_desc);
 	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
 	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
 		   snapshot->tail.internal, snapshot->tail.memory);
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index d32fa31faa2c..06a95a598736 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -21,14 +21,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size);
 void xe_lrc_finish(struct xe_lrc *lrc);
 
-size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class);
+size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
 
+void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail);
+u32 xe_lrc_ring_tail(struct xe_lrc *lrc);
 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head);
 u32 xe_lrc_ring_head(struct xe_lrc *lrc);
 u32 xe_lrc_ring_space(struct xe_lrc *lrc);
 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size);
 
+u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc);
 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
 u32 *xe_lrc_regs(struct xe_lrc *lrc);
 
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index b716df0dfb4e..cdbf03faef15 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -20,10 +20,14 @@ struct xe_lrc {
 	 */
 	struct xe_bo *bo;
 
+	/** @size: size of lrc including any indirect ring state page */
+	u32 size;
+
 	/** @tile: tile which this LRC belongs to */
 	struct xe_tile *tile;
 
 	/** @flags: LRC flags */
+#define XE_LRC_FLAG_INDIRECT_RING_STATE		0x1
 	u32 flags;
 
 	/** @ring: submission ring state */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 99723a423850..595e4096a17a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -661,6 +661,7 @@ static int xe_info_init(struct xe_device *xe,
 		gt = tile->primary_gt;
 		gt->info.id = xe->info.gt_count++;
 		gt->info.type = XE_GT_TYPE_MAIN;
+		gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
 		gt->info.__engine_mask = graphics_desc->hw_engine_mask;
 		if (MEDIA_VER(xe) < 13 && media_desc)
 			gt->info.__engine_mask |= media_desc->hw_engine_mask;
@@ -678,6 +679,7 @@ static int xe_info_init(struct xe_device *xe,
 
 		gt = tile->media_gt;
 		gt->info.type = XE_GT_TYPE_MEDIA;
+		gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
 		gt->info.__engine_mask = media_desc->hw_engine_mask;
 		gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
 		gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index e1f2b4879fc2..79b0f80376a4 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -27,6 +27,7 @@ struct xe_graphics_desc {
 	u8 has_asid:1;
 	u8 has_atomic_enable_pte_bit:1;
 	u8 has_flat_ccs:1;
+	u8 has_indirect_ring_state:1;
 	u8 has_range_tlb_invalidation:1;
 	u8 has_usm:1;
 };
@@ -37,6 +38,8 @@ struct xe_media_desc {
 	u8 rel;
 
 	u64 hw_engine_mask;	/* hardware engines provided by media IP */
+
+	u8 has_indirect_ring_state:1;
 };
 
 struct gmdid_map {
-- 
cgit 


From 7578c2f811fffd7178de3075a19bddcbb7e0b2e7 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 7 May 2024 15:42:52 -0700
Subject: drm/xe: Dump Indirect Ring State registers

Dump INDIRECT_RING_STATE and RING_START_UDW registers.

v2: Add bspec reference

Bspec: 67137, 67138
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-4-niranjana.vishwanathapura@intel.com
---
 drivers/gpu/drm/xe/regs/xe_engine_regs.h |  4 ++++
 drivers/gpu/drm/xe/xe_hw_engine.c        | 11 +++++++++++
 drivers/gpu/drm/xe/xe_hw_engine_types.h  |  4 ++++
 3 files changed, 19 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 260a44f46f7e..263ffc7bc2ef 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -55,6 +55,8 @@
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
 #define   RING_CTL_SIZE(size)			((size) - PAGE_SIZE) /* in bytes -> pages */
 
+#define RING_START_UDW(base)			XE_REG((base) + 0x48)
+
 #define RING_PSMI_CTL(base)			XE_REG((base) + 0x50, XE_REG_OPTION_MASKED)
 #define   RC_SEMA_IDLE_MSG_DISABLE		REG_BIT(12)
 #define   WAIT_FOR_EVENT_POWER_DOWN_DISABLE	REG_BIT(7)
@@ -110,6 +112,8 @@
 #define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
 #define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
 
+#define INDIRECT_RING_STATE(base)		XE_REG((base) + 0x108)
+
 #define RING_BBADDR(base)			XE_REG((base) + 0x140)
 #define RING_BBADDR_UDW(base)			XE_REG((base) + 0x168)
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index ec69803152a2..45f582a7caaa 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -908,6 +908,13 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
 	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
+	if (xe_gt_has_indirect_ring_state(hwe->gt)) {
+		snapshot->reg.indirect_ring_state =
+			hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
+		snapshot->reg.ring_start_udw =
+			hw_engine_mmio_read32(hwe, RING_START_UDW(0));
+	}
+
 	snapshot->reg.ring_head =
 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
 	snapshot->reg.ring_tail =
@@ -997,6 +1004,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
 		   snapshot->reg.ring_execlist_sq_contents);
 	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
+	drm_printf(p, "\tRING_START_UDW: 0x%08x\n",
+		   snapshot->reg.ring_start_udw);
 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
@@ -1010,6 +1019,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
+	drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
+		   snapshot->reg.indirect_ring_state);
 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
 	xe_hw_engine_snapshot_instdone_print(snapshot, p);
 
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 9f9755e31b9f..5f4b67acba99 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -189,6 +189,8 @@ struct xe_hw_engine_snapshot {
 		u32 ring_hws_pga;
 		/** @reg.ring_start: RING_START */
 		u32 ring_start;
+		/** @reg.ring_start_udw: RING_START_UDW */
+		u32 ring_start_udw;
 		/** @reg.ring_head: RING_HEAD */
 		u32 ring_head;
 		/** @reg.ring_tail: RING_TAIL */
@@ -207,6 +209,8 @@ struct xe_hw_engine_snapshot {
 		u32 ring_emr;
 		/** @reg.ring_eir: RING_EIR */
 		u32 ring_eir;
+		/** @reg.indirect_ring_state: INDIRECT_RING_STATE */
+		u32 indirect_ring_state;
 		/** @reg.ipehr: IPEHR */
 		u32 ipehr;
 		/** @reg.rcu_mode: RCU_MODE */
-- 
cgit 


From fe0154cf8222d9e38c60ccc124adb2f9b5272371 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 7 May 2024 15:42:53 -0700
Subject: drm/xe/xe2: Enable Indirect Ring State support for Xe2

Indirect Ring State is the recommended mode for Xe2 platforms,
enable it by default.

v2: Set has_indirect_ring_state to '1' instead of 'true'

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-5-niranjana.vishwanathapura@intel.com
---
 drivers/gpu/drm/xe/xe_pci.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 595e4096a17a..83e662a36dfa 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -166,6 +166,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
 	.has_asid = 1, \
 	.has_atomic_enable_pte_bit = 1, \
 	.has_flat_ccs = 1, \
+	.has_indirect_ring_state = 1, \
 	.has_range_tlb_invalidation = 1, \
 	.has_usm = 1, \
 	.va_bits = 48, \
-- 
cgit 


From 205e5c4b20c3c2e2033338a935f6a59843dc69de Mon Sep 17 00:00:00 2001
From: Shuicheng Lin <shuicheng.lin@intel.com>
Date: Tue, 7 May 2024 13:04:11 +0000
Subject: drm/xe: Fix UBSAN shift-out-of-bounds failure

Here is the failure stack:
[   12.988209] ------------[ cut here ]------------
[   12.988216] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13
[   12.988232] shift exponent 64 is too large for 64-bit type 'long unsigned int'
[   12.988235] CPU: 4 PID: 1310 Comm: gnome-shell Tainted: G     U             6.9.0-rc6+prerelease1158+ #19
[   12.988237] Hardware name: Intel Corporation Raptor Lake Client Platform/RPL-S ADP-S DDR5 UDIMM CRB, BIOS RPLSFWI1.R00.3301.A02.2208050712 08/05/2022
[   12.988239] Call Trace:
[   12.988240]  <TASK>
[   12.988242]  dump_stack_lvl+0xd7/0xf0
[   12.988248]  dump_stack+0x10/0x20
[   12.988250]  ubsan_epilogue+0x9/0x40
[   12.988253]  __ubsan_handle_shift_out_of_bounds+0x10e/0x170
[   12.988260]  dma_resv_reserve_fences.cold+0x2b/0x48
[   12.988262]  ? ww_mutex_lock_interruptible+0x3c/0x110
[   12.988267]  drm_exec_prepare_obj+0x45/0x60 [drm_exec]
[   12.988271]  ? vm_bind_ioctl_ops_execute+0x5b/0x740 [xe]
[   12.988345]  vm_bind_ioctl_ops_execute+0x78/0x740 [xe]

It is caused by the value 0 of parameter num_fences in function
drm_exec_prepare_obj.  And lead to in function __rounddown_pow_of_two,
"0 - 1" causes the shift-out-of-bounds.

By design drm_exec_prepare_obj() should be called only when there are
fences to be reserved. If num_fences is 0, calling drm_exec_lock_obj()
is sufficient as was done in commit 9377de4cb3e8 ("drm/xe/vm: Avoid
reserving zero fences")

Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://lore.kernel.org/all/24d4a9a9-c622-4f56-8672-21f4c6785476@amd.com
Link: https://patchwork.freedesktop.org/patch/msgid/20240507130411.630361-1-shuicheng.lin@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d17192c8b7de..c5b1694b292f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2692,7 +2692,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
 
 	if (bo) {
 		if (!bo->vm)
-			err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0);
+			err = drm_exec_lock_obj(exec, &bo->ttm.base);
 		if (!err && validate)
 			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
 	}
@@ -2777,7 +2777,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
 	struct xe_vma_op *op;
 	int err;
 
-	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0);
+	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
 	if (err)
 		return err;
 
-- 
cgit 


From 515f08972355e160f896f612347121fbb685e740 Mon Sep 17 00:00:00 2001
From: Karthik Poosa <karthik.poosa@intel.com>
Date: Fri, 19 Apr 2024 18:29:45 +0530
Subject: drm/xe/hwmon: Remove unwanted write permission for currN_label

Change umode of currN_label from 0644 to 0444 as write permission
not needed for label.

Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419125945.4085629-1-karthik.poosa@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_hwmon.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 453e601ddd5e..dca275117232 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -550,12 +550,17 @@ xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel)
 {
 	u32 uval;
 
+	/* hwmon sysfs attribute of current available only for package */
+	if (channel != CHANNEL_PKG)
+		return 0;
+
 	switch (attr) {
 	case hwmon_curr_crit:
-	case hwmon_curr_label:
-		if (channel == CHANNEL_PKG)
 			return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
 				(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+	case hwmon_curr_label:
+			return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+				(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444;
 		break;
 	default:
 		return 0;
-- 
cgit 


From 62010b3cd6030ff743930c9ae898d8e4e943100d Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 8 May 2024 19:48:56 +0200
Subject: drm/xe: Move xe_gpu_commands.h file to instructions/

All other files with commands definitions are in instructions/
folder. Move xe_gpu_commands.h also there.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240508174856.1908-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/instructions/xe_gpu_commands.h | 70 +++++++++++++++++++++++
 drivers/gpu/drm/xe/regs/xe_gpu_commands.h         | 70 -----------------------
 drivers/gpu/drm/xe/xe_bb.c                        |  1 -
 drivers/gpu/drm/xe/xe_device.c                    |  2 +-
 drivers/gpu/drm/xe/xe_execlist.c                  |  1 -
 drivers/gpu/drm/xe/xe_lrc.c                       |  1 -
 drivers/gpu/drm/xe/xe_migrate.c                   |  2 +-
 drivers/gpu/drm/xe/xe_ring_ops.c                  |  2 +-
 8 files changed, 73 insertions(+), 76 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
 delete mode 100644 drivers/gpu/drm/xe/regs/xe_gpu_commands.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
new file mode 100644
index 000000000000..a255946b6f77
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GPU_COMMANDS_H_
+#define _XE_GPU_COMMANDS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
+#define   SRC_ACCESS_TYPE_SHIFT		21
+#define   DST_ACCESS_TYPE_SHIFT		20
+#define   CCS_SIZE_MASK			GENMASK(17, 8)
+#define   XE2_CCS_SIZE_MASK		GENMASK(18, 9)
+#define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
+#define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK	GENMASK(31, 28)
+#define   NUM_CCS_BYTES_PER_BLOCK	256
+#define   NUM_BYTES_PER_CCS_BYTE(_xe)	(GRAPHICS_VER(_xe) >= 20 ? 512 : 256)
+
+#define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
+#define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
+#define   XY_FAST_COLOR_BLT_DW		16
+#define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 22)
+#define   XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK	GENMASK(27, 24)
+#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
+
+#define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
+#define   XY_FAST_COPY_BLT_DEPTH_32	(3<<24)
+#define   XY_FAST_COPY_BLT_D1_SRC_TILE4	REG_BIT(31)
+#define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
+#define   XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK	GENMASK(23, 20)
+
+#define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
+#define   PVC_MEM_SET_CMD_LEN_DW	7
+#define   PVC_MEM_SET_MATRIX		REG_BIT(17)
+#define   PVC_MEM_SET_DATA_FIELD	GENMASK(31, 24)
+/* Bspec lists field as [6:0], but index alone is from [6:1] */
+#define   PVC_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 1)
+#define   XE2_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 3)
+
+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+
+#define	  PIPE_CONTROL0_HDC_PIPELINE_FLUSH		BIT(9)	/* gen12 */
+
+#define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29)
+#define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
+#define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
+#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24)
+#define   PIPE_CONTROL_LRI_POST_SYNC			BIT(23)
+#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
+#define   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET		(1<<19)
+#define	  PIPE_CONTROL_TLB_INVALIDATE			BIT(18)
+#define   PIPE_CONTROL_PSD_SYNC				(1<<17)
+#define   PIPE_CONTROL_QW_WRITE				(1<<14)
+#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
+#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11)
+#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10)
+#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
deleted file mode 100644
index a255946b6f77..000000000000
--- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_GPU_COMMANDS_H_
-#define _XE_GPU_COMMANDS_H_
-
-#include "regs/xe_reg_defs.h"
-
-#define XY_CTRL_SURF_COPY_BLT		((2 << 29) | (0x48 << 22) | 3)
-#define   SRC_ACCESS_TYPE_SHIFT		21
-#define   DST_ACCESS_TYPE_SHIFT		20
-#define   CCS_SIZE_MASK			GENMASK(17, 8)
-#define   XE2_CCS_SIZE_MASK		GENMASK(18, 9)
-#define   XY_CTRL_SURF_MOCS_MASK	GENMASK(31, 26)
-#define   XE2_XY_CTRL_SURF_MOCS_INDEX_MASK	GENMASK(31, 28)
-#define   NUM_CCS_BYTES_PER_BLOCK	256
-#define   NUM_BYTES_PER_CCS_BYTE(_xe)	(GRAPHICS_VER(_xe) >= 20 ? 512 : 256)
-
-#define XY_FAST_COLOR_BLT_CMD		(2 << 29 | 0x44 << 22)
-#define   XY_FAST_COLOR_BLT_DEPTH_32	(2 << 19)
-#define   XY_FAST_COLOR_BLT_DW		16
-#define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 22)
-#define   XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK	GENMASK(27, 24)
-#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
-
-#define XY_FAST_COPY_BLT_CMD		(2 << 29 | 0x42 << 22)
-#define   XY_FAST_COPY_BLT_DEPTH_32	(3<<24)
-#define   XY_FAST_COPY_BLT_D1_SRC_TILE4	REG_BIT(31)
-#define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
-#define   XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK	GENMASK(23, 20)
-
-#define	PVC_MEM_SET_CMD		(2 << 29 | 0x5b << 22)
-#define   PVC_MEM_SET_CMD_LEN_DW	7
-#define   PVC_MEM_SET_MATRIX		REG_BIT(17)
-#define   PVC_MEM_SET_DATA_FIELD	GENMASK(31, 24)
-/* Bspec lists field as [6:0], but index alone is from [6:1] */
-#define   PVC_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 1)
-#define   XE2_MEM_SET_MOCS_INDEX_MASK	GENMASK(6, 3)
-
-#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
-
-#define	  PIPE_CONTROL0_HDC_PIPELINE_FLUSH		BIT(9)	/* gen12 */
-
-#define   PIPE_CONTROL_COMMAND_CACHE_INVALIDATE		(1<<29)
-#define   PIPE_CONTROL_TILE_CACHE_FLUSH			(1<<28)
-#define   PIPE_CONTROL_AMFS_FLUSH			(1<<25)
-#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24)
-#define   PIPE_CONTROL_LRI_POST_SYNC			BIT(23)
-#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
-#define   PIPE_CONTROL_CS_STALL				(1<<20)
-#define   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET		(1<<19)
-#define	  PIPE_CONTROL_TLB_INVALIDATE			BIT(18)
-#define   PIPE_CONTROL_PSD_SYNC				(1<<17)
-#define   PIPE_CONTROL_QW_WRITE				(1<<14)
-#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
-#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12)
-#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11)
-#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10)
-#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
-#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7)
-#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
-#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
-#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
-#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
-#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
-#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
-
-#endif
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 37e056fde95d..a13e0b3a169e 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -6,7 +6,6 @@
 #include "xe_bb.h"
 
 #include "instructions/xe_mi_commands.h"
-#include "regs/xe_gpu_commands.h"
 #include "xe_assert.h"
 #include "xe_device.h"
 #include "xe_exec_queue_types.h"
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index f8eb477f359d..4165e1347371 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -16,7 +16,7 @@
 #include <drm/xe_drm.h>
 
 #include "display/xe_display.h"
-#include "regs/xe_gpu_commands.h"
+#include "instructions/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index dece2785933c..e9dee1e14fef 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -9,7 +9,6 @@
 
 #include "instructions/xe_mi_commands.h"
 #include "regs/xe_engine_regs.h"
-#include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "xe_assert.h"
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index ef954cd5cd68..9b0a4078add3 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -11,7 +11,6 @@
 #include "instructions/xe_gfxpipe_commands.h"
 #include "instructions/xe_gfx_state_commands.h"
 #include "regs/xe_engine_regs.h"
-#include "regs/xe_gpu_commands.h"
 #include "regs/xe_lrc_layout.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9f6e9b7f11c8..36db5ed1a572 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -14,8 +14,8 @@
 
 #include <generated/xe_wa_oob.h>
 
+#include "instructions/xe_gpu_commands.h"
 #include "instructions/xe_mi_commands.h"
-#include "regs/xe_gpu_commands.h"
 #include "regs/xe_gtt_defs.h"
 #include "tests/xe_test.h"
 #include "xe_assert.h"
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index d42b3f33bd7a..a3ca718456f6 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -7,9 +7,9 @@
 
 #include <generated/xe_wa_oob.h>
 
+#include "instructions/xe_gpu_commands.h"
 #include "instructions/xe_mi_commands.h"
 #include "regs/xe_engine_regs.h"
-#include "regs/xe_gpu_commands.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "xe_exec_queue_types.h"
-- 
cgit 


From c3203ca3b8a6aab7c5a5dc3f5e165a53410461f6 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Wed, 8 May 2024 19:10:00 +0200
Subject: drm/xe: Rename few xe_args.h macros

To minimize the risk of future name collisions, rename macros to
always include the ARG or ARGS tag:

  DROP_FIRST to DROP_FIRST_ARG
  PICK_FIRST to FIRST_ARG
  PICK_LAST to LAST_ARG

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> #v2
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240508171000.1864-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/tests/xe_args_test.c | 101 +++++++++++++++++++++-----------
 drivers/gpu/drm/xe/xe_args.h            |  72 +++++++++++++++--------
 drivers/gpu/drm/xe/xe_rtp_helpers.h     |  12 ++--
 3 files changed, 119 insertions(+), 66 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/tests/xe_args_test.c b/drivers/gpu/drm/xe/tests/xe_args_test.c
index 9b44c1ab6364..f3fb23aa5d2e 100644
--- a/drivers/gpu/drm/xe/tests/xe_args_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_args_test.c
@@ -21,10 +21,10 @@ static void call_args_example(struct kunit *test)
 #undef buz
 }
 
-static void drop_first_example(struct kunit *test)
+static void drop_first_arg_example(struct kunit *test)
 {
 #define foo	X, Y, Z, Q
-#define bar	CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo))
+#define bar	CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo))
 
 	KUNIT_EXPECT_EQ(test, bar, 3);
 
@@ -32,12 +32,12 @@ static void drop_first_example(struct kunit *test)
 #undef bar
 }
 
-static void pick_first_example(struct kunit *test)
+static void first_arg_example(struct kunit *test)
 {
 	int X = 1;
 
 #define foo	X, Y, Z, Q
-#define bar	PICK_FIRST(foo)
+#define bar	FIRST_ARG(foo)
 
 	KUNIT_EXPECT_EQ(test, bar, X);
 	KUNIT_EXPECT_STREQ(test, __stringify(bar), "X");
@@ -46,12 +46,12 @@ static void pick_first_example(struct kunit *test)
 #undef bar
 }
 
-static void pick_last_example(struct kunit *test)
+static void last_arg_example(struct kunit *test)
 {
 	int Q = 1;
 
 #define foo	X, Y, Z, Q
-#define bar	PICK_LAST(foo)
+#define bar	LAST_ARG(foo)
 
 	KUNIT_EXPECT_EQ(test, bar, Q);
 	KUNIT_EXPECT_STREQ(test, __stringify(bar), "Q");
@@ -60,11 +60,29 @@ static void pick_last_example(struct kunit *test)
 #undef bar
 }
 
+static void pick_arg_example(struct kunit *test)
+{
+	int Y = 1, Z = 2;
+
+#define foo	X, Y, Z, Q
+#define bar	PICK_ARG(2, foo)
+#define buz	PICK_ARG3(foo)
+
+	KUNIT_EXPECT_EQ(test, bar, Y);
+	KUNIT_EXPECT_STREQ(test, __stringify(bar), "Y");
+	KUNIT_EXPECT_EQ(test, buz, Z);
+	KUNIT_EXPECT_STREQ(test, __stringify(buz), "Z");
+
+#undef foo
+#undef bar
+#undef buz
+}
+
 static void sep_comma_example(struct kunit *test)
 {
 #define foo(f)	f(X) f(Y) f(Z) f(Q)
-#define bar	DROP_FIRST(foo(ARGS_SEP_COMMA __stringify))
-#define buz	CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo(ARGS_SEP_COMMA)))
+#define bar	DROP_FIRST_ARG(foo(ARGS_SEP_COMMA __stringify))
+#define buz	CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo(ARGS_SEP_COMMA)))
 
 	static const char * const a[] = { bar };
 
@@ -123,61 +141,74 @@ static void call_args_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, MAX_ARGS), 12);
 }
 
-static void drop_first_test(struct kunit *test)
+static void drop_first_arg_test(struct kunit *test)
 {
 	int Y = -2, Z = -3, Q = -4;
-	int a[] = { DROP_FIRST(FOO_ARGS) };
+	int a[] = { DROP_FIRST_ARG(FOO_ARGS) };
 
-	KUNIT_EXPECT_EQ(test, DROP_FIRST(0, -1), -1);
-	KUNIT_EXPECT_EQ(test, DROP_FIRST(DROP_FIRST(0, -1, -2)), -2);
+	KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(0, -1), -1);
+	KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(DROP_FIRST_ARG(0, -1, -2)), -2);
 
-	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, DROP_FIRST(FOO_ARGS)), 3);
-	KUNIT_EXPECT_EQ(test, DROP_FIRST(DROP_FIRST(DROP_FIRST(FOO_ARGS))), -4);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(FOO_ARGS)), 3);
+	KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS))), -4);
 	KUNIT_EXPECT_EQ(test, a[0], -2);
 	KUNIT_EXPECT_EQ(test, a[1], -3);
 	KUNIT_EXPECT_EQ(test, a[2], -4);
-	KUNIT_EXPECT_STREQ(test, __stringify(DROP_FIRST(DROP_FIRST(DROP_FIRST(FOO_ARGS)))), "Q");
+
+#define foo	DROP_FIRST_ARG(FOO_ARGS)
+#define bar	DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS))
+#define buz	DROP_FIRST_ARG(DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS)))
+
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, foo), 3);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, bar), 2);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, buz), 1);
+	KUNIT_EXPECT_STREQ(test, __stringify(buz), "Q");
+
+#undef foo
+#undef bar
+#undef buz
 }
 
-static void pick_first_test(struct kunit *test)
+static void first_arg_test(struct kunit *test)
 {
 	int X = -1;
-	int a[] = { PICK_FIRST(FOO_ARGS) };
+	int a[] = { FIRST_ARG(FOO_ARGS) };
 
-	KUNIT_EXPECT_EQ(test, PICK_FIRST(-1, -2), -1);
+	KUNIT_EXPECT_EQ(test, FIRST_ARG(-1, -2), -1);
 
-	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, PICK_FIRST(FOO_ARGS)), 1);
-	KUNIT_EXPECT_EQ(test, PICK_FIRST(FOO_ARGS), -1);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FIRST_ARG(FOO_ARGS)), 1);
+	KUNIT_EXPECT_EQ(test, FIRST_ARG(FOO_ARGS), -1);
 	KUNIT_EXPECT_EQ(test, a[0], -1);
-	KUNIT_EXPECT_STREQ(test, __stringify(PICK_FIRST(FOO_ARGS)), "X");
+	KUNIT_EXPECT_STREQ(test, __stringify(FIRST_ARG(FOO_ARGS)), "X");
 }
 
-static void pick_last_test(struct kunit *test)
+static void last_arg_test(struct kunit *test)
 {
 	int Q = -4;
-	int a[] = { PICK_LAST(FOO_ARGS) };
+	int a[] = { LAST_ARG(FOO_ARGS) };
 
-	KUNIT_EXPECT_EQ(test, PICK_LAST(-1, -2), -2);
+	KUNIT_EXPECT_EQ(test, LAST_ARG(-1, -2), -2);
 
-	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, PICK_LAST(FOO_ARGS)), 1);
-	KUNIT_EXPECT_EQ(test, PICK_LAST(FOO_ARGS), -4);
+	KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, LAST_ARG(FOO_ARGS)), 1);
+	KUNIT_EXPECT_EQ(test, LAST_ARG(FOO_ARGS), -4);
 	KUNIT_EXPECT_EQ(test, a[0], -4);
-	KUNIT_EXPECT_STREQ(test, __stringify(PICK_LAST(FOO_ARGS)), "Q");
+	KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(FOO_ARGS)), "Q");
 
-	KUNIT_EXPECT_EQ(test, PICK_LAST(MAX_ARGS), -12);
-	KUNIT_EXPECT_STREQ(test, __stringify(PICK_LAST(MAX_ARGS)), "-12");
+	KUNIT_EXPECT_EQ(test, LAST_ARG(MAX_ARGS), -12);
+	KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(MAX_ARGS)), "-12");
 }
 
 static struct kunit_case args_tests[] = {
 	KUNIT_CASE(count_args_test),
 	KUNIT_CASE(call_args_example),
 	KUNIT_CASE(call_args_test),
-	KUNIT_CASE(drop_first_example),
-	KUNIT_CASE(drop_first_test),
-	KUNIT_CASE(pick_first_example),
-	KUNIT_CASE(pick_first_test),
-	KUNIT_CASE(pick_last_example),
-	KUNIT_CASE(pick_last_test),
+	KUNIT_CASE(drop_first_arg_example),
+	KUNIT_CASE(drop_first_arg_test),
+	KUNIT_CASE(first_arg_example),
+	KUNIT_CASE(first_arg_test),
+	KUNIT_CASE(last_arg_example),
+	KUNIT_CASE(last_arg_test),
+	KUNIT_CASE(pick_arg_example),
 	KUNIT_CASE(sep_comma_example),
 	{}
 };
diff --git a/drivers/gpu/drm/xe/xe_args.h b/drivers/gpu/drm/xe/xe_args.h
index 40b9eb4151d8..4dbc7e53c624 100644
--- a/drivers/gpu/drm/xe/xe_args.h
+++ b/drivers/gpu/drm/xe/xe_args.h
@@ -35,7 +35,7 @@
 #define __CALL_ARGS(f, args...)		f(args)
 
 /**
- * DROP_FIRST - Returns all arguments except the first one.
+ * DROP_FIRST_ARG - Returns all arguments except the first one.
  * @args: arguments
  *
  * This helper macro allows manipulation the argument list before passing it
@@ -44,15 +44,15 @@
  * Example:
  *
  *	#define foo	X,Y,Z,Q
- *	#define bar	CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo))
+ *	#define bar	CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo))
  *
  *	With above definitions bar expands to 3.
  */
-#define DROP_FIRST(args...)		__DROP_FIRST(args)
-#define __DROP_FIRST(a, b...)		b
+#define DROP_FIRST_ARG(args...)		__DROP_FIRST_ARG(args)
+#define __DROP_FIRST_ARG(a, b...)	b
 
 /**
- * PICK_FIRST - Returns the first argument.
+ * FIRST_ARG - Returns the first argument.
  * @args: arguments
  *
  * This helper macro allows manipulation the argument list before passing it
@@ -61,15 +61,15 @@
  * Example:
  *
  *	#define foo	X,Y,Z,Q
- *	#define bar	PICK_FIRST(foo)
+ *	#define bar	FIRST_ARG(foo)
  *
  *	With above definitions bar expands to X.
  */
-#define PICK_FIRST(args...)		__PICK_FIRST(args)
-#define __PICK_FIRST(a, b...)		a
+#define FIRST_ARG(args...)		__FIRST_ARG(args)
+#define __FIRST_ARG(a, b...)		a
 
 /**
- * PICK_LAST - Returns the last argument.
+ * LAST_ARG - Returns the last argument.
  * @args: arguments
  *
  * This helper macro allows manipulation the argument list before passing it
@@ -80,24 +80,46 @@
  * Example:
  *
  *	#define foo	X,Y,Z,Q
- *	#define bar	PICK_LAST(foo)
+ *	#define bar	LAST_ARG(foo)
  *
  *	With above definitions bar expands to Q.
  */
-#define PICK_LAST(args...)		__PICK_ARG(COUNT_ARGS(args), args)
+#define LAST_ARG(args...)		__LAST_ARG(args)
+#define __LAST_ARG(args...)		PICK_ARG(COUNT_ARGS(args), args)
+
+/**
+ * PICK_ARG - Returns the n-th argument.
+ * @n: argument number to be returned
+ * @args: arguments
+ *
+ * This helper macro allows manipulation the argument list before passing it
+ * to the next level macro.
+ *
+ * Like COUNT_ARGS() this macro supports n up to 12.
+ * Specialized macros PICK_ARG1() to PICK_ARG12() are also available.
+ *
+ * Example:
+ *
+ *	#define foo	X,Y,Z,Q
+ *	#define bar	PICK_ARG(2, foo)
+ *	#define buz	PICK_ARG3(foo)
+ *
+ *	With above definitions bar expands to Y and buz expands to Z.
+ */
+#define PICK_ARG(n, args...)		__PICK_ARG(n, args)
 #define __PICK_ARG(n, args...)		CALL_ARGS(CONCATENATE(PICK_ARG, n), args)
-#define PICK_ARG1(args...)		PICK_FIRST(args)
-#define PICK_ARG2(args...)		PICK_ARG1(DROP_FIRST(args))
-#define PICK_ARG3(args...)		PICK_ARG2(DROP_FIRST(args))
-#define PICK_ARG4(args...)		PICK_ARG3(DROP_FIRST(args))
-#define PICK_ARG5(args...)		PICK_ARG4(DROP_FIRST(args))
-#define PICK_ARG6(args...)		PICK_ARG5(DROP_FIRST(args))
-#define PICK_ARG7(args...)		PICK_ARG6(DROP_FIRST(args))
-#define PICK_ARG8(args...)		PICK_ARG7(DROP_FIRST(args))
-#define PICK_ARG9(args...)		PICK_ARG8(DROP_FIRST(args))
-#define PICK_ARG10(args...)		PICK_ARG9(DROP_FIRST(args))
-#define PICK_ARG11(args...)		PICK_ARG10(DROP_FIRST(args))
-#define PICK_ARG12(args...)		PICK_ARG11(DROP_FIRST(args))
+#define PICK_ARG1(args...)		FIRST_ARG(args)
+#define PICK_ARG2(args...)		PICK_ARG1(DROP_FIRST_ARG(args))
+#define PICK_ARG3(args...)		PICK_ARG2(DROP_FIRST_ARG(args))
+#define PICK_ARG4(args...)		PICK_ARG3(DROP_FIRST_ARG(args))
+#define PICK_ARG5(args...)		PICK_ARG4(DROP_FIRST_ARG(args))
+#define PICK_ARG6(args...)		PICK_ARG5(DROP_FIRST_ARG(args))
+#define PICK_ARG7(args...)		PICK_ARG6(DROP_FIRST_ARG(args))
+#define PICK_ARG8(args...)		PICK_ARG7(DROP_FIRST_ARG(args))
+#define PICK_ARG9(args...)		PICK_ARG8(DROP_FIRST_ARG(args))
+#define PICK_ARG10(args...)		PICK_ARG9(DROP_FIRST_ARG(args))
+#define PICK_ARG11(args...)		PICK_ARG10(DROP_FIRST_ARG(args))
+#define PICK_ARG12(args...)		PICK_ARG11(DROP_FIRST_ARG(args))
 
 /**
  * ARGS_SEP_COMMA - Definition of a comma character.
@@ -109,8 +131,8 @@
  * Example:
  *
  *	#define foo(f)	f(X) f(Y) f(Z) f(Q)
- *	#define bar	DROP_FIRST(foo(ARGS_SEP_COMMA __stringify))
- *	#define buz	CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo(ARGS_SEP_COMMA)))
+ *	#define bar	DROP_FIRST_ARG(foo(ARGS_SEP_COMMA __stringify))
+ *	#define buz	CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo(ARGS_SEP_COMMA)))
  *
  *	With above definitions bar expands to
  *		"X", "Y", "Z", "Q"
diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h
index 8129d6d9ef37..7735f217ba71 100644
--- a/drivers/gpu/drm/xe/xe_rtp_helpers.h
+++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h
@@ -17,7 +17,7 @@
  */
 #define _XE_ESC(...) __VA_ARGS__
 
-#define _XE_TUPLE_TAIL(...) (DROP_FIRST(__VA_ARGS__))
+#define _XE_TUPLE_TAIL(...) (DROP_FIRST_ARG(__VA_ARGS__))
 
 #define _XE_RTP_CONCAT(a, b) CONCATENATE(XE_RTP_, CONCATENATE(a, b))
 
@@ -54,10 +54,10 @@
  *	XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR
  */
 #define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_RTP_CONCAT(PASTE_, COUNT_ARGS args_)(prefix_, sep_, args_)
-#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_)
-#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_)
+#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
 
 /*
  * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
@@ -70,6 +70,6 @@
  *
  *	{ .a = 10 }
  */
-#define XE_RTP_DROP_CAST(...) _XE_ESC(DROP_FIRST _XE_ESC __VA_ARGS__)
+#define XE_RTP_DROP_CAST(...) _XE_ESC(DROP_FIRST_ARG _XE_ESC __VA_ARGS__)
 
 #endif
-- 
cgit 


From 304aa805ee8e82adc30159ff43038cd96eb69eb9 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 13 May 2024 10:42:15 +0200
Subject: drm/xe: Fix xe_gt_throttle_sysfs.h

We don't need to include drm/drm_managed.h here.
We don't need to comment final #endif.
Also remove empty line at the end.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513084218.2084-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
index 6c61e6f228a8..335c402b51a6 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
@@ -6,11 +6,8 @@
 #ifndef _XE_GT_THROTTLE_SYSFS_H_
 #define _XE_GT_THROTTLE_SYSFS_H_
 
-#include <drm/drm_managed.h>
-
 struct xe_gt;
 
 int xe_gt_throttle_sysfs_init(struct xe_gt *gt);
 
-#endif /* _XE_GT_THROTTLE_SYSFS_H_ */
-
+#endif
-- 
cgit 


From c5d9c6690ed4cefe9390f8d73ad690332dbc7142 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 13 May 2024 10:42:16 +0200
Subject: drm/xe: Fix xe_guc_ads.h

We don't need to include xe_guc_ads_types.h here.
Use forward declaration instead.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513084218.2084-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc_ads.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h
index 2e2531779122..2e6674c760ff 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads.h
@@ -6,7 +6,7 @@
 #ifndef _XE_GUC_ADS_H_
 #define _XE_GUC_ADS_H_
 
-#include "xe_guc_ads_types.h"
+struct xe_guc_ads;
 
 int xe_guc_ads_init(struct xe_guc_ads *ads);
 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads);
-- 
cgit 


From 38830bfe287f9ad97be87a844237beb8e7e64f4a Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 13 May 2024 10:42:17 +0200
Subject: drm/xe: Fix xe_lrc.h

Prefer forward declarations over #include xe_lrc_types.h

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513084218.2084-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_lrc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 06a95a598736..e0e841963c23 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -5,14 +5,17 @@
 #ifndef _XE_LRC_H_
 #define _XE_LRC_H_
 
-#include "xe_lrc_types.h"
+#include <linux/types.h>
 
 struct drm_printer;
 struct xe_bb;
 struct xe_device;
 struct xe_exec_queue;
 enum xe_engine_class;
+struct xe_gt;
 struct xe_hw_engine;
+struct xe_lrc;
+struct xe_lrc_snapshot;
 struct xe_vm;
 
 #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
-- 
cgit 


From 664de50cbfae048d08e9f3c1c0da377d1269e6d1 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 13 May 2024 10:42:18 +0200
Subject: drm/xe: Fix xe_reg_sr.h

Prefer forward declarations over #include xe_reg_sr_types.h

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513084218.2084-5-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_reg_sr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h
index e3197c33afe2..51fbba423e27 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.h
+++ b/drivers/gpu/drm/xe/xe_reg_sr.h
@@ -6,8 +6,6 @@
 #ifndef _XE_REG_SR_
 #define _XE_REG_SR_
 
-#include "xe_reg_sr_types.h"
-
 /*
  * Reg save/restore bookkeeping
  */
@@ -15,6 +13,8 @@
 struct xe_device;
 struct xe_gt;
 struct xe_hw_engine;
+struct xe_reg_sr;
+struct xe_reg_sr_entry;
 struct drm_printer;
 
 int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe);
-- 
cgit 


From 61549a2ee5940af4d49ba227d7e7798ccf6f04a5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 May 2024 14:37:47 -0700
Subject: drm/xe: Drop __engine_mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not really used, it's just a copy of engine_mask, which already reads
the fuses to mark engines as available/not-available.

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c       | 3 ---
 drivers/gpu/drm/xe/xe_gt_types.h | 6 ------
 drivers/gpu/drm/xe/xe_migrate.c  | 4 ++--
 drivers/gpu/drm/xe/xe_pci.c      | 6 +++---
 4 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 36c7b1631fa6..3b3418eb7bc4 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -518,9 +518,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
 	if (err)
 		goto out_fw;
 
-	/* XXX: Fake that we pull the engine mask from hwconfig blob */
-	gt->info.engine_mask = gt->info.__engine_mask;
-
 out_fw:
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 out:
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 8dc203413a27..5a114fc9dde7 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -114,12 +114,6 @@ struct xe_gt {
 		u32 reference_clock;
 		/** @info.engine_mask: mask of engines present on GT */
 		u64 engine_mask;
-		/**
-		 * @info.__engine_mask: mask of engines present on GT read from
-		 * xe_pci.c, used to fake reading the engine_mask from the
-		 * hwconfig blob.
-		 */
-		u64 __engine_mask;
 		/** @info.gmdid: raw GMD_ID value from hardware */
 		u32 gmdid;
 		/** @info.id: Unique ID of this GT within the PCI Device */
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 36db5ed1a572..2b12f3621f9e 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -936,8 +936,8 @@ static bool has_service_copy_support(struct xe_gt *gt)
 	 * all of the actual service copy engines (BCS1-BCS8) have been fused
 	 * off.
 	 */
-	return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
-						XE_HW_ENGINE_BCS1);
+	return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
+					      XE_HW_ENGINE_BCS1);
 }
 
 static u32 emit_clear_cmd_len(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 83e662a36dfa..fc29eb8e99c5 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -663,9 +663,9 @@ static int xe_info_init(struct xe_device *xe,
 		gt->info.id = xe->info.gt_count++;
 		gt->info.type = XE_GT_TYPE_MAIN;
 		gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
-		gt->info.__engine_mask = graphics_desc->hw_engine_mask;
+		gt->info.engine_mask = graphics_desc->hw_engine_mask;
 		if (MEDIA_VER(xe) < 13 && media_desc)
-			gt->info.__engine_mask |= media_desc->hw_engine_mask;
+			gt->info.engine_mask |= media_desc->hw_engine_mask;
 
 		if (MEDIA_VER(xe) < 13 || !media_desc)
 			continue;
@@ -681,7 +681,7 @@ static int xe_info_init(struct xe_device *xe,
 		gt = tile->media_gt;
 		gt->info.type = XE_GT_TYPE_MEDIA;
 		gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
-		gt->info.__engine_mask = media_desc->hw_engine_mask;
+		gt->info.engine_mask = media_desc->hw_engine_mask;
 		gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
 		gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
 
-- 
cgit 


From 402c014cbcc7c9ada6d62ee646b2b359d4793ce2 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 May 2024 14:37:48 -0700
Subject: drm/xe: Drop useless forcewake get/put
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Forcewake used to be needed in xe_gt_init_early() since it was calling
xe_gt_topology_init(). That call was dropped in commit 4c47049d93b7
("drm/xe/guc: Fix missing topology init"), but the forcewake calls were
left behind. Remove them.

Cc: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3b3418eb7bc4..05b77214f996 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -318,14 +318,6 @@ int xe_gt_init_early(struct xe_gt *gt)
 			return err;
 	}
 
-	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-	if (err)
-		return err;
-
-	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-	if (err)
-		return err;
-
 	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
 
 	err = xe_wa_init(gt);
-- 
cgit 


From 65c4de2a9148385114b3ff1121143ef1af805a1a Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 May 2024 14:37:49 -0700
Subject: drm/xe: Move xe_gt_init_early() where it belongs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Early shall be early enough, stop doing other things with gt before it.
Now that xe_gt_init_early() doesn't need forcewake and doesn't depend on
the fake engine_mask initialization, move it where it belongs: it
doesn't need to be after hwconfig config anymore.

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 4165e1347371..9c32bd157ecf 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -567,6 +567,12 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_ttm_sys_mgr_init(xe);
 
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_init_early(gt);
+		if (err)
+			return err;
+	}
+
 	for_each_gt(gt, xe, id)
 		xe_force_wake_init_gt(gt, gt_to_fw(gt));
 
@@ -605,12 +611,6 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		goto err;
 
-	for_each_gt(gt, xe, id) {
-		err = xe_gt_init_early(gt);
-		if (err)
-			goto err_irq_shutdown;
-	}
-
 	err = xe_device_set_has_flat_ccs(xe);
 	if (err)
 		goto err_irq_shutdown;
-- 
cgit 


From 45b9066ec351518657cd09599872f737ceb25f09 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 May 2024 14:37:50 -0700
Subject: drm/xe: Move xe_force_wake_init_gt() inside gt initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xe_force_wake_init_gt() is a software-only initialization and doesn't
need to be called from xe_device_probe(). Move it to initialize
together with the gt.

Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 3 ---
 drivers/gpu/drm/xe/xe_gt.c     | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 9c32bd157ecf..ad18c48cab99 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -573,9 +573,6 @@ int xe_device_probe(struct xe_device *xe)
 			return err;
 	}
 
-	for_each_gt(gt, xe, id)
-		xe_force_wake_init_gt(gt, gt_to_fw(gt));
-
 	for_each_tile(tile, xe, id) {
 		err = xe_ggtt_init_early(tile->mem.ggtt);
 		if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 05b77214f996..e05899691a8c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -328,6 +328,8 @@ int xe_gt_init_early(struct xe_gt *gt)
 	xe_wa_process_oob(gt);
 	xe_tuning_process_gt(gt);
 
+	xe_force_wake_init_gt(gt, gt_to_fw(gt));
+
 	return 0;
 }
 
-- 
cgit 


From d1855d284e9f6580c0eaba66bfa04722f4d4dc9b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 13 May 2024 14:37:51 -0700
Subject: drm/xe: Move sw-only pcode initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move it to xe_gt_init_early() that initializes the sw-only part for each
gt.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 3 ---
 drivers/gpu/drm/xe/xe_gt.c     | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ad18c48cab99..bedf55928aa4 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -597,9 +597,6 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		return err;
 
-	for_each_gt(gt, xe, id)
-		xe_pcode_init(gt);
-
 	err = xe_display_init_noirq(xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index e05899691a8c..11870ad2caf6 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -44,6 +44,7 @@
 #include "xe_migrate.h"
 #include "xe_mmio.h"
 #include "xe_pat.h"
+#include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_mocs.h"
 #include "xe_reg_sr.h"
@@ -329,6 +330,7 @@ int xe_gt_init_early(struct xe_gt *gt)
 	xe_tuning_process_gt(gt);
 
 	xe_force_wake_init_gt(gt, gt_to_fw(gt));
+	xe_pcode_init(gt);
 
 	return 0;
 }
-- 
cgit 


From c81858eb52266b3d6ba28ca4f62a198231a10cdc Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Wed, 8 May 2024 20:52:15 +0530
Subject: drm/xe: Change pcode timeout to 50msec while polling again

Polling is initially attempted with timeout_base_ms enabled for
preemption, and if it exceeds this timeframe, another attempt is made
without preemption, allowing an additional 50 ms before timing out.

v2
- Rebase

v3
- Move warnings to separate patch (Lucas)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Fixes: 7dc9b92dcfef ("drm/xe: Remove i915_utils dependency from xe_pcode.")
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240508152216.3263109-2-himal.prasad.ghimiray@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pcode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index c010ef16fbf5..a5e7da8cf944 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -191,7 +191,7 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
 	preempt_disable();
 	ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
-				true, timeout_base_ms * 1000, true);
+				true, 50 * 1000, true);
 	preempt_enable();
 
 out:
-- 
cgit 


From 4c0be90e6874b8af30541c37689780fc7c8276c9 Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Wed, 8 May 2024 20:52:16 +0530
Subject: drm/xe: Fix the warning conditions

The maximum timeout display uses in xe_pcode_request is 3 msec, add the
warning in cases the function is misused with higher timeouts.

Add a warning if pcode_try_request is not passed the timeout parameter
greater than 0.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240508152216.3263109-3-himal.prasad.ghimiray@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pcode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index a5e7da8cf944..9c4eefdf6642 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -10,6 +10,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "xe_assert.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_mmio.h"
@@ -124,6 +125,8 @@ static int pcode_try_request(struct xe_gt *gt, u32 mbox,
 {
 	int slept, wait = 10;
 
+	xe_gt_assert(gt, timeout_us > 0);
+
 	for (slept = 0; slept < timeout_us; slept += wait) {
 		if (locked)
 			*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
@@ -169,6 +172,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 	u32 status;
 	int ret;
 
+	xe_gt_assert(gt, timeout_base_ms <= 3);
+
 	mutex_lock(&gt->pcode.lock);
 
 	ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
@@ -188,7 +193,6 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 	 */
 	drm_err(&gt_to_xe(gt)->drm,
 		"PCODE timeout, retrying with preemption disabled\n");
-	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
 	preempt_disable();
 	ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
 				true, 50 * 1000, true);
-- 
cgit 


From 04f4a70a183a688a60fe3882d6e4236ea02cfc67 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 15 Apr 2024 12:04:53 -0700
Subject: drm/xe: Only use reserved BCS instances for usm migrate exec queue

The GuC context scheduling queue is 2 entires deep, thus it is possible
for a migration job to be stuck behind a fault if migration exec queue
shares engines with user jobs. This can deadlock as the migrate exec
queue is required to service page faults. Avoid deadlock by only using
reserved BCS instances for usm migrate exec queue.

Fixes: a043fbab7af5 ("drm/xe/pvc: Use fast copy engines as migrate engine on PVC")
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240415190453.696553-2-matthew.brost@intel.com
Reviewed-by: Brian Welty <brian.welty@intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 2b12f3621f9e..bacb23de411b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -34,7 +34,6 @@
 #include "xe_sync.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
-#include "xe_wa.h"
 
 /**
  * struct xe_migrate - migrate context.
@@ -300,10 +299,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 }
 
 /*
- * Due to workaround 16017236439, odd instance hardware copy engines are
- * faster than even instance ones.
- * This function returns the mask involving all fast copy engines and the
- * reserved copy engine to be used as logical mask for migrate engine.
  * Including the reserved copy engine is required to avoid deadlocks due to
  * migrate jobs servicing the faults gets stuck behind the job that faulted.
  */
@@ -317,8 +312,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
 		if (hwe->class != XE_ENGINE_CLASS_COPY)
 			continue;
 
-		if (!XE_WA(gt, 16017236439) ||
-		    xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+		if (xe_gt_is_usm_hwe(gt, hwe))
 			logical_mask |= BIT(hwe->logical_instance);
 	}
 
@@ -369,6 +363,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		if (!hwe || !logical_mask)
 			return ERR_PTR(-EINVAL);
 
+		/*
+		 * XXX: Currently only reserving 1 (likely slow) BCS instance on
+		 * PVC, may want to revisit if performance is needed.
+		 */
 		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
 					    EXEC_QUEUE_FLAG_KERNEL |
 					    EXEC_QUEUE_FLAG_PERMANENT |
-- 
cgit 


From 3df01f5c72b0b4ecdca2b3da88a4b6fac809986b Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 10 May 2024 22:38:09 +0200
Subject: drm/xe/uc: Reorder post hwconfig uC initialization step

We want to move the GuC submission initialization to the post
hwconfig step, but now this step is done too late as migration
initialization uses exec_queue that would crash due to a unset
exec_queue_ops. We can easily fix that by small function reorder.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240510203810.1952-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 11870ad2caf6..e69a03ddd255 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -430,6 +430,10 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	err = xe_uc_init_post_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
 	if (!xe_gt_is_media_type(gt)) {
 		/*
 		 * USM has its only SA pool to non-block behind user operations
@@ -456,10 +460,6 @@ static int all_fw_domain_init(struct xe_gt *gt)
 		}
 	}
 
-	err = xe_uc_init_post_hwconfig(&gt->uc);
-	if (err)
-		goto err_force_wake;
-
 	err = xe_uc_init_hw(&gt->uc);
 	if (err)
 		goto err_force_wake;
-- 
cgit 


From 4071e0872fcad846381f86f5164236827f7e91c8 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 10 May 2024 22:38:10 +0200
Subject: drm/xe/uc: Move GuC submission init to post hwconfig step

We shouldn't need anything from the GuC submission code until we
finish GuC initialization in post hwconfig step.

While around add diagnostic message if we fail uC init.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240510203810.1952-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc.c |  9 +++++++++
 drivers/gpu/drm/xe/xe_uc.c  | 10 +++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 0c9938e0ab8c..b1bb94914028 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -21,6 +21,7 @@
 #include "xe_gt_printk.h"
 #include "xe_guc_ads.h"
 #include "xe_guc_ct.h"
+#include "xe_guc_db_mgr.h"
 #include "xe_guc_hwconfig.h"
 #include "xe_guc_log.h"
 #include "xe_guc_pc.h"
@@ -356,6 +357,14 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
 
 	guc_init_params_post_hwconfig(guc);
 
+	ret = xe_guc_submit_init(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_db_mgr_init(&guc->dbm, ~0);
+	if (ret)
+		return ret;
+
 	ret = xe_guc_pc_init(&guc->pc);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 45035e38388b..0186eafc947d 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -10,10 +10,9 @@
 #include "xe_gsc.h"
 #include "xe_gsc_proxy.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_guc.h"
-#include "xe_guc_db_mgr.h"
 #include "xe_guc_pc.h"
-#include "xe_guc_submit.h"
 #include "xe_huc.h"
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
@@ -58,13 +57,10 @@ int xe_uc_init(struct xe_uc *uc)
 	if (ret)
 		goto err;
 
-	ret = xe_guc_submit_init(&uc->guc);
-	if (ret)
-		goto err;
-
-	ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0);
+	return 0;
 
 err:
+	xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret));
 	return ret;
 }
 
-- 
cgit 


From 1564d411e17f51e2f64655b4e4da015be1ba7eaa Mon Sep 17 00:00:00 2001
From: Jonathan Cavitt <jonathan.cavitt@intel.com>
Date: Fri, 10 May 2024 12:45:38 -0700
Subject: drm/xe/xe_guc_submit: Fix exec queue stop race condition

Reorder the xe_sched_tdr_queue_imm and set_exec_queue_banned calls in
guc_exec_queue_stop.  This prevents a possible race condition between
the two events in which it's possible for xe_sched_tdr_queue_imm to
wake the ufence waiter before the exec queue is banned, causing the
ufence waiter to miss the banned state.

Suggested-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240510194540.3246991-1-jonathan.cavitt@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index fde527d34f58..3a8e501f2bc2 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1491,8 +1491,8 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
 			    !xe_sched_job_completed(job)) ||
 			    xe_sched_invalidate_job(job, 2)) {
 				trace_xe_sched_job_ban(job);
-				xe_sched_tdr_queue_imm(&q->guc->sched);
 				set_exec_queue_banned(q);
+				xe_sched_tdr_queue_imm(&q->guc->sched);
 			}
 		}
 	}
-- 
cgit 


From abdea2847acfe41313620a5359940522990018e3 Mon Sep 17 00:00:00 2001
From: Jonathan Cavitt <jonathan.cavitt@intel.com>
Date: Fri, 10 May 2024 12:45:39 -0700
Subject: drm/xe/xe_guc_submit: Allow lr exec queues to be banned

LR queues currently don't get banned during a GT/GuC reset because they
lack a job.  Though they don't have a job to detect the reset status of,
it's still possible to tell when they should be banned by looking at the
LRC: if the LRC head and tail don't match, then the exec queue should be
banned and cleaned up.

This also requires swapping the usage of xe_sched_tdr_queue_imm with
xe_guc_exec_queue_trigger_cleanup, as the former is specific to non-lr
exec queues.

Suggested-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240510194540.3246991-2-jonathan.cavitt@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 3a8e501f2bc2..bd507a916c1c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1485,15 +1485,23 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
 	 */
 	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
 		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
+		bool ban = false;
 
 		if (job) {
 			if ((xe_sched_job_started(job) &&
 			    !xe_sched_job_completed(job)) ||
 			    xe_sched_invalidate_job(job, 2)) {
 				trace_xe_sched_job_ban(job);
-				set_exec_queue_banned(q);
-				xe_sched_tdr_queue_imm(&q->guc->sched);
+				ban = true;
 			}
+		} else if (xe_exec_queue_is_lr(q) &&
+			   (xe_lrc_ring_head(q->lrc) != xe_lrc_ring_tail(q->lrc))) {
+			ban = true;
+		}
+
+		if (ban) {
+			set_exec_queue_banned(q);
+			xe_guc_exec_queue_trigger_cleanup(q);
 		}
 	}
 }
-- 
cgit 


From b31cfb47b27ae02ea7fb3f956b99c79356730e2a Mon Sep 17 00:00:00 2001
From: Jonathan Cavitt <jonathan.cavitt@intel.com>
Date: Fri, 10 May 2024 12:45:40 -0700
Subject: drm/xe/xe_guc_submit: Declare reset if banned or killed or wedged

Add an additional condition to the reset_status guc_exec_queue_op that
returns true if the exec queue has been banned or killed or wedged.  The
reset_status op is only used for exiting any xe_wait_user_fence_ioctl
that waits on an exec queue without timing out, so doing this will exit
the ioctl early in cases where the exec queue can no longer function,
such as after a GuC stop during a reset.

Suggested-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240510194540.3246991-3-jonathan.cavitt@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index bd507a916c1c..4efb88e3e056 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1432,7 +1432,7 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q)
 
 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
 {
-	return exec_queue_reset(q);
+	return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
 }
 
 /*
-- 
cgit 


From c8ff26b82c5b0f589516edcf7628704e3a6bc426 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 14 May 2024 16:23:25 -0700
Subject: drm/xe: Only zap PTEs as needed

If PTEs are already invalidated no need to invalidate again.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514232325.84508-1-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_pt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 87975e45622a..11dd0988ffda 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -812,8 +812,9 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
 		.tile = tile,
 	};
 	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
+	u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated);
 
-	if (!(vma->tile_present & BIT(tile->id)))
+	if (!(pt_mask & BIT(tile->id)))
 		return false;
 
 	(void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma),
-- 
cgit 


From 75fe5f347167aceb8b78b9f6ad0ba01a38a34e16 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 7 May 2024 18:57:57 +0200
Subject: drm/xe/pf: Don't advertise support to enable VFs if not ready
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even if we have not enabled SR-IOV support using the platform
specific has_sriov flag, the hardware may still report SR-IOV
capability and the PCI layer may wrongly advertise driver support
to enable VFs.  Explicitly reset the number of supported VFs to
zero to avoid confusion.

Applications may read the /sys/bus/pci/devices/.../sriov_totalvfs
prior to enabling VFs using the sriov_numvfs to check if such an
operation is possible.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240507165757.2835-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_sriov.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 1c3fa84b6adb..a274a5fb1401 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -53,6 +53,7 @@ static bool test_is_vf(struct xe_device *xe)
  */
 void xe_sriov_probe_early(struct xe_device *xe)
 {
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE;
 	bool has_sriov = xe->info.has_sriov;
 
@@ -61,6 +62,16 @@ void xe_sriov_probe_early(struct xe_device *xe)
 			mode = XE_SRIOV_MODE_VF;
 		else if (xe_sriov_pf_readiness(xe))
 			mode = XE_SRIOV_MODE_PF;
+	} else if (pci_sriov_get_totalvfs(pdev)) {
+		/*
+		 * Even if we have not enabled SR-IOV support using the
+		 * platform specific has_sriov flag, the hardware may still
+		 * report SR-IOV capability and the PCI layer may wrongly
+		 * advertise driver support to enable VFs. Explicitly reset
+		 * the number of supported VFs to zero to avoid confusion.
+		 */
+		drm_info(&xe->drm, "Support for SR-IOV is not available\n");
+		pci_sriov_set_totalvfs(pdev, 0);
 	}
 
 	xe_assert(xe, !xe->sriov.__mode);
-- 
cgit 


From 9aa8586063a465da986a39ef55e3e5c12140cde5 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 6 May 2024 20:41:21 +0200
Subject: drm/xe/pf: Implement pci_driver.sriov_configure callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PCI subsystem already exposes the "sriov_numvfs" attribute
that users can use to enable or disable SR-IOV VFs. Add custom
implementation of the .sriov_configure callback defined by the
pci_driver to perform additional steps, including fair VFs
provisioning with the resources, as required by our platforms.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Piotr Piórkowski <piotr.piorkowski@intel.com>
Cc: Badal Nilawar <badal.nilawar@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com> #v2
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240506184121.2615-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/Makefile       |   1 +
 drivers/gpu/drm/xe/xe_pci.c       |   4 ++
 drivers/gpu/drm/xe/xe_pci_sriov.c | 143 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pci_sriov.h |  13 ++++
 4 files changed, 161 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_pci_sriov.c
 create mode 100644 drivers/gpu/drm/xe/xe_pci_sriov.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index a67977edff5b..6acde66f0827 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -169,6 +169,7 @@ xe-$(CONFIG_PCI_IOV) += \
 	xe_lmtt.o \
 	xe_lmtt_2l.o \
 	xe_lmtt_ml.o \
+	xe_pci_sriov.o \
 	xe_sriov_pf.o
 
 # include helpers for tests even when XE is built-in
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index fc29eb8e99c5..b1c8050b7bf5 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -23,6 +23,7 @@
 #include "xe_macros.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
+#include "xe_pci_sriov.h"
 #include "xe_pci_types.h"
 #include "xe_pm.h"
 #include "xe_sriov.h"
@@ -960,6 +961,9 @@ static struct pci_driver xe_pci_driver = {
 	.probe = xe_pci_probe,
 	.remove = xe_pci_remove,
 	.shutdown = xe_pci_shutdown,
+#ifdef CONFIG_PCI_IOV
+	.sriov_configure = xe_pci_sriov_configure,
+#endif
 #ifdef CONFIG_PM_SLEEP
 	.driver.pm = &xe_pm_ops,
 #endif
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
new file mode 100644
index 000000000000..06d0fceb5114
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_pci_sriov.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_printk.h"
+
+static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	int result = 0, err;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs);
+		result = result ?: err;
+	}
+
+	return result;
+}
+
+static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	unsigned int n;
+
+	for_each_gt(gt, xe, id)
+		for (n = 1; n <= num_vfs; n++)
+			xe_gt_sriov_pf_config_release(gt, n, true);
+}
+
+static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int total_vfs = xe_sriov_pf_get_totalvfs(xe);
+	int err;
+
+	xe_assert(xe, IS_SRIOV_PF(xe));
+	xe_assert(xe, num_vfs > 0);
+	xe_assert(xe, num_vfs <= total_vfs);
+	xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs));
+
+	/*
+	 * We must hold additional reference to the runtime PM to keep PF in D0
+	 * during VFs lifetime, as our VFs do not implement the PM capability.
+	 *
+	 * With PF being in D0 state, all VFs will also behave as in D0 state.
+	 * This will also keep GuC alive with all VFs' configurations.
+	 *
+	 * We will release this additional PM reference in pf_disable_vfs().
+	 */
+	xe_pm_runtime_get_noresume(xe);
+
+	err = pf_provision_vfs(xe, num_vfs);
+	if (err < 0)
+		goto failed;
+
+	err = pci_enable_sriov(pdev, num_vfs);
+	if (err < 0)
+		goto failed;
+
+	xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
+		      num_vfs, total_vfs, str_plural(total_vfs));
+	return num_vfs;
+
+failed:
+	pf_unprovision_vfs(xe, num_vfs);
+	xe_pm_runtime_put(xe);
+
+	xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n",
+			num_vfs, str_plural(num_vfs), ERR_PTR(err));
+	return err;
+}
+
+static int pf_disable_vfs(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u16 num_vfs = pci_num_vf(pdev);
+
+	xe_assert(xe, IS_SRIOV_PF(xe));
+	xe_sriov_dbg(xe, "disabling %u VF%s\n", num_vfs, str_plural(num_vfs));
+
+	if (!num_vfs)
+		return 0;
+
+	pci_disable_sriov(pdev);
+
+	pf_unprovision_vfs(xe, num_vfs);
+
+	/* not needed anymore - see pf_enable_vfs() */
+	xe_pm_runtime_put(xe);
+
+	xe_sriov_info(xe, "Disabled %u VF%s\n", num_vfs, str_plural(num_vfs));
+	return 0;
+}
+
+/**
+ * xe_pci_sriov_configure - Configure SR-IOV (enable/disable VFs).
+ * @pdev: the &pci_dev
+ * @num_vfs: number of VFs to enable or zero to disable all VFs
+ *
+ * This is the Xe implementation of struct pci_driver.sriov_configure callback.
+ *
+ * This callback will be called by the PCI subsystem to enable or disable SR-IOV
+ * Virtual Functions (VFs) as requested by the used via the PCI sysfs interface.
+ *
+ * Return: number of configured VFs or a negative error code on failure.
+ */
+int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct xe_device *xe = pdev_to_xe_device(pdev);
+	int ret;
+
+	if (!IS_SRIOV_PF(xe))
+		return -ENODEV;
+
+	if (num_vfs < 0)
+		return -EINVAL;
+
+	if (num_vfs > xe_sriov_pf_get_totalvfs(xe))
+		return -ERANGE;
+
+	if (num_vfs && pci_num_vf(pdev))
+		return -EBUSY;
+
+	xe_pm_runtime_get(xe);
+	if (num_vfs > 0)
+		ret = pf_enable_vfs(xe, num_vfs);
+	else
+		ret = pf_disable_vfs(xe);
+	xe_pm_runtime_put(xe);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.h b/drivers/gpu/drm/xe/xe_pci_sriov.h
new file mode 100644
index 000000000000..3b8bfbf7e1d9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_PCI_SRIOV_H_
+#define _XE_PCI_SRIOV_H_
+
+struct pci_dev;
+
+int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs);
+
+#endif
-- 
cgit 


From e6946ea8fcb5625c46754435fef5523f12659c11 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 14 May 2024 21:00:08 +0200
Subject: drm/xe/guc: Add more KLV helper macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In upcoming patches we will want to generate some of the KLV keys
from other macros. Add MAKE_GUC_KLV_{KEY|LEN} macros for that and
make sure they will correctly expand provided TAG parameter. Also
fix PREP_GUC_KLV_TAG to also work correctly within other macros.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc_klv_helpers.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
index b835e0ebe6db..c676d21c173b 100644
--- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
@@ -6,6 +6,7 @@
 #ifndef _XE_GUC_KLV_HELPERS_H_
 #define _XE_GUC_KLV_HELPERS_H_
 
+#include <linux/args.h>
 #include <linux/types.h>
 
 struct drm_printer;
@@ -37,6 +38,18 @@ int xe_guc_klv_count(const u32 *klvs, u32 num_dwords);
 	(FIELD_PREP_CONST(GUC_KLV_0_KEY, (key)) | \
 	 FIELD_PREP_CONST(GUC_KLV_0_LEN, (len)))
 
+/**
+ * MAKE_GUC_KLV_KEY - Prepare KLV KEY name based on unique KLV definition tag.
+ * @TAG: unique tag of the KLV definition
+ */
+#define MAKE_GUC_KLV_KEY(TAG) CONCATENATE(CONCATENATE(GUC_KLV_, TAG), _KEY)
+
+/**
+ * MAKE_GUC_KLV_LEN - Prepare KLV LEN name based on unique KLV definition tag.
+ * @TAG: unique tag of the KLV definition
+ */
+#define MAKE_GUC_KLV_LEN(TAG) CONCATENATE(CONCATENATE(GUC_KLV_, TAG), _LEN)
+
 /**
  * PREP_GUC_KLV_TAG - Prepare KLV header value based on unique KLV definition tag.
  * @TAG: unique tag of the KLV definition
@@ -46,6 +59,6 @@ int xe_guc_klv_count(const u32 *klvs, u32 num_dwords);
  * Return: value of the KLV header (u32).
  */
 #define PREP_GUC_KLV_TAG(TAG) \
-	PREP_GUC_KLV_CONST(GUC_KLV_##TAG##_KEY, GUC_KLV_##TAG##_LEN)
+	PREP_GUC_KLV_CONST(MAKE_GUC_KLV_KEY(TAG), MAKE_GUC_KLV_LEN(TAG))
 
 #endif
-- 
cgit 


From b1ce52fbf6ebfc3815773045856c695ce86ca679 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 14 May 2024 21:00:09 +0200
Subject: drm/xe/guc: Introduce GuC KLV thresholds set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GuC firmware monitors VF's activity and notifies the PF driver
once any configured threshold related to such activity is exceeded.

The available thresholds are defined in the GuC ABI as part of the
GuC VF Configuration KLVs.  Threshold configurations performed by
the PF driver and notifications sent by the GuC rely on the KLV keys,
which are not zero-based and might not guarantee continuity.

To simplify the driver code and eliminate the need to repeat very
similar code for each threshold, introduce the threshold set macro
that allows to generate required code based on unique threshold tag.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h     | 64 ++++++++++++++++++++
 .../gpu/drm/xe/xe_guc_klv_thresholds_set_types.h   | 68 ++++++++++++++++++++++
 2 files changed, 132 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
 create mode 100644 drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
new file mode 100644
index 000000000000..da0fedbbdbaf
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_KLV_THRESHOLDS_SET_H_
+#define _XE_GUC_KLV_THRESHOLDS_SET_H_
+
+#include "abi/guc_klvs_abi.h"
+#include "xe_guc_klv_helpers.h"
+#include "xe_guc_klv_thresholds_set_types.h"
+
+/**
+ * MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY - Prepare the name of the KLV key constant.
+ * @TAG: unique tag of the GuC threshold KLV key.
+ */
+#define MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG) \
+	MAKE_GUC_KLV_KEY(CONCATENATE(VF_CFG_THRESHOLD_, TAG))
+
+/**
+ * xe_guc_klv_threshold_key_to_index - Find index of the tracked GuC threshold.
+ * @key: GuC threshold KLV key.
+ *
+ * This translation is automatically generated using &MAKE_XE_GUC_KLV_THRESHOLDS_SET.
+ * Return: index of the GuC threshold KLV or -1 if not found.
+ */
+static inline int xe_guc_klv_threshold_key_to_index(u32 key)
+{
+	switch (key) {
+#define define_xe_guc_klv_threshold_key_to_index_case(TAG, ...)		\
+									\
+	case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG):			\
+		return MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG);
+
+	/* private: auto-generated case statements */
+	MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_xe_guc_klv_threshold_key_to_index_case)
+	}
+	return -1;
+#undef define_xe_guc_klv_threshold_key_to_index_case
+}
+
+/**
+ * xe_guc_klv_threshold_index_to_key - Get tracked GuC threshold KLV key.
+ * @index: GuC threshold KLV index.
+ *
+ * This translation is automatically generated using &MAKE_XE_GUC_KLV_THRESHOLDS_SET.
+ * Return: key of the GuC threshold KLV or 0 on malformed index.
+ */
+static inline u32 xe_guc_klv_threshold_index_to_key(enum xe_guc_klv_threshold_index index)
+{
+	switch (index) {
+#define define_xe_guc_klv_threshold_index_to_key_case(TAG, ...)		\
+									\
+	case MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG):			\
+		return MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG);
+
+	/* private: auto-generated case statements */
+	MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_xe_guc_klv_threshold_index_to_key_case)
+	}
+	return 0; /* unreachable */
+#undef define_xe_guc_klv_threshold_index_to_key_case
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
new file mode 100644
index 000000000000..0a028c94756d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_KLV_THRESHOLDS_SET_TYPES_H_
+#define _XE_GUC_KLV_THRESHOLDS_SET_TYPES_H_
+
+#include "xe_args.h"
+
+/**
+ * MAKE_XE_GUC_KLV_THRESHOLDS_SET - Generate various GuC thresholds definitions.
+ * @define: name of the inner macro to expand.
+ *
+ * The GuC firmware is able to monitor VF's adverse activity and will notify the
+ * PF driver once any threshold is exceeded.
+ *
+ * This super macro allows various conversions between the GuC adverse event
+ * threshold KLV definitions and the driver code without repeating similar code
+ * or risking missing some cases.
+ *
+ * For each GuC threshold definition, the inner macro &define will be provided
+ * with the &TAG, that corresponds to the GuC threshold KLV key name defined by
+ * ABI and the associated &NAME, that may be used in code or debugfs/sysfs::
+ *
+ *	define(TAG, NAME)
+ */
+#define MAKE_XE_GUC_KLV_THRESHOLDS_SET(define)		\
+	define(CAT_ERR, cat_error_count)		\
+	define(ENGINE_RESET, engine_reset_count)	\
+	define(PAGE_FAULT, page_fault_count)		\
+	define(H2G_STORM, guc_time_us)			\
+	define(IRQ_STORM, irq_time_us)			\
+	define(DOORBELL_STORM, doorbell_time_us)	\
+	/* end */
+
+/**
+ * XE_GUC_KLV_NUM_THRESHOLDS - Number of GuC thresholds KLVs.
+ *
+ * Calculated automatically using &MAKE_XE_GUC_KLV_THRESHOLDS_SET.
+ */
+#define XE_GUC_KLV_NUM_THRESHOLDS \
+	(CALL_ARGS(COUNT_ARGS, MAKE_XE_GUC_KLV_THRESHOLDS_SET(ARGS_SEP_COMMA)) - 1)
+
+/**
+ * MAKE_XE_GUC_KLV_THRESHOLD_INDEX - Create enumerator name.
+ * @TAG: unique TAG of the enum xe_guc_klv_threshold_index.
+ */
+#define MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG) \
+	CONCATENATE(XE_GUC_KLV_THRESHOLD_INDEX_, TAG)
+
+/**
+ * enum xe_guc_klv_threshold_index - Index of the tracked GuC threshold.
+ *
+ * This enum is automatically generated using &MAKE_XE_GUC_KLV_THRESHOLDS_SET.
+ * All these generated enumerators will only be used by the also generated code.
+ */
+enum xe_guc_klv_threshold_index {
+#define define_xe_guc_klv_threshold_index_enum(TAG, ...)	\
+								\
+	MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG),
+
+	/* private: auto-generated enum definitions */
+	MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_xe_guc_klv_threshold_index_enum)
+#undef define_xe_guc_klv_threshold_index_enum
+};
+
+#endif
-- 
cgit 


From 7aefee83fcdfe5a6a443b87650f3b6cb5721d3ad Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 14 May 2024 21:00:10 +0200
Subject: drm/xe/guc: Add support for threshold KLVs in to_string() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use MAKE_XE_GUC_KLV_THRESHOLDS_SET to generate missing conversion
of threshold KLV keys to string.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc_klv_helpers.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
index ceca949932a0..9d99fe266d97 100644
--- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
@@ -8,6 +8,7 @@
 
 #include "abi/guc_klvs_abi.h"
 #include "xe_guc_klv_helpers.h"
+#include "xe_guc_klv_thresholds_set.h"
 
 #define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
 
@@ -48,6 +49,17 @@ const char *xe_guc_klv_key_to_string(u16 key)
 		return "begin_db_id";
 	case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY:
 		return "begin_ctx_id";
+
+	/* VF CFG threshold keys */
+#define define_threshold_key_to_string_case(TAG, NAME, ...)	\
+								\
+	case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG):		\
+		return #NAME;
+
+	/* private: auto-generated case statements */
+	MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_threshold_key_to_string_case)
+#undef define_threshold_key_to_string_case
+
 	default:
 		return "(unknown)";
 	}
-- 
cgit 


From 629df234bfe73dacb4bb0daa4bc2c14824dba159 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 14 May 2024 21:00:11 +0200
Subject: drm/xe/pf: Introduce functions to configure VF thresholds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GuC firmware monitors VF's activity and notifies the PF driver
once any configured threshold related to such activity is exceeded.
Add functions to allow configuration of these thresholds per VF.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-5-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c       | 87 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h       |  6 ++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h |  4 ++
 3 files changed, 97 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 7eac01e04cc5..f678cd1ad9c5 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -25,6 +25,7 @@
 #include "xe_guc_fwif.h"
 #include "xe_guc_id_mgr.h"
 #include "xe_guc_klv_helpers.h"
+#include "xe_guc_klv_thresholds_set.h"
 #include "xe_guc_submit.h"
 #include "xe_lmtt.h"
 #include "xe_map.h"
@@ -208,6 +209,15 @@ static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
 	return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size);
 }
 
+static int pf_push_vf_cfg_threshold(struct xe_gt *gt, unsigned int vfid,
+				    enum xe_guc_klv_threshold_index index, u32 value)
+{
+	u32 key = xe_guc_klv_threshold_index_to_key(index);
+
+	xe_gt_assert(gt, key);
+	return pf_push_vf_cfg_u32(gt, vfid, key, value);
+}
+
 static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned int vfid)
 {
 	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
@@ -1748,6 +1758,83 @@ static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *c
 	config->preempt_timeout = 0;
 }
 
+static int pf_provision_threshold(struct xe_gt *gt, unsigned int vfid,
+				  enum xe_guc_klv_threshold_index index, u32 value)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	int err;
+
+	err = pf_push_vf_cfg_threshold(gt, vfid, index, value);
+	if (unlikely(err))
+		return err;
+
+	config->thresholds[index] = value;
+
+	return 0;
+}
+
+static int pf_get_threshold(struct xe_gt *gt, unsigned int vfid,
+			    enum xe_guc_klv_threshold_index index)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+	return config->thresholds[index];
+}
+
+static const char *threshold_unit(u32 threshold)
+{
+	return threshold ? "" : "(disabled)";
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_threshold - Configure threshold for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @index: the threshold index
+ * @value: requested value (0 means disabled)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid,
+					enum xe_guc_klv_threshold_index index, u32 value)
+{
+	u32 key = xe_guc_klv_threshold_index_to_key(index);
+	const char *name = xe_guc_klv_key_to_string(key);
+	int err;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	err = pf_provision_threshold(gt, vfid, index, value);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_set_u32_done(gt, vfid, value,
+				      xe_gt_sriov_pf_config_get_threshold(gt, vfid, index),
+				      name, threshold_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_threshold - Get VF's threshold.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @index: the threshold index
+ *
+ * This function can only be called on PF.
+ *
+ * Return: value of VF's (or PF's) threshold.
+ */
+u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid,
+					enum xe_guc_klv_threshold_index index)
+{
+	u32 value;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	value = pf_get_threshold(gt, vfid, index);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return value;
+}
+
 static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
 {
 	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 5e6b36f00b5b..e8238c1ad06a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 
+enum xe_guc_klv_threshold_index;
 struct drm_printer;
 struct xe_gt;
 
@@ -43,6 +44,11 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi
 int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
 					      u32 preempt_timeout);
 
+u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid,
+					enum xe_guc_klv_threshold_index index);
+int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid,
+					enum xe_guc_klv_threshold_index index, u32 value);
+
 int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
 int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force);
 int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
index d3745c355957..7bc66656fcc7 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
@@ -8,6 +8,8 @@
 
 #include <drm/drm_mm.h>
 
+#include "xe_guc_klv_thresholds_set_types.h"
+
 struct xe_bo;
 
 /**
@@ -32,6 +34,8 @@ struct xe_gt_sriov_config {
 	u32 exec_quantum;
 	/** @preempt_timeout: preemption timeout in microseconds. */
 	u32 preempt_timeout;
+	/** @thresholds: GuC thresholds for adverse events notifications. */
+	u32 thresholds[XE_GUC_KLV_NUM_THRESHOLDS];
 };
 
 /**
-- 
cgit 


From c4f5ded082bb9433b180dbfbb8352f92e319149b Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 14 May 2024 21:00:12 +0200
Subject: drm/xe/pf: Allow configuration of VF thresholds over debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Initial values of all thresholds used by the GuC to monitor VF's
activity is zero (disabled) and we need to explicitly configure
them per each VF. Expose additional attributes over debugfs.

Definitions of all attributes are generated so we will not need
to make any changes if new thresholds would be added to the set.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-6-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 72 +++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 5102035faa7e..eb71c2009c34 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -197,6 +197,71 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n");
 DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n");
 DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n");
 
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0
+ *      │   ├── pf
+ *      │   │   ├── threshold_cat_error_count
+ *      │   │   ├── threshold_doorbell_time_us
+ *      │   │   ├── threshold_engine_reset_count
+ *      │   │   ├── threshold_guc_time_us
+ *      │   │   ├── threshold_irq_time_us
+ *      │   │   ├── threshold_page_fault_count
+ *      │   ├── vf1
+ *      │   │   ├── threshold_cat_error_count
+ *      │   │   ├── threshold_doorbell_time_us
+ *      │   │   ├── threshold_engine_reset_count
+ *      │   │   ├── threshold_guc_time_us
+ *      │   │   ├── threshold_irq_time_us
+ *      │   │   ├── threshold_page_fault_count
+ */
+
+static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index index)
+{
+	struct xe_gt *gt = extract_gt(data);
+	unsigned int vfid = extract_vfid(data);
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	if (val > (u32)~0ull)
+		return -EOVERFLOW;
+
+	xe_pm_runtime_get(xe);
+	err = xe_gt_sriov_pf_config_set_threshold(gt, vfid, index, val);
+	xe_pm_runtime_put(xe);
+
+	return err;
+}
+
+static int get_threshold(void *data, u64 *val, enum xe_guc_klv_threshold_index index)
+{
+	struct xe_gt *gt = extract_gt(data);
+	unsigned int vfid = extract_vfid(data);
+
+	*val = xe_gt_sriov_pf_config_get_threshold(gt, vfid, index);
+	return 0;
+}
+
+#define DEFINE_SRIOV_GT_THRESHOLD_DEBUGFS_ATTRIBUTE(THRESHOLD, INDEX)		\
+										\
+static int THRESHOLD##_set(void *data, u64 val)					\
+{										\
+	return set_threshold(data, val, INDEX);					\
+}										\
+										\
+static int THRESHOLD##_get(void *data, u64 *val)				\
+{										\
+	return get_threshold(data, val, INDEX);					\
+}										\
+										\
+DEFINE_DEBUGFS_ATTRIBUTE(THRESHOLD##_fops, THRESHOLD##_get, THRESHOLD##_set, "%llu\n")
+
+/* generate all threshold attributes */
+#define define_threshold_attribute(TAG, NAME, ...) \
+	DEFINE_SRIOV_GT_THRESHOLD_DEBUGFS_ATTRIBUTE(NAME, MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG));
+MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_threshold_attribute)
+#undef define_threshold_attribute
+
 static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigned int vfid)
 {
 	xe_gt_assert(gt, gt == extract_gt(parent));
@@ -217,6 +282,13 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
 				   &exec_quantum_fops);
 	debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent,
 				   &preempt_timeout_fops);
+
+	/* register all threshold attributes */
+#define register_threshold_attribute(TAG, NAME, ...) \
+	debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \
+				   &NAME##_fops);
+	MAKE_XE_GUC_KLV_THRESHOLDS_SET(register_threshold_attribute)
+#undef register_threshold_attribute
 }
 
 /*
-- 
cgit 


From d5e12fffcc01b3a22157a9cd4a7474ee6355182e Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 14 May 2024 21:00:13 +0200
Subject: drm/xe/guc: Add GUC2PF_ADVERSE_EVENT to ABI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When thresholds used to monitor VFs activities are configured,
then GuC may send GUC2PF_ADVERSE_EVENT messages informing the
PF driver about exceeded thresholds. Add necessary definitions
to our GuC firmware ABI header.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-7-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 30 ++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index c1ad09b36453..f1aa7f88e217 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -171,6 +171,36 @@
 #define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx	GUC_HXG_REQUEST_MSG_n_DATAn
 #define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
 
+/**
+ * DOC: GUC2PF_ADVERSE_EVENT
+ *
+ * This message is used by the GuC to notify PF about adverse events.
+ *
+ * This G2H message must be sent as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_GUC2PF_ADVERSE_EVENT` = 0x5104         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | DATA1 = **VFID** - VF identifier                             |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | DATA2 = **THRESHOLD** - key of the exceeded threshold        |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_GUC2PF_ADVERSE_EVENT			0x5104
+
+#define GUC2PF_ADVERSE_EVENT_EVENT_MSG_LEN		(GUC_HXG_EVENT_MSG_MIN_LEN + 2u)
+#define GUC2PF_ADVERSE_EVENT_EVENT_MSG_0_MBZ		GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2PF_ADVERSE_EVENT_EVENT_MSG_1_VFID		GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_ADVERSE_EVENT_EVENT_MSG_2_THRESHOLD	GUC_HXG_EVENT_MSG_n_DATAn
+
 /**
  * DOC: GUC2PF_VF_STATE_NOTIFY
  *
-- 
cgit 


From 335d62ade5feaa46082f8da755ffdc569ae51768 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 14 May 2024 21:00:14 +0200
Subject: drm/xe/pf: Track adverse events notifications from GuC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When thresholds used to monitor VFs activities are configured,
then GuC may send GUC2PF_ADVERSE_EVENT messages informing the
PF driver about exceeded thresholds. Start handling such messages.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-8-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/Makefile                       |   1 +
 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c       | 147 ++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h       |  27 ++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h |  22 ++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h         |   5 +
 drivers/gpu/drm/xe/xe_guc_ct.c                    |   4 +
 6 files changed, 206 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 6acde66f0827..8fe7bb80501f 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -164,6 +164,7 @@ xe-$(CONFIG_PCI_IOV) += \
 	xe_gt_sriov_pf_config.o \
 	xe_gt_sriov_pf_control.o \
 	xe_gt_sriov_pf_debugfs.o \
+	xe_gt_sriov_pf_monitor.o \
 	xe_gt_sriov_pf_policy.o \
 	xe_gt_sriov_pf_service.o \
 	xe_lmtt.o \
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c
new file mode 100644
index 000000000000..7d532bded02a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_messages_abi.h"
+
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_monitor.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc_klv_helpers.h"
+#include "xe_guc_klv_thresholds_set.h"
+
+/**
+ * xe_gt_sriov_pf_monitor_flr - Cleanup VF data after VF FLR.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * On FLR this function will reset all event data related to the VF.
+ * This function is for PF only.
+ */
+void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid)
+{
+	int e;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	xe_gt_sriov_pf_assert_vfid(gt, vfid);
+
+	for (e = 0; e < XE_GUC_KLV_NUM_THRESHOLDS; e++)
+		gt->sriov.pf.vfs[vfid].monitor.guc.events[e] = 0;
+}
+
+static void pf_update_event_counter(struct xe_gt *gt, u32 vfid,
+				    enum xe_guc_klv_threshold_index e)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	xe_gt_assert(gt, e < XE_GUC_KLV_NUM_THRESHOLDS);
+
+	gt->sriov.pf.vfs[vfid].monitor.guc.events[e]++;
+}
+
+static int pf_handle_vf_threshold_event(struct xe_gt *gt, u32 vfid, u32 threshold)
+{
+	char origin[8];
+	int e;
+
+	e = xe_guc_klv_threshold_key_to_index(threshold);
+	xe_sriov_function_name(vfid, origin, sizeof(origin));
+
+	/* was there a new KEY added that we missed? */
+	if (unlikely(e < 0)) {
+		xe_gt_sriov_notice(gt, "unknown threshold key %#x reported for %s\n",
+				   threshold, origin);
+		return -ENOTCONN;
+	}
+
+	xe_gt_sriov_dbg(gt, "%s exceeded threshold %u %s\n",
+			origin, xe_gt_sriov_pf_config_get_threshold(gt, vfid, e),
+			xe_guc_klv_key_to_string(threshold));
+
+	pf_update_event_counter(gt, vfid, e);
+
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_monitor_process_guc2pf - Handle adverse event notification from the GuC.
+ * @gt: the &xe_gt
+ * @msg: G2H event message
+ * @len: length of the message
+ *
+ * This function is intended for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 vfid;
+	u32 threshold;
+
+	xe_gt_assert(gt, len >= GUC_HXG_MSG_MIN_LEN);
+	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+	xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+		     GUC_ACTION_GUC2PF_ADVERSE_EVENT);
+
+	if (unlikely(!IS_SRIOV_PF(xe)))
+		return -EPROTO;
+
+	if (unlikely(FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_0_MBZ, msg[0])))
+		return -EPFNOSUPPORT;
+
+	if (unlikely(len < GUC2PF_ADVERSE_EVENT_EVENT_MSG_LEN))
+		return -EPROTO;
+
+	vfid = FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_1_VFID, msg[1]);
+	threshold = FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_2_THRESHOLD, msg[2]);
+
+	if (unlikely(vfid > xe_gt_sriov_pf_get_totalvfs(gt)))
+		return -EINVAL;
+
+	return pf_handle_vf_threshold_event(gt, vfid, threshold);
+}
+
+/**
+ * xe_gt_sriov_pf_monitor_print_events - Print adverse events counters.
+ * @gt: the &xe_gt to print events from
+ * @p: the &drm_printer
+ *
+ * Print adverse events counters for all VFs.
+ * VFs with no events are not printed.
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int n, total_vfs = xe_gt_sriov_pf_get_totalvfs(gt);
+	const struct xe_gt_sriov_monitor *data;
+	int e;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	for (n = 1; n <= total_vfs; n++) {
+		data = &gt->sriov.pf.vfs[n].monitor;
+
+		for (e = 0; e < XE_GUC_KLV_NUM_THRESHOLDS; e++)
+			if (data->guc.events[e])
+				break;
+
+		/* skip empty unless in debug mode */
+		if (e >= XE_GUC_KLV_NUM_THRESHOLDS &&
+		    !IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
+			continue;
+
+#define __format(...) "%s:%u "
+#define __value(TAG, NAME, ...) , #NAME, data->guc.events[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)]
+
+		drm_printf(p, "VF%u:\t" MAKE_XE_GUC_KLV_THRESHOLDS_SET(__format) "\n",
+			   n MAKE_XE_GUC_KLV_THRESHOLDS_SET(__value));
+
+#undef __format
+#undef __value
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h
new file mode 100644
index 000000000000..7ca9351a271b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MONITOR_H_
+#define _XE_GT_SRIOV_PF_MONITOR_H_
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid);
+void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p);
+
+#ifdef CONFIG_PCI_IOV
+int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len);
+#else
+static inline int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
+{
+	return -EPROTO;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h
new file mode 100644
index 000000000000..e27c0308c5db
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_MONITOR_TYPES_H_
+#define _XE_GT_SRIOV_PF_MONITOR_TYPES_H_
+
+#include "xe_guc_klv_thresholds_set_types.h"
+
+/**
+ * struct xe_gt_sriov_monitor - GT level per-VF monitoring data.
+ */
+struct xe_gt_sriov_monitor {
+	/** @guc: monitoring data related to the GuC. */
+	struct {
+		/** @guc.events: number of adverse events reported by the GuC. */
+		unsigned int events[XE_GUC_KLV_NUM_THRESHOLDS];
+	} guc;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
index 880754f3e215..40cbaea3ef44 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 
 #include "xe_gt_sriov_pf_config_types.h"
+#include "xe_gt_sriov_pf_monitor_types.h"
 #include "xe_gt_sriov_pf_policy_types.h"
 #include "xe_gt_sriov_pf_service_types.h"
 
@@ -18,6 +19,10 @@
 struct xe_gt_sriov_metadata {
 	/** @config: per-VF provisioning data. */
 	struct xe_gt_sriov_config config;
+
+	/** @monitor: per-VF monitoring data. */
+	struct xe_gt_sriov_monitor monitor;
+
 	/** @version: negotiated VF/PF ABI version */
 	struct xe_gt_sriov_pf_service_version version;
 };
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 0151d29b3c58..c1f258348f5c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -22,6 +22,7 @@
 #include "xe_gt_pagefault.h"
 #include "xe_gt_printk.h"
 #include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_monitor.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_guc.h"
 #include "xe_guc_relay.h"
@@ -1071,6 +1072,9 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 	case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
 		ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
 		break;
+	case GUC_ACTION_GUC2PF_ADVERSE_EVENT:
+		ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len);
+		break;
 	default:
 		xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
 	}
-- 
cgit 


From 1c99d3d3edab25617afbb1592564d3ecc233bc5d Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 14 May 2024 21:00:15 +0200
Subject: drm/xe/pf: Expose PF monitor details via debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For debug purposes we might want to view statistics maintained by
the PF driver about VFs activity.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-9-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index eb71c2009c34..2290ddaf9594 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -17,6 +17,7 @@
 #include "xe_gt_sriov_pf_control.h"
 #include "xe_gt_sriov_pf_debugfs.h"
 #include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_monitor.h"
 #include "xe_gt_sriov_pf_policy.h"
 #include "xe_gt_sriov_pf_service.h"
 #include "xe_pm.h"
@@ -55,6 +56,7 @@ static unsigned int extract_vfid(struct dentry *d)
  *      │   │   ├── doorbells_provisioned
  *      │   │   ├── runtime_registers
  *      │   │   ├── negotiated_versions
+ *      │   │   ├── adverse_events
  */
 
 static const struct drm_info_list pf_info[] = {
@@ -88,6 +90,11 @@ static const struct drm_info_list pf_info[] = {
 		.show = xe_gt_debugfs_simple_show,
 		.data = xe_gt_sriov_pf_service_print_version,
 	},
+	{
+		"adverse_events",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_pf_monitor_print_events,
+	},
 };
 
 /*
-- 
cgit 


From e158cf936114661044dface6da794437a91b53c4 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 May 2024 13:05:41 +0200
Subject: drm/xe/guc: Add VF2GUC_MATCH_VERSION to ABI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In upcoming patches we will add a version negotiation between
the VF driver and the GuC firmware. Add necessary definitions
to our GuC firmware ABI header.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 67 ++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index f1aa7f88e217..2642cd337a1f 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -243,6 +243,73 @@
 #define   GUC_PF_NOTIFY_VF_PAUSE_DONE			3u
 #define   GUC_PF_NOTIFY_VF_FIXUP_DONE			4u
 
+/**
+ * DOC: VF2GUC_MATCH_VERSION
+ *
+ * This action is used to match VF interface version used by VF and GuC.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_VF2GUC_MATCH_VERSION` = 0x5500         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:24 | **BRANCH** - branch ID of the VF interface                   |
+ *  |   |       | (use BRANCH_ANY to request latest version supported by GuC)  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 23:16 | **MAJOR** - major version of the VF interface                |
+ *  |   |       | (use MAJOR_ANY to request latest version supported by GuC)   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:8 | **MINOR** - minor version of the VF interface                |
+ *  |   |       | (use MINOR_ANY to request latest version supported by GuC)   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **MBZ**                                                      |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:24 | **BRANCH** - branch ID of the VF interface                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 23:16 | **MAJOR** - major version of the VF interface                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:8 | **MINOR** - minor version of the VF interface                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |   7:0 | **PATCH** - patch version of the VF interface                |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_VF2GUC_MATCH_VERSION			0x5500u
+
+#define VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define VF2GUC_MATCH_VERSION_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH	(0xffu << 24)
+#define   GUC_VERSION_BRANCH_ANY			0
+#define VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR	(0xffu << 16)
+#define   GUC_VERSION_MAJOR_ANY				0
+#define VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR	(0xffu << 8)
+#define   GUC_VERSION_MINOR_ANY				0
+#define VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MBZ		(0xffu << 0)
+
+#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_LEN		(GUC_HXG_RESPONSE_MSG_MIN_LEN + 1u)
+#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
+#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH	(0xffu << 24)
+#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR	(0xffu << 16)
+#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR	(0xffu << 8)
+#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH	(0xffu << 0)
+
 /**
  * DOC: PF2GUC_UPDATE_VGT_POLICY
  *
-- 
cgit 


From 769551c45c2b66b4d0bbe5a78aab4156f85c6331 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 May 2024 13:05:42 +0200
Subject: drm/xe/guc: Add VF2GUC_VF_RESET to ABI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The version negotiation between the VF driver and the GuC firmware
must start with explicit soft reset of the GuC state initiated by
the VF driver. Add VF2GUC action definitions to the ABI header.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 37 ++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index 2642cd337a1f..23e21c3f033f 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -464,4 +464,41 @@
 #define   GUC_PF_TRIGGER_VF_FLR_START			4u
 #define   GUC_PF_TRIGGER_VF_FLR_FINISH			5u
 
+/**
+ * DOC: VF2GUC_VF_RESET
+ *
+ * This action is used by VF to reset GuC's VF state.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_VF2GUC_VF_RESET` = 0x5507              |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_VF2GUC_VF_RESET			0x5507u
+
+#define VF2GUC_VF_RESET_REQUEST_MSG_LEN			GUC_HXG_REQUEST_MSG_MIN_LEN
+#define VF2GUC_VF_RESET_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+
+#define VF2GUC_VF_RESET_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
+
 #endif
-- 
cgit 


From c454f1a6b994e44e338ac837981441a298c941b8 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 May 2024 13:05:43 +0200
Subject: drm/xe/guc: Add VF2GUC_QUERY_SINGLE_KLV to ABI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In upcoming patches we will add support to the VF driver to
read its configuration from the GuC using special H2G actions.
Add necessary definitions to our GuC firmware ABI header.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 56 ++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
index 23e21c3f033f..181180f5945c 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -501,4 +501,60 @@
 #define VF2GUC_VF_RESET_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
 #define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
 
+/**
+ * DOC: VF2GUC_QUERY_SINGLE_KLV
+ *
+ * This action is used by VF to query value of the single KLV data.
+ *
+ * This message must be sent as `MMIO HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_VF2GUC_QUERY_SINGLE_KLV` = 0x5509      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 | 31:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **KEY** - key for which value is requested                   |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **LENGTH** - length of data in dwords                        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VALUE32** - bits 31:0 of value if **LENGTH** >= 1          |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **VALUE64** - bits 63:32 of value if **LENGTH** >= 2         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 3 |  31:0 | **VALUE96** - bits 95:64 of value if **LENGTH** >= 3         |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_VF2GUC_QUERY_SINGLE_KLV		0x5509u
+
+#define VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_0_MBZ	GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_1_MBZ	(0xffffu << 16)
+#define VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_1_KEY	(0xffffu << 0)
+
+#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_MIN_LEN	GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_MAX_LEN	(GUC_HXG_RESPONSE_MSG_MIN_LEN + 3u)
+#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_MBZ	(0xfffu << 16)
+#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_LENGTH	(0xffffu << 0)
+#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_1_VALUE32	GUC_HXG_REQUEST_MSG_n_DATAn
+#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64	GUC_HXG_REQUEST_MSG_n_DATAn
+#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96	GUC_HXG_REQUEST_MSG_n_DATAn
+
 #endif
-- 
cgit 


From f2345ed5374ef964ff97e13e82f53b07c827b373 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 May 2024 13:05:44 +0200
Subject: drm/xe/vf: Add support for VF to query its configuration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The VF driver doesn't know which GuC firmware was loaded by the PF
driver and must perform GuC ABI version handshake prior to sending
any other H2G actions to the GuC to submit workloads.

The VF driver also doesn't have access to the fuse registers and
must rely on the runtime info, which includes values of the fuse
registers, that the PF driver is exposing to the VFs.

Add functions to cover that functionality. We will use these
functions in upcoming patches.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Piotr Piórkowski <piotr.piorkowski@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-5-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/Makefile               |   1 +
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c       | 747 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_vf.h       |  23 +
 drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h |  82 ++++
 drivers/gpu/drm/xe/xe_gt_types.h          |   3 +
 5 files changed, 856 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf.h
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 8fe7bb80501f..e9ba6c753779 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -155,6 +155,7 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o
 
 # graphics virtualization (SR-IOV) support
 xe-y += \
+	xe_gt_sriov_vf.o \
 	xe_guc_relay.o \
 	xe_memirq.o \
 	xe_sriov.o
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
new file mode 100644
index 000000000000..378dde5ad4f9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -0,0 +1,747 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_communication_mmio_abi.h"
+#include "abi/guc_klvs_abi.h"
+#include "abi/guc_relay_actions_abi.h"
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_gt_sriov_vf_types.h"
+#include "xe_guc.h"
+#include "xe_guc_hxg_helpers.h"
+#include "xe_guc_relay.h"
+#include "xe_sriov.h"
+
+#define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
+
+static int guc_action_vf_reset(struct xe_guc *guc)
+{
+	u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_VF_RESET),
+	};
+	int ret;
+
+	ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request));
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+static int vf_reset_guc_state(struct xe_gt *gt)
+{
+	struct xe_guc *guc = &gt->uc.guc;
+	int err;
+
+	err = guc_action_vf_reset(guc);
+	if (unlikely(err))
+		xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err));
+	return err;
+}
+
+static int guc_action_match_version(struct xe_guc *guc,
+				    u32 wanted_branch, u32 wanted_major, u32 wanted_minor,
+				    u32 *branch, u32 *major, u32 *minor, u32 *patch)
+{
+	u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_VF2GUC_MATCH_VERSION),
+		FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted_branch) |
+		FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted_major) |
+		FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted_minor),
+	};
+	u32 response[GUC_MAX_MMIO_MSG_LEN];
+	int ret;
+
+	BUILD_BUG_ON(VF2GUC_MATCH_VERSION_RESPONSE_MSG_LEN > GUC_MAX_MMIO_MSG_LEN);
+
+	ret = xe_guc_mmio_send_recv(guc, request, ARRAY_SIZE(request), response);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0])))
+		return -EPROTO;
+
+	*branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]);
+	*major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]);
+	*minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]);
+	*patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]);
+
+	return 0;
+}
+
+static void vf_minimum_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	switch (xe->info.platform) {
+	case XE_TIGERLAKE ... XE_PVC:
+		/* 1.1 this is current baseline for Xe driver */
+		*branch = 0;
+		*major = 1;
+		*minor = 1;
+		break;
+	default:
+		/* 1.2 has support for the GMD_ID KLV */
+		*branch = 0;
+		*major = 1;
+		*minor = 2;
+		break;
+	}
+}
+
+static void vf_wanted_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor)
+{
+	/* for now it's the same as minimum */
+	return vf_minimum_guc_version(gt, branch, major, minor);
+}
+
+static int vf_handshake_with_guc(struct xe_gt *gt)
+{
+	struct xe_gt_sriov_vf_guc_version *guc_version = &gt->sriov.vf.guc_version;
+	struct xe_guc *guc = &gt->uc.guc;
+	u32 wanted_branch, wanted_major, wanted_minor;
+	u32 branch, major, minor, patch;
+	int err;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	/* select wanted version - prefer previous (if any) */
+	if (guc_version->major || guc_version->minor) {
+		wanted_branch = guc_version->branch;
+		wanted_major = guc_version->major;
+		wanted_minor = guc_version->minor;
+	} else {
+		vf_wanted_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor);
+		xe_gt_assert(gt, wanted_major != GUC_VERSION_MAJOR_ANY);
+	}
+
+	err = guc_action_match_version(guc, wanted_branch, wanted_major, wanted_minor,
+				       &branch, &major, &minor, &patch);
+	if (unlikely(err))
+		goto fail;
+
+	/* we don't support interface version change */
+	if ((guc_version->major || guc_version->minor) &&
+	    (guc_version->branch != branch || guc_version->major != major ||
+	     guc_version->minor != minor)) {
+		xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n",
+				branch, major, minor, patch);
+		xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n",
+				 guc_version->branch, guc_version->major,
+				 guc_version->minor, guc_version->patch);
+		err = -EREMCHG;
+		goto fail;
+	}
+
+	/* illegal */
+	if (major > wanted_major) {
+		err = -EPROTO;
+		goto unsupported;
+	}
+
+	/* there's no fallback on major version. */
+	if (major != wanted_major) {
+		err = -ENOPKG;
+		goto unsupported;
+	}
+
+	/* check against minimum version supported by us */
+	vf_minimum_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor);
+	xe_gt_assert(gt, major != GUC_VERSION_MAJOR_ANY);
+	if (major < wanted_major || (major == wanted_major && minor < wanted_minor)) {
+		err = -ENOKEY;
+		goto unsupported;
+	}
+
+	xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n",
+			branch, major, minor, patch);
+
+	guc_version->branch = branch;
+	guc_version->major = major;
+	guc_version->minor = minor;
+	guc_version->patch = patch;
+	return 0;
+
+unsupported:
+	xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n",
+			branch, major, minor, patch, ERR_PTR(err));
+fail:
+	xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n",
+			wanted_major, wanted_minor, ERR_PTR(err));
+
+	/* try again with *any* just to query which version is supported */
+	if (!guc_action_match_version(guc, GUC_VERSION_BRANCH_ANY,
+				      GUC_VERSION_MAJOR_ANY, GUC_VERSION_MINOR_ANY,
+				      &branch, &major, &minor, &patch))
+		xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n",
+				   branch, major, minor, patch);
+	return err;
+}
+
+/**
+ * xe_gt_sriov_vf_bootstrap - Query and setup GuC ABI interface version.
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ * It requires functional `GuC MMIO based communication`_.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt)
+{
+	int err;
+
+	err = vf_reset_guc_state(gt);
+	if (unlikely(err))
+		return err;
+
+	err = vf_handshake_with_guc(gt);
+	if (unlikely(err))
+		return err;
+
+	return 0;
+}
+
+static int guc_action_query_single_klv(struct xe_guc *guc, u32 key,
+				       u32 *value, u32 value_len)
+{
+	u32 request[VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+			   GUC_ACTION_VF2GUC_QUERY_SINGLE_KLV),
+		FIELD_PREP(VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_1_KEY, key),
+	};
+	u32 response[GUC_MAX_MMIO_MSG_LEN];
+	u32 length;
+	int ret;
+
+	BUILD_BUG_ON(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_MAX_LEN > GUC_MAX_MMIO_MSG_LEN);
+	ret = xe_guc_mmio_send_recv(guc, request, ARRAY_SIZE(request), response);
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (unlikely(FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_MBZ, response[0])))
+		return -EPROTO;
+
+	length = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_LENGTH, response[0]);
+	if (unlikely(length > value_len))
+		return -EOVERFLOW;
+	if (unlikely(length < value_len))
+		return -ENODATA;
+
+	switch (value_len) {
+	default:
+		xe_gt_WARN_ON(guc_to_gt(guc), value_len > 3);
+		fallthrough;
+	case 3:
+		value[2] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96, response[3]);
+		fallthrough;
+	case 2:
+		value[1] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64, response[2]);
+		fallthrough;
+	case 1:
+		value[0] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_1_VALUE32, response[1]);
+		fallthrough;
+	case 0:
+		break;
+	}
+
+	return 0;
+}
+
+static int guc_action_query_single_klv32(struct xe_guc *guc, u32 key, u32 *value32)
+{
+	return guc_action_query_single_klv(guc, key, value32, hxg_sizeof(u32));
+}
+
+static int guc_action_query_single_klv64(struct xe_guc *guc, u32 key, u64 *value64)
+{
+	u32 value[2];
+	int err;
+
+	err = guc_action_query_single_klv(guc, key, value, hxg_sizeof(value));
+	if (unlikely(err))
+		return err;
+
+	*value64 = make_u64_from_u32(value[1], value[0]);
+	return 0;
+}
+
+static int vf_get_ggtt_info(struct xe_gt *gt)
+{
+	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+	struct xe_guc *guc = &gt->uc.guc;
+	u64 start, size;
+	int err;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start);
+	if (unlikely(err))
+		return err;
+
+	err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_SIZE_KEY, &size);
+	if (unlikely(err))
+		return err;
+
+	if (config->ggtt_size && config->ggtt_size != size) {
+		xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n",
+				size / SZ_1K, config->ggtt_size / SZ_1K);
+		return -EREMCHG;
+	}
+
+	xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n",
+				start, start + size - 1, size / SZ_1K);
+
+	config->ggtt_base = start;
+	config->ggtt_size = size;
+
+	return config->ggtt_size ? 0 : -ENODATA;
+}
+
+static int vf_get_lmem_info(struct xe_gt *gt)
+{
+	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+	struct xe_guc *guc = &gt->uc.guc;
+	char size_str[10];
+	u64 size;
+	int err;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, &size);
+	if (unlikely(err))
+		return err;
+
+	if (config->lmem_size && config->lmem_size != size) {
+		xe_gt_sriov_err(gt, "Unexpected LMEM reassignment: %lluM != %lluM\n",
+				size / SZ_1M, config->lmem_size / SZ_1M);
+		return -EREMCHG;
+	}
+
+	string_get_size(size, 1, STRING_UNITS_2, size_str, sizeof(size_str));
+	xe_gt_sriov_dbg_verbose(gt, "LMEM %lluM %s\n", size / SZ_1M, size_str);
+
+	config->lmem_size = size;
+
+	return config->lmem_size ? 0 : -ENODATA;
+}
+
+static int vf_get_submission_cfg(struct xe_gt *gt)
+{
+	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+	struct xe_guc *guc = &gt->uc.guc;
+	u32 num_ctxs, num_dbs;
+	int err;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	err = guc_action_query_single_klv32(guc, GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY, &num_ctxs);
+	if (unlikely(err))
+		return err;
+
+	err = guc_action_query_single_klv32(guc, GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY, &num_dbs);
+	if (unlikely(err))
+		return err;
+
+	if (config->num_ctxs && config->num_ctxs != num_ctxs) {
+		xe_gt_sriov_err(gt, "Unexpected CTXs reassignment: %u != %u\n",
+				num_ctxs, config->num_ctxs);
+		return -EREMCHG;
+	}
+	if (config->num_dbs && config->num_dbs != num_dbs) {
+		xe_gt_sriov_err(gt, "Unexpected DBs reassignment: %u != %u\n",
+				num_dbs, config->num_dbs);
+		return -EREMCHG;
+	}
+
+	xe_gt_sriov_dbg_verbose(gt, "CTXs %u DBs %u\n", num_ctxs, num_dbs);
+
+	config->num_ctxs = num_ctxs;
+	config->num_dbs = num_dbs;
+
+	return config->num_ctxs ? 0 : -ENODATA;
+}
+
+/**
+ * xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO.
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_vf_query_config(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	err = vf_get_ggtt_info(gt);
+	if (unlikely(err))
+		return err;
+
+	if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) {
+		err = vf_get_lmem_info(gt);
+		if (unlikely(err))
+			return err;
+	}
+
+	err = vf_get_submission_cfg(gt);
+	if (unlikely(err))
+		return err;
+
+	return 0;
+}
+
+static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor)
+{
+	u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VF2PF_HANDSHAKE),
+		FIELD_PREP(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, *major) |
+		FIELD_PREP(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, *minor),
+	};
+	u32 response[VF2PF_HANDSHAKE_RESPONSE_MSG_LEN];
+	int ret;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	ret = xe_guc_relay_send_to_pf(&gt->uc.guc.relay,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	if (unlikely(ret < 0))
+		return ret;
+
+	if (unlikely(ret != VF2PF_HANDSHAKE_RESPONSE_MSG_LEN))
+		return -EPROTO;
+
+	if (unlikely(FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_0_MBZ, response[0])))
+		return -EPROTO;
+
+	*major = FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR, response[1]);
+	*minor = FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR, response[1]);
+
+	return 0;
+}
+
+static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor)
+{
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	gt->sriov.vf.pf_version.major = major;
+	gt->sriov.vf.pf_version.minor = minor;
+}
+
+static void vf_disconnect_pf(struct xe_gt *gt)
+{
+	vf_connect_pf(gt, 0, 0);
+}
+
+static int vf_handshake_with_pf(struct xe_gt *gt)
+{
+	u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR;
+	u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR;
+	u32 major = major_wanted, minor = minor_wanted;
+	int err;
+
+	err = relay_action_handshake(gt, &major, &minor);
+	if (unlikely(err))
+		goto failed;
+
+	if (!major && !minor) {
+		err = -ENODATA;
+		goto failed;
+	}
+
+	xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor);
+	vf_connect_pf(gt, major, minor);
+	return 0;
+
+failed:
+	xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n",
+			major, minor, ERR_PTR(err));
+	vf_disconnect_pf(gt);
+	return err;
+}
+
+/**
+ * xe_gt_sriov_vf_connect - Establish connection with the PF driver.
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_vf_connect(struct xe_gt *gt)
+{
+	int err;
+
+	err = vf_handshake_with_pf(gt);
+	if (unlikely(err))
+		goto failed;
+
+	return 0;
+
+failed:
+	xe_gt_sriov_err(gt, "Failed to get version info (%pe)\n", ERR_PTR(err));
+	return err;
+}
+
+static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
+{
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	return major == gt->sriov.vf.pf_version.major &&
+	       minor <= gt->sriov.vf.pf_version.minor;
+}
+
+static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs)
+{
+	struct vf_runtime_reg *regs = gt->sriov.vf.runtime.regs;
+	unsigned int regs_size = round_up(num_regs, 4);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	xe_gt_assert(gt, IS_SRIOV_VF(xe));
+
+	if (regs) {
+		if (num_regs <= gt->sriov.vf.runtime.regs_size) {
+			memset(regs, 0, num_regs * sizeof(*regs));
+			gt->sriov.vf.runtime.num_regs = num_regs;
+			return 0;
+		}
+
+		drmm_kfree(&xe->drm, regs);
+		gt->sriov.vf.runtime.regs = NULL;
+		gt->sriov.vf.runtime.num_regs = 0;
+		gt->sriov.vf.runtime.regs_size = 0;
+	}
+
+	regs = drmm_kcalloc(&xe->drm, regs_size, sizeof(*regs), GFP_KERNEL);
+	if (unlikely(!regs))
+		return -ENOMEM;
+
+	gt->sriov.vf.runtime.regs = regs;
+	gt->sriov.vf.runtime.num_regs = num_regs;
+	gt->sriov.vf.runtime.regs_size = regs_size;
+	return 0;
+}
+
+static int vf_query_runtime_info(struct xe_gt *gt)
+{
+	u32 request[VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN];
+	u32 response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 32]; /* up to 16 regs */
+	u32 limit = (ARRAY_SIZE(response) - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2;
+	u32 count, remaining, num, i;
+	u32 start = 0;
+	int ret;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+	xe_gt_assert(gt, limit);
+
+	/* this is part of the 1.0 PF/VF ABI */
+	if (!vf_is_negotiated(gt, 1, 0))
+		return -ENOPKG;
+
+	request[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		     FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		     FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
+				GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME) |
+		     FIELD_PREP(VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT, limit);
+
+repeat:
+	request[1] = FIELD_PREP(VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START, start);
+	ret = xe_guc_relay_send_to_pf(&gt->uc.guc.relay,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	if (unlikely(ret < 0))
+		goto failed;
+
+	if (unlikely(ret < VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN)) {
+		ret = -EPROTO;
+		goto failed;
+	}
+	if (unlikely((ret - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) % 2)) {
+		ret = -EPROTO;
+		goto failed;
+	}
+
+	num = (ret - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2;
+	count = FIELD_GET(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT, response[0]);
+	remaining = FIELD_GET(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING, response[1]);
+
+	xe_gt_sriov_dbg_verbose(gt, "count=%u num=%u ret=%d start=%u remaining=%u\n",
+				count, num, ret, start, remaining);
+
+	if (unlikely(count != num)) {
+		ret = -EPROTO;
+		goto failed;
+	}
+
+	if (start == 0) {
+		ret = vf_prepare_runtime_info(gt, num + remaining);
+		if (unlikely(ret < 0))
+			goto failed;
+	} else if (unlikely(start + num > gt->sriov.vf.runtime.num_regs)) {
+		ret = -EPROTO;
+		goto failed;
+	}
+
+	for (i = 0; i < num; ++i) {
+		struct vf_runtime_reg *reg = &gt->sriov.vf.runtime.regs[start + i];
+
+		reg->offset = response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 2 * i];
+		reg->value = response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 2 * i + 1];
+	}
+
+	if (remaining) {
+		start += num;
+		goto repeat;
+	}
+
+	return 0;
+
+failed:
+	vf_prepare_runtime_info(gt, 0);
+	return ret;
+}
+
+static void vf_show_runtime_info(struct xe_gt *gt)
+{
+	struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs;
+	unsigned int size = gt->sriov.vf.runtime.num_regs;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	for (; size--; vf_regs++)
+		xe_gt_sriov_dbg(gt, "runtime(%#x) = %#x\n",
+				vf_regs->offset, vf_regs->value);
+}
+
+/**
+ * xe_gt_sriov_vf_query_runtime - Query SR-IOV runtime data.
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt)
+{
+	int err;
+
+	err = vf_query_runtime_info(gt);
+	if (unlikely(err))
+		goto failed;
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+		vf_show_runtime_info(gt);
+
+	return 0;
+
+failed:
+	xe_gt_sriov_err(gt, "Failed to get runtime info (%pe)\n",
+			ERR_PTR(err));
+	return err;
+}
+
+/**
+ * xe_gt_sriov_vf_print_config - Print VF self config.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * This function is for VF use only.
+ */
+void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+	struct xe_device *xe = gt_to_xe(gt);
+	char buf[10];
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	drm_printf(p, "GGTT range:\t%#llx-%#llx\n",
+		   config->ggtt_base,
+		   config->ggtt_base + config->ggtt_size - 1);
+
+	string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+	drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf);
+
+	if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) {
+		string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+		drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf);
+	}
+
+	drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs);
+	drm_printf(p, "GuC doorbells:\t%u\n", config->num_dbs);
+}
+
+/**
+ * xe_gt_sriov_vf_print_runtime - Print VF's runtime regs received from PF.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * This function is for VF use only.
+ */
+void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs;
+	unsigned int size = gt->sriov.vf.runtime.num_regs;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	for (; size--; vf_regs++)
+		drm_printf(p, "%#x = %#x\n", vf_regs->offset, vf_regs->value);
+}
+
+/**
+ * xe_gt_sriov_vf_print_version - Print VF ABI versions.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * This function is for VF use only.
+ */
+void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_gt_sriov_vf_guc_version *guc_version = &gt->sriov.vf.guc_version;
+	struct xe_gt_sriov_vf_relay_version *pf_version = &gt->sriov.vf.pf_version;
+	u32 branch, major, minor;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	drm_printf(p, "GuC ABI:\n");
+
+	vf_minimum_guc_version(gt, &branch, &major, &minor);
+	drm_printf(p, "\tbase:\t%u.%u.%u.*\n", branch, major, minor);
+
+	vf_wanted_guc_version(gt, &branch, &major, &minor);
+	drm_printf(p, "\twanted:\t%u.%u.%u.*\n", branch, major, minor);
+
+	drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n",
+		   guc_version->branch, guc_version->major,
+		   guc_version->minor, guc_version->patch);
+
+	drm_printf(p, "PF ABI:\n");
+
+	drm_printf(p, "\tbase:\t%u.%u\n",
+		   GUC_RELAY_VERSION_BASE_MAJOR, GUC_RELAY_VERSION_BASE_MINOR);
+	drm_printf(p, "\twanted:\t%u.%u\n",
+		   GUC_RELAY_VERSION_LATEST_MAJOR, GUC_RELAY_VERSION_LATEST_MINOR);
+	drm_printf(p, "\thandshake:\t%u.%u\n",
+		   pf_version->major, pf_version->minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
new file mode 100644
index 000000000000..997cb7541036
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_VF_H_
+#define _XE_GT_SRIOV_VF_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt);
+int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
+int xe_gt_sriov_vf_connect(struct xe_gt *gt);
+int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
+
+void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
new file mode 100644
index 000000000000..519492f4b7d0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_VF_TYPES_H_
+#define _XE_GT_SRIOV_VF_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_gt_sriov_vf_guc_version - GuC ABI version details.
+ */
+struct xe_gt_sriov_vf_guc_version {
+	/** @branch: branch version. */
+	u8 branch;
+	/** @major: major version. */
+	u8 major;
+	/** @minor: minor version. */
+	u8 minor;
+	/** @patch: patch version. */
+	u8 patch;
+};
+
+/**
+ * struct xe_gt_sriov_vf_relay_version - PF ABI version details.
+ */
+struct xe_gt_sriov_vf_relay_version {
+	/** @major: major version. */
+	u16 major;
+	/** @minor: minor version. */
+	u16 minor;
+};
+
+/**
+ * struct xe_gt_sriov_vf_selfconfig - VF configuration data.
+ */
+struct xe_gt_sriov_vf_selfconfig {
+	/** @ggtt_base: assigned base offset of the GGTT region. */
+	u64 ggtt_base;
+	/** @ggtt_size: assigned size of the GGTT region. */
+	u64 ggtt_size;
+	/** @lmem_size: assigned size of the LMEM. */
+	u64 lmem_size;
+	/** @num_ctxs: assigned number of GuC submission context IDs. */
+	u16 num_ctxs;
+	/** @num_dbs: assigned number of GuC doorbells IDs. */
+	u16 num_dbs;
+};
+
+/**
+ * struct xe_gt_sriov_vf_runtime - VF runtime data.
+ */
+struct xe_gt_sriov_vf_runtime {
+	/** @regs_size: size of runtime register array. */
+	u32 regs_size;
+	/** @num_regs: number of runtime registers in the array. */
+	u32 num_regs;
+	/** @regs: pointer to array of register offset/value pairs. */
+	struct vf_runtime_reg {
+		/** @regs.offset: register offset. */
+		u32 offset;
+		/** @regs.value: register value. */
+		u32 value;
+	} *regs;
+};
+
+/**
+ * struct xe_gt_sriov_vf - GT level VF virtualization data.
+ */
+struct xe_gt_sriov_vf {
+	/** @guc_version: negotiated GuC ABI version. */
+	struct xe_gt_sriov_vf_guc_version guc_version;
+	/** @self_config: resource configurations. */
+	struct xe_gt_sriov_vf_selfconfig self_config;
+	/** @pf_version: negotiated VF/PF ABI version. */
+	struct xe_gt_sriov_vf_relay_version pf_version;
+	/** @runtime: runtime data retrieved from the PF. */
+	struct xe_gt_sriov_vf_runtime runtime;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 5a114fc9dde7..475fb58882f1 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -9,6 +9,7 @@
 #include "xe_force_wake_types.h"
 #include "xe_gt_idle_types.h"
 #include "xe_gt_sriov_pf_types.h"
+#include "xe_gt_sriov_vf_types.h"
 #include "xe_hw_engine_types.h"
 #include "xe_hw_fence_types.h"
 #include "xe_reg_sr_types.h"
@@ -143,6 +144,8 @@ struct xe_gt {
 	union {
 		/** @sriov.pf: PF data. Valid only if driver is running as PF */
 		struct xe_gt_sriov_pf pf;
+		/** @sriov.vf: VF data. Valid only if driver is running as VF */
+		struct xe_gt_sriov_vf vf;
 	} sriov;
 
 	/**
-- 
cgit 


From 25275c8a4f1be38494caae25cfbac116280a5351 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 May 2024 13:05:45 +0200
Subject: drm/xe/vf: Custom hardware config load step if VF
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The VF drivers may immediately communicate with the GuC to obtain
the hardware config since the firmware shall already be running.

With the GuC communication established, VFs can also obtain the
values of the runtime registers (fuses) from the PF driver.

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-6-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index b1bb94914028..e52b544ac690 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -19,6 +19,7 @@
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
 #include "xe_guc_ads.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_db_mgr.h"
@@ -547,6 +548,38 @@ out:
 	return 0	/* FIXME: ret, don't want to stop load currently */;
 }
 
+static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+	int ret;
+
+	ret = xe_gt_sriov_vf_bootstrap(gt);
+	if (ret)
+		return ret;
+
+	ret = xe_gt_sriov_vf_query_config(gt);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_hwconfig_init(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_guc_enable_communication(guc);
+	if (ret)
+		return ret;
+
+	ret = xe_gt_sriov_vf_connect(gt);
+	if (ret)
+		return ret;
+
+	ret = xe_gt_sriov_vf_query_runtime(gt);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 /**
  * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table
  * @guc: The GuC object
@@ -562,6 +595,9 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
 {
 	int ret;
 
+	if (IS_SRIOV_VF(guc_to_xe(guc)))
+		return vf_guc_min_load_for_hwconfig(guc);
+
 	xe_guc_ads_populate_minimal(&guc->ads);
 
 	/* Raise GT freq to speed up HuC/GuC load */
-- 
cgit 


From 63d8cb8fe3ddf74627003f99ad085887baf91e60 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 May 2024 13:05:46 +0200
Subject: drm/xe/vf: Expose SR-IOV VF attributes to GT debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For debug purposes we might want to view actual VF configuration
(including GGTT range, LMEM size, number of GuC contexts IDs or
doorbells) and the negotiated ABI versions (with GuC and PF).

Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-7-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/Makefile                 |  1 +
 drivers/gpu/drm/xe/xe_gt_debugfs.c          |  3 ++
 drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c | 72 +++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h | 14 ++++++
 4 files changed, 90 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e9ba6c753779..71b5c35ee4c2 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -156,6 +156,7 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o
 # graphics virtualization (SR-IOV) support
 xe-y += \
 	xe_gt_sriov_vf.o \
+	xe_gt_sriov_vf_debugfs.o \
 	xe_guc_relay.o \
 	xe_memirq.o \
 	xe_sriov.o
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index c5e562e143fd..66f897a9b6ca 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -16,6 +16,7 @@
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_sriov_pf_debugfs.h"
+#include "xe_gt_sriov_vf_debugfs.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_engine.h"
 #include "xe_lrc.h"
@@ -306,4 +307,6 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 
 	if (IS_SRIOV_PF(xe))
 		xe_gt_sriov_pf_debugfs_register(gt, root);
+	else if (IS_SRIOV_VF(xe))
+		xe_gt_sriov_vf_debugfs_register(gt, root);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
new file mode 100644
index 000000000000..f3ddcbefc6bc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include <drm/drm_debugfs.h>
+
+#include "xe_gt_debugfs.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_gt_sriov_vf_debugfs.h"
+#include "xe_gt_types.h"
+#include "xe_sriov.h"
+
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0
+ *      │   ├── vf
+ *      │   │   ├── self_config
+ *      │   │   ├── abi_versions
+ *      │   │   ├── runtime_regs
+ */
+
+static const struct drm_info_list vf_info[] = {
+	{
+		"self_config",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_vf_print_config,
+	},
+	{
+		"abi_versions",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_vf_print_version,
+	},
+#if defined(CONFIG_DRM_XE_DEBUG) || defined(CONFIG_DRM_XE_DEBUG_SRIOV)
+	{
+		"runtime_regs",
+		.show = xe_gt_debugfs_simple_show,
+		.data = xe_gt_sriov_vf_print_runtime,
+	},
+#endif
+};
+
+/**
+ * xe_gt_sriov_vf_debugfs_register - Register SR-IOV VF specific entries in GT debugfs.
+ * @gt: the &xe_gt to register
+ * @root: the &dentry that represents the GT directory
+ *
+ * Register SR-IOV VF entries that are GT related and must be shown under GT debugfs.
+ */
+void xe_gt_sriov_vf_debugfs_register(struct xe_gt *gt, struct dentry *root)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct drm_minor *minor = xe->drm.primary;
+	struct dentry *vfdentry;
+
+	xe_assert(xe, IS_SRIOV_VF(xe));
+	xe_assert(xe, root->d_inode->i_private == gt);
+
+	/*
+	 *      /sys/kernel/debug/dri/0/
+	 *      ├── gt0
+	 *      │   ├── vf
+	 */
+	vfdentry = debugfs_create_dir("vf", root);
+	if (IS_ERR(vfdentry))
+		return;
+	vfdentry->d_inode->i_private = gt;
+
+	drm_debugfs_create_files(vf_info, ARRAY_SIZE(vf_info), vfdentry, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h
new file mode 100644
index 000000000000..b2cff7ef5c78
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_VF_DEBUGFS_H_
+#define _XE_GT_SRIOV_VF_DEBUGFS_H_
+
+struct xe_gt;
+struct dentry;
+
+void xe_gt_sriov_vf_debugfs_register(struct xe_gt *gt, struct dentry *root);
+
+#endif
-- 
cgit 


From 844f3228d225d25af8a21a7e1554d78c20823a37 Mon Sep 17 00:00:00 2001
From: José Roberto de Souza <jose.souza@intel.com>
Date: Fri, 10 May 2024 08:01:08 -0700
Subject: drm/xe: Replace RING_START_UDW by u64 RING_START
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Other u64 registers are printed in a single line so RING_START
needs to follow that too.
As there is no upstream decoder tool parsing RING_START this will
not break any decoder application.

Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240510150108.80679-1-jose.souza@intel.com
---
 drivers/gpu/drm/xe/xe_hw_engine.c       | 10 +++++-----
 drivers/gpu/drm/xe/xe_hw_engine_types.h |  4 +---
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 45f582a7caaa..e19af179af33 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -908,11 +908,13 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
 	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
+	if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
+		val = hw_engine_mmio_read32(hwe, RING_START_UDW(0));
+		snapshot->reg.ring_start |= val << 32;
+	}
 	if (xe_gt_has_indirect_ring_state(hwe->gt)) {
 		snapshot->reg.indirect_ring_state =
 			hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
-		snapshot->reg.ring_start_udw =
-			hw_engine_mmio_read32(hwe, RING_START_UDW(0));
 	}
 
 	snapshot->reg.ring_head =
@@ -1003,9 +1005,7 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 		   snapshot->reg.ring_execlist_status);
 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
 		   snapshot->reg.ring_execlist_sq_contents);
-	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
-	drm_printf(p, "\tRING_START_UDW: 0x%08x\n",
-		   snapshot->reg.ring_start_udw);
+	drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 5f4b67acba99..b2f64b92a636 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -188,9 +188,7 @@ struct xe_hw_engine_snapshot {
 		/** @reg.ring_hws_pga: RING_HWS_PGA */
 		u32 ring_hws_pga;
 		/** @reg.ring_start: RING_START */
-		u32 ring_start;
-		/** @reg.ring_start_udw: RING_START_UDW */
-		u32 ring_start_udw;
+		u64 ring_start;
 		/** @reg.ring_head: RING_HEAD */
 		u32 ring_head;
 		/** @reg.ring_tail: RING_TAIL */
-- 
cgit 


From ab689514b6ac518ef6e88afa245b834b0dae15a5 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 May 2024 13:43:03 -0700
Subject: drm/xe: Promote xe_hw_engine_class_to_str()

Move it out of the sysfs compilation unit so it can be re-used in other
places.

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-2-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c             | 18 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_hw_engine.h             |  2 ++
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 18 ------------------
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index e19af179af33..b71e90c555fa 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -1099,3 +1099,21 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
 	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
 		hwe->instance == gt->usm.reserved_bcs_instance;
 }
+
+const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+		return "rcs";
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		return "vcs";
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return "vecs";
+	case XE_ENGINE_CLASS_COPY:
+		return "bcs";
+	case XE_ENGINE_CLASS_COMPUTE:
+		return "ccs";
+	default:
+		return NULL;
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 71968ee2f600..843de159e47c 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -67,4 +67,6 @@ static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
 	return hwe->name;
 }
 
+const char *xe_hw_engine_class_to_str(enum xe_engine_class class);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index 844ec68cbbb8..efce6c7dd2a2 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -618,24 +618,6 @@ static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
 	kobject_put(kobj);
 }
 
-static const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
-{
-	switch (class) {
-	case XE_ENGINE_CLASS_RENDER:
-		return "rcs";
-	case XE_ENGINE_CLASS_VIDEO_DECODE:
-		return "vcs";
-	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
-		return "vecs";
-	case XE_ENGINE_CLASS_COPY:
-		return "bcs";
-	case XE_ENGINE_CLASS_COMPUTE:
-		return "ccs";
-	default:
-		return NULL;
-	}
-}
-
 /**
  * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT.
  * @gt: Xe GT.
-- 
cgit 


From bd49e50d81b543e678965118a86958d87c045c73 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 May 2024 13:43:04 -0700
Subject: drm/xe: Add XE_ENGINE_CLASS_OTHER to str conversion

XE_ENGINE_CLASS_OTHER was missing from the str conversion. Add it and
remove the default handling so it's protected by -Wswitch.
Currently the only user is xe_hw_engine_class_sysfs_init(), which
already skips XE_ENGINE_CLASS_OTHER, so there's no change in behavior.

Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-3-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index b71e90c555fa..942fca8f1eb9 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -1111,9 +1111,13 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
 		return "vecs";
 	case XE_ENGINE_CLASS_COPY:
 		return "bcs";
+	case XE_ENGINE_CLASS_OTHER:
+		return "other";
 	case XE_ENGINE_CLASS_COMPUTE:
 		return "ccs";
-	default:
-		return NULL;
+	case XE_ENGINE_CLASS_MAX:
+		break;
 	}
+
+	return NULL;
 }
-- 
cgit 


From 9b090d57746d965684f53a1aefcb363bab653ad3 Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Fri, 17 May 2024 13:43:05 -0700
Subject: drm/xe/lrc: Add helper to capture context timestamp

Add a helper to capture CTX_TIMESTAMP from the context image so it can
be used to calculate the runtime.

v2: Add kernel-doc to clarify expectation from caller

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-4-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_lrc_layout.h |  1 +
 drivers/gpu/drm/xe/xe_lrc.c             | 12 ++++++++++++
 drivers/gpu/drm/xe/xe_lrc.h             | 14 ++++++++++++++
 drivers/gpu/drm/xe/xe_lrc_types.h       |  3 +++
 4 files changed, 30 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index e6ca8bbda8f4..045dfd09db99 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -11,6 +11,7 @@
 #define CTX_RING_TAIL			(0x06 + 1)
 #define CTX_RING_START			(0x08 + 1)
 #define CTX_RING_CTL			(0x0a + 1)
+#define CTX_TIMESTAMP			(0x22 + 1)
 #define CTX_INDIRECT_RING_STATE		(0x26 + 1)
 #define CTX_PDP0_UDW			(0x30 + 1)
 #define CTX_PDP0_LDW			(0x32 + 1)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 9b0a4078add3..f679cb9aaea7 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -844,6 +844,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	lrc->tile = gt_to_tile(hwe->gt);
 	lrc->ring.size = ring_size;
 	lrc->ring.tail = 0;
+	lrc->ctx_timestamp = 0;
 
 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
 			     hwe->fence_irq, hwe->name);
@@ -898,6 +899,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
 	}
 
+	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
+
 	if (xe->info.has_asid && vm)
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
 
@@ -1576,3 +1579,12 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
 		xe_bo_put(snapshot->lrc_bo);
 	kfree(snapshot);
 }
+
+u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
+{
+	*old_ts = lrc->ctx_timestamp;
+
+	lrc->ctx_timestamp = xe_lrc_read_ctx_reg(lrc, CTX_TIMESTAMP);
+
+	return lrc->ctx_timestamp;
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index e0e841963c23..b9da1031083b 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -66,4 +66,18 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot);
 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p);
 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot);
 
+/**
+ * xe_lrc_update_timestamp - readout LRC timestamp and update cached value
+ * @lrc: logical ring context for this exec queue
+ * @old_ts: pointer where to save the previous timestamp
+ *
+ * Read the current timestamp for this LRC and update the cached value. The
+ * previous cached value is also returned in @old_ts so the caller can calculate
+ * the delta between 2 updates. Note that this is not intended to be called from
+ * any place, but just by the paths updating the drm client utilization.
+ *
+ * Returns the current LRC timestamp
+ */
+u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index cdbf03faef15..0fa055da6b27 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -45,6 +45,9 @@ struct xe_lrc {
 
 	/** @fence_ctx: context for hw fence */
 	struct xe_hw_fence_ctx fence_ctx;
+
+	/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
+	u32 ctx_timestamp;
 };
 
 struct xe_lrc_snapshot;
-- 
cgit 


From f2f6b667c67daee6fe2c51b5cec3bb0f1b4c1ce0 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 May 2024 13:43:06 -0700
Subject: drm/xe: Add helper to capture engine timestamp

Just like CTX_TIMESTAMP is used to calculate runtime, add a helper to
get the timestamp for the engine so it can be used to calculate the
"engine time" with the same unit as the runtime is recorded.

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-5-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 5 +++++
 drivers/gpu/drm/xe/xe_hw_engine.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 942fca8f1eb9..de1aefaa2335 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -1121,3 +1121,8 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
 
 	return NULL;
 }
+
+u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
+{
+	return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h
index 843de159e47c..7f2d27c0ba1a 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine.h
@@ -68,5 +68,6 @@ static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe)
 }
 
 const char *xe_hw_engine_class_to_str(enum xe_engine_class class);
+u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe);
 
 #endif
-- 
cgit 


From 6109f24f87d75122cf6de50901115cbee4285ce2 Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Fri, 17 May 2024 13:43:07 -0700
Subject: drm/xe: Add helper to accumulate exec queue runtime

Add a helper to accumulate per-client runtime of all its
exec queues. This is called every time a sched job is finished.

v2:
  - Use guc_exec_queue_free_job() and execlist_job_free() to accumulate
    runtime when job is finished since xe_sched_job_completed() is not a
    notification that job finished.
  - Stop trying to update runtime from xe_exec_queue_fini() - that is
    redundant and may happen after xef is closed, leading to a
    use-after-free
  - Do not special case the first timestamp read: the default LRC sets
    CTX_TIMESTAMP to zero, so even the first sample should be a valid
    one.
  - Handle the parallel submission case by multiplying the runtime by
    width.
v3: Update comments

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-6-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  3 +++
 drivers/gpu/drm/xe/xe_exec_queue.c   | 37 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_exec_queue.h   |  1 +
 drivers/gpu/drm/xe/xe_execlist.c     |  1 +
 drivers/gpu/drm/xe/xe_guc_submit.c   |  2 ++
 5 files changed, 44 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 0af739981ebf..13da7a079c5f 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -559,6 +559,9 @@ struct xe_file {
 		struct mutex lock;
 	} exec_queue;
 
+	/** @runtime: hw engine class runtime in ticks for this drm client */
+	u64 runtime[XE_ENGINE_CLASS_MAX];
+
 	/** @client: drm client */
 	struct xe_drm_client *client;
 };
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 395de93579fa..fa6dc996eca8 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -769,6 +769,43 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
 		q->lrc[0].fence_ctx.next_seqno - 1;
 }
 
+/**
+ * xe_exec_queue_update_runtime() - Update runtime for this exec queue from hw
+ * @q: The exec queue
+ *
+ * Update the timestamp saved by HW for this exec queue and save runtime
+ * calculated by using the delta from last update. On multi-lrc case, only the
+ * first is considered.
+ */
+void xe_exec_queue_update_runtime(struct xe_exec_queue *q)
+{
+	struct xe_file *xef;
+	struct xe_lrc *lrc;
+	u32 old_ts, new_ts;
+
+	/*
+	 * Jobs that are run during driver load may use an exec_queue, but are
+	 * not associated with a user xe file, so avoid accumulating busyness
+	 * for kernel specific work.
+	 */
+	if (!q->vm || !q->vm->xef)
+		return;
+
+	xef = q->vm->xef;
+
+	/*
+	 * Only sample the first LRC. For parallel submission, all of them are
+	 * scheduled together and we compensate that below by multiplying by
+	 * width - this may introduce errors if that premise is not true and
+	 * they don't exit 100% aligned. On the other hand, looping through
+	 * the LRCs and reading them in different time could also introduce
+	 * errors.
+	 */
+	lrc = &q->lrc[0];
+	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
+	xef->runtime[q->class] += (new_ts - old_ts) * q->width;
+}
+
 void xe_exec_queue_kill(struct xe_exec_queue *q)
 {
 	struct xe_exec_queue *eq = q, *next;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 48f6da53a292..e0f07d28ee1a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -75,5 +75,6 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
 					       struct xe_vm *vm);
 void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
 				  struct dma_fence *fence);
+void xe_exec_queue_update_runtime(struct xe_exec_queue *q);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index e9dee1e14fef..bd7f27efe0e0 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -306,6 +306,7 @@ static void execlist_job_free(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 
+	xe_exec_queue_update_runtime(job->q);
 	xe_sched_job_put(job);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 4efb88e3e056..ad2b8067d071 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -749,6 +749,8 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 
+	xe_exec_queue_update_runtime(job->q);
+
 	trace_xe_sched_job_free(job);
 	xe_sched_job_put(job);
 }
-- 
cgit 


From baa14865529bf1f3c12dc6145bd9109ef289e038 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 May 2024 13:43:08 -0700
Subject: drm/xe: Cache data about user-visible engines

gt->info.engine_mask used to indicate the available engines, but that
is not always true anymore: some engines are reserved to kernel and some
may be exposed as a single engine (e.g. with ccs_mode).

Runtime changes only happen when no clients exist, so it's safe to cache
the list of engines in the gt and update that when it's needed. This
will help implementing per client engine utilization so this (mostly
constant) information doesn't need to be re-calculated on every query.

Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-7-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c          | 23 +++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt.h          | 13 +++++++++++++
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c |  1 +
 drivers/gpu/drm/xe/xe_gt_types.h    | 21 ++++++++++++++++++++-
 4 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index e69a03ddd255..5194a3d38e76 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -560,9 +560,32 @@ int xe_gt_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
+	xe_gt_record_user_engines(gt);
+
 	return drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
 }
 
+void xe_gt_record_user_engines(struct xe_gt *gt)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	gt->user_engines.mask = 0;
+	memset(gt->user_engines.instances_per_class, 0,
+	       sizeof(gt->user_engines.instances_per_class));
+
+	for_each_hw_engine(hwe, gt, id) {
+		if (xe_hw_engine_is_reserved(hwe))
+			continue;
+
+		gt->user_engines.mask |= BIT_ULL(id);
+		gt->user_engines.instances_per_class[hwe->class]++;
+	}
+
+	xe_gt_assert(gt, (gt->user_engines.mask | gt->info.engine_mask)
+		     == gt->info.engine_mask);
+}
+
 static int do_gt_reset(struct xe_gt *gt)
 {
 	int err;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 8474c50b1b30..1d010bf4a756 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -38,6 +38,19 @@ int xe_gt_init_hwconfig(struct xe_gt *gt);
 int xe_gt_init_early(struct xe_gt *gt);
 int xe_gt_init(struct xe_gt *gt);
 int xe_gt_record_default_lrcs(struct xe_gt *gt);
+
+/**
+ * xe_gt_record_user_engines - save data related to engines available to
+ * usersapce
+ * @gt: GT structure
+ *
+ * Walk the available HW engines from gt->info.engine_mask and calculate data
+ * related to those engines that may be used by userspace. To be used whenever
+ * available engines change in runtime (e.g. with ccs_mode) or during
+ * initialization
+ */
+void xe_gt_record_user_engines(struct xe_gt *gt);
+
 void xe_gt_suspend_prepare(struct xe_gt *gt);
 int xe_gt_suspend(struct xe_gt *gt);
 int xe_gt_resume(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index a34c9a24dafc..c36218f4f6c8 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -134,6 +134,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
 	if (gt->ccs_mode != num_engines) {
 		xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
 		gt->ccs_mode = num_engines;
+		xe_gt_record_user_engines(gt);
 		xe_gt_reset_async(gt);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 475fb58882f1..10a9a9529377 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -113,7 +113,11 @@ struct xe_gt {
 		enum xe_gt_type type;
 		/** @info.reference_clock: clock frequency */
 		u32 reference_clock;
-		/** @info.engine_mask: mask of engines present on GT */
+		/**
+		 * @info.engine_mask: mask of engines present on GT. Some of
+		 * them may be reserved in runtime and not available for user.
+		 * See @user_engines.mask
+		 */
 		u64 engine_mask;
 		/** @info.gmdid: raw GMD_ID value from hardware */
 		u32 gmdid;
@@ -368,6 +372,21 @@ struct xe_gt {
 		/** @wa_active.oob: bitmap with active OOB workaroudns */
 		unsigned long *oob;
 	} wa_active;
+
+	/** @user_engines: engines present in GT and available to userspace */
+	struct {
+		/**
+		 * @user_engines.mask: like @info->engine_mask, but take in
+		 * consideration only engines available to userspace
+		 */
+		u64 mask;
+
+		/**
+		 * @user_engines.instances_per_class: aggregate per class the
+		 * number of engines available to userspace
+		 */
+		u8 instances_per_class[XE_ENGINE_CLASS_MAX];
+	} user_engines;
 };
 
 #endif
-- 
cgit 


From 6aa18d7436b0c11f7e62fd6cdb707eaeab1dc473 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 May 2024 13:43:09 -0700
Subject: drm/xe: Add helper to return any available hw engine

Get the first available engine from a gt, which helps in the case any
engine serves as a context, like when reading RING_TIMESTAMP.

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-8-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c | 11 +++++++++++
 drivers/gpu/drm/xe/xe_gt.h |  7 +++++++
 2 files changed, 18 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 5194a3d38e76..3432fef56486 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -833,3 +833,14 @@ struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
 
 	return NULL;
 }
+
+struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt)
+{
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+
+	for_each_hw_engine(hwe, gt, id)
+		return hwe;
+
+	return NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 1d010bf4a756..9073ac68a777 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -67,6 +67,13 @@ void xe_gt_remove(struct xe_gt *gt);
 struct xe_hw_engine *
 xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class);
 
+/**
+ * xe_gt_any_hw_engine - scan the list of engines and return the
+ * first available
+ * @gt: GT structure
+ */
+struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt);
+
 struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
 				     enum xe_engine_class class,
 				     u16 instance,
-- 
cgit 


From 188ced1e0ff892f0948f20480e2e0122380ae46d Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 17 May 2024 13:43:10 -0700
Subject: drm/xe/client: Print runtime to fdinfo

Print the accumulated runtime for client when printing fdinfo.
Each time a query is done it first does 2 things:

1) loop through all the exec queues for the current client and
   accumulate the runtime, per engine class. CTX_TIMESTAMP is used for
   that, being read from the context image.

2) Read a "GPU timestamp" that can be used for considering "how much GPU
   time has passed" and that has the same unit/refclock as the one
   recording the runtime. RING_TIMESTAMP is used for that via MMIO.

Since for all current platforms RING_TIMESTAMP follows the same
refclock, just read it once, using any first engine available.

This is exported to userspace as 2 numbers in fdinfo:

	drm-cycles-<class>: <RUNTIME>
	drm-total-cycles-<class>: <TIMESTAMP>

Userspace is expected to collect at least 2 samples, which allows to
know the client engine busyness as per:

		    RUNTIME1 - RUNTIME0
	busyness = ---------------------
			  T1 - T0

Since drm-cycles-<class> always starts at 0, it's also possible to know
if and engine was ever used by a client.

It's expected that userspace will read any 2 samples every few seconds.
Given the update frequency of the counters involved and that
CTX_TIMESTAMP is 32-bits, the counter for each exec_queue can wrap
around (assuming 100% utilization) after ~200s. The wraparound is not
perceived by userspace since it's just accumulated for all the
exec_queues in a 64-bit counter) but the measurement will not be
accurate if the samples are too far apart.

This could be mitigated by adding a workqueue to accumulate the counters
every so often, but it's additional complexity for something that is
done already by userspace every few seconds in tools like gputop (from
igt), htop, nvtop, etc, with none of them really defaulting to 1 sample
per minute or more.

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Acked-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-9-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 Documentation/gpu/drm-usage-stats.rst       |  21 ++++-
 Documentation/gpu/xe/index.rst              |   1 +
 Documentation/gpu/xe/xe-drm-usage-stats.rst |  10 +++
 drivers/gpu/drm/xe/xe_drm_client.c          | 121 +++++++++++++++++++++++++++-
 4 files changed, 150 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/gpu/xe/xe-drm-usage-stats.rst

(limited to 'drivers/gpu')

diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst
index 6dc299343b48..a80f95ca1b2f 100644
--- a/Documentation/gpu/drm-usage-stats.rst
+++ b/Documentation/gpu/drm-usage-stats.rst
@@ -112,6 +112,19 @@ larger value within a reasonable period. Upon observing a value lower than what
 was previously read, userspace is expected to stay with that larger previous
 value until a monotonic update is seen.
 
+- drm-total-cycles-<keystr>: <uint>
+
+Engine identifier string must be the same as the one specified in the
+drm-cycles-<keystr> tag and shall contain the total number cycles for the given
+engine.
+
+This is a timestamp in GPU unspecified unit that matches the update rate
+of drm-cycles-<keystr>. For drivers that implement this interface, the engine
+utilization can be calculated entirely on the GPU clock domain, without
+considering the CPU sleep time between 2 samples.
+
+A driver may implement either this key or drm-maxfreq-<keystr>, but not both.
+
 - drm-maxfreq-<keystr>: <uint> [Hz|MHz|KHz]
 
 Engine identifier string must be the same as the one specified in the
@@ -121,6 +134,9 @@ percentage utilization of the engine, whereas drm-engine-<keystr> only reflects
 time active without considering what frequency the engine is operating as a
 percentage of its maximum frequency.
 
+A driver may implement either this key or drm-total-cycles-<keystr>, but not
+both.
+
 Memory
 ^^^^^^
 
@@ -168,5 +184,6 @@ be documented above and where possible, aligned with other drivers.
 Driver specific implementations
 -------------------------------
 
-:ref:`i915-usage-stats`
-:ref:`panfrost-usage-stats`
+* :ref:`i915-usage-stats`
+* :ref:`panfrost-usage-stats`
+* :ref:`xe-usage-stats`
diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
index c224ecaee81e..3f07aa3b5432 100644
--- a/Documentation/gpu/xe/index.rst
+++ b/Documentation/gpu/xe/index.rst
@@ -23,3 +23,4 @@ DG2, etc is provided to prototype the driver.
    xe_firmware
    xe_tile
    xe_debugging
+   xe-drm-usage-stats.rst
diff --git a/Documentation/gpu/xe/xe-drm-usage-stats.rst b/Documentation/gpu/xe/xe-drm-usage-stats.rst
new file mode 100644
index 000000000000..482d503ae68a
--- /dev/null
+++ b/Documentation/gpu/xe/xe-drm-usage-stats.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+.. _xe-usage-stats:
+
+========================================
+Xe DRM client usage stats implementation
+========================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_drm_client.c
+   :doc: DRM Client usage stats
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 08f0b7c95901..af404c9e5cc0 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -2,6 +2,7 @@
 /*
  * Copyright © 2023 Intel Corporation
  */
+#include "xe_drm_client.h"
 
 #include <drm/drm_print.h>
 #include <drm/xe_drm.h>
@@ -12,9 +13,66 @@
 #include "xe_bo.h"
 #include "xe_bo_types.h"
 #include "xe_device_types.h"
-#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_pm.h"
 #include "xe_trace.h"
 
+/**
+ * DOC: DRM Client usage stats
+ *
+ * The drm/xe driver implements the DRM client usage stats specification as
+ * documented in :ref:`drm-client-usage-stats`.
+ *
+ * Example of the output showing the implemented key value pairs and entirety of
+ * the currently possible format options:
+ *
+ * ::
+ *
+ * 	pos:    0
+ * 	flags:  0100002
+ * 	mnt_id: 26
+ * 	ino:    685
+ * 	drm-driver:     xe
+ * 	drm-client-id:  3
+ * 	drm-pdev:       0000:03:00.0
+ * 	drm-total-system:       0
+ * 	drm-shared-system:      0
+ * 	drm-active-system:      0
+ * 	drm-resident-system:    0
+ * 	drm-purgeable-system:   0
+ * 	drm-total-gtt:  192 KiB
+ * 	drm-shared-gtt: 0
+ * 	drm-active-gtt: 0
+ * 	drm-resident-gtt:       192 KiB
+ * 	drm-total-vram0:        23992 KiB
+ * 	drm-shared-vram0:       16 MiB
+ * 	drm-active-vram0:       0
+ * 	drm-resident-vram0:     23992 KiB
+ * 	drm-total-stolen:       0
+ * 	drm-shared-stolen:      0
+ * 	drm-active-stolen:      0
+ * 	drm-resident-stolen:    0
+ * 	drm-cycles-rcs: 28257900
+ * 	drm-total-cycles-rcs:   7655183225
+ * 	drm-cycles-bcs: 0
+ * 	drm-total-cycles-bcs:   7655183225
+ * 	drm-cycles-vcs: 0
+ * 	drm-total-cycles-vcs:   7655183225
+ * 	drm-engine-capacity-vcs:        2
+ * 	drm-cycles-vecs:        0
+ * 	drm-total-cycles-vecs:  7655183225
+ * 	drm-engine-capacity-vecs:       2
+ * 	drm-cycles-ccs: 0
+ * 	drm-total-cycles-ccs:   7655183225
+ * 	drm-engine-capacity-ccs:        4
+ *
+ * Possible `drm-cycles-` key names are: `rcs`, `ccs`, `bcs`, `vcs`, `vecs` and
+ * "other".
+ */
+
 /**
  * xe_drm_client_alloc() - Allocate drm client
  * @void: No arg
@@ -179,6 +237,66 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
 	}
 }
 
+static void show_runtime(struct drm_printer *p, struct drm_file *file)
+{
+	unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { };
+	struct xe_file *xef = file->driver_priv;
+	struct xe_device *xe = xef->xe;
+	struct xe_gt *gt;
+	struct xe_hw_engine *hwe;
+	struct xe_exec_queue *q;
+	u64 gpu_timestamp;
+
+	xe_pm_runtime_get(xe);
+
+	/* Accumulate all the exec queues from this client */
+	mutex_lock(&xef->exec_queue.lock);
+	xa_for_each(&xef->exec_queue.xa, i, q)
+		xe_exec_queue_update_runtime(q);
+	mutex_unlock(&xef->exec_queue.lock);
+
+	/* Get the total GPU cycles */
+	for_each_gt(gt, xe, gt_id) {
+		hwe = xe_gt_any_hw_engine(gt);
+		if (!hwe)
+			continue;
+
+		xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+		gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+		xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+		break;
+	}
+
+	xe_pm_runtime_put(xe);
+
+	if (unlikely(!hwe))
+		return;
+
+	for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) {
+		const char *class_name;
+
+		for_each_gt(gt, xe, gt_id)
+			capacity[class] += gt->user_engines.instances_per_class[class];
+
+		/*
+		 * Engines may be fused off or not exposed to userspace. Don't
+		 * return anything if this entire class is not available
+		 */
+		if (!capacity[class])
+			continue;
+
+		class_name = xe_hw_engine_class_to_str(class);
+		drm_printf(p, "drm-cycles-%s:\t%llu\n",
+			   class_name, xef->runtime[class]);
+		drm_printf(p, "drm-total-cycles-%s:\t%llu\n",
+			   class_name, gpu_timestamp);
+
+		if (capacity[class] > 1)
+			drm_printf(p, "drm-engine-capacity-%s:\t%lu\n",
+				   class_name, capacity[class]);
+	}
+}
+
 /**
  * xe_drm_client_fdinfo() - Callback for fdinfo interface
  * @p: The drm_printer ptr
@@ -192,5 +310,6 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
 void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 {
 	show_meminfo(p, file);
+	show_runtime(p, file);
 }
 #endif
-- 
cgit 


From 995f7dafd110eecbeef1e02846d897d64839d838 Mon Sep 17 00:00:00 2001
From: Francois Dugast <francois.dugast@intel.com>
Date: Tue, 16 Apr 2024 14:50:37 +0000
Subject: drm/xe/uapi: Expose the L3 bank mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The L3 bank mask is already generated and stored internally with
the rest of the GT topology. In user space, the compute runtime
now needs this information to be added to the device properties
therefore the topology mask query is extended to provide a new
mask which represents the L3 banks enabled on the GT.

The changes in the compute runtime are ready and approved, see
link below.

v2: Rewrite commit message and add a link to the compute
    runtime PR (Francois Dugast)

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Robert Krzemien <robert.krzemien@intel.com>
Cc: Mateusz Jablonski <mateusz.jablonski@intel.com>
Link: https://github.com/intel/compute-runtime/pull/722
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Acked-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240416145037.7-2-francois.dugast@intel.com
---
 drivers/gpu/drm/xe/xe_query.c | 9 ++++++++-
 include/uapi/drm/xe_drm.h     | 2 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 29f847debb5c..995effcb904b 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -455,9 +455,10 @@ static int query_hwconfig(struct xe_device *xe,
 static size_t calc_topo_query_size(struct xe_device *xe)
 {
 	return xe->info.gt_count *
-		(3 * sizeof(struct drm_xe_query_topology_mask) +
+		(4 * sizeof(struct drm_xe_query_topology_mask) +
 		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
 		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+		 sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
 		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
 }
 
@@ -511,6 +512,12 @@ static int query_gt_topology(struct xe_device *xe,
 		if (err)
 			return err;
 
+		topo.type = DRM_XE_TOPO_L3_BANK;
+		err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
+				sizeof(gt->fuse_topo.l3_bank_mask));
+		if (err)
+			return err;
+
 		topo.type = DRM_XE_TOPO_EU_PER_DSS;
 		err = copy_mask(&query_ptr, &topo,
 				gt->fuse_topo.eu_mask_per_dss,
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 1446c3bae515..d7b0903c22b2 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -508,6 +508,7 @@ struct drm_xe_query_gt_list {
  *    containing the following in mask:
  *    ``DSS_COMPUTE    ff ff ff ff 00 00 00 00``
  *    means 32 DSS are available for compute.
+ *  - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks
  *  - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU)
  *    available per Dual Sub Slices (DSS). For example a query response
  *    containing the following in mask:
@@ -520,6 +521,7 @@ struct drm_xe_query_topology_mask {
 
 #define DRM_XE_TOPO_DSS_GEOMETRY	1
 #define DRM_XE_TOPO_DSS_COMPUTE		2
+#define DRM_XE_TOPO_L3_BANK		3
 #define DRM_XE_TOPO_EU_PER_DSS		4
 	/** @type: type of mask */
 	__u16 type;
-- 
cgit 


From 735940f99966a5d510c43d05c932da536b33715a Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 21 May 2024 12:36:23 +0200
Subject: drm/xe: Add warn when level can not be zero.

At xe_pt_zap_ptes_entry() and xe_pt_stage_unbind_entry, the level cannot
be 0. Therefore, add an independent check for the level. Since the level
cannot be zero at this point, there is no need to check for `is_compact`,
so remove that instead.

Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521103623.11645-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_pt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 11dd0988ffda..cd60c009b679 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -763,7 +763,7 @@ static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset,
 	pgoff_t end_offset;
 
 	XE_WARN_ON(!*child);
-	XE_WARN_ON(!level && xe_child->is_compact);
+	XE_WARN_ON(!level);
 
 	/*
 	 * Note that we're called from an entry callback, and we're dealing
@@ -1445,7 +1445,7 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
 	struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base);
 
 	XE_WARN_ON(!*child);
-	XE_WARN_ON(!level && xe_child->is_compact);
+	XE_WARN_ON(!level);
 
 	xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
 
-- 
cgit 


From 01d71dff61c7e1efae1d7f11b71dfa4549c172bb Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 21 May 2024 12:27:15 +0200
Subject: drm/xe/tests: Use uninterruptible VM lock

Interruptible lock can return error and needed a return value
check. This test should finish quick enough so use a uninterruptible
lock instead.

Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521102715.22700-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index b6e7f80c3774..962f6438e219 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -344,7 +344,7 @@ static int migrate_test_run_device(struct xe_device *xe)
 		struct xe_migrate *m = tile->migrate;
 
 		kunit_info(test, "Testing tile id %d.\n", id);
-		xe_vm_lock(m->q->vm, true);
+		xe_vm_lock(m->q->vm, false);
 		xe_migrate_sanity_test(m, test);
 		xe_vm_unlock(m->q->vm);
 	}
-- 
cgit 


From bdc9abed51b52965557f9c46d541b5ca3fc66da3 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 12:28:25 +0200
Subject: drm/xe: Fix xe_uc.h

Prefer forward declaration over #include xe_uc_types.h

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521102828.668-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_uc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 5dfa7725483d..11856f24e6f9 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -6,7 +6,7 @@
 #ifndef _XE_UC_H_
 #define _XE_UC_H_
 
-#include "xe_uc_types.h"
+struct xe_uc;
 
 int xe_uc_init(struct xe_uc *uc);
 int xe_uc_init_hwconfig(struct xe_uc *uc);
-- 
cgit 


From 2291c091107d0635f10269098152900c0a12fd00 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 12:28:26 +0200
Subject: drm/xe: Fix xe_gsc.h

Prefer forward declaration over #include xe_gsc_types.h

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521102828.668-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gsc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index dd16e9b8b894..1c7a623faf11 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -6,8 +6,9 @@
 #ifndef _XE_GSC_H_
 #define _XE_GSC_H_
 
-#include "xe_gsc_types.h"
+#include <linux/types.h>
 
+struct xe_gsc;
 struct xe_gt;
 struct xe_hw_engine;
 
-- 
cgit 


From de1429a99fd37f706e6bdbf5e9ad318e1523442c Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 12:28:27 +0200
Subject: drm/xe: Fix xe_huc.h

Prefer forward declaration over #include xe_huc_types.h

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521102828.668-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_huc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
index 3ab56cc14b00..fa1c45e70443 100644
--- a/drivers/gpu/drm/xe/xe_huc.h
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -6,9 +6,10 @@
 #ifndef _XE_HUC_H_
 #define _XE_HUC_H_
 
-#include "xe_huc_types.h"
+#include <linux/types.h>
 
 struct drm_printer;
+struct xe_huc;
 
 enum xe_huc_auth_types {
 	XE_HUC_AUTH_VIA_GUC = 0,
-- 
cgit 


From a6bc7cda37d1ad52cdc59a8d4c4d654836f8c238 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 12:28:28 +0200
Subject: drm/xe: Fix xe_guc_pc.h

Prefer forward declaration over #include xe_guc_pc_types.h

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521102828.668-5-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc_pc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index d3680d89490e..532cac985a6d 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -6,7 +6,9 @@
 #ifndef _XE_GUC_PC_H_
 #define _XE_GUC_PC_H_
 
-#include "xe_guc_pc_types.h"
+#include <linux/types.h>
+
+struct xe_guc_pc;
 
 int xe_guc_pc_init(struct xe_guc_pc *pc);
 int xe_guc_pc_start(struct xe_guc_pc *pc);
@@ -27,4 +29,5 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
 u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
 u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
 void xe_guc_pc_init_early(struct xe_guc_pc *pc);
+
 #endif /* _XE_GUC_PC_H_ */
-- 
cgit 


From 31a278b5a11e6785db7f4976419d2b284591720e Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 20 May 2024 20:18:12 +0200
Subject: drm/i915/display: Add missing include to intel_vga.c

This compilation unit uses udelay() function without including
it's header file. Fix that to break dependency on other code.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240520181814.2392-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/i915/display/intel_vga.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c
index 4b98833bfa8c..0b5916c15307 100644
--- a/drivers/gpu/drm/i915/display/intel_vga.c
+++ b/drivers/gpu/drm/i915/display/intel_vga.c
@@ -3,6 +3,7 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include <linux/delay.h>
 #include <linux/vgaarb.h>
 
 #include <video/vga.h>
-- 
cgit 


From 26a22952c8551763f3ecc3251e3d8d213dc08aa4 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 20 May 2024 20:18:13 +0200
Subject: drm/xe: Don't rely on indirect includes from xe_mmio.h

These compilation units use udelay() or some GT oriented printk
functions without explicitly including proper header files, and
relying on #includes from the xe_mmio.h instead. Fix that.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240520181814.2392-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_device.c         | 2 ++
 drivers/gpu/drm/xe/xe_gsc.c            | 2 ++
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c    | 1 +
 drivers/gpu/drm/xe/xe_guc_ads.c        | 1 +
 drivers/gpu/drm/xe/xe_huc.c            | 2 ++
 drivers/gpu/drm/xe/xe_mocs.c           | 1 +
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 1 +
 drivers/gpu/drm/xe/xe_uc_fw.c          | 1 +
 8 files changed, 11 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index bedf55928aa4..602ea8d0ef21 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -5,6 +5,7 @@
 
 #include "xe_device.h"
 
+#include <linux/delay.h>
 #include <linux/units.h>
 
 #include <drm/drm_aperture.h>
@@ -32,6 +33,7 @@
 #include "xe_gsc_proxy.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
 #include "xe_hwmon.h"
 #include "xe_irq.h"
 #include "xe_memirq.h"
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 8cc6420a9e7f..80a61934decc 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -5,6 +5,8 @@
 
 #include "xe_gsc.h"
 
+#include <linux/delay.h>
+
 #include <drm/drm_managed.h>
 
 #include <generated/xe_wa_oob.h>
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index c36218f4f6c8..167e041e51d2 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -9,6 +9,7 @@
 #include "xe_assert.h"
 #include "xe_gt.h"
 #include "xe_gt_ccs_mode.h"
+#include "xe_gt_printk.h"
 #include "xe_gt_sysfs.h"
 #include "xe_mmio.h"
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 9c33cca4e370..1c60b685dbc6 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -16,6 +16,7 @@
 #include "xe_bo.h"
 #include "xe_gt.h"
 #include "xe_gt_ccs_mode.h"
+#include "xe_gt_printk.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_hw_engine.h"
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 39a484a57585..b039ff49341b 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -5,6 +5,8 @@
 
 #include "xe_huc.h"
 
+#include <linux/delay.h>
+
 #include <drm/drm_managed.h>
 
 #include "abi/gsc_pxp_commands_abi.h"
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index f04754ad911b..de3f2d3f1b04 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -12,6 +12,7 @@
 #include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
 #include "xe_pm.h"
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index f77367329760..64592a8e527b 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -18,6 +18,7 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_mmio.h"
 #include "xe_res_cursor.h"
 #include "xe_sriov.h"
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 186f81640cef..5571612321d4 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -14,6 +14,7 @@
 #include "xe_force_wake.h"
 #include "xe_gsc.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
-- 
cgit 


From f7e20cfb59c9f6dad45539286a1c804b24fd86c7 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 20 May 2024 20:18:14 +0200
Subject: drm/xe: Cleanup xe_mmio.h

We don't need <linux/delay.h> include since commit 5c09bd6ccd41
("drm/xe/mmio: Move xe_mmio_wait32() to xe_mmio.c").

We don't need <linux/io-64-nonatomic-lo-hi.h> include since commit
54c659660d63 ("drm/xe: Make xe_mmio_read|write() functions non-inline").

And since commit 924e6a9789a0 ("drm/xe/uapi: Remove MMIO ioctl")
we don't need forward declarations of drm_device and drm_file.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240520181814.2392-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_mmio.c | 7 +++++--
 drivers/gpu/drm/xe/xe_mmio.h | 9 +--------
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 05edab0e085d..548dc37e5893 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -3,10 +3,12 @@
  * Copyright © 2021-2023 Intel Corporation
  */
 
-#include <linux/minmax.h>
-
 #include "xe_mmio.h"
 
+#include <linux/delay.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/minmax.h>
+
 #include <drm/drm_managed.h>
 #include <drm/xe_drm.h>
 
@@ -19,6 +21,7 @@
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
 #include "xe_macros.h"
 #include "xe_module.h"
 #include "xe_sriov.h"
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 445ec6a0753e..9ef7deecf38f 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -6,17 +6,10 @@
 #ifndef _XE_MMIO_H_
 #define _XE_MMIO_H_
 
-#include <linux/delay.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
-
-#include "regs/xe_reg_defs.h"
-#include "xe_device_types.h"
-#include "xe_gt_printk.h"
 #include "xe_gt_types.h"
 
-struct drm_device;
-struct drm_file;
 struct xe_device;
+struct xe_reg;
 
 #define LMEM_BAR		2
 
-- 
cgit 


From 7065b19bd542e5dfdcfc421aed4ab9e4fefa19cc Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 11:25:17 +0200
Subject: drm/xe/guc: Allow to initialize submission with limited set of IDs

While PF and native drivers may initialize submission code to use
all available GuC contexts IDs, the VF driver may only use limited
number of IDs. Update init function to accept number of context
IDs available for use.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521092518.624-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc.c        |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c | 17 +++++++++++++++--
 drivers/gpu/drm/xe/xe_guc_submit.h |  2 +-
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index e52b544ac690..807ad53449e4 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -358,7 +358,7 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
 
 	guc_init_params_post_hwconfig(guc);
 
-	ret = xe_guc_submit_init(guc);
+	ret = xe_guc_submit_init(guc, ~0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index ad2b8067d071..b041b7676c08 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -278,7 +278,20 @@ static void primelockdep(struct xe_guc *guc)
 	fs_reclaim_release(GFP_KERNEL);
 }
 
-int xe_guc_submit_init(struct xe_guc *guc)
+/**
+ * xe_guc_submit_init() - Initialize GuC submission.
+ * @guc: the &xe_guc to initialize
+ * @num_ids: number of GuC context IDs to use
+ *
+ * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
+ * GuC context IDs supported by the GuC firmware should be used for submission.
+ *
+ * Only VF drivers will have to provide explicit number of GuC context IDs
+ * that they can use for submission.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
@@ -288,7 +301,7 @@ int xe_guc_submit_init(struct xe_guc *guc)
 	if (err)
 		return err;
 
-	err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0);
+	err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 4275b7da9df5..4ad5f4c1b084 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -12,7 +12,7 @@ struct drm_printer;
 struct xe_exec_queue;
 struct xe_guc;
 
-int xe_guc_submit_init(struct xe_guc *guc);
+int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids);
 
 int xe_guc_submit_reset_prepare(struct xe_guc *guc);
 void xe_guc_submit_reset_wait(struct xe_guc *guc);
-- 
cgit 


From d8a417c4bd510e7ed8ccd30045bf2bf307c00c57 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 11:25:18 +0200
Subject: drm/xe/vf: Custom GuC initialization if VF
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GuC firmware is loaded and initialized by the PF driver. Make
sure VF drivers only perform permitted operations. For submission
initialization, use number of GuC context IDs from self config.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521092518.624-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 17 ++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_vf.h |  2 ++
 drivers/gpu/drm/xe/xe_guc.c         | 40 +++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 378dde5ad4f9..bab3858b423e 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -408,6 +408,23 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt)
 	return 0;
 }
 
+/**
+ * xe_gt_sriov_vf_guc_ids - VF GuC context IDs configuration.
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ *
+ * Return: number of GuC context IDs assigned to VF.
+ */
+u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+	xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
+	xe_gt_assert(gt, gt->sriov.vf.self_config.num_ctxs);
+
+	return gt->sriov.vf.self_config.num_ctxs;
+}
+
 static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor)
 {
 	u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = {
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index 997cb7541036..d6d37b193d17 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -16,6 +16,8 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
 int xe_gt_sriov_vf_connect(struct xe_gt *gt);
 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
 
+u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
+
 void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
 void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
 void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 807ad53449e4..cec709a908c3 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -295,6 +295,23 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc)
 	return 0;
 }
 
+static int vf_guc_init(struct xe_guc *guc)
+{
+	int err;
+
+	xe_guc_comm_init_early(guc);
+
+	err = xe_guc_ct_init(&guc->ct);
+	if (err)
+		return err;
+
+	err = xe_guc_relay_init(&guc->relay);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 int xe_guc_init(struct xe_guc *guc)
 {
 	struct xe_device *xe = guc_to_xe(guc);
@@ -309,6 +326,13 @@ int xe_guc_init(struct xe_guc *guc)
 	if (!xe_uc_fw_is_enabled(&guc->fw))
 		return 0;
 
+	if (IS_SRIOV_VF(xe)) {
+		ret = vf_guc_init(guc);
+		if (ret)
+			goto out;
+		return 0;
+	}
+
 	ret = xe_guc_log_init(&guc->log);
 	if (ret)
 		goto out;
@@ -342,6 +366,19 @@ out:
 	return ret;
 }
 
+static int vf_guc_init_post_hwconfig(struct xe_guc *guc)
+{
+	int err;
+
+	err = xe_guc_submit_init(guc, xe_gt_sriov_vf_guc_ids(guc_to_gt(guc)));
+	if (err)
+		return err;
+
+	/* XXX xe_guc_db_mgr_init not needed for now */
+
+	return 0;
+}
+
 /**
  * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
  * @guc: The GuC object
@@ -352,6 +389,9 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
 {
 	int ret;
 
+	if (IS_SRIOV_VF(guc_to_xe(guc)))
+		return vf_guc_init_post_hwconfig(guc);
+
 	ret = xe_guc_realloc_post_hwconfig(guc);
 	if (ret)
 		return ret;
-- 
cgit 


From 4465b8c6d3539811ce576f163cb3bf3890e5c883 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:44 +0100
Subject: drm/xe/pci: remove broken driver_release

This is quite broken since we are nuking the pdev link to the private
driver struct, but note here that driver_release is called when the
drm_device is released (poor mans drmm), which can be long after the
device has been removed. So here what we are actually doing is nuking
the pdev link for what is potentially bound to a different drm_device.
If that happens before our pci remove callback is triggered (for the new
drm_device) we silently exit and skip some important cleanup steps,
resulting in hilarity.

There should be no reason to implement driver_release, when we already
have nicer stuff like drmm, so just remove completely. The actual pdev
link is already nuked when removing the device.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-19-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_device.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 602ea8d0ef21..043697e9e1b6 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -189,13 +189,6 @@ static const struct file_operations xe_driver_fops = {
 #endif
 };
 
-static void xe_driver_release(struct drm_device *dev)
-{
-	struct xe_device *xe = to_xe_device(dev);
-
-	pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
-}
-
 static struct drm_driver driver = {
 	/* Don't use MTRRs here; the Xserver or userspace app should
 	 * deal with them for Intel hardware.
@@ -214,8 +207,6 @@ static struct drm_driver driver = {
 #ifdef CONFIG_PROC_FS
 	.show_fdinfo = xe_drm_client_fdinfo,
 #endif
-	.release = &xe_driver_release,
-
 	.ioctls = xe_ioctls,
 	.num_ioctls = ARRAY_SIZE(xe_ioctls),
 	.fops = &xe_driver_fops,
-- 
cgit 


From c60f91bbc4bcd91f9c7f3de36c541951b12d165a Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:45 +0100
Subject: drm/xe: covert sysfs over to devm

Hotunplugging the device seems to result in stuff like:

kobject_add_internal failed for tile0 with -EEXIST, don't try to
register things with the same name in the same directory.

We only remove the sysfs as part of drmm, however that is tied to the
lifetime of the driver instance and not the device underneath. Attempt
to fix by using devm for all of the remaining sysfs stuff related to the
device.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1667
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1432
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-20-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_device_sysfs.c          |  4 ++--
 drivers/gpu/drm/xe/xe_gt_ccs_mode.c           |  4 ++--
 drivers/gpu/drm/xe/xe_gt_freq.c               |  4 ++--
 drivers/gpu/drm/xe/xe_gt_idle.c               |  4 ++--
 drivers/gpu/drm/xe/xe_gt_sysfs.c              |  4 ++--
 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c     |  4 ++--
 drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 12 ++++++------
 drivers/gpu/drm/xe/xe_tile_sysfs.c            |  4 ++--
 drivers/gpu/drm/xe/xe_vram_freq.c             |  4 ++--
 9 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 21677b8cd977..7375937934fa 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -69,7 +69,7 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RW(vram_d3cold_threshold);
 
-static void xe_device_sysfs_fini(struct drm_device *drm, void *arg)
+static void xe_device_sysfs_fini(void *arg)
 {
 	struct xe_device *xe = arg;
 
@@ -85,5 +85,5 @@ int xe_device_sysfs_init(struct xe_device *xe)
 	if (ret)
 		return ret;
 
-	return drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
+	return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 167e041e51d2..5d4cdbd69bc3 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -152,7 +152,7 @@ static const struct attribute *gt_ccs_mode_attrs[] = {
 	NULL,
 };
 
-static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg)
+static void xe_gt_ccs_mode_sysfs_fini(void *arg)
 {
 	struct xe_gt *gt = arg;
 
@@ -184,5 +184,5 @@ int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
-	return drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
+	return devm_add_action_or_reset(xe->drm.dev, xe_gt_ccs_mode_sysfs_fini, gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 855de40e40ea..e0305942644c 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -209,7 +209,7 @@ static const struct attribute *freq_attrs[] = {
 	NULL
 };
 
-static void freq_fini(struct drm_device *drm, void *arg)
+static void freq_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -237,7 +237,7 @@ int xe_gt_freq_init(struct xe_gt *gt)
 	if (!gt->freq)
 		return -ENOMEM;
 
-	err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
+	err = devm_add_action(xe->drm.dev, freq_fini, gt->freq);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index a4f6f0a96d05..749b7f4b688f 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -145,7 +145,7 @@ static const struct attribute *gt_idle_attrs[] = {
 	NULL,
 };
 
-static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
+static void gt_idle_sysfs_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -182,7 +182,7 @@ int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
 		return err;
 	}
 
-	return drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
+	return devm_add_action_or_reset(xe->drm.dev, gt_idle_sysfs_fini, kobj);
 }
 
 void xe_gt_idle_enable_c6(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index 1e5971072bc8..a05c3699e8b9 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -22,7 +22,7 @@ static const struct kobj_type xe_gt_sysfs_kobj_type = {
 	.sysfs_ops = &kobj_sysfs_ops,
 };
 
-static void gt_sysfs_fini(struct drm_device *drm, void *arg)
+static void gt_sysfs_fini(void *arg)
 {
 	struct xe_gt *gt = arg;
 
@@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt)
 
 	gt->sysfs = &kg->base;
 
-	return drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
+	return devm_add_action(xe->drm.dev, gt_sysfs_fini, gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
index fbe21a8599ca..c9e04151286d 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
@@ -229,7 +229,7 @@ static const struct attribute_group throttle_group_attrs = {
 	.attrs = throttle_attrs,
 };
 
-static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg)
+static void gt_throttle_sysfs_fini(void *arg)
 {
 	struct xe_gt *gt = arg;
 
@@ -245,5 +245,5 @@ int xe_gt_throttle_sysfs_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
-	return drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
+	return devm_add_action_or_reset(xe->drm.dev, gt_throttle_sysfs_fini, gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index efce6c7dd2a2..b53e8d2accdb 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -492,7 +492,7 @@ static const struct attribute * const files[] = {
 	NULL
 };
 
-static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
+static void kobj_xe_hw_engine_class_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -517,7 +517,7 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char
 	}
 	keclass->xe = xe;
 
-	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
+	err = devm_add_action_or_reset(xe->drm.dev, kobj_xe_hw_engine_class_fini,
 				       &keclass->base);
 	if (err)
 		return NULL;
@@ -525,7 +525,7 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char
 	return keclass;
 }
 
-static void hw_engine_class_defaults_fini(struct drm_device *drm, void *arg)
+static void hw_engine_class_defaults_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -552,7 +552,7 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
 	if (err)
 		goto err_object;
 
-	return drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, kobj);
+	return devm_add_action_or_reset(xe->drm.dev, hw_engine_class_defaults_fini, kobj);
 
 err_object:
 	kobject_put(kobj);
@@ -611,7 +611,7 @@ static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
 	.sysfs_ops = &xe_hw_engine_class_sysfs_ops,
 };
 
-static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
+static void hw_engine_class_sysfs_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -680,7 +680,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 			goto err_object;
 	}
 
-	return drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, kobj);
+	return devm_add_action_or_reset(xe->drm.dev, hw_engine_class_sysfs_fini, kobj);
 
 err_object:
 	kobject_put(kobj);
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index 64661403afcd..b804234a6551 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -22,7 +22,7 @@ static const struct kobj_type xe_tile_sysfs_kobj_type = {
 	.sysfs_ops = &kobj_sysfs_ops,
 };
 
-static void tile_sysfs_fini(struct drm_device *drm, void *arg)
+static void tile_sysfs_fini(void *arg)
 {
 	struct xe_tile *tile = arg;
 
@@ -55,5 +55,5 @@ int xe_tile_sysfs_init(struct xe_tile *tile)
 	if (err)
 		return err;
 
-	return drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
+	return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile);
 }
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c
index 3e21ddc6e60c..99ff95e408e0 100644
--- a/drivers/gpu/drm/xe/xe_vram_freq.c
+++ b/drivers/gpu/drm/xe/xe_vram_freq.c
@@ -87,7 +87,7 @@ static const struct attribute_group freq_group_attrs = {
 	.attrs = freq_attrs,
 };
 
-static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg)
+static void vram_freq_sysfs_fini(void *arg)
 {
 	struct kobject *kobj = arg;
 
@@ -122,5 +122,5 @@ int xe_vram_freq_sysfs_init(struct xe_tile *tile)
 		return err;
 	}
 
-	return drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj);
+	return devm_add_action_or_reset(xe->drm.dev, vram_freq_sysfs_fini, kobj);
 }
-- 
cgit 


From 3a1c27cd0122409a9fc425106fff79655ceeaa20 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:46 +0100
Subject: drm/xe/ggtt: use drm_dev_enter to mark device section

Device can be hotunplugged before we start destroying gem objects. In
such a case don't touch the GGTT entries, trigger any invalidations or
mess around with rpm.  This should already be taken care of when
removing the device, we just need to take care of dealing with the
software state, like removing the mm node.

v2: (Andrzej)
  - Avoid some duplication by tracking the bound status and checking
    that instead.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1717
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Jagmeet Randhawa <jagmeet.randhawa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-21-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_ggtt.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 0d541f55b4fc..17e5066763db 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -8,6 +8,7 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/sizes.h>
 
+#include <drm/drm_drv.h>
 #include <drm/drm_managed.h>
 #include <drm/i915_drm.h>
 
@@ -433,18 +434,29 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 			 bool invalidate)
 {
-	xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
+	struct xe_device *xe = tile_to_xe(ggtt->tile);
+	bool bound;
+	int idx;
+
+	bound = drm_dev_enter(&xe->drm, &idx);
+	if (bound)
+		xe_pm_runtime_get_noresume(xe);
 
 	mutex_lock(&ggtt->lock);
-	xe_ggtt_clear(ggtt, node->start, node->size);
+	if (bound)
+		xe_ggtt_clear(ggtt, node->start, node->size);
 	drm_mm_remove_node(node);
 	node->size = 0;
 	mutex_unlock(&ggtt->lock);
 
+	if (!bound)
+		return;
+
 	if (invalidate)
 		xe_ggtt_invalidate(ggtt);
 
-	xe_pm_runtime_put(tile_to_xe(ggtt->tile));
+	xe_pm_runtime_put(xe);
+	drm_dev_exit(idx);
 }
 
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
-- 
cgit 


From 241f5d25ff198f86bbd812a655ddc86834dedb94 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:47 +0100
Subject: drm/xe/guc: move guc_fini over to devm

Make sure to actually call this when the device is removed. Currently we
only trigger it when the driver instance goes away, but that doesn't
work too well with hotunplug, since device can be removed and re-probed
with a new driver instance, where the guc_fini() is called too late.
Move the fini over to devm to ensure this is called when device is
removed.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1717
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-22-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_guc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index cec709a908c3..3ac821438b60 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -241,7 +241,7 @@ static void guc_write_params(struct xe_guc *guc)
 		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
 }
 
-static void guc_fini(struct drm_device *drm, void *arg)
+static void guc_fini(void *arg)
 {
 	struct xe_guc *guc = arg;
 	struct xe_gt *gt = guc_to_gt(guc);
@@ -349,7 +349,7 @@ int xe_guc_init(struct xe_guc *guc)
 	if (ret)
 		goto out;
 
-	ret = drmm_add_action_or_reset(&xe->drm, guc_fini, guc);
+	ret = devm_add_action_or_reset(xe->drm.dev, guc_fini, guc);
 	if (ret)
 		goto out;
 
-- 
cgit 


From 19fa7aa4d235cd21b1c62977b2e8d6331976286f Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:48 +0100
Subject: drm/xe/guc: s/guc_fini/guc_fini_hw/

Make it clear that is about cleaning up the HW/FW side, and not software
state.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-23-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_guc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 3ac821438b60..90ecd7073d3d 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -241,7 +241,7 @@ static void guc_write_params(struct xe_guc *guc)
 		xe_mmio_write32(gt, SOFT_SCRATCH(1 + i), guc->params[i]);
 }
 
-static void guc_fini(void *arg)
+static void guc_fini_hw(void *arg)
 {
 	struct xe_guc *guc = arg;
 	struct xe_gt *gt = guc_to_gt(guc);
@@ -349,7 +349,7 @@ int xe_guc_init(struct xe_guc *guc)
 	if (ret)
 		goto out;
 
-	ret = devm_add_action_or_reset(xe->drm.dev, guc_fini, guc);
+	ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc);
 	if (ret)
 		goto out;
 
-- 
cgit 


From c9f422de07b1c62ab6da0a240802ac24efe26dc4 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:49 +0100
Subject: drm/xe/guc_pc: move pc_fini to devm

Here we are touching the HW/GuC and presumably this should happen when
the device is removed. Currently if you hotunplug the device this is
skipped if there is already open driver instance.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-24-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d10aab29651e..14af348aecf2 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -890,10 +890,9 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc)
 
 /**
  * xe_guc_pc_fini - Finalize GuC's Power Conservation component
- * @drm: DRM device
  * @arg: opaque pointer that should point to Xe_GuC_PC instance
  */
-static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
+static void xe_guc_pc_fini(void *arg)
 {
 	struct xe_guc_pc *pc = arg;
 	struct xe_device *xe = pc_to_xe(pc);
@@ -941,5 +940,5 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 
 	pc->bo = bo;
 
-	return drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc);
+	return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini, pc);
 }
-- 
cgit 


From 6d95155ae71963ebf43a8df60f39704294f09dae Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:50 +0100
Subject: drm/xe/guc_pc: s/pc_fini/pc_fini_hw/

Make it clear that is about cleaning up the HW/FW side, and not software
state.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-25-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_guc_pc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 14af348aecf2..8187dfb2ad6c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -889,10 +889,10 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc)
 }
 
 /**
- * xe_guc_pc_fini - Finalize GuC's Power Conservation component
+ * xe_guc_pc_fini_hw - Finalize GuC's Power Conservation component
  * @arg: opaque pointer that should point to Xe_GuC_PC instance
  */
-static void xe_guc_pc_fini(void *arg)
+static void xe_guc_pc_fini_hw(void *arg)
 {
 	struct xe_guc_pc *pc = arg;
 	struct xe_device *xe = pc_to_xe(pc);
@@ -940,5 +940,5 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 
 	pc->bo = bo;
 
-	return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini, pc);
+	return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc);
 }
-- 
cgit 


From bbc9651fe9f4c081e9931e2b3f6a6c5b2d05a942 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:51 +0100
Subject: drm/xe/irq: move irq_uninstall over to devm

Makes sense to trigger this when the device is removed.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-26-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_irq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 996806353171..8ee3c300c5e4 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -663,7 +663,7 @@ static irq_handler_t xe_irq_handler(struct xe_device *xe)
 		return xelp_irq_handler;
 }
 
-static void irq_uninstall(struct drm_device *drm, void *arg)
+static void irq_uninstall(void *arg)
 {
 	struct xe_device *xe = arg;
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -723,7 +723,7 @@ int xe_irq_install(struct xe_device *xe)
 
 	xe_irq_postinstall(xe);
 
-	err = drmm_add_action_or_reset(&xe->drm, irq_uninstall, xe);
+	err = devm_add_action_or_reset(xe->drm.dev, irq_uninstall, xe);
 	if (err)
 		goto free_irq_handler;
 
@@ -737,7 +737,7 @@ free_irq_handler:
 
 void xe_irq_shutdown(struct xe_device *xe)
 {
-	irq_uninstall(&xe->drm, xe);
+	irq_uninstall(xe);
 }
 
 void xe_irq_suspend(struct xe_device *xe)
-- 
cgit 


From bc54f42c0efaa81b42b53ce5f2423f7778d82516 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:52 +0100
Subject: drm/xe/device: move flr to devm

Should be called when driver is removed, not when this particular driver
instance is destroyed.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-27-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 043697e9e1b6..866c996df27a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -381,7 +381,7 @@ static void xe_driver_flr(struct xe_device *xe)
 	xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
 }
 
-static void xe_driver_flr_fini(struct drm_device *drm, void *arg)
+static void xe_driver_flr_fini(void *arg)
 {
 	struct xe_device *xe = arg;
 
@@ -586,7 +586,7 @@ int xe_device_probe(struct xe_device *xe)
 	err = xe_devcoredump_init(xe);
 	if (err)
 		return err;
-	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
+	err = devm_add_action_or_reset(xe->drm.dev, xe_driver_flr_fini, xe);
 	if (err)
 		return err;
 
-- 
cgit 


From cee70645a7499e3a2731953db80124c9d5405d5f Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:53 +0100
Subject: drm/xe/device: move xe_device_sanitize over to devm

Disable GuC submission when removing the device.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-28-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 866c996df27a..b825788067fc 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -389,7 +389,7 @@ static void xe_driver_flr_fini(void *arg)
 		xe_driver_flr(xe);
 }
 
-static void xe_device_sanitize(struct drm_device *drm, void *arg)
+static void xe_device_sanitize(void *arg)
 {
 	struct xe_device *xe = arg;
 	struct xe_gt *gt;
@@ -649,7 +649,7 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_hwmon_register(xe);
 
-	return drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
+	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
 
 err_fini_display:
 	xe_display_driver_remove(xe);
-- 
cgit 


From cf13ae6b81d5402ba93c46635dd46eb19ab49c4c Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:54 +0100
Subject: drm/xe/coredump: move over to devm

Here we are using drmm to ensure we release the coredump when unloading
the module, however the coredump is very much tied to the struct device
underneath. We can see this when we hotunplug the device, for which we
have already got a coredump attached. In such a case the coredump still
remains and adding another is not possible. However we still register
the release action via xe_driver_devcoredump_fini(), so in effect two or
more releases for one dump.  The other consideration is that the
coredump state is embedded in the xe_driver instance, so technically
once the drmm release action fires we might free the coredumpe state
from a different driver instance, assuming we have two release actions
and they can race. Rather use devm here to remove the coredump when the
device is released.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1679
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-29-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_devcoredump.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 3d7980232be1..e70aef797193 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -238,13 +238,15 @@ void xe_devcoredump(struct xe_sched_job *job)
 		      xe_devcoredump_read, xe_devcoredump_free);
 }
 
-static void xe_driver_devcoredump_fini(struct drm_device *drm, void *arg)
+static void xe_driver_devcoredump_fini(void *arg)
 {
+	struct drm_device *drm = arg;
+
 	dev_coredump_put(drm->dev);
 }
 
 int xe_devcoredump_init(struct xe_device *xe)
 {
-	return drmm_add_action_or_reset(&xe->drm, xe_driver_devcoredump_fini, xe);
+	return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm);
 }
 #endif
-- 
cgit 


From 1bd985ff9fc463a72e4ceb65ae6bcb735ce93c25 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:55 +0100
Subject: drm/xe/gt: break out gt_fini into sw vs hw state

Have a cleaner separation between hw vs sw.

v2: Fix missing return

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-30-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_gt.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 3432fef56486..98c2228b51d0 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -58,9 +58,17 @@
 #include "xe_wa.h"
 #include "xe_wopcm.h"
 
+static void gt_fini(struct drm_device *drm, void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	destroy_workqueue(gt->ordered_wq);
+}
+
 struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
 {
 	struct xe_gt *gt;
+	int err;
 
 	gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL);
 	if (!gt)
@@ -69,6 +77,10 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
 	gt->tile = tile;
 	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
 
+	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
+	if (err)
+		return ERR_PTR(err);
+
 	return gt;
 }
 
@@ -91,15 +103,9 @@ void xe_gt_sanitize(struct xe_gt *gt)
  */
 void xe_gt_remove(struct xe_gt *gt)
 {
-	xe_uc_remove(&gt->uc);
-}
-
-static void gt_fini(struct drm_device *drm, void *arg)
-{
-	struct xe_gt *gt = arg;
 	int i;
 
-	destroy_workqueue(gt->ordered_wq);
+	xe_uc_remove(&gt->uc);
 
 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -562,7 +568,7 @@ int xe_gt_init(struct xe_gt *gt)
 
 	xe_gt_record_user_engines(gt);
 
-	return drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
+	return 0;
 }
 
 void xe_gt_record_user_engines(struct xe_gt *gt)
-- 
cgit 


From cd506a33b0d9759e0a58556799b1b38650fa3698 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:56 +0100
Subject: drm/xe: make gt_remove use devm

No need to hand roll the onion unwind here, just move gt_remove over to
devm which will already have the correct ordering.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-31-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_device.c | 22 ++--------------------
 drivers/gpu/drm/xe/xe_gt.c     | 16 +++++++---------
 drivers/gpu/drm/xe/xe_gt.h     |  1 -
 3 files changed, 9 insertions(+), 30 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b825788067fc..56ea4c158a2a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -538,7 +538,6 @@ int xe_device_probe(struct xe_device *xe)
 	struct xe_tile *tile;
 	struct xe_gt *gt;
 	int err;
-	u8 last_gt;
 	u8 id;
 
 	xe_pat_init_early(xe);
@@ -626,18 +625,16 @@ int xe_device_probe(struct xe_device *xe)
 		goto err_irq_shutdown;
 
 	for_each_gt(gt, xe, id) {
-		last_gt = id;
-
 		err = xe_gt_init(gt);
 		if (err)
-			goto err_fini_gt;
+			goto err_irq_shutdown;
 	}
 
 	xe_heci_gsc_init(xe);
 
 	err = xe_display_init(xe);
 	if (err)
-		goto err_fini_gt;
+		goto err_irq_shutdown;
 
 	err = drm_dev_register(&xe->drm, 0);
 	if (err)
@@ -653,15 +650,6 @@ int xe_device_probe(struct xe_device *xe)
 
 err_fini_display:
 	xe_display_driver_remove(xe);
-
-err_fini_gt:
-	for_each_gt(gt, xe, id) {
-		if (id < last_gt)
-			xe_gt_remove(gt);
-		else
-			break;
-	}
-
 err_irq_shutdown:
 	xe_irq_shutdown(xe);
 err:
@@ -679,18 +667,12 @@ static void xe_device_remove_display(struct xe_device *xe)
 
 void xe_device_remove(struct xe_device *xe)
 {
-	struct xe_gt *gt;
-	u8 id;
-
 	xe_device_remove_display(xe);
 
 	xe_display_fini(xe);
 
 	xe_heci_gsc_fini(xe);
 
-	for_each_gt(gt, xe, id)
-		xe_gt_remove(gt);
-
 	xe_irq_shutdown(xe);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 98c2228b51d0..6f4b59a6e710 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -93,16 +93,14 @@ void xe_gt_sanitize(struct xe_gt *gt)
 	gt->uc.guc.submission_state.enabled = false;
 }
 
-/**
- * xe_gt_remove() - Clean up the GT structures before driver removal
- * @gt: the GT object
- *
- * This function should only act on objects/structures that must be cleaned
- * before the driver removal callback is complete and therefore can't be
- * deferred to a drmm action.
+/*
+ * Clean up the GT structures before driver removal. This function should only
+ * act on objects/structures that must be cleaned before the driver removal
+ * callback is complete and therefore can't be deferred to a drmm action.
  */
-void xe_gt_remove(struct xe_gt *gt)
+static void gt_remove(void *arg)
 {
+	struct xe_gt *gt = arg;
 	int i;
 
 	xe_uc_remove(&gt->uc);
@@ -568,7 +566,7 @@ int xe_gt_init(struct xe_gt *gt)
 
 	xe_gt_record_user_engines(gt);
 
-	return 0;
+	return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, gt_remove, gt);
 }
 
 void xe_gt_record_user_engines(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 9073ac68a777..d0747edfe020 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -56,7 +56,6 @@ int xe_gt_suspend(struct xe_gt *gt);
 int xe_gt_resume(struct xe_gt *gt);
 void xe_gt_reset_async(struct xe_gt *gt);
 void xe_gt_sanitize(struct xe_gt *gt);
-void xe_gt_remove(struct xe_gt *gt);
 
 /**
  * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
-- 
cgit 


From a0b834c8957a7d2848face008a12382a0ad11ffc Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:57 +0100
Subject: drm/xe/mmio: move mmio_fini over to devm

Not valid to touch mmio once the device is removed, so make sure we
unmap on removal and not just when driver instance goes away. Also set
the mmio pointers to NULL to hopefully catch such issues more easily.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-32-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_mmio.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 548dc37e5893..a87961ac6359 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -390,13 +390,16 @@ add_mmio_ext:
 	}
 }
 
-static void mmio_fini(struct drm_device *drm, void *arg)
+static void mmio_fini(void *arg)
 {
 	struct xe_device *xe = arg;
 
 	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
 	if (xe->mem.vram.mapping)
 		iounmap(xe->mem.vram.mapping);
+
+	xe->mem.vram.mapping = NULL;
+	xe->mmio.regs = NULL;
 }
 
 int xe_mmio_init(struct xe_device *xe)
@@ -421,7 +424,7 @@ int xe_mmio_init(struct xe_device *xe)
 	root_tile->mmio.size = SZ_16M;
 	root_tile->mmio.regs = xe->mmio.regs;
 
-	return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
+	return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
 }
 
 u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
-- 
cgit 


From c7117419784f612d59ee565145f722e8b5541fe6 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:58 +0100
Subject: drm/xe: reset mmio mappings with devm

Set our various mmio mappings to NULL. This should make it easier to
catch something rogue trying to mess with mmio after device removal. For
example, we might unmap everything and then start hitting some mmio
address which has already been unmamped by us and then remapped by
something else, causing all kinds of carnage.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-33-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/xe_device.c |  4 +++-
 drivers/gpu/drm/xe/xe_mmio.c   | 35 +++++++++++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_mmio.h   |  2 +-
 3 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 56ea4c158a2a..2eba1e32c633 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -555,7 +555,9 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		return err;
 
-	xe_mmio_probe_tiles(xe);
+	err = xe_mmio_probe_tiles(xe);
+	if (err)
+		return err;
 
 	xe_ttm_sys_mgr_init(xe);
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index a87961ac6359..8a39d4c18554 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -258,6 +258,21 @@ static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size,
 	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 }
 
+static void vram_fini(void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile;
+	int id;
+
+	if (xe->mem.vram.mapping)
+		iounmap(xe->mem.vram.mapping);
+
+	xe->mem.vram.mapping = NULL;
+
+	for_each_tile(tile, xe, id)
+		tile->mem.vram.mapping = NULL;
+}
+
 int xe_mmio_probe_vram(struct xe_device *xe)
 {
 	struct xe_tile *tile;
@@ -334,10 +349,20 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 	drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
 		 &available_size);
 
-	return 0;
+	return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe);
 }
 
-void xe_mmio_probe_tiles(struct xe_device *xe)
+static void tiles_fini(void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile;
+	int id;
+
+	for_each_tile(tile, xe, id)
+		tile->mmio.regs = NULL;
+}
+
+int xe_mmio_probe_tiles(struct xe_device *xe)
 {
 	size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
 	u8 id, tile_count = xe->info.tile_count;
@@ -388,6 +413,8 @@ add_mmio_ext:
 			regs += tile_mmio_ext_size;
 		}
 	}
+
+	return devm_add_action_or_reset(xe->drm.dev, tiles_fini, xe);
 }
 
 static void mmio_fini(void *arg)
@@ -395,10 +422,6 @@ static void mmio_fini(void *arg)
 	struct xe_device *xe = arg;
 
 	pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
-	if (xe->mem.vram.mapping)
-		iounmap(xe->mem.vram.mapping);
-
-	xe->mem.vram.mapping = NULL;
 	xe->mmio.regs = NULL;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 9ef7deecf38f..7ddd54cd34e6 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -14,7 +14,7 @@ struct xe_reg;
 #define LMEM_BAR		2
 
 int xe_mmio_init(struct xe_device *xe);
-void xe_mmio_probe_tiles(struct xe_device *xe);
+int xe_mmio_probe_tiles(struct xe_device *xe);
 
 u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg);
 u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg);
-- 
cgit 


From 5b6937b65e45e31af56a5bba47f69acc550acf26 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:21:59 +0100
Subject: drm/xe/display: move display fini stuff to devm

Match the i915 display handling here with calling both no_irq and
noaccel when removing the device.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-34-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/display/xe_display.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 63b27fbcdaca..89701ca841fc 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -135,9 +135,9 @@ int xe_display_init_nommio(struct xe_device *xe)
 	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe);
 }
 
-static void xe_display_fini_noirq(struct drm_device *dev, void *dummy)
+static void xe_display_fini_noirq(void *arg)
 {
-	struct xe_device *xe = to_xe_device(dev);
+	struct xe_device *xe = arg;
 
 	if (!xe->info.enable_display)
 		return;
@@ -172,12 +172,12 @@ int xe_display_init_noirq(struct xe_device *xe)
 	if (err)
 		return err;
 
-	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noirq, NULL);
+	return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe);
 }
 
-static void xe_display_fini_noaccel(struct drm_device *dev, void *dummy)
+static void xe_display_fini_noaccel(void *arg)
 {
-	struct xe_device *xe = to_xe_device(dev);
+	struct xe_device *xe = arg;
 
 	if (!xe->info.enable_display)
 		return;
@@ -196,7 +196,7 @@ int xe_display_init_noaccel(struct xe_device *xe)
 	if (err)
 		return err;
 
-	return drmm_add_action_or_reset(&xe->drm, xe_display_fini_noaccel, NULL);
+	return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noaccel, xe);
 }
 
 int xe_display_init(struct xe_device *xe)
-- 
cgit 


From 48d74a0a45201de4efa016fb2f556889db37ed28 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:22:00 +0100
Subject: drm/xe/display: stop calling domains_driver_remove twice

Unclear why we call this twice.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-35-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/display/xe_display.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 89701ca841fc..406bc3912f4b 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -143,7 +143,6 @@ static void xe_display_fini_noirq(void *arg)
 		return;
 
 	intel_display_driver_remove_noirq(xe);
-	intel_power_domains_driver_remove(xe);
 }
 
 int xe_display_init_noirq(struct xe_device *xe)
-- 
cgit 


From dc51c682ddb0104851c8ccbe1d644163ae38262b Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Wed, 22 May 2024 11:22:01 +0100
Subject: drm/xe/display: move device_remove over to drmm

i915 display calls this when releasing the drm_device, match this also
in xe by using drmm. intel_display_device_remove() is freeing purely
software state for the drm_device.

v2: fix build error

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522102143.128069-36-matthew.auld@intel.com
---
 drivers/gpu/drm/xe/display/xe_display.c | 20 ++++++++++++++++----
 drivers/gpu/drm/xe/display/xe_display.h |  4 ++--
 drivers/gpu/drm/xe/xe_pci.c             |  4 +++-
 3 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 406bc3912f4b..347e625b53be 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -245,8 +245,6 @@ void xe_display_driver_remove(struct xe_device *xe)
 		return;
 
 	intel_display_driver_remove(xe);
-
-	intel_display_device_remove(xe);
 }
 
 /* IRQ-related functions */
@@ -388,17 +386,31 @@ void xe_display_pm_resume(struct xe_device *xe)
 	intel_power_domains_enable(xe);
 }
 
-void xe_display_probe(struct xe_device *xe)
+static void display_device_remove(struct drm_device *dev, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	intel_display_device_remove(xe);
+}
+
+int xe_display_probe(struct xe_device *xe)
 {
+	int err;
+
 	if (!xe->info.enable_display)
 		goto no_display;
 
 	intel_display_device_probe(xe);
 
+	err = drmm_add_action_or_reset(&xe->drm, display_device_remove, xe);
+	if (err)
+		return err;
+
 	if (has_display(xe))
-		return;
+		return 0;
 
 no_display:
 	xe->info.enable_display = false;
 	unset_display_features(xe);
+	return 0;
 }
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 710e56180b52..e4085f841aa7 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -18,7 +18,7 @@ void xe_display_driver_remove(struct xe_device *xe);
 
 int xe_display_create(struct xe_device *xe);
 
-void xe_display_probe(struct xe_device *xe);
+int xe_display_probe(struct xe_device *xe);
 
 int xe_display_init_nommio(struct xe_device *xe);
 int xe_display_init_noirq(struct xe_device *xe);
@@ -47,7 +47,7 @@ static inline void xe_display_driver_remove(struct xe_device *xe) {}
 
 static inline int xe_display_create(struct xe_device *xe) { return 0; }
 
-static inline void xe_display_probe(struct xe_device *xe) { }
+static inline int xe_display_probe(struct xe_device *xe) { return 0; }
 
 static inline int xe_display_init_nommio(struct xe_device *xe) { return 0; }
 static inline int xe_display_init_noirq(struct xe_device *xe) { return 0; }
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index b1c8050b7bf5..bd058dae2986 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -767,7 +767,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		return err;
 
-	xe_display_probe(xe);
+	err = xe_display_probe(xe);
+	if (err)
+		return err;
 
 	drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d",
 		desc->platform_name,
-- 
cgit 


From 3ec3b42752a29af22369e72d2935f36cf4c77713 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 13:48:57 +0200
Subject: drm/xe/uc: Don't emit false error if running in execlist mode

When running in execlist mode (using force_execlist=1 modparam)
we incorrectly select the error path in xe_uc_init(), leading to
an unwanted error message like this:

[ ] xe 0000:00:00.0: [drm] *ERROR* GT0: Failed to initialize uC (0000000000000000)

Fix that by doing early return like we do in other similar cases.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521114857.712-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_uc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 0186eafc947d..85808706d1c8 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -51,7 +51,7 @@ int xe_uc_init(struct xe_uc *uc)
 		goto err;
 
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
-		goto err;
+		return 0;
 
 	ret = xe_wopcm_init(&uc->wopcm);
 	if (ret)
-- 
cgit 


From 40672b792a36894aff3a337b695f6136ee6ac5d4 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 21 May 2024 13:17:11 -0700
Subject: drm/xe: Properly handle alloc_guc_id() failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Release the submission_state lock if alloc_guc_id() fails.

v2: Add Fixes tag and CC stable kernel

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: <stable@vger.kernel.org> # v6.8+
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521201711.4934-1-niranjana.vishwanathapura@intel.com
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index b041b7676c08..23f73577facf 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1317,6 +1317,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	return 0;
 
 err_entity:
+	mutex_unlock(&guc->submission_state.lock);
 	xe_sched_entity_fini(&ge->entity);
 err_sched:
 	xe_sched_fini(&ge->sched);
-- 
cgit 


From 46edb0a3eb16cebc2db6f9b6f7c19813d52bfcc9 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 May 2024 13:00:59 -0400
Subject: drm/xe: Fix xe_pm_runtime_get_if_active return
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Current callers of this function are already taking the result
to a boolean and using in an if. It might be a problem because
current function might return negative error codes on failure,
without increasing the reference counter.

In this scenario we could end up with extra 'put' call ending
in unbalanced scenarios.

Let's fix it, while aligning with the current xe_pm_get_if_in_use
style.

Tested-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522170105.327472-1-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pm.c | 8 ++++----
 drivers/gpu/drm/xe/xe_pm.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index c1831106ea4b..d14741f35cc4 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -505,12 +505,12 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe)
  * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
  * @xe: xe device instance
  *
- * Returns: Any number greater than or equal to 0 for success, negative error
- * code otherwise.
+ * Return: True if device is awake (regardless the previous number of references)
+ * and a new reference was taken, false otherwise.
  */
-int xe_pm_runtime_get_if_active(struct xe_device *xe)
+bool xe_pm_runtime_get_if_active(struct xe_device *xe)
 {
-	return pm_runtime_get_if_active(xe->drm.dev);
+	return pm_runtime_get_if_active(xe->drm.dev) > 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 18b0613fe57b..f694005db278 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -29,7 +29,7 @@ int xe_pm_runtime_resume(struct xe_device *xe);
 void xe_pm_runtime_get(struct xe_device *xe);
 int xe_pm_runtime_get_ioctl(struct xe_device *xe);
 void xe_pm_runtime_put(struct xe_device *xe);
-int xe_pm_runtime_get_if_active(struct xe_device *xe);
+bool xe_pm_runtime_get_if_active(struct xe_device *xe);
 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe);
 void xe_pm_runtime_get_noresume(struct xe_device *xe);
 bool xe_pm_runtime_resume_and_get(struct xe_device *xe);
-- 
cgit 


From 967c5d7c64e207c9cb1b99ff3d2aec95e3316af7 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 May 2024 13:01:00 -0400
Subject: drm/xe: Fix xe_pm_runtime_get_if_in_use documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Let's be clear on what it is actually doing and align with
xe_pm_runtime_get_if_active doc style.

Tested-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522170105.327472-2-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index d14741f35cc4..c3ef2283770f 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -514,10 +514,11 @@ bool xe_pm_runtime_get_if_active(struct xe_device *xe)
 }
 
 /**
- * xe_pm_runtime_get_if_in_use - Get a runtime_pm reference and resume if needed
+ * xe_pm_runtime_get_if_in_use - Get a new reference if device is active with previous ref taken
  * @xe: xe device instance
  *
- * Returns: True if device is awake and the reference was taken, false otherwise.
+ * Return: True if device is awake, a previous reference had been already taken,
+ * and a new reference was now taken, false otherwise.
  */
 bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
 {
-- 
cgit 


From ad1e331fc451a2cffc72ae193b843682ce237e24 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 May 2024 13:01:01 -0400
Subject: drm/xe: Relax runtime pm protection during execution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Limit the protection only during moments of actual job execution,
and introduce protection for guc submit fini, which is currently
unprotected due to the absence of exec_queue life protection.

In the regular use case scenario, user space will create an
exec queue, and keep it alive to reuse that until it is done
with that kind of workload.

For the regular desktop cases, it means that the exec_queue
is alive even on idle scenarios where display goes off. This
is unacceptable since this would entirely block runtime PM
indefinitely, blocking deeper Package-C state. This would be
a waste drainage of power.

Cc: Matthew Brost <matthew.brost@intel.com>
Tested-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522170105.327472-3-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c | 14 --------------
 drivers/gpu/drm/xe/xe_guc_submit.c |  3 +++
 drivers/gpu/drm/xe/xe_sched_job.c  | 10 +++-------
 3 files changed, 6 insertions(+), 21 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index fa6dc996eca8..0fd61fb4d104 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -106,7 +106,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
 
 static int __xe_exec_queue_init(struct xe_exec_queue *q)
 {
-	struct xe_device *xe = gt_to_xe(q->gt);
 	int i, err;
 
 	for (i = 0; i < q->width; ++i) {
@@ -119,17 +118,6 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
 	if (err)
 		goto err_lrc;
 
-	/*
-	 * Normally the user vm holds an rpm ref to keep the device
-	 * awake, and the context holds a ref for the vm, however for
-	 * some engines we use the kernels migrate vm underneath which offers no
-	 * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we
-	 * can perform GuC CT actions when needed. Caller is expected to have
-	 * already grabbed the rpm ref outside any sensitive locks.
-	 */
-	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
-		xe_pm_runtime_get_noresume(xe);
-
 	return 0;
 
 err_lrc:
@@ -216,8 +204,6 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
 
 	for (i = 0; i < q->width; ++i)
 		xe_lrc_finish(q->lrc + i);
-	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
-		xe_pm_runtime_put(gt_to_xe(q->gt));
 	__xe_exec_queue_free(q);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 23f73577facf..76382b894f38 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -35,6 +35,7 @@
 #include "xe_macros.h"
 #include "xe_map.h"
 #include "xe_mocs.h"
+#include "xe_pm.h"
 #include "xe_ring_ops_types.h"
 #include "xe_sched_job.h"
 #include "xe_trace.h"
@@ -1088,6 +1089,7 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
 	struct xe_exec_queue *q = ge->q;
 	struct xe_guc *guc = exec_queue_to_guc(q);
 
+	xe_pm_runtime_get(guc_to_xe(guc));
 	trace_xe_exec_queue_destroy(q);
 
 	if (xe_exec_queue_is_lr(q))
@@ -1098,6 +1100,7 @@ static void __guc_exec_queue_fini_async(struct work_struct *w)
 
 	kfree(ge);
 	xe_exec_queue_fini(q);
+	xe_pm_runtime_put(guc_to_xe(guc));
 }
 
 static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index cd8a2fba5438..a4e030f5e019 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -158,11 +158,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 	for (i = 0; i < width; ++i)
 		job->batch_addr[i] = batch_addr[i];
 
-	/* All other jobs require a VM to be open which has a ref */
-	if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
-		xe_pm_runtime_get_noresume(job_to_xe(job));
-	xe_device_assert_mem_access(job_to_xe(job));
-
+	xe_pm_runtime_get_noresume(job_to_xe(job));
 	trace_xe_sched_job_create(job);
 	return job;
 
@@ -191,13 +187,13 @@ void xe_sched_job_destroy(struct kref *ref)
 {
 	struct xe_sched_job *job =
 		container_of(ref, struct xe_sched_job, refcount);
+	struct xe_device *xe = job_to_xe(job);
 
-	if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
-		xe_pm_runtime_put(job_to_xe(job));
 	xe_exec_queue_put(job->q);
 	dma_fence_put(job->fence);
 	drm_sched_job_cleanup(&job->drm);
 	job_free(job);
+	xe_pm_runtime_put(xe);
 }
 
 void xe_sched_job_set_error(struct xe_sched_job *job, int error)
-- 
cgit 


From 73ba282e7faf625faf26a09861e91a9fd20ea9a8 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 May 2024 13:01:02 -0400
Subject: drm/xe: Relax runtime pm protection around VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the regular use case scenario, user space will create a
VM, and keep it alive for the entire duration of its workload.

For the regular desktop cases, it means that the VM
is alive even on idle scenarios where display goes off. This
is unacceptable since this would entirely block runtime PM
indefinitely, blocking deeper Package-C state. This would be
a waste drainage of power.

Limit the VM protection solely for long-running workloads that
are not protected by the scheduler references.
By design, run_job for long-running workloads returns NULL and
the scheduler drops all the references of it, hence protecting
the VM for this case is necessary.

v2: Update commit message to a more imperative language and to
    reflect why the VM protection is really needed.
    Also add a comment in the code to let the reason visbible.

v3: Remove vma_access case and the mentions to mmap. Mmap cases
    are already protected by the gem page fault.

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Tested-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522170105.327472-4-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c5b1694b292f..2a49dea231e7 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1347,7 +1347,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 
 	vm->pt_ops = &xelp_pt_ops;
 
-	if (!(flags & XE_VM_FLAG_MIGRATION))
+	/*
+	 * Long-running workloads are not protected by the scheduler references.
+	 * By design, run_job for long-running workloads returns NULL and the
+	 * scheduler drops all the references of it, hence protecting the VM
+	 * for this case is necessary.
+	 */
+	if (flags & XE_VM_FLAG_LR_MODE)
 		xe_pm_runtime_get_noresume(xe);
 
 	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
@@ -1457,7 +1463,7 @@ err_no_resv:
 	for_each_tile(tile, xe, id)
 		xe_range_fence_tree_fini(&vm->rftree[id]);
 	kfree(vm);
-	if (!(flags & XE_VM_FLAG_MIGRATION))
+	if (flags & XE_VM_FLAG_LR_MODE)
 		xe_pm_runtime_put(xe);
 	return ERR_PTR(err);
 }
@@ -1592,7 +1598,7 @@ static void vm_destroy_work_func(struct work_struct *w)
 
 	mutex_destroy(&vm->snap_mutex);
 
-	if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+	if (vm->flags & XE_VM_FLAG_LR_MODE)
 		xe_pm_runtime_put(xe);
 
 	for_each_tile(tile, xe, id)
-- 
cgit 


From e7b180b22022f52e3f5fca695cc75d63bddc5a1c Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 May 2024 13:01:03 -0400
Subject: drm/xe: Prepare display for D3Cold

Prepare power-well and DC handling for a full power
lost during D3Cold, then sanitize it upon D3->D0.
Otherwise we get a bunch of state mismatch.

Ideally we could leave DC9 enabled and wouldn't need
to move DC9->DC0 on every runtime resume, however,
the disable_DC is part of the power-well checks and
intrinsic to the dc_off power well. In the future that
can be detangled so we can have even bigger power savings.
But for now, let's focus on getting a D3Cold, which saves
much more power by itself.

v2: create new functions to avoid full-suspend-resume path,
which would result in a deadlock between xe_gem_fault and the
modeset-ioctl.

v3: Only avoid the full modeset to avoid the race, for a more
robust suspend-resume.

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Cc: Uma Shankar <uma.shankar@intel.com>
Tested-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522170105.327472-5-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/display/xe_display.c | 10 ++++++----
 drivers/gpu/drm/xe/display/xe_display.h |  8 ++++----
 drivers/gpu/drm/xe/xe_pm.c              | 15 ++++++++++++---
 3 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 347e625b53be..2a9b4a4f2e71 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -308,7 +308,7 @@ static bool suspend_to_idle(void)
 	return false;
 }
 
-void xe_display_pm_suspend(struct xe_device *xe)
+void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
 {
 	bool s2idle = suspend_to_idle();
 	if (!xe->info.enable_display)
@@ -322,7 +322,8 @@ void xe_display_pm_suspend(struct xe_device *xe)
 	if (has_display(xe))
 		drm_kms_helper_poll_disable(&xe->drm);
 
-	intel_display_driver_suspend(xe);
+	if (!runtime)
+		intel_display_driver_suspend(xe);
 
 	intel_dp_mst_suspend(xe);
 
@@ -358,7 +359,7 @@ void xe_display_pm_resume_early(struct xe_device *xe)
 	intel_power_domains_resume(xe);
 }
 
-void xe_display_pm_resume(struct xe_device *xe)
+void xe_display_pm_resume(struct xe_device *xe, bool runtime)
 {
 	if (!xe->info.enable_display)
 		return;
@@ -373,7 +374,8 @@ void xe_display_pm_resume(struct xe_device *xe)
 
 	/* MST sideband requires HPD interrupts enabled */
 	intel_dp_mst_resume(xe);
-	intel_display_driver_resume(xe);
+	if (!runtime)
+		intel_display_driver_resume(xe);
 
 	intel_hpd_poll_disable(xe);
 	if (has_display(xe))
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index e4085f841aa7..000fb5799df5 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -34,10 +34,10 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
 void xe_display_irq_reset(struct xe_device *xe);
 void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
 
-void xe_display_pm_suspend(struct xe_device *xe);
+void xe_display_pm_suspend(struct xe_device *xe, bool runtime);
 void xe_display_pm_suspend_late(struct xe_device *xe);
 void xe_display_pm_resume_early(struct xe_device *xe);
-void xe_display_pm_resume(struct xe_device *xe);
+void xe_display_pm_resume(struct xe_device *xe, bool runtime);
 
 #else
 
@@ -63,10 +63,10 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
 static inline void xe_display_irq_reset(struct xe_device *xe) {}
 static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
 
-static inline void xe_display_pm_suspend(struct xe_device *xe) {}
+static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {}
 static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
 static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
-static inline void xe_display_pm_resume(struct xe_device *xe) {}
+static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {}
 
 #endif /* CONFIG_DRM_XE_DISPLAY */
 #endif /* _XE_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index c3ef2283770f..a2f5a1b11cbf 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -96,12 +96,12 @@ int xe_pm_suspend(struct xe_device *xe)
 	if (err)
 		goto err;
 
-	xe_display_pm_suspend(xe);
+	xe_display_pm_suspend(xe, false);
 
 	for_each_gt(gt, xe, id) {
 		err = xe_gt_suspend(gt);
 		if (err) {
-			xe_display_pm_resume(xe);
+			xe_display_pm_resume(xe, false);
 			goto err;
 		}
 	}
@@ -151,7 +151,7 @@ int xe_pm_resume(struct xe_device *xe)
 
 	xe_irq_resume(xe);
 
-	xe_display_pm_resume(xe);
+	xe_display_pm_resume(xe, false);
 
 	for_each_gt(gt, xe, id)
 		xe_gt_resume(gt);
@@ -366,6 +366,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 		err = xe_bo_evict_all(xe);
 		if (err)
 			goto out;
+		xe_display_pm_suspend(xe, true);
 	}
 
 	for_each_gt(gt, xe, id) {
@@ -375,7 +376,12 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 	}
 
 	xe_irq_suspend(xe);
+
+	if (xe->d3cold.allowed)
+		xe_display_pm_suspend_late(xe);
 out:
+	if (err)
+		xe_display_pm_resume(xe, true);
 	lock_map_release(&xe_pm_runtime_lockdep_map);
 	xe_pm_write_callback_task(xe, NULL);
 	return err;
@@ -411,6 +417,8 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 		if (err)
 			goto out;
 
+		xe_display_pm_resume_early(xe);
+
 		/*
 		 * This only restores pinned memory which is the memory
 		 * required for the GT(s) to resume.
@@ -426,6 +434,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 		xe_gt_resume(gt);
 
 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
+		xe_display_pm_resume(xe, true);
 		err = xe_bo_restore_user(xe);
 		if (err)
 			goto out;
-- 
cgit 


From 8d490e019b3b8da9cd9986d75b493ac3aa8d3269 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 May 2024 13:01:04 -0400
Subject: drm/xe: Stop checking for power_lost on D3Cold

GuC reset status is not reliable for this purpose and it is
once in a while ending up in a situation of D3Cold, where
power_reset is false and without the proper memory restoration
the GuC reload and Display will fail to come back from D3Cold.

So, let's do a full restoration of everything if we have a risk
of losing power, without further optimizations.

v2: also remove the gut_in_reset function (Anshuman)

Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522170105.327472-6-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  3 ---
 drivers/gpu/drm/xe/xe_guc.c          | 27 ---------------------------
 drivers/gpu/drm/xe/xe_guc.h          |  1 -
 drivers/gpu/drm/xe/xe_pm.c           | 12 ++----------
 4 files changed, 2 insertions(+), 41 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 13da7a079c5f..03bedc33b21a 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -431,9 +431,6 @@ struct xe_device {
 		/** @d3cold.allowed: Indicates if d3cold is a valid device state */
 		bool allowed;
 
-		/** @d3cold.power_lost: Indicates if card has really lost power. */
-		bool power_lost;
-
 		/**
 		 * @d3cold.vram_threshold:
 		 *
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 90ecd7073d3d..8c01f3eddd5a 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -1023,30 +1023,3 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
 	xe_guc_ct_print(&guc->ct, p, false);
 	xe_guc_submit_print(guc, p);
 }
-
-/**
- * xe_guc_in_reset() - Detect if GuC MIA is in reset.
- * @guc: The GuC object
- *
- * This function detects runtime resume from d3cold by leveraging
- * GUC_STATUS, GUC doesn't get reset during d3hot,
- * it strictly to be called from RPM resume handler.
- *
- * Return: true if failed to get forcewake or GuC MIA is in Reset,
- * otherwise false.
- */
-bool xe_guc_in_reset(struct xe_guc *guc)
-{
-	struct xe_gt *gt = guc_to_gt(guc);
-	u32 status;
-	int err;
-
-	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-	if (err)
-		return true;
-
-	status = xe_mmio_read32(gt, GUC_STATUS);
-	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-
-	return  status & GS_MIA_IN_RESET;
-}
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index a3c92b74a3d5..af59c9545753 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -37,7 +37,6 @@ void xe_guc_reset_wait(struct xe_guc *guc);
 void xe_guc_stop_prepare(struct xe_guc *guc);
 void xe_guc_stop(struct xe_guc *guc);
 int xe_guc_start(struct xe_guc *guc);
-bool xe_guc_in_reset(struct xe_guc *guc);
 
 static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
 {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index a2f5a1b11cbf..de3b5df65e48 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -404,15 +404,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 
 	lock_map_acquire(&xe_pm_runtime_lockdep_map);
 
-	/*
-	 * It can be possible that xe has allowed d3cold but other pcie devices
-	 * in gfx card soc would have blocked d3cold, therefore card has not
-	 * really lost power. Detecting primary Gt power is sufficient.
-	 */
-	gt = xe_device_get_gt(xe, 0);
-	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
-
-	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
+	if (xe->d3cold.allowed) {
 		err = xe_pcode_ready(xe, true);
 		if (err)
 			goto out;
@@ -433,7 +425,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	for_each_gt(gt, xe, id)
 		xe_gt_resume(gt);
 
-	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
+	if (xe->d3cold.allowed) {
 		xe_display_pm_resume(xe, true);
 		err = xe_bo_restore_user(xe);
 		if (err)
-- 
cgit 


From f91806033fb7ed1eb7e110849dd48f41c4fe4d4a Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Wed, 22 May 2024 13:01:05 -0400
Subject: drm/xe: Enable D3Cold on 'low' VRAM utilization

Now that we eliminated all the mem_access get/put with its
locking issues from the inner calls of migration, we can
allow D3Cold.

Enable it when VRAM utilization is lower then 300Mb. On
higher utilization we only allow D3hot so we don't increase
so much the latency on runtime resume due to the memory
restoration.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522170105.327472-7-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pm.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index f694005db278..104a21ae6dfd 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -8,12 +8,7 @@
 
 #include <linux/pm_runtime.h>
 
-/*
- * TODO: Threshold = 0 will block D3Cold.
- *       Before we can move this to a higher value (like 300), we need to:
- *           1. rewrite the VRAM save / restore to avoid buffer object locks
- */
-#define DEFAULT_VRAM_THRESHOLD 0 /* in MB */
+#define DEFAULT_VRAM_THRESHOLD 300 /* in MB */
 
 struct xe_device;
 
-- 
cgit 


From e8ac8048a759ab59042019fd84e6db98c7dc32f8 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Wed, 22 May 2024 18:58:40 +0100
Subject: drm/xe: remove unused struct 'xe_gt_desc'

'xe_gt_desc' is unused since
commit 1e6c20be6c83 ("drm/xe: Drop extra_gts[] declarations and
XE_GT_TYPE_REMOTE").

Remove it.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Link:
https://patchwork.freedesktop.org/patch/msgid/20240522175840.382107-1-linux@treblig.org
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index bd058dae2986..d65f05fa7b1b 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -41,12 +41,6 @@ struct xe_subplatform_desc {
 	const u16 *pciidlist;
 };
 
-struct xe_gt_desc {
-	enum xe_gt_type type;
-	u32 mmio_adj_limit;
-	u32 mmio_adj_offset;
-};
-
 struct xe_device_desc {
 	/* Should only ever be set for platforms without GMD_ID */
 	const struct xe_graphics_desc *graphics;
-- 
cgit 


From b10d0c5e9df7059be5bbfe2c6b293d8e4883f975 Mon Sep 17 00:00:00 2001
From: José Roberto de Souza <jose.souza@intel.com>
Date: Wed, 22 May 2024 13:12:03 -0700
Subject: drm/xe: Add process name to devcoredump
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Process name help us track what application caused the gpug hang, this
is crucial when running several applications at the same time.

v2:
- handle Xe KMD exec_queues without VM

v3:
- use get_pid_task() (suggested by Nirmoy)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522201203.145403-1-jose.souza@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_devcoredump.c       | 13 +++++++++++++
 drivers/gpu/drm/xe/xe_devcoredump_types.h |  2 ++
 2 files changed, 15 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index e70aef797193..1643d44f8bc4 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -110,6 +110,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 	drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
 	ts = ktime_to_timespec64(ss->boot_time);
 	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+	drm_printf(&p, "Process: %s\n", ss->process_name);
 	xe_device_snapshot_print(xe, &p);
 
 	drm_printf(&p, "\n**** GuC CT ****\n");
@@ -166,12 +167,24 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 	enum xe_hw_engine_id id;
 	u32 adj_logical_mask = q->logical_mask;
 	u32 width_mask = (0x1 << q->width) - 1;
+	const char *process_name = "no process";
+	struct task_struct *task = NULL;
+
 	int i;
 	bool cookie;
 
 	ss->snapshot_time = ktime_get_real();
 	ss->boot_time = ktime_get_boottime();
 
+	if (q->vm) {
+		task = get_pid_task(q->vm->xef->drm->pid, PIDTYPE_PID);
+		if (task)
+			process_name = task->comm;
+	}
+	snprintf(ss->process_name, sizeof(ss->process_name), process_name);
+	if (task)
+		put_task_struct(task);
+
 	ss->gt = q->gt;
 	INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
 
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 6f654b63c7f1..923cdf72a816 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -26,6 +26,8 @@ struct xe_devcoredump_snapshot {
 	ktime_t snapshot_time;
 	/** @boot_time:  Relative boot time so the uptime can be calculated. */
 	ktime_t boot_time;
+	/** @process_name: Name of process that triggered this gpu hang */
+	char process_name[TASK_COMM_LEN];
 
 	/** @gt: Affected GT, used by forcewake for delayed capture */
 	struct xe_gt *gt;
-- 
cgit 


From 83ee002df0ccdd7429d6355f65af3845af016ffe Mon Sep 17 00:00:00 2001
From: José Roberto de Souza <jose.souza@intel.com>
Date: Wed, 22 May 2024 13:34:31 -0700
Subject: drm/xe: Nuke simple error capture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This error capture prints into dmesg HW state when a gpu hang happens.
It was useful when we did not had devcoredump, now it is a incompleted
version of devcoredump that has potential to flood dmesg.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522203431.191594-1-jose.souza@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/Kconfig.debug   | 10 -------
 drivers/gpu/drm/xe/xe_guc_submit.c | 53 +-------------------------------------
 drivers/gpu/drm/xe/xe_vm.c         | 49 -----------------------------------
 drivers/gpu/drm/xe/xe_vm.h         |  2 --
 4 files changed, 1 insertion(+), 113 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index df02e5d17d26..bc177368af6c 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -61,16 +61,6 @@ config DRM_XE_DEBUG_MEM
 
 	  If in doubt, say "N".
 
-config DRM_XE_SIMPLE_ERROR_CAPTURE
-	bool "Enable simple error capture to dmesg on job timeout"
-	default n
-	help
-	  Choose this option when debugging an unexpected job timeout
-
-	  Recommended for driver developers only.
-
-	  If in doubt, say "N".
-
 config DRM_XE_KUNIT_TEST
         tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS
 	depends on DRM_XE && KUNIT && DEBUG_FS
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 76382b894f38..54778189cfd5 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -816,55 +816,6 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
 		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
 }
 
-static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p);
-
-#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
-static void simple_error_capture(struct xe_exec_queue *q)
-{
-	struct xe_guc *guc = exec_queue_to_guc(q);
-	struct xe_device *xe = guc_to_xe(guc);
-	struct drm_printer p = drm_err_printer(&xe->drm, NULL);
-	struct xe_hw_engine *hwe;
-	enum xe_hw_engine_id id;
-	u32 adj_logical_mask = q->logical_mask;
-	u32 width_mask = (0x1 << q->width) - 1;
-	int i;
-	bool cookie;
-
-	if (q->vm && !q->vm->error_capture.capture_once) {
-		q->vm->error_capture.capture_once = true;
-		cookie = dma_fence_begin_signalling();
-		for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
-			if (adj_logical_mask & BIT(i)) {
-				adj_logical_mask |= width_mask << i;
-				i += q->width;
-			} else {
-				++i;
-			}
-		}
-
-		if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL))
-			xe_gt_info(guc_to_gt(guc),
-				   "failed to get forcewake for error capture");
-		xe_guc_ct_print(&guc->ct, &p, true);
-		guc_exec_queue_print(q, &p);
-		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
-			if (hwe->class != q->hwe->class ||
-			    !(BIT(hwe->logical_instance) & adj_logical_mask))
-				continue;
-			xe_hw_engine_print(hwe, &p);
-		}
-		xe_analyze_vm(&p, q->vm, q->gt->info.id);
-		xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
-		dma_fence_end_signalling(cookie);
-	}
-}
-#else
-static void simple_error_capture(struct xe_exec_queue *q)
-{
-}
-#endif
-
 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
@@ -996,10 +947,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
 		   "VM job timed out on non-killed execqueue\n");
 
-	if (!exec_queue_killed(q)) {
-		simple_error_capture(q);
+	if (!exec_queue_killed(q))
 		xe_devcoredump(job);
-	}
 
 	trace_xe_sched_job_timedout(job);
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 2a49dea231e7..26b409e1b0f0 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3395,55 +3395,6 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	return 0;
 }
 
-int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
-{
-	struct drm_gpuva *gpuva;
-	bool is_vram;
-	uint64_t addr;
-
-	if (!down_read_trylock(&vm->lock)) {
-		drm_printf(p, " Failed to acquire VM lock to dump capture");
-		return 0;
-	}
-	if (vm->pt_root[gt_id]) {
-		addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
-		is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
-		drm_printf(p, " VM root: A:0x%llx %s\n", addr,
-			   is_vram ? "VRAM" : "SYS");
-	}
-
-	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
-		struct xe_vma *vma = gpuva_to_vma(gpuva);
-		bool is_userptr = xe_vma_is_userptr(vma);
-		bool is_null = xe_vma_is_null(vma);
-
-		if (is_null) {
-			addr = 0;
-		} else if (is_userptr) {
-			struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
-			struct xe_res_cursor cur;
-
-			if (sg) {
-				xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
-				addr = xe_res_dma(&cur);
-			} else {
-				addr = 0;
-			}
-		} else {
-			addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
-			is_vram = xe_bo_is_vram(xe_vma_bo(vma));
-		}
-		drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
-			   xe_vma_start(vma), xe_vma_end(vma) - 1,
-			   xe_vma_size(vma),
-			   addr, is_null ? "NULL" : is_userptr ? "USR" :
-			   is_vram ? "VRAM" : "SYS");
-	}
-	up_read(&vm->lock);
-
-	return 0;
-}
-
 struct xe_vm_snapshot {
 	unsigned long num_snaps;
 	struct {
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 3ac9021f970e..b481608b12f1 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -243,8 +243,6 @@ int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
 
 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
 
-int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
-
 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
 
 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
-- 
cgit 


From fcc8f80517ff71fab24f63dcb4bdb43e63b52fbd Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Fri, 17 May 2024 21:36:58 -0700
Subject: drm/xe: Make read_perf_limit_reasons globally accessible

Other driver code beyond the sysfs interface wants to know about
throttling. So make the query function globally accessible.

v2: Revert include order change (review feedback from Lucas)
v3: Remove '_sysfs' from throttle file names and keep limit query in
the same file rather than moving elsewhere (review feedback from
Rodrigo).
v4: Correct #include while renaming header file (review feedback
from Lucas).

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240518043700.3264362-2-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/xe/Makefile               |   2 +-
 drivers/gpu/drm/xe/xe_gt_freq.c           |   4 +-
 drivers/gpu/drm/xe/xe_gt_throttle.c       | 249 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_throttle.h       |  17 ++
 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c | 249 ------------------------------
 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h |  13 --
 6 files changed, 269 insertions(+), 265 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_gt_throttle.c
 create mode 100644 drivers/gpu/drm/xe/xe_gt_throttle.h
 delete mode 100644 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
 delete mode 100644 drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 71b5c35ee4c2..ae07f3d6a068 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -89,7 +89,7 @@ xe-y += xe_bb.o \
 	xe_gt_mcr.o \
 	xe_gt_pagefault.o \
 	xe_gt_sysfs.o \
-	xe_gt_throttle_sysfs.o \
+	xe_gt_throttle.o \
 	xe_gt_tlb_invalidation.o \
 	xe_gt_topology.o \
 	xe_guc.o \
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index e0305942644c..68a5778b4319 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -13,7 +13,7 @@
 
 #include "xe_device_types.h"
 #include "xe_gt_sysfs.h"
-#include "xe_gt_throttle_sysfs.h"
+#include "xe_gt_throttle.h"
 #include "xe_guc_pc.h"
 #include "xe_pm.h"
 
@@ -245,5 +245,5 @@ int xe_gt_freq_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
-	return xe_gt_throttle_sysfs_init(gt);
+	return xe_gt_throttle_init(gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
new file mode 100644
index 000000000000..25963e33a383
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include <regs/xe_gt_regs.h>
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sysfs.h"
+#include "xe_gt_throttle.h"
+#include "xe_mmio.h"
+#include "xe_pm.h"
+
+/**
+ * DOC: Xe GT Throttle
+ *
+ * Provides sysfs entries and other helpers for frequency throttle reasons in GT
+ *
+ * device/gt#/freq0/throttle/status - Overall status
+ * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1
+ * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2
+ * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc.
+ * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal
+ * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
+ * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
+ * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
+ * device/gt#/freq0/throttle/reason_vr_tdc -  Frequency throttle due to VR TDC
+ */
+
+static struct xe_gt *
+dev_to_gt(struct device *dev)
+{
+	return kobj_to_gt(dev->kobj.parent);
+}
+
+u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
+{
+	u32 reg;
+
+	xe_pm_runtime_get(gt_to_xe(gt));
+	if (xe_gt_is_media_type(gt))
+		reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
+	else
+		reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+	xe_pm_runtime_put(gt_to_xe(gt));
+
+	return reg;
+}
+
+static u32 read_status(struct xe_gt *gt)
+{
+	u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
+
+	return status;
+}
+
+static u32 read_reason_pl1(struct xe_gt *gt)
+{
+	u32 pl1 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_1_MASK;
+
+	return pl1;
+}
+
+static u32 read_reason_pl2(struct xe_gt *gt)
+{
+	u32 pl2 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_2_MASK;
+
+	return pl2;
+}
+
+static u32 read_reason_pl4(struct xe_gt *gt)
+{
+	u32 pl4 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_4_MASK;
+
+	return pl4;
+}
+
+static u32 read_reason_thermal(struct xe_gt *gt)
+{
+	u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & THERMAL_LIMIT_MASK;
+
+	return thermal;
+}
+
+static u32 read_reason_prochot(struct xe_gt *gt)
+{
+	u32 prochot = xe_gt_throttle_get_limit_reasons(gt) & PROCHOT_MASK;
+
+	return prochot;
+}
+
+static u32 read_reason_ratl(struct xe_gt *gt)
+{
+	u32 ratl = xe_gt_throttle_get_limit_reasons(gt) & RATL_MASK;
+
+	return ratl;
+}
+
+static u32 read_reason_vr_thermalert(struct xe_gt *gt)
+{
+	u32 thermalert = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMALERT_MASK;
+
+	return thermalert;
+}
+
+static u32 read_reason_vr_tdc(struct xe_gt *gt)
+{
+	u32 tdc = xe_gt_throttle_get_limit_reasons(gt) & VR_TDC_MASK;
+
+	return tdc;
+}
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool status = !!read_status(gt);
+
+	return sysfs_emit(buff, "%u\n", status);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t reason_pl1_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl1 = !!read_reason_pl1(gt);
+
+	return sysfs_emit(buff, "%u\n", pl1);
+}
+static DEVICE_ATTR_RO(reason_pl1);
+
+static ssize_t reason_pl2_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl2 = !!read_reason_pl2(gt);
+
+	return sysfs_emit(buff, "%u\n", pl2);
+}
+static DEVICE_ATTR_RO(reason_pl2);
+
+static ssize_t reason_pl4_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool pl4 = !!read_reason_pl4(gt);
+
+	return sysfs_emit(buff, "%u\n", pl4);
+}
+static DEVICE_ATTR_RO(reason_pl4);
+
+static ssize_t reason_thermal_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool thermal = !!read_reason_thermal(gt);
+
+	return sysfs_emit(buff, "%u\n", thermal);
+}
+static DEVICE_ATTR_RO(reason_thermal);
+
+static ssize_t reason_prochot_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool prochot = !!read_reason_prochot(gt);
+
+	return sysfs_emit(buff, "%u\n", prochot);
+}
+static DEVICE_ATTR_RO(reason_prochot);
+
+static ssize_t reason_ratl_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool ratl = !!read_reason_ratl(gt);
+
+	return sysfs_emit(buff, "%u\n", ratl);
+}
+static DEVICE_ATTR_RO(reason_ratl);
+
+static ssize_t reason_vr_thermalert_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool thermalert = !!read_reason_vr_thermalert(gt);
+
+	return sysfs_emit(buff, "%u\n", thermalert);
+}
+static DEVICE_ATTR_RO(reason_vr_thermalert);
+
+static ssize_t reason_vr_tdc_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buff)
+{
+	struct xe_gt *gt = dev_to_gt(dev);
+	bool tdc = !!read_reason_vr_tdc(gt);
+
+	return sysfs_emit(buff, "%u\n", tdc);
+}
+static DEVICE_ATTR_RO(reason_vr_tdc);
+
+static struct attribute *throttle_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_reason_pl1.attr,
+	&dev_attr_reason_pl2.attr,
+	&dev_attr_reason_pl4.attr,
+	&dev_attr_reason_thermal.attr,
+	&dev_attr_reason_prochot.attr,
+	&dev_attr_reason_ratl.attr,
+	&dev_attr_reason_vr_thermalert.attr,
+	&dev_attr_reason_vr_tdc.attr,
+	NULL
+};
+
+static const struct attribute_group throttle_group_attrs = {
+	.name = "throttle",
+	.attrs = throttle_attrs,
+};
+
+static void gt_throttle_sysfs_fini(void *arg)
+{
+	struct xe_gt *gt = arg;
+
+	sysfs_remove_group(gt->freq, &throttle_group_attrs);
+}
+
+int xe_gt_throttle_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	int err;
+
+	err = sysfs_create_group(gt->freq, &throttle_group_attrs);
+	if (err)
+		return err;
+
+	return devm_add_action_or_reset(xe->drm.dev, gt_throttle_sysfs_fini, gt);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.h b/drivers/gpu/drm/xe/xe_gt_throttle.h
new file mode 100644
index 000000000000..02277494715d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_THROTTLE_H_
+#define _XE_GT_THROTTLE_H_
+
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gt_throttle_init(struct xe_gt *gt);
+
+u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt);
+
+#endif /* _XE_GT_THROTTLE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
deleted file mode 100644
index c9e04151286d..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#include <drm/drm_managed.h>
-
-#include <regs/xe_gt_regs.h>
-#include "xe_device.h"
-#include "xe_gt.h"
-#include "xe_gt_sysfs.h"
-#include "xe_gt_throttle_sysfs.h"
-#include "xe_mmio.h"
-#include "xe_pm.h"
-
-/**
- * DOC: Xe GT Throttle
- *
- * Provides sysfs entries for frequency throttle reasons in GT
- *
- * device/gt#/freq0/throttle/status - Overall status
- * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1
- * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2
- * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc.
- * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal
- * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
- * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
- * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
- * device/gt#/freq0/throttle/reason_vr_tdc -  Frequency throttle due to VR TDC
- */
-
-static struct xe_gt *
-dev_to_gt(struct device *dev)
-{
-	return kobj_to_gt(dev->kobj.parent);
-}
-
-static u32 read_perf_limit_reasons(struct xe_gt *gt)
-{
-	u32 reg;
-
-	xe_pm_runtime_get(gt_to_xe(gt));
-	if (xe_gt_is_media_type(gt))
-		reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
-	else
-		reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
-	xe_pm_runtime_put(gt_to_xe(gt));
-
-	return reg;
-}
-
-static u32 read_status(struct xe_gt *gt)
-{
-	u32 status = read_perf_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
-
-	return status;
-}
-
-static u32 read_reason_pl1(struct xe_gt *gt)
-{
-	u32 pl1 = read_perf_limit_reasons(gt) & POWER_LIMIT_1_MASK;
-
-	return pl1;
-}
-
-static u32 read_reason_pl2(struct xe_gt *gt)
-{
-	u32 pl2 = read_perf_limit_reasons(gt) & POWER_LIMIT_2_MASK;
-
-	return pl2;
-}
-
-static u32 read_reason_pl4(struct xe_gt *gt)
-{
-	u32 pl4 = read_perf_limit_reasons(gt) & POWER_LIMIT_4_MASK;
-
-	return pl4;
-}
-
-static u32 read_reason_thermal(struct xe_gt *gt)
-{
-	u32 thermal = read_perf_limit_reasons(gt) & THERMAL_LIMIT_MASK;
-
-	return thermal;
-}
-
-static u32 read_reason_prochot(struct xe_gt *gt)
-{
-	u32 prochot = read_perf_limit_reasons(gt) & PROCHOT_MASK;
-
-	return prochot;
-}
-
-static u32 read_reason_ratl(struct xe_gt *gt)
-{
-	u32 ratl = read_perf_limit_reasons(gt) & RATL_MASK;
-
-	return ratl;
-}
-
-static u32 read_reason_vr_thermalert(struct xe_gt *gt)
-{
-	u32 thermalert = read_perf_limit_reasons(gt) & VR_THERMALERT_MASK;
-
-	return thermalert;
-}
-
-static u32 read_reason_vr_tdc(struct xe_gt *gt)
-{
-	u32 tdc = read_perf_limit_reasons(gt) & VR_TDC_MASK;
-
-	return tdc;
-}
-
-static ssize_t status_show(struct device *dev,
-			   struct device_attribute *attr,
-			   char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool status = !!read_status(gt);
-
-	return sysfs_emit(buff, "%u\n", status);
-}
-static DEVICE_ATTR_RO(status);
-
-static ssize_t reason_pl1_show(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool pl1 = !!read_reason_pl1(gt);
-
-	return sysfs_emit(buff, "%u\n", pl1);
-}
-static DEVICE_ATTR_RO(reason_pl1);
-
-static ssize_t reason_pl2_show(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool pl2 = !!read_reason_pl2(gt);
-
-	return sysfs_emit(buff, "%u\n", pl2);
-}
-static DEVICE_ATTR_RO(reason_pl2);
-
-static ssize_t reason_pl4_show(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool pl4 = !!read_reason_pl4(gt);
-
-	return sysfs_emit(buff, "%u\n", pl4);
-}
-static DEVICE_ATTR_RO(reason_pl4);
-
-static ssize_t reason_thermal_show(struct device *dev,
-				   struct device_attribute *attr,
-				   char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool thermal = !!read_reason_thermal(gt);
-
-	return sysfs_emit(buff, "%u\n", thermal);
-}
-static DEVICE_ATTR_RO(reason_thermal);
-
-static ssize_t reason_prochot_show(struct device *dev,
-				   struct device_attribute *attr,
-				   char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool prochot = !!read_reason_prochot(gt);
-
-	return sysfs_emit(buff, "%u\n", prochot);
-}
-static DEVICE_ATTR_RO(reason_prochot);
-
-static ssize_t reason_ratl_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool ratl = !!read_reason_ratl(gt);
-
-	return sysfs_emit(buff, "%u\n", ratl);
-}
-static DEVICE_ATTR_RO(reason_ratl);
-
-static ssize_t reason_vr_thermalert_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool thermalert = !!read_reason_vr_thermalert(gt);
-
-	return sysfs_emit(buff, "%u\n", thermalert);
-}
-static DEVICE_ATTR_RO(reason_vr_thermalert);
-
-static ssize_t reason_vr_tdc_show(struct device *dev,
-				  struct device_attribute *attr,
-				  char *buff)
-{
-	struct xe_gt *gt = dev_to_gt(dev);
-	bool tdc = !!read_reason_vr_tdc(gt);
-
-	return sysfs_emit(buff, "%u\n", tdc);
-}
-static DEVICE_ATTR_RO(reason_vr_tdc);
-
-static struct attribute *throttle_attrs[] = {
-	&dev_attr_status.attr,
-	&dev_attr_reason_pl1.attr,
-	&dev_attr_reason_pl2.attr,
-	&dev_attr_reason_pl4.attr,
-	&dev_attr_reason_thermal.attr,
-	&dev_attr_reason_prochot.attr,
-	&dev_attr_reason_ratl.attr,
-	&dev_attr_reason_vr_thermalert.attr,
-	&dev_attr_reason_vr_tdc.attr,
-	NULL
-};
-
-static const struct attribute_group throttle_group_attrs = {
-	.name = "throttle",
-	.attrs = throttle_attrs,
-};
-
-static void gt_throttle_sysfs_fini(void *arg)
-{
-	struct xe_gt *gt = arg;
-
-	sysfs_remove_group(gt->freq, &throttle_group_attrs);
-}
-
-int xe_gt_throttle_sysfs_init(struct xe_gt *gt)
-{
-	struct xe_device *xe = gt_to_xe(gt);
-	int err;
-
-	err = sysfs_create_group(gt->freq, &throttle_group_attrs);
-	if (err)
-		return err;
-
-	return devm_add_action_or_reset(xe->drm.dev, gt_throttle_sysfs_fini, gt);
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
deleted file mode 100644
index 335c402b51a6..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_GT_THROTTLE_SYSFS_H_
-#define _XE_GT_THROTTLE_SYSFS_H_
-
-struct xe_gt;
-
-int xe_gt_throttle_sysfs_init(struct xe_gt *gt);
-
-#endif
-- 
cgit 


From b0ac1b42dbdcc990ad32d6f8107a1d5214c33e67 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Fri, 17 May 2024 21:36:59 -0700
Subject: drm/xe/guc: Port over the slow GuC loading support from i915

GuC loading can take longer than it is supposed to for various
reasons. So add in the code to cope with that and to report it when it
happens. There are also many different reasons why GuC loading can
fail, so add in the code for checking for those and for reporting
issues in a meaningful manner rather than just hitting a timeout and
saying 'fail: status = %x'.

Also, remove the 'FIXME' comment about an i915 bug that has never been
applicable to Xe!

v2: Actually report the requested and granted frequencies rather than
showing granted twice (review feedback from Badal).
v3: Locally code all the timeout and end condition handling because a
helper function is not allowed (review feedback from Lucas/Rodrigo).
v4: Add more documentation comments and rename a define to add units
(review feedback from Lucas).
v5: Fix copy/paste error in xe_mmio_wait32_not (review feedback from
Lucas) and rebase (no more return value from guc_wait_ucode).

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240518043700.3264362-3-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/xe/abi/guc_errors_abi.h |  26 +++-
 drivers/gpu/drm/xe/regs/xe_guc_regs.h   |   2 +
 drivers/gpu/drm/xe/xe_guc.c             | 203 +++++++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_mmio.c            |  61 ++++++++++
 drivers/gpu/drm/xe/xe_mmio.h            |   2 +
 5 files changed, 260 insertions(+), 34 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
index ec83551bf9c0..d0b5fed6876f 100644
--- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
@@ -7,8 +7,12 @@
 #define _ABI_GUC_ERRORS_ABI_H
 
 enum xe_guc_response_status {
-	XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
-	XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
+	XE_GUC_RESPONSE_STATUS_SUCCESS                      = 0x0,
+	XE_GUC_RESPONSE_NOT_SUPPORTED                       = 0x20,
+	XE_GUC_RESPONSE_NO_ATTRIBUTE_TABLE                  = 0x201,
+	XE_GUC_RESPONSE_NO_DECRYPTION_KEY                   = 0x202,
+	XE_GUC_RESPONSE_DECRYPTION_FAILED                   = 0x204,
+	XE_GUC_RESPONSE_STATUS_GENERIC_FAIL                 = 0xF000,
 };
 
 enum xe_guc_load_status {
@@ -17,6 +21,9 @@ enum xe_guc_load_status {
 	XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH       = 0x02,
 	XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH       = 0x03,
 	XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE      = 0x04,
+	XE_GUC_LOAD_STATUS_HWCONFIG_START                   = 0x05,
+	XE_GUC_LOAD_STATUS_HWCONFIG_DONE                    = 0x06,
+	XE_GUC_LOAD_STATUS_HWCONFIG_ERROR                   = 0x07,
 	XE_GUC_LOAD_STATUS_GDT_DONE                         = 0x10,
 	XE_GUC_LOAD_STATUS_IDT_DONE                         = 0x20,
 	XE_GUC_LOAD_STATUS_LAPIC_DONE                       = 0x30,
@@ -34,4 +41,19 @@ enum xe_guc_load_status {
 	XE_GUC_LOAD_STATUS_READY                            = 0xF0,
 };
 
+enum xe_bootrom_load_status {
+	XE_BOOTROM_STATUS_NO_KEY_FOUND                      = 0x13,
+	XE_BOOTROM_STATUS_AES_PROD_KEY_FOUND                = 0x1A,
+	XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE            = 0x2B,
+	XE_BOOTROM_STATUS_RSA_FAILED                        = 0x50,
+	XE_BOOTROM_STATUS_PAVPC_FAILED                      = 0x73,
+	XE_BOOTROM_STATUS_WOPCM_FAILED                      = 0x74,
+	XE_BOOTROM_STATUS_LOADLOC_FAILED                    = 0x75,
+	XE_BOOTROM_STATUS_JUMP_PASSED                       = 0x76,
+	XE_BOOTROM_STATUS_JUMP_FAILED                       = 0x77,
+	XE_BOOTROM_STATUS_RC6CTXCONFIG_FAILED               = 0x79,
+	XE_BOOTROM_STATUS_MPUMAP_INCORRECT                  = 0x7A,
+	XE_BOOTROM_STATUS_EXCEPTION                         = 0x7E,
+};
+
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index 11682e675e0f..a5fd14307f94 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -40,6 +40,8 @@
 #define   GS_BOOTROM_JUMP_PASSED		REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76)
 #define   GS_MIA_IN_RESET			REG_BIT(0)
 
+#define GUC_HEADER_INFO				XE_REG(0xc014)
+
 #define GUC_WOPCM_SIZE				XE_REG(0xc050)
 #define   GUC_WOPCM_SIZE_MASK			REG_GENMASK(31, 12)
 #define   GUC_WOPCM_SIZE_LOCKED			REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 8c01f3eddd5a..f7886c00af01 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -20,6 +20,7 @@
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
 #include "xe_gt_sriov_vf.h"
+#include "xe_gt_throttle.h"
 #include "xe_guc_ads.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_db_mgr.h"
@@ -501,53 +502,191 @@ static int guc_xfer_rsa(struct xe_guc *guc)
 	return 0;
 }
 
+/*
+ * Check a previously read GuC status register (GUC_STATUS) looking for
+ * known terminal states (either completion or failure) of either the
+ * microkernel status field or the boot ROM status field. Returns +1 for
+ * successful completion, -1 for failure and 0 for any intermediate state.
+ */
+static int guc_load_done(u32 status)
+{
+	u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+	u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+	switch (uk_val) {
+	case XE_GUC_LOAD_STATUS_READY:
+		return 1;
+
+	case XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
+	case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
+	case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
+	case XE_GUC_LOAD_STATUS_HWCONFIG_ERROR:
+	case XE_GUC_LOAD_STATUS_DPC_ERROR:
+	case XE_GUC_LOAD_STATUS_EXCEPTION:
+	case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID:
+	case XE_GUC_LOAD_STATUS_MPU_DATA_INVALID:
+	case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+		return -1;
+	}
+
+	switch (br_val) {
+	case XE_BOOTROM_STATUS_NO_KEY_FOUND:
+	case XE_BOOTROM_STATUS_RSA_FAILED:
+	case XE_BOOTROM_STATUS_PAVPC_FAILED:
+	case XE_BOOTROM_STATUS_WOPCM_FAILED:
+	case XE_BOOTROM_STATUS_LOADLOC_FAILED:
+	case XE_BOOTROM_STATUS_JUMP_FAILED:
+	case XE_BOOTROM_STATUS_RC6CTXCONFIG_FAILED:
+	case XE_BOOTROM_STATUS_MPUMAP_INCORRECT:
+	case XE_BOOTROM_STATUS_EXCEPTION:
+	case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
+		return -1;
+	}
+
+	return 0;
+}
+
+static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
+{
+	u32 freq;
+	int ret = xe_guc_pc_get_cur_freq(guc_pc, &freq);
+
+	return ret ? ret : freq;
+}
+
+/*
+ * Wait for the GuC to start up.
+ *
+ * Measurements indicate this should take no more than 20ms (assuming the GT
+ * clock is at maximum frequency). However, thermal throttling and other issues
+ * can prevent the clock hitting max and thus making the load take significantly
+ * longer. Allow up to 200ms as a safety margin for real world worst case situations.
+ *
+ * However, bugs anywhere from KMD to GuC to PCODE to fan failure in a CI farm can
+ * lead to even longer times. E.g. if the GT is clamped to minimum frequency then
+ * the load times can be in the seconds range. So the timeout is increased for debug
+ * builds to ensure that problems can be correctly analysed. For release builds, the
+ * timeout is kept short so that users don't wait forever to find out that there is a
+ * problem. In either case, if the load took longer than is reasonable even with some
+ * 'sensible' throttling, then flag a warning because something is not right.
+ *
+ * Note that there is a limit on how long an individual usleep_range() can wait for,
+ * hence longer waits require wrapping a shorter wait in a loop.
+ *
+ * Note that the only reason an end user should hit the shorter timeout is in case of
+ * extreme thermal throttling. And a system that is that hot during boot is probably
+ * dead anyway!
+ */
+#if defined(CONFIG_DRM_XE_DEBUG)
+#define GUC_LOAD_RETRY_LIMIT	20
+#else
+#define GUC_LOAD_RETRY_LIMIT	3
+#endif
+#define GUC_LOAD_TIME_WARN_MS      200
+
 static void guc_wait_ucode(struct xe_guc *guc)
 {
 	struct xe_gt *gt = guc_to_gt(guc);
-	u32 status;
-	int ret;
-
+	struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
+	ktime_t before, after, delta;
+	int load_done;
+	u32 status = 0;
+	int count;
+	u64 delta_ms;
+	u32 before_freq;
+
+	before_freq = xe_guc_pc_get_act_freq(guc_pc);
+	before = ktime_get();
 	/*
-	 * Wait for the GuC to start up.
-	 * NB: Docs recommend not using the interrupt for completion.
-	 * Measurements indicate this should take no more than 20ms
-	 * (assuming the GT clock is at maximum frequency). So, a
-	 * timeout here indicates that the GuC has failed and is unusable.
-	 * (Higher levels of the driver may decide to reset the GuC and
-	 * attempt the ucode load again if this happens.)
-	 *
-	 * FIXME: There is a known (but exceedingly unlikely) race condition
-	 * where the asynchronous frequency management code could reduce
-	 * the GT clock while a GuC reload is in progress (during a full
-	 * GT reset). A fix is in progress but there are complex locking
-	 * issues to be resolved. In the meantime bump the timeout to
-	 * 200ms. Even at slowest clock, this should be sufficient. And
-	 * in the working case, a larger timeout makes no difference.
+	 * Note, can't use any kind of timing information from the call to xe_mmio_wait.
+	 * It could return a thousand intermediate stages at random times. Instead, must
+	 * manually track the total time taken and locally implement the timeout.
 	 */
-	ret = xe_mmio_wait32(gt, GUC_STATUS, GS_UKERNEL_MASK,
-			     FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY),
-			     200000, &status, false);
+	do {
+		u32 last_status = status & (GS_UKERNEL_MASK | GS_BOOTROM_MASK);
 
-	if (ret) {
-		xe_gt_err(gt, "GuC load failed: status = 0x%08X\n", status);
-		xe_gt_err(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n",
-			  REG_FIELD_GET(GS_MIA_IN_RESET, status),
+		/*
+		 * Wait for any change (intermediate or terminal) in the status register.
+		 * Note, the return value is a don't care. The only failure code is timeout
+		 * but the timeouts need to be accumulated over all the intermediate partial
+		 * timeouts rather than allowing a huge timeout each time. So basically, need
+		 * to treat a timeout no different to a value change.
+		 */
+		xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
+				   last_status, 1000 * 1000, &status, false);
+
+		after = ktime_get();
+		delta = ktime_sub(after, before);
+		delta_ms = ktime_to_ms(delta);
+
+		load_done = guc_load_done(status);
+		if (load_done != 0)
+			break;
+
+		if (delta_ms >= (GUC_LOAD_RETRY_LIMIT * 1000))
+			break;
+
+		xe_gt_dbg(gt, "load still in progress, count = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
+			  count, xe_guc_pc_get_act_freq(guc_pc),
+			  guc_pc_get_cur_freq(guc_pc), status,
 			  REG_FIELD_GET(GS_BOOTROM_MASK, status),
-			  REG_FIELD_GET(GS_UKERNEL_MASK, status),
+			  REG_FIELD_GET(GS_UKERNEL_MASK, status));
+	} while (1);
+
+	if (load_done != 1) {
+		u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+		u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+		xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz), done = %d\n",
+			  status, delta_ms, xe_guc_pc_get_act_freq(guc_pc),
+			  guc_pc_get_cur_freq(guc_pc), load_done);
+		xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+			  REG_FIELD_GET(GS_MIA_IN_RESET, status),
+			  bootrom, ukernel,
 			  REG_FIELD_GET(GS_MIA_MASK, status),
 			  REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
 
-		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED)
-			xe_gt_err(gt, "GuC firmware signature verification failed\n");
+		switch (bootrom) {
+		case XE_BOOTROM_STATUS_NO_KEY_FOUND:
+			xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
+				  xe_mmio_read32(gt, GUC_HEADER_INFO));
+			break;
+
+		case XE_BOOTROM_STATUS_RSA_FAILED:
+			xe_gt_err(gt, "firmware signature verification failed\n");
+			break;
 
-		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
-		    XE_GUC_LOAD_STATUS_EXCEPTION)
-			xe_gt_err(gt, "GuC firmware exception. EIP: %#x\n",
+		case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
+			xe_gt_err(gt, "firmware production part check failure\n");
+			break;
+		}
+
+		switch (ukernel) {
+		case XE_GUC_LOAD_STATUS_EXCEPTION:
+			xe_gt_err(gt, "firmware exception. EIP: %#x\n",
 				  xe_mmio_read32(gt, SOFT_SCRATCH(13)));
+			break;
+
+		case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+			xe_gt_err(gt, "illegal register in save/restore workaround list\n");
+			break;
+
+		case XE_GUC_LOAD_STATUS_HWCONFIG_START:
+			xe_gt_err(gt, "still extracting hwconfig table.\n");
+			break;
+		}
 
 		xe_device_declare_wedged(gt_to_xe(gt));
+	} else if (delta_ms > GUC_LOAD_TIME_WARN_MS) {
+		xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, count = %d]\n",
+			   delta_ms, status, count);
+		xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n",
+			   xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
+			   before_freq, xe_gt_throttle_get_limit_reasons(gt));
 	} else {
-		xe_gt_dbg(gt, "GuC successfully loaded\n");
+		xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, count = %d\n",
+			  delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
+			  before_freq, status, count);
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 8a39d4c18554..ff7a7cf99530 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -623,3 +623,64 @@ int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 t
 
 	return ret;
 }
+
+/**
+ * xe_mmio_wait32_not() - Wait for a register to return anything other than the given masked value
+ * @gt: MMIO target GT
+ * @reg: register to read value from
+ * @mask: mask to be applied to the value read from the register
+ * @val: value to match after applying the mask
+ * @timeout_us: time out after this period of time. Wait logic tries to be
+ * smart, applying an exponential backoff until @timeout_us is reached.
+ * @out_val: if not NULL, points where to store the last unmasked value
+ * @atomic: needs to be true if calling from an atomic context
+ *
+ * This function polls for a masked value to change from a given value and
+ * returns zero on success or -ETIMEDOUT if timed out.
+ *
+ * Note that @timeout_us represents the minimum amount of time to wait before
+ * giving up. The actual time taken by this function can be a little more than
+ * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
+ * it is possible that this function succeeds even after @timeout_us has passed.
+ */
+int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+		       u32 *out_val, bool atomic)
+{
+	ktime_t cur = ktime_get_raw();
+	const ktime_t end = ktime_add_us(cur, timeout_us);
+	int ret = -ETIMEDOUT;
+	s64 wait = 10;
+	u32 read;
+
+	for (;;) {
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) != val) {
+			ret = 0;
+			break;
+		}
+
+		cur = ktime_get_raw();
+		if (!ktime_before(cur, end))
+			break;
+
+		if (ktime_after(ktime_add_us(cur, wait), end))
+			wait = ktime_us_delta(end, cur);
+
+		if (atomic)
+			udelay(wait);
+		else
+			usleep_range(wait, wait << 1);
+		wait <<= 1;
+	}
+
+	if (ret != 0) {
+		read = xe_mmio_read32(gt, reg);
+		if ((read & mask) != val)
+			ret = 0;
+	}
+
+	if (out_val)
+		*out_val = read;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 7ddd54cd34e6..1d578fd6ffc2 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -28,6 +28,8 @@ int xe_mmio_probe_vram(struct xe_device *xe);
 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
 int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
 		   u32 *out_val, bool atomic);
+int xe_mmio_wait32_not(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
+		       u32 *out_val, bool atomic);
 
 static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr)
 {
-- 
cgit 


From 4edadc41a3a444566cbf1f5204867a9c8f44b255 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 23 May 2024 21:22:35 +0200
Subject: drm/xe/vf: Use register values obtained from the PF

As part of the its initialization, the VF driver has already
obtained a list of the runtime (fuse) register values from the
PF driver. When VF driver is attempting to read register that is
inaccessible to the VF, use the values from this list instead.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240523192240.844-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 53 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_vf.h |  2 ++
 drivers/gpu/drm/xe/xe_mmio.c        |  4 +++
 3 files changed, 59 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index bab3858b423e..237486a80fb5 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/bitfield.h>
+#include <linux/bsearch.h>
 
 #include <drm/drm_managed.h>
 #include <drm/drm_print.h>
@@ -21,6 +22,7 @@
 #include "xe_guc.h"
 #include "xe_guc_hxg_helpers.h"
 #include "xe_guc_relay.h"
+#include "xe_mmio.h"
 #include "xe_sriov.h"
 
 #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
@@ -677,6 +679,57 @@ failed:
 	return err;
 }
 
+static int vf_runtime_reg_cmp(const void *a, const void *b)
+{
+	const struct vf_runtime_reg *ra = a;
+	const struct vf_runtime_reg *rb = b;
+
+	return (int)ra->offset - (int)rb->offset;
+}
+
+static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
+{
+	struct xe_gt_sriov_vf_runtime *runtime = &gt->sriov.vf.runtime;
+	struct vf_runtime_reg key = { .offset = addr };
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key),
+		       vf_runtime_reg_cmp);
+}
+
+/**
+ * xe_gt_sriov_vf_read32 - Get a register value from the runtime data.
+ * @gt: the &xe_gt
+ * @reg: the register to read
+ *
+ * This function is for VF use only.
+ * This function shall be called after VF has connected to PF.
+ * This function is dedicated for registers that VFs can't read directly.
+ *
+ * Return: register value obtained from the PF or 0 if not found.
+ */
+u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
+{
+	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
+	struct vf_runtime_reg *rr;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+	xe_gt_assert(gt, gt->sriov.vf.pf_version.major);
+	xe_gt_assert(gt, !reg.vf);
+
+	rr = vf_lookup_reg(gt, addr);
+	if (!rr) {
+		xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG),
+			   "VF is trying to read an inaccessible register %#x+%#x\n",
+			   reg.addr, addr - reg.addr);
+		return 0;
+	}
+
+	xe_gt_sriov_dbg_verbose(gt, "runtime[%#x] = %#x\n", addr, rr->value);
+	return rr->value;
+}
+
 /**
  * xe_gt_sriov_vf_print_config - Print VF self config.
  * @gt: the &xe_gt
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index d6d37b193d17..be69c1025320 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -10,6 +10,7 @@
 
 struct drm_printer;
 struct xe_gt;
+struct xe_reg;
 
 int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt);
 int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
@@ -17,6 +18,7 @@ int xe_gt_sriov_vf_connect(struct xe_gt *gt);
 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
 
 u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
+u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg);
 
 void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
 void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index ff7a7cf99530..248e93ec6df7 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -22,6 +22,7 @@
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
 #include "xe_macros.h"
 #include "xe_module.h"
 #include "xe_sriov.h"
@@ -479,6 +480,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
 	struct xe_tile *tile = gt_to_tile(gt);
 	u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
 
+	if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
+		return xe_gt_sriov_vf_read32(gt, reg);
+
 	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
 }
 
-- 
cgit 


From e70aa1016e5b54762e922318d0e20bc646993c13 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 23 May 2024 21:22:36 +0200
Subject: drm/xe/guc: Add GLOBAL_CFG_GMD_ID KLV definition

VF drivers can't access GMD_ID register over MMIO.
The value of the GMD_ID register must be queried from GuC.
It is available as GLOBAL_CFG_GMD_ID KLV.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240523192240.844-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 5c1d40432ca0..8f9f60b28306 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -35,6 +35,20 @@
 #define GUC_KLV_0_LEN				(0xffffu << 0)
 #define GUC_KLV_n_VALUE				(0xffffffffu << 0)
 
+/**
+ * DOC: GuC Global Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_GLOBAL_CFG_GMD_ID` : 0x3000
+ *      Refers to 32 bit architecture version as reported by the HW IP.
+ *      This key is supported on MTL+ platforms only.
+ *      Requires GuC ABI 1.2+.
+ */
+
+#define GUC_KLV_GLOBAL_CFG_GMD_ID_KEY			0x3000u
+#define GUC_KLV_GLOBAL_CFG_GMD_ID_LEN			1u
+
 /**
  * DOC: GuC Self Config KLVs
  *
-- 
cgit 


From 2948b24233cfe3c8f2c071b6c83efc3e05881023 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 23 May 2024 21:22:37 +0200
Subject: drm/xe/vf: Obtain value of GMDID register from GuC

VFs don't have access to the GMDID register and must obtain it
value using GuC VF ABI KLV query. Add function for doing that.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240523192240.844-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 29 +++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_vf.h |  1 +
 2 files changed, 30 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 237486a80fb5..5b8771f831f7 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -284,6 +284,35 @@ static int guc_action_query_single_klv64(struct xe_guc *guc, u32 key, u64 *value
 	return 0;
 }
 
+/**
+ * xe_gt_sriov_vf_gmdid - Query GMDID over MMIO.
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ *
+ * Return: value of GMDID KLV on success or 0 on failure.
+ */
+u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt)
+{
+	const char *type = xe_gt_is_media_type(gt) ? "media" : "graphics";
+	struct xe_guc *guc = &gt->uc.guc;
+	u32 value;
+	int err;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+	xe_gt_assert(gt, gt->sriov.vf.guc_version.major > 1 || gt->sriov.vf.guc_version.minor >= 2);
+
+	err = guc_action_query_single_klv32(guc, GUC_KLV_GLOBAL_CFG_GMD_ID_KEY, &value);
+	if (unlikely(err)) {
+		xe_gt_sriov_err(gt, "Failed to obtain %s GMDID (%pe)\n",
+				type, ERR_PTR(err));
+		return 0;
+	}
+
+	xe_gt_sriov_dbg(gt, "%s GMDID = %#x\n", type, value);
+	return value;
+}
+
 static int vf_get_ggtt_info(struct xe_gt *gt)
 {
 	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index be69c1025320..0391ed72a570 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -17,6 +17,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
 int xe_gt_sriov_vf_connect(struct xe_gt *gt);
 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
 
+u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
 u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
 u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg);
 
-- 
cgit 


From fcc6b719ae8576a00ddd3e261b18443594250189 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 24 May 2024 00:30:42 +0200
Subject: drm/xe/vf: Provide early access to GMDID register

VFs do not have direct access to the GMDID register and must obtain
its value from the GuC. Since we need GMDID value very early in the
driver probe flow, before we even start the full setup of GT and GuC
data structures, we must do some early initializations ourselves.

Additionally, since we also need GMDID for the media GT, which isn't
created yet, temporarly tweak the root GT type into MEDIA to allow
communication with the correct GuC, as only it can provide the value
of the media GMDID register.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240523223042.888-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index d65f05fa7b1b..90ca97894a09 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -20,6 +20,8 @@
 #include "xe_device.h"
 #include "xe_drv.h"
 #include "xe_gt.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
 #include "xe_macros.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
@@ -469,10 +471,52 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
 
 	KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
 
-	if (type == GMDID_MEDIA)
-		gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
+	if (IS_SRIOV_VF(xe)) {
+		/*
+		 * To get the value of the GMDID register, VFs must obtain it
+		 * from the GuC using MMIO communication.
+		 *
+		 * Note that at this point the xe_gt is not fully uninitialized
+		 * and only basic access to MMIO registers is possible. To use
+		 * our existing GuC communication functions we must perform at
+		 * least basic xe_gt and xe_guc initialization.
+		 *
+		 * Since to obtain the value of GMDID_MEDIA we need to use the
+		 * media GuC, temporarly tweak the gt type.
+		 */
+		xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+
+		if (type == GMDID_MEDIA) {
+			gt->info.id = 1;
+			gt->info.type = XE_GT_TYPE_MEDIA;
+		} else {
+			gt->info.id = 0;
+			gt->info.type = XE_GT_TYPE_MAIN;
+		}
+
+		xe_guc_comm_init_early(&gt->uc.guc);
+
+		/* Don't bother with GMDID if failed to negotiate the GuC ABI */
+		val = xe_gt_sriov_vf_bootstrap(gt) ? 0 : xe_gt_sriov_vf_gmdid(gt);
+
+		/*
+		 * Only undo xe_gt.info here, the remaining changes made above
+		 * will be overwritten as part of the regular initialization.
+		 */
+		gt->info.id = 0;
+		gt->info.type = XE_GT_TYPE_UNINITIALIZED;
+	} else {
+		/*
+		 * We need to apply the GSI offset explicitly here as at this
+		 * point the xe_gt is not fully uninitialized and only basic
+		 * access to MMIO registers is possible.
+		 */
+		if (type == GMDID_MEDIA)
+			gmdid_reg.addr += MEDIA_GT_GSI_OFFSET;
+
+		val = xe_mmio_read32(gt, gmdid_reg);
+	}
 
-	val = xe_mmio_read32(gt, gmdid_reg);
 	*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
 	*revid = REG_FIELD_GET(GMD_ID_REVID, val);
 }
-- 
cgit 


From 9081f8ca2713356650c6db7e1e989ce7c68267dd Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 23 May 2024 21:22:39 +0200
Subject: drm/xe/vf: Cache value of the GMDID register

Read and cache value of the GMDID register as part of the config
query that VF driver is doing over MMIO.

While the VF driver likely already obtained the value of the GMDID
register once during the early driver probe, we couldn't cache it
then as the GT structures were not ready yet.

Cache it now, in case the driver needs it later when the GuC MMIO
communication, required to query GMDID from GuC, could be no longer
desired as it will be replaced by the CTB communication.

While around, assert that we will query GMDID only when applicable.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240523192240.844-6-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c       | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h |  2 ++
 2 files changed, 19 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 5b8771f831f7..347ab7060588 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -284,6 +284,11 @@ static int guc_action_query_single_klv64(struct xe_guc *guc, u32 key, u64 *value
 	return 0;
 }
 
+static bool has_gmdid(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) >= 1270;
+}
+
 /**
  * xe_gt_sriov_vf_gmdid - Query GMDID over MMIO.
  * @gt: the &xe_gt
@@ -300,6 +305,7 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt)
 	int err;
 
 	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+	xe_gt_assert(gt, !GRAPHICS_VERx100(gt_to_xe(gt)) || has_gmdid(gt_to_xe(gt)));
 	xe_gt_assert(gt, gt->sriov.vf.guc_version.major > 1 || gt->sriov.vf.guc_version.minor >= 2);
 
 	err = guc_action_query_single_klv32(guc, GUC_KLV_GLOBAL_CFG_GMD_ID_KEY, &value);
@@ -409,6 +415,14 @@ static int vf_get_submission_cfg(struct xe_gt *gt)
 	return config->num_ctxs ? 0 : -ENODATA;
 }
 
+static void vf_cache_gmdid(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, has_gmdid(gt_to_xe(gt)));
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+	gt->sriov.vf.runtime.gmdid = xe_gt_sriov_vf_gmdid(gt);
+}
+
 /**
  * xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO.
  * @gt: the &xe_gt
@@ -436,6 +450,9 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt)
 	if (unlikely(err))
 		return err;
 
+	if (has_gmdid(xe))
+		vf_cache_gmdid(gt);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
index 519492f4b7d0..a57f13b5afcd 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
@@ -52,6 +52,8 @@ struct xe_gt_sriov_vf_selfconfig {
  * struct xe_gt_sriov_vf_runtime - VF runtime data.
  */
 struct xe_gt_sriov_vf_runtime {
+	/** @gmdid: cached value of the GDMID register. */
+	u32 gmdid;
 	/** @regs_size: size of runtime register array. */
 	u32 regs_size;
 	/** @num_regs: number of runtime registers in the array. */
-- 
cgit 


From 5cef84939723f96352d7087b0ea596a65fb972c6 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 23 May 2024 21:22:40 +0200
Subject: drm/xe/vf: Treat GMDID as another runtime register

While the GMDID registers are not part of the runtime register list
shared by the PF driver, we may still return cached values from our
VF specific read32() helper function.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240523192240.844-7-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 347ab7060588..66069eb8c5be 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -13,6 +13,7 @@
 #include "abi/guc_communication_mmio_abi.h"
 #include "abi/guc_klvs_abi.h"
 #include "abi/guc_relay_actions_abi.h"
+#include "regs/xe_gt_regs.h"
 
 #include "xe_assert.h"
 #include "xe_device.h"
@@ -764,6 +765,12 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
 	xe_gt_assert(gt, gt->sriov.vf.pf_version.major);
 	xe_gt_assert(gt, !reg.vf);
 
+	if (reg.addr == GMD_ID.addr) {
+		xe_gt_sriov_dbg_verbose(gt, "gmdid(%#x) = %#x\n",
+					addr, gt->sriov.vf.runtime.gmdid);
+		return gt->sriov.vf.runtime.gmdid;
+	}
+
 	rr = vf_lookup_reg(gt, addr);
 	if (!rr) {
 		xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG),
-- 
cgit 


From ea797cf4b7f8470f1b23cd084096cb93fb93e6c3 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 24 May 2024 13:37:13 +0200
Subject: drm/xe/vf: Read VF configuration prior to GGTT initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each VF will be assigned with only a limited range of the GGTT
address space. Make sure that VF driver will read its own GGTT
configuration before starting any GGTT initialization.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240524113714.932-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_device.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 2eba1e32c633..b2d5c7341238 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -34,6 +34,8 @@
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
 #include "xe_hwmon.h"
 #include "xe_irq.h"
 #include "xe_memirq.h"
@@ -568,6 +570,15 @@ int xe_device_probe(struct xe_device *xe)
 	}
 
 	for_each_tile(tile, xe, id) {
+		if (IS_SRIOV_VF(xe)) {
+			xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
+			err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
+			if (err)
+				return err;
+			err = xe_gt_sriov_vf_query_config(tile->primary_gt);
+			if (err)
+				return err;
+		}
 		err = xe_ggtt_init_early(tile->mem.ggtt);
 		if (err)
 			return err;
-- 
cgit 


From d79e8cab32371b0f08ae0ed6f1b2b9bd8b7a2873 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 27 May 2024 13:20:15 +0200
Subject: drm/xe/vf: Use only assigned GGTT region
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each VF is assigned a limited range of the GGTT address space.
To ensure that the VF driver does not use GGTT allocations outside
of the assigned region, explicitly reserve GGTT space below and
above this region when initializing GGTT.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527112015.1020-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_device_types.h |  3 ++
 drivers/gpu/drm/xe/xe_ggtt.c         | 14 +++++-
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c  | 92 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_vf.h  |  1 +
 4 files changed, 109 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 03bedc33b21a..3ff60da7d1cd 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -196,6 +196,9 @@ struct xe_tile {
 		struct {
 			/** @sriov.vf.memirq: Memory Based Interrupts. */
 			struct xe_memirq memirq;
+
+			/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
+			struct drm_mm_node ggtt_balloon[2];
 		} vf;
 	} sriov;
 
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 17e5066763db..b01a670fecb8 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -20,6 +20,7 @@
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_map.h"
 #include "xe_pm.h"
@@ -141,6 +142,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
 	struct xe_device *xe = tile_to_xe(ggtt->tile);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	unsigned int gsm_size;
+	int err;
 
 	if (IS_SRIOV_VF(xe))
 		gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */
@@ -194,7 +196,17 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
 	mutex_init(&ggtt->lock);
 	primelockdep(ggtt);
 
-	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
+	err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
+	if (err)
+		return err;
+
+	if (IS_SRIOV_VF(xe)) {
+		err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0));
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 66069eb8c5be..da7c64f6285b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -14,9 +14,11 @@
 #include "abi/guc_klvs_abi.h"
 #include "abi/guc_relay_actions_abi.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_gtt_defs.h"
 
 #include "xe_assert.h"
 #include "xe_device.h"
+#include "xe_ggtt.h"
 #include "xe_gt_sriov_printk.h"
 #include "xe_gt_sriov_vf.h"
 #include "xe_gt_sriov_vf_types.h"
@@ -25,6 +27,8 @@
 #include "xe_guc_relay.h"
 #include "xe_mmio.h"
 #include "xe_sriov.h"
+#include "xe_uc_fw.h"
+#include "xe_wopcm.h"
 
 #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
 
@@ -474,6 +478,94 @@ u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt)
 	return gt->sriov.vf.self_config.num_ctxs;
 }
 
+static int vf_balloon_ggtt(struct xe_gt *gt)
+{
+	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_ggtt *ggtt = tile->mem.ggtt;
+	struct xe_device *xe = gt_to_xe(gt);
+	u64 start, end;
+	int err;
+
+	xe_gt_assert(gt, IS_SRIOV_VF(xe));
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+	if (!config->ggtt_size)
+		return -ENODATA;
+
+	/*
+	 * VF can only use part of the GGTT as allocated by the PF:
+	 *
+	 *      WOPCM                                  GUC_GGTT_TOP
+	 *      |<------------ Total GGTT size ------------------>|
+	 *
+	 *           VF GGTT base -->|<- size ->|
+	 *
+	 *      +--------------------+----------+-----------------+
+	 *      |////////////////////|   block  |\\\\\\\\\\\\\\\\\|
+	 *      +--------------------+----------+-----------------+
+	 *
+	 *      |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
+	 */
+
+	start = xe_wopcm_size(xe);
+	end = config->ggtt_base;
+	if (end != start) {
+		err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[0]);
+		if (err)
+			goto failed;
+	}
+
+	start = config->ggtt_base + config->ggtt_size;
+	end = GUC_GGTT_TOP;
+	if (end != start) {
+		err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[1]);
+		if (err)
+			goto deballoon;
+	}
+
+	return 0;
+
+deballoon:
+	xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]);
+failed:
+	return err;
+}
+
+static void deballoon_ggtt(struct drm_device *drm, void *arg)
+{
+	struct xe_tile *tile = arg;
+	struct xe_ggtt *ggtt = tile->mem.ggtt;
+
+	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+	xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[1]);
+	xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]);
+}
+
+/**
+ * xe_gt_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = tile_to_xe(tile);
+	int err;
+
+	if (xe_gt_is_media_type(gt))
+		return 0;
+
+	err = vf_balloon_ggtt(gt);
+	if (err)
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, deballoon_ggtt, tile);
+}
+
 static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor)
 {
 	u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = {
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index 0391ed72a570..7a04bcaffe9f 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -16,6 +16,7 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt);
 int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
 int xe_gt_sriov_vf_connect(struct xe_gt *gt);
 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
+int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt);
 
 u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
 u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
-- 
cgit 


From 08f7200899ca72dec550af092ae424b7db099abd Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 27 May 2024 15:59:08 +0200
Subject: drm/xe: Decouple job seqno and lrc seqno
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tightly coupling these seqno presents problems if alternative fences for
jobs are used. Decouple these for correctness.

v2:
- Slightly reword commit message (Thomas)
- Make sure the lrc fence ops are used in comparison (Thomas)
- Assume seqno is unsigned rather than signed in format string (Thomas)

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527135912.152156-2-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_exec_queue.c      |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c      |  5 +++--
 drivers/gpu/drm/xe/xe_ring_ops.c        | 12 ++++++------
 drivers/gpu/drm/xe/xe_sched_job.c       | 16 ++++++++--------
 drivers/gpu/drm/xe/xe_sched_job.h       |  5 +++++
 drivers/gpu/drm/xe/xe_sched_job_types.h |  2 ++
 drivers/gpu/drm/xe/xe_trace.h           |  7 +++++--
 7 files changed, 30 insertions(+), 19 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 0fd61fb4d104..e8bf250f5b6a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -98,7 +98,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
 
 	if (xe_exec_queue_is_parallel(q)) {
 		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
-		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
+		q->parallel.composite_fence_seqno = 0;
 	}
 
 	return q;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 54778189cfd5..53ab98c5ef5e 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -940,8 +940,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 		return DRM_GPU_SCHED_STAT_NOMINAL;
 	}
 
-	drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
-		   xe_sched_job_seqno(job), q->guc->id, q->flags);
+	drm_notice(&xe->drm, "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
+		   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
+		   q->guc->id, q->flags);
 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
 		   "Kernel-submitted job timed out\n");
 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index a3ca718456f6..2705d1f9d572 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -398,7 +398,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
 
 	__emit_job_gen12_simple(job, job->q->lrc,
 				job->batch_addr[0],
-				xe_sched_job_seqno(job));
+				xe_sched_job_lrc_seqno(job));
 }
 
 static void emit_job_gen12_copy(struct xe_sched_job *job)
@@ -407,14 +407,14 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
 
 	if (xe_sched_job_is_migration(job->q)) {
 		emit_migration_job_gen12(job, job->q->lrc,
-					 xe_sched_job_seqno(job));
+					 xe_sched_job_lrc_seqno(job));
 		return;
 	}
 
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_simple(job, job->q->lrc + i,
-				        job->batch_addr[i],
-				        xe_sched_job_seqno(job));
+					job->batch_addr[i],
+					xe_sched_job_lrc_seqno(job));
 }
 
 static void emit_job_gen12_video(struct xe_sched_job *job)
@@ -425,7 +425,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_video(job, job->q->lrc + i,
 				       job->batch_addr[i],
-				       xe_sched_job_seqno(job));
+				       xe_sched_job_lrc_seqno(job));
 }
 
 static void emit_job_gen12_render_compute(struct xe_sched_job *job)
@@ -435,7 +435,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_render_compute(job, job->q->lrc + i,
 						job->batch_addr[i],
-						xe_sched_job_seqno(job));
+						xe_sched_job_lrc_seqno(job));
 }
 
 static const struct xe_ring_ops ring_ops_gen12_gsc = {
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index a4e030f5e019..874450be327e 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -117,6 +117,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 			err = PTR_ERR(job->fence);
 			goto err_sched_job;
 		}
+		job->lrc_seqno = job->fence->seqno;
 	} else {
 		struct dma_fence_array *cf;
 
@@ -132,6 +133,8 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 				err = PTR_ERR(fences[j]);
 				goto err_fences;
 			}
+			if (!j)
+				job->lrc_seqno = fences[0]->seqno;
 		}
 
 		cf = dma_fence_array_create(q->width, fences,
@@ -144,10 +147,6 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 			goto err_fences;
 		}
 
-		/* Sanity check */
-		for (j = 0; j < q->width; ++j)
-			xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
-
 		job->fence = &cf->base;
 	}
 
@@ -229,9 +228,9 @@ bool xe_sched_job_started(struct xe_sched_job *job)
 {
 	struct xe_lrc *lrc = job->q->lrc;
 
-	return !__dma_fence_is_later(xe_sched_job_seqno(job),
+	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
 				     xe_lrc_start_seqno(lrc),
-				     job->fence->ops);
+				     dma_fence_array_first(job->fence)->ops);
 }
 
 bool xe_sched_job_completed(struct xe_sched_job *job)
@@ -243,8 +242,9 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
 	 * parallel handshake is done.
 	 */
 
-	return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
-				     job->fence->ops);
+	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
+				     xe_lrc_seqno(lrc),
+				     dma_fence_array_first(job->fence)->ops);
 }
 
 void xe_sched_job_arm(struct xe_sched_job *job)
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index c75018f4660d..002c3b5c0a5c 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -73,6 +73,11 @@ static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
 	return job->fence->seqno;
 }
 
+static inline u32 xe_sched_job_lrc_seqno(struct xe_sched_job *job)
+{
+	return job->lrc_seqno;
+}
+
 static inline void
 xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
 {
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 5e12724219fd..990ddac55ed6 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -37,6 +37,8 @@ struct xe_sched_job {
 		/** @user_fence.value: write back value */
 		u64 value;
 	} user_fence;
+	/** @lrc_seqno: LRC seqno */
+	u32 lrc_seqno;
 	/** @migrate_flush_flags: Additional flush flags for migration jobs */
 	u32 migrate_flush_flags;
 	/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 2d56cfc09e42..6c6cecc58f63 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -254,6 +254,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 
 		    TP_STRUCT__entry(
 			     __field(u32, seqno)
+			     __field(u32, lrc_seqno)
 			     __field(u16, guc_id)
 			     __field(u32, guc_state)
 			     __field(u32, flags)
@@ -264,6 +265,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 
 		    TP_fast_assign(
 			   __entry->seqno = xe_sched_job_seqno(job);
+			   __entry->lrc_seqno = xe_sched_job_lrc_seqno(job);
 			   __entry->guc_id = job->q->guc->id;
 			   __entry->guc_state =
 			   atomic_read(&job->q->guc->state);
@@ -273,8 +275,9 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 			   __entry->batch_addr = (u64)job->batch_addr[0];
 			   ),
 
-		    TP_printk("fence=%p, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
-			      __entry->fence, __entry->seqno, __entry->guc_id,
+		    TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+			      __entry->fence, __entry->seqno,
+			      __entry->lrc_seqno, __entry->guc_id,
 			      __entry->batch_addr, __entry->guc_state,
 			      __entry->flags, __entry->error)
 );
-- 
cgit 


From e183910ae4015214475b3248ce0b4c70f104f254 Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Mon, 27 May 2024 15:59:09 +0200
Subject: drm/xe: Split lrc seqno fence creation up
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since sometimes a lock is required to initialize a seqno fence,
and it might be desirable not to hold that lock while performing
memory allocations, split the lrc seqno fence creation up into an
allocation phase and an initialization phase.

Since lrc seqno fences under the hood are hw_fences, do the same
for these and remove the xe_hw_fence_create() function since it
is not used anymore.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527135912.152156-3-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_hw_fence.c | 59 ++++++++++++++++++++++++++++++++--------
 drivers/gpu/drm/xe/xe_hw_fence.h |  7 +++--
 drivers/gpu/drm/xe/xe_lrc.c      | 48 ++++++++++++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_lrc.h      |  3 ++
 4 files changed, 101 insertions(+), 16 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index f872ef103127..35c0063a831a 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -208,23 +208,58 @@ static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence)
 	return container_of(fence, struct xe_hw_fence, dma);
 }
 
-struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
-				       struct iosys_map seqno_map)
+/**
+ * xe_hw_fence_alloc() -  Allocate an hw fence.
+ *
+ * Allocate but don't initialize an hw fence.
+ *
+ * Return: Pointer to the allocated fence or
+ * negative error pointer on error.
+ */
+struct dma_fence *xe_hw_fence_alloc(void)
 {
-	struct xe_hw_fence *fence;
+	struct xe_hw_fence *hw_fence = fence_alloc();
 
-	fence = fence_alloc();
-	if (!fence)
+	if (!hw_fence)
 		return ERR_PTR(-ENOMEM);
 
-	fence->ctx = ctx;
-	fence->seqno_map = seqno_map;
-	INIT_LIST_HEAD(&fence->irq_link);
+	return &hw_fence->dma;
+}
 
-	dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock,
-		       ctx->dma_fence_ctx, ctx->next_seqno++);
+/**
+ * xe_hw_fence_free() - Free an hw fence.
+ * @fence: Pointer to the fence to free.
+ *
+ * Frees an hw fence that hasn't yet been
+ * initialized.
+ */
+void xe_hw_fence_free(struct dma_fence *fence)
+{
+	fence_free(&fence->rcu);
+}
 
-	trace_xe_hw_fence_create(fence);
+/**
+ * xe_hw_fence_init() - Initialize an hw fence.
+ * @fence: Pointer to the fence to initialize.
+ * @ctx: Pointer to the struct xe_hw_fence_ctx fence context.
+ * @seqno_map: Pointer to the map into where the seqno is blitted.
+ *
+ * Initializes a pre-allocated hw fence.
+ * After initialization, the fence is subject to normal
+ * dma-fence refcounting.
+ */
+void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx,
+		      struct iosys_map seqno_map)
+{
+	struct  xe_hw_fence *hw_fence =
+		container_of(fence, typeof(*hw_fence), dma);
+
+	hw_fence->ctx = ctx;
+	hw_fence->seqno_map = seqno_map;
+	INIT_LIST_HEAD(&hw_fence->irq_link);
+
+	dma_fence_init(fence, &xe_hw_fence_ops, &ctx->irq->lock,
+		       ctx->dma_fence_ctx, ctx->next_seqno++);
 
-	return fence;
+	trace_xe_hw_fence_create(hw_fence);
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h
index cfe5fd603787..f13a1c4982c7 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.h
+++ b/drivers/gpu/drm/xe/xe_hw_fence.h
@@ -24,7 +24,10 @@ void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
 			  struct xe_hw_fence_irq *irq, const char *name);
 void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx);
 
-struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx,
-				       struct iosys_map seqno_map);
+struct dma_fence *xe_hw_fence_alloc(void);
 
+void xe_hw_fence_free(struct dma_fence *fence);
+
+void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx,
+		      struct iosys_map seqno_map);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index f679cb9aaea7..c28796cfaa03 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1033,10 +1033,54 @@ u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
 	return __xe_lrc_seqno_ggtt_addr(lrc);
 }
 
+/**
+ * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
+ *
+ * Allocate but don't initialize an lrc seqno fence.
+ *
+ * Return: Pointer to the allocated fence or
+ * negative error pointer on error.
+ */
+struct dma_fence *xe_lrc_alloc_seqno_fence(void)
+{
+	return xe_hw_fence_alloc();
+}
+
+/**
+ * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
+ * @fence: Pointer to the fence to free.
+ *
+ * Frees an lrc seqno fence that hasn't yet been
+ * initialized.
+ */
+void xe_lrc_free_seqno_fence(struct dma_fence *fence)
+{
+	xe_hw_fence_free(fence);
+}
+
+/**
+ * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
+ * @lrc: Pointer to the lrc.
+ * @fence: Pointer to the fence to initialize.
+ *
+ * Initializes a pre-allocated lrc seqno fence.
+ * After initialization, the fence is subject to normal
+ * dma-fence refcounting.
+ */
+void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
+{
+	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
+}
+
 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
 {
-	return &xe_hw_fence_create(&lrc->fence_ctx,
-				   __xe_lrc_seqno_map(lrc))->dma;
+	struct dma_fence *fence = xe_lrc_alloc_seqno_fence();
+
+	if (IS_ERR(fence))
+		return fence;
+
+	xe_lrc_init_seqno_fence(lrc, fence);
+	return fence;
 }
 
 s32 xe_lrc_seqno(struct xe_lrc *lrc)
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index b9da1031083b..7a8752fd8484 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -44,6 +44,9 @@ void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val);
 u64 xe_lrc_descriptor(struct xe_lrc *lrc);
 
 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc);
+struct dma_fence *xe_lrc_alloc_seqno_fence(void);
+void xe_lrc_free_seqno_fence(struct dma_fence *fence);
+void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence);
 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc);
 s32 xe_lrc_seqno(struct xe_lrc *lrc);
 
-- 
cgit 


From 0ac7a2c745e8a42803378b944fa0f4455b7240f6 Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Mon, 27 May 2024 15:59:10 +0200
Subject: drm/xe: Don't initialize fences at xe_sched_job_create()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-allocate but don't initialize fences at xe_sched_job_create(),
and initialize / arm them instead at xe_sched_job_arm(). This
makes it possible to move xe_sched_job_create() with its memory
allocation out of any lock that is required for fence
initialization, and that may not allow memory allocation under it.

Replaces the struct dma_fence_array for parallell jobs with a
struct dma_fence_chain, since the former doesn't allow
a split-up between allocation and initialization.

v2:
- Rebase.
- Don't always use the first lrc when initializing parallel
  lrc fences.
- Use dma_fence_chain_contained() to access the lrc fences.

v4:
- Add an assert that job->lrc_seqno == fence->seqno.
  (Matthew Brost)

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> #v2
Link: https://patchwork.freedesktop.org/patch/msgid/20240527135912.152156-4-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_exec_queue.c       |   5 -
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  10 --
 drivers/gpu/drm/xe/xe_ring_ops.c         |  12 +--
 drivers/gpu/drm/xe/xe_sched_job.c        | 161 ++++++++++++++++++-------------
 drivers/gpu/drm/xe/xe_sched_job_types.h  |  18 +++-
 drivers/gpu/drm/xe/xe_trace.h            |   2 +-
 6 files changed, 115 insertions(+), 93 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index e8bf250f5b6a..e3cebec3de24 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -96,11 +96,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
 		}
 	}
 
-	if (xe_exec_queue_is_parallel(q)) {
-		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
-		q->parallel.composite_fence_seqno = 0;
-	}
-
 	return q;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index ee78d497d838..f0c40e8ad80a 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -103,16 +103,6 @@ struct xe_exec_queue {
 		struct xe_guc_exec_queue *guc;
 	};
 
-	/**
-	 * @parallel: parallel submission state
-	 */
-	struct {
-		/** @parallel.composite_fence_ctx: context composite fence */
-		u64 composite_fence_ctx;
-		/** @parallel.composite_fence_seqno: seqno for composite fence */
-		u32 composite_fence_seqno;
-	} parallel;
-
 	/** @sched_props: scheduling properties */
 	struct {
 		/** @sched_props.timeslice_us: timeslice period in micro-seconds */
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 2705d1f9d572..f75756e7a87b 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -366,7 +366,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 
 	dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */
 
-	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
+	i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i);
 
 	if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
 		/* XXX: Do we need this? Leaving for now. */
@@ -375,7 +375,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 		dw[i++] = preparser_disable(false);
 	}
 
-	i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
+	i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);
 
 	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
 		MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW;
@@ -397,7 +397,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
 	xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */
 
 	__emit_job_gen12_simple(job, job->q->lrc,
-				job->batch_addr[0],
+				job->ptrs[0].batch_addr,
 				xe_sched_job_lrc_seqno(job));
 }
 
@@ -413,7 +413,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
 
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_simple(job, job->q->lrc + i,
-					job->batch_addr[i],
+					job->ptrs[i].batch_addr,
 					xe_sched_job_lrc_seqno(job));
 }
 
@@ -424,7 +424,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
 	/* FIXME: Not doing parallel handshake for now */
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_video(job, job->q->lrc + i,
-				       job->batch_addr[i],
+				       job->ptrs[i].batch_addr,
 				       xe_sched_job_lrc_seqno(job));
 }
 
@@ -434,7 +434,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
 
 	for (i = 0; i < job->q->width; ++i)
 		__emit_job_gen12_render_compute(job, job->q->lrc + i,
-						job->batch_addr[i],
+						job->ptrs[i].batch_addr,
 						xe_sched_job_lrc_seqno(job));
 }
 
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 874450be327e..29f3201d7dfa 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -6,7 +6,7 @@
 #include "xe_sched_job.h"
 
 #include <drm/xe_drm.h>
-#include <linux/dma-fence-array.h>
+#include <linux/dma-fence-chain.h>
 #include <linux/slab.h>
 
 #include "xe_device.h"
@@ -29,7 +29,7 @@ int __init xe_sched_job_module_init(void)
 	xe_sched_job_slab =
 		kmem_cache_create("xe_sched_job",
 				  sizeof(struct xe_sched_job) +
-				  sizeof(u64), 0,
+				  sizeof(struct xe_job_ptrs), 0,
 				  SLAB_HWCACHE_ALIGN, NULL);
 	if (!xe_sched_job_slab)
 		return -ENOMEM;
@@ -37,7 +37,7 @@ int __init xe_sched_job_module_init(void)
 	xe_sched_job_parallel_slab =
 		kmem_cache_create("xe_sched_job_parallel",
 				  sizeof(struct xe_sched_job) +
-				  sizeof(u64) *
+				  sizeof(struct xe_job_ptrs) *
 				  XE_HW_ENGINE_MAX_INSTANCE, 0,
 				  SLAB_HWCACHE_ALIGN, NULL);
 	if (!xe_sched_job_parallel_slab) {
@@ -79,26 +79,33 @@ static struct xe_device *job_to_xe(struct xe_sched_job *job)
 	return gt_to_xe(job->q->gt);
 }
 
+/* Free unused pre-allocated fences */
+static void xe_sched_job_free_fences(struct xe_sched_job *job)
+{
+	int i;
+
+	for (i = 0; i < job->q->width; ++i) {
+		struct xe_job_ptrs *ptrs = &job->ptrs[i];
+
+		if (ptrs->lrc_fence)
+			xe_lrc_free_seqno_fence(ptrs->lrc_fence);
+		if (ptrs->chain_fence)
+			dma_fence_chain_free(ptrs->chain_fence);
+	}
+}
+
 struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 					 u64 *batch_addr)
 {
-	struct xe_sched_job *job;
-	struct dma_fence **fences;
 	bool is_migration = xe_sched_job_is_migration(q);
+	struct xe_sched_job *job;
 	int err;
-	int i, j;
+	int i;
 	u32 width;
 
 	/* only a kernel context can submit a vm-less job */
 	XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
 
-	/* Migration and kernel engines have their own locking */
-	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
-		lockdep_assert_held(&q->vm->lock);
-		if (!xe_vm_in_lr_mode(q->vm))
-			xe_vm_assert_held(q->vm);
-	}
-
 	job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
 	if (!job)
 		return ERR_PTR(-ENOMEM);
@@ -111,43 +118,25 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 	if (err)
 		goto err_free;
 
-	if (!xe_exec_queue_is_parallel(q)) {
-		job->fence = xe_lrc_create_seqno_fence(q->lrc);
-		if (IS_ERR(job->fence)) {
-			err = PTR_ERR(job->fence);
-			goto err_sched_job;
-		}
-		job->lrc_seqno = job->fence->seqno;
-	} else {
-		struct dma_fence_array *cf;
+	for (i = 0; i < q->width; ++i) {
+		struct dma_fence *fence = xe_lrc_alloc_seqno_fence();
+		struct dma_fence_chain *chain;
 
-		fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
-		if (!fences) {
-			err = -ENOMEM;
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
 			goto err_sched_job;
 		}
+		job->ptrs[i].lrc_fence = fence;
 
-		for (j = 0; j < q->width; ++j) {
-			fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
-			if (IS_ERR(fences[j])) {
-				err = PTR_ERR(fences[j]);
-				goto err_fences;
-			}
-			if (!j)
-				job->lrc_seqno = fences[0]->seqno;
-		}
+		if (i + 1 == q->width)
+			continue;
 
-		cf = dma_fence_array_create(q->width, fences,
-					    q->parallel.composite_fence_ctx,
-					    q->parallel.composite_fence_seqno++,
-					    false);
-		if (!cf) {
-			--q->parallel.composite_fence_seqno;
+		chain = dma_fence_chain_alloc();
+		if (!chain) {
 			err = -ENOMEM;
-			goto err_fences;
+			goto err_sched_job;
 		}
-
-		job->fence = &cf->base;
+		job->ptrs[i].chain_fence = chain;
 	}
 
 	width = q->width;
@@ -155,19 +144,14 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 		width = 2;
 
 	for (i = 0; i < width; ++i)
-		job->batch_addr[i] = batch_addr[i];
+		job->ptrs[i].batch_addr = batch_addr[i];
 
 	xe_pm_runtime_get_noresume(job_to_xe(job));
 	trace_xe_sched_job_create(job);
 	return job;
 
-err_fences:
-	for (j = j - 1; j >= 0; --j) {
-		--q->lrc[j].fence_ctx.next_seqno;
-		dma_fence_put(fences[j]);
-	}
-	kfree(fences);
 err_sched_job:
+	xe_sched_job_free_fences(job);
 	drm_sched_job_cleanup(&job->drm);
 err_free:
 	xe_exec_queue_put(q);
@@ -188,6 +172,7 @@ void xe_sched_job_destroy(struct kref *ref)
 		container_of(ref, struct xe_sched_job, refcount);
 	struct xe_device *xe = job_to_xe(job);
 
+	xe_sched_job_free_fences(job);
 	xe_exec_queue_put(job->q);
 	dma_fence_put(job->fence);
 	drm_sched_job_cleanup(&job->drm);
@@ -195,27 +180,32 @@ void xe_sched_job_destroy(struct kref *ref)
 	xe_pm_runtime_put(xe);
 }
 
-void xe_sched_job_set_error(struct xe_sched_job *job, int error)
+/* Set the error status under the fence to avoid racing with signaling */
+static bool xe_fence_set_error(struct dma_fence *fence, int error)
 {
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
-		return;
+	unsigned long irq_flags;
+	bool signaled;
+
+	spin_lock_irqsave(fence->lock, irq_flags);
+	signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
+	if (!signaled)
+		dma_fence_set_error(fence, error);
+	spin_unlock_irqrestore(fence->lock, irq_flags);
 
-	dma_fence_set_error(job->fence, error);
+	return signaled;
+}
 
-	if (dma_fence_is_array(job->fence)) {
-		struct dma_fence_array *array =
-			to_dma_fence_array(job->fence);
-		struct dma_fence **child = array->fences;
-		unsigned int nchild = array->num_fences;
+void xe_sched_job_set_error(struct xe_sched_job *job, int error)
+{
+	if (xe_fence_set_error(job->fence, error))
+		return;
 
-		do {
-			struct dma_fence *current_fence = *child++;
+	if (dma_fence_is_chain(job->fence)) {
+		struct dma_fence *iter;
 
-			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-				     &current_fence->flags))
-				continue;
-			dma_fence_set_error(current_fence, error);
-		} while (--nchild);
+		dma_fence_chain_for_each(iter, job->fence)
+			xe_fence_set_error(dma_fence_chain_contained(iter),
+					   error);
 	}
 
 	trace_xe_sched_job_set_error(job);
@@ -230,7 +220,7 @@ bool xe_sched_job_started(struct xe_sched_job *job)
 
 	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
 				     xe_lrc_start_seqno(lrc),
-				     dma_fence_array_first(job->fence)->ops);
+				     dma_fence_chain_contained(job->fence)->ops);
 }
 
 bool xe_sched_job_completed(struct xe_sched_job *job)
@@ -244,13 +234,24 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
 
 	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
 				     xe_lrc_seqno(lrc),
-				     dma_fence_array_first(job->fence)->ops);
+				     dma_fence_chain_contained(job->fence)->ops);
 }
 
 void xe_sched_job_arm(struct xe_sched_job *job)
 {
 	struct xe_exec_queue *q = job->q;
+	struct dma_fence *fence, *prev;
 	struct xe_vm *vm = q->vm;
+	u64 seqno = 0;
+	int i;
+
+	/* Migration and kernel engines have their own locking */
+	if (IS_ENABLED(CONFIG_LOCKDEP) &&
+	    !(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
+		lockdep_assert_held(&q->vm->lock);
+		if (!xe_vm_in_lr_mode(q->vm))
+			xe_vm_assert_held(q->vm);
+	}
 
 	if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
 	    (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
@@ -259,6 +260,27 @@ void xe_sched_job_arm(struct xe_sched_job *job)
 		job->ring_ops_flush_tlb = true;
 	}
 
+	/* Arm the pre-allocated fences */
+	for (i = 0; i < q->width; prev = fence, ++i) {
+		struct dma_fence_chain *chain;
+
+		fence = job->ptrs[i].lrc_fence;
+		xe_lrc_init_seqno_fence(&q->lrc[i], fence);
+		job->ptrs[i].lrc_fence = NULL;
+		if (!i) {
+			job->lrc_seqno = fence->seqno;
+			continue;
+		} else {
+			xe_assert(gt_to_xe(q->gt), job->lrc_seqno == fence->seqno);
+		}
+
+		chain = job->ptrs[i - 1].chain_fence;
+		dma_fence_chain_init(chain, prev, fence, seqno++);
+		job->ptrs[i - 1].chain_fence = NULL;
+		fence = &chain->base;
+	}
+
+	job->fence = fence;
 	drm_sched_job_arm(&job->drm);
 }
 
@@ -318,7 +340,8 @@ xe_sched_job_snapshot_capture(struct xe_sched_job *job)
 
 	snapshot->batch_addr_len = q->width;
 	for (i = 0; i < q->width; i++)
-		snapshot->batch_addr[i] = xe_device_uncanonicalize_addr(xe, job->batch_addr[i]);
+		snapshot->batch_addr[i] =
+			xe_device_uncanonicalize_addr(xe, job->ptrs[i].batch_addr);
 
 	return snapshot;
 }
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 990ddac55ed6..0d3f76fb05ce 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -11,6 +11,20 @@
 #include <drm/gpu_scheduler.h>
 
 struct xe_exec_queue;
+struct dma_fence;
+struct dma_fence_chain;
+
+/**
+ * struct xe_job_ptrs - Per hw engine instance data
+ */
+struct xe_job_ptrs {
+	/** @lrc_fence: Pre-allocated uinitialized lrc fence.*/
+	struct dma_fence *lrc_fence;
+	/** @chain_fence: Pre-allocated ninitialized fence chain node. */
+	struct dma_fence_chain *chain_fence;
+	/** @batch_addr: Batch buffer address. */
+	u64 batch_addr;
+};
 
 /**
  * struct xe_sched_job - XE schedule job (batch buffer tracking)
@@ -43,8 +57,8 @@ struct xe_sched_job {
 	u32 migrate_flush_flags;
 	/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
 	bool ring_ops_flush_tlb;
-	/** @batch_addr: batch buffer address of job */
-	u64 batch_addr[];
+	/** @ptrs: per instance pointers. */
+	struct xe_job_ptrs ptrs[];
 };
 
 struct xe_sched_job_snapshot {
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 6c6cecc58f63..450f407c66e8 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -272,7 +272,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 			   __entry->flags = job->q->flags;
 			   __entry->error = job->fence->error;
 			   __entry->fence = job->fence;
-			   __entry->batch_addr = (u64)job->batch_addr[0];
+			   __entry->batch_addr = (u64)job->ptrs[0].batch_addr;
 			   ),
 
 		    TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
-- 
cgit 


From 577b83b0f45f7b36d734708f5cad8b35e21ffb7d Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Mon, 27 May 2024 15:59:11 +0200
Subject: drm/xe: Remove xe_lrc_create_seqno_fence()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's not used anymore.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527135912.152156-5-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_lrc.c | 11 -----------
 drivers/gpu/drm/xe/xe_lrc.h |  1 -
 2 files changed, 12 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index c28796cfaa03..e91967070478 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1072,17 +1072,6 @@ void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
 	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
 }
 
-struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
-{
-	struct dma_fence *fence = xe_lrc_alloc_seqno_fence();
-
-	if (IS_ERR(fence))
-		return fence;
-
-	xe_lrc_init_seqno_fence(lrc, fence);
-	return fence;
-}
-
 s32 xe_lrc_seqno(struct xe_lrc *lrc)
 {
 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 7a8752fd8484..c2df6bfd1889 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -47,7 +47,6 @@ u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc);
 struct dma_fence *xe_lrc_alloc_seqno_fence(void);
 void xe_lrc_free_seqno_fence(struct dma_fence *fence);
 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence);
-struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc);
 s32 xe_lrc_seqno(struct xe_lrc *lrc);
 
 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc);
-- 
cgit 


From 50e52592fbe791d96ec2cb431d158cc6bc495be5 Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Mon, 27 May 2024 15:59:12 +0200
Subject: drm/xe: Move job creation out of the struct xe_migrate::job_mutex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to be able to run gpu jobs from reclaim context,
move job creation (where allocation takes place) out of the
struct xe_migrate::job_mutex, and prime that mutex as reclaim
tainted.

Jobs that may need to run from reclaim context include
CCS metadata extraction at shrinking time.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527135912.152156-6-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/xe/xe_migrate.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index bacb23de411b..cccffaf3db06 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -383,6 +383,9 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	}
 
 	mutex_init(&m->job_mutex);
+	fs_reclaim_acquire(GFP_KERNEL);
+	might_lock(&m->job_mutex);
+	fs_reclaim_release(GFP_KERNEL);
 
 	err = drmm_add_action_or_reset(&xe->drm, xe_migrate_fini, m);
 	if (err)
@@ -807,7 +810,6 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 						  IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
 						  src_L0, ccs_ofs, copy_ccs);
 
-		mutex_lock(&m->job_mutex);
 		job = xe_bb_create_migration_job(m->q, bb,
 						 xe_migrate_batch_base(m, usm),
 						 update_idx);
@@ -827,6 +829,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 				goto err_job;
 		}
 
+		mutex_lock(&m->job_mutex);
 		xe_sched_job_arm(job);
 		dma_fence_put(fence);
 		fence = dma_fence_get(&job->drm.s_fence->finished);
@@ -844,7 +847,6 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 err_job:
 		xe_sched_job_put(job);
 err:
-		mutex_unlock(&m->job_mutex);
 		xe_bb_free(bb, NULL);
 
 err_sync:
@@ -1044,7 +1046,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 			flush_flags = MI_FLUSH_DW_CCS;
 		}
 
-		mutex_lock(&m->job_mutex);
 		job = xe_bb_create_migration_job(m->q, bb,
 						 xe_migrate_batch_base(m, usm),
 						 update_idx);
@@ -1067,6 +1068,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 				goto err_job;
 		}
 
+		mutex_lock(&m->job_mutex);
 		xe_sched_job_arm(job);
 		dma_fence_put(fence);
 		fence = dma_fence_get(&job->drm.s_fence->finished);
@@ -1083,7 +1085,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 err_job:
 		xe_sched_job_put(job);
 err:
-		mutex_unlock(&m->job_mutex);
 		xe_bb_free(bb, NULL);
 err_sync:
 		/* Sync partial copies if any. FIXME: job_mutex? */
@@ -1377,9 +1378,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 			write_pgtable(tile, bb, 0, &updates[i], pt_update);
 	}
 
-	if (!q)
-		mutex_lock(&m->job_mutex);
-
 	job = xe_bb_create_migration_job(q ?: m->q, bb,
 					 xe_migrate_batch_base(m, usm),
 					 update_idx);
@@ -1420,6 +1418,9 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		if (err)
 			goto err_job;
 	}
+	if (!q)
+		mutex_lock(&m->job_mutex);
+
 	xe_sched_job_arm(job);
 	fence = dma_fence_get(&job->drm.s_fence->finished);
 	xe_sched_job_push(job);
@@ -1435,8 +1436,6 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 err_job:
 	xe_sched_job_put(job);
 err_bb:
-	if (!q)
-		mutex_unlock(&m->job_mutex);
 	xe_bb_free(bb, NULL);
 err:
 	drm_suballoc_free(sa_bo, NULL);
-- 
cgit 


From 45bb564de0a6f87e9f502ceb4ff4d9f936365c85 Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Fri, 24 May 2024 16:47:43 -0700
Subject: drm/xe: Use run_ticks instead of runtime for client stats

Note that runtime is also used in the pm context, so it is confusing to
use the same name to denote run time of the drm client. Use a more
appropriate name for the client utilization.

While at it, drop the incorrect multi-lrc comment in the helper
description

v2: s/show_runtime/show_run_ticks/ (Rodrigo)

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240524234744.1352543-1-umesh.nerlige.ramappa@intel.com
---
 drivers/gpu/drm/xe/xe_device_types.h |  4 ++--
 drivers/gpu/drm/xe/xe_drm_client.c   |  8 ++++----
 drivers/gpu/drm/xe/xe_exec_queue.c   | 12 ++++++------
 drivers/gpu/drm/xe/xe_exec_queue.h   |  2 +-
 drivers/gpu/drm/xe/xe_execlist.c     |  2 +-
 drivers/gpu/drm/xe/xe_guc_submit.c   |  2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 3ff60da7d1cd..db12393217f5 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -559,8 +559,8 @@ struct xe_file {
 		struct mutex lock;
 	} exec_queue;
 
-	/** @runtime: hw engine class runtime in ticks for this drm client */
-	u64 runtime[XE_ENGINE_CLASS_MAX];
+	/** @run_ticks: hw engine class run time in ticks for this drm client */
+	u64 run_ticks[XE_ENGINE_CLASS_MAX];
 
 	/** @client: drm client */
 	struct xe_drm_client *client;
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index af404c9e5cc0..1dcae0e6d66d 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -237,7 +237,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
 	}
 }
 
-static void show_runtime(struct drm_printer *p, struct drm_file *file)
+static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
 {
 	unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { };
 	struct xe_file *xef = file->driver_priv;
@@ -252,7 +252,7 @@ static void show_runtime(struct drm_printer *p, struct drm_file *file)
 	/* Accumulate all the exec queues from this client */
 	mutex_lock(&xef->exec_queue.lock);
 	xa_for_each(&xef->exec_queue.xa, i, q)
-		xe_exec_queue_update_runtime(q);
+		xe_exec_queue_update_run_ticks(q);
 	mutex_unlock(&xef->exec_queue.lock);
 
 	/* Get the total GPU cycles */
@@ -287,7 +287,7 @@ static void show_runtime(struct drm_printer *p, struct drm_file *file)
 
 		class_name = xe_hw_engine_class_to_str(class);
 		drm_printf(p, "drm-cycles-%s:\t%llu\n",
-			   class_name, xef->runtime[class]);
+			   class_name, xef->run_ticks[class]);
 		drm_printf(p, "drm-total-cycles-%s:\t%llu\n",
 			   class_name, gpu_timestamp);
 
@@ -310,6 +310,6 @@ static void show_runtime(struct drm_printer *p, struct drm_file *file)
 void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 {
 	show_meminfo(p, file);
-	show_runtime(p, file);
+	show_run_ticks(p, file);
 }
 #endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index e3cebec3de24..f4359479318b 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -751,14 +751,14 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
 }
 
 /**
- * xe_exec_queue_update_runtime() - Update runtime for this exec queue from hw
+ * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue
+ * from hw
  * @q: The exec queue
  *
- * Update the timestamp saved by HW for this exec queue and save runtime
- * calculated by using the delta from last update. On multi-lrc case, only the
- * first is considered.
+ * Update the timestamp saved by HW for this exec queue and save run ticks
+ * calculated by using the delta from last update.
  */
-void xe_exec_queue_update_runtime(struct xe_exec_queue *q)
+void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
 {
 	struct xe_file *xef;
 	struct xe_lrc *lrc;
@@ -784,7 +784,7 @@ void xe_exec_queue_update_runtime(struct xe_exec_queue *q)
 	 */
 	lrc = &q->lrc[0];
 	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
-	xef->runtime[q->class] += (new_ts - old_ts) * q->width;
+	xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
 }
 
 void xe_exec_queue_kill(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index e0f07d28ee1a..289a3a51d2a2 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -75,6 +75,6 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
 					       struct xe_vm *vm);
 void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
 				  struct dma_fence *fence);
-void xe_exec_queue_update_runtime(struct xe_exec_queue *q);
+void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index bd7f27efe0e0..8e5c591fcecd 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -306,7 +306,7 @@ static void execlist_job_free(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 
-	xe_exec_queue_update_runtime(job->q);
+	xe_exec_queue_update_run_ticks(job->q);
 	xe_sched_job_put(job);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 53ab98c5ef5e..72aa2f91832c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -763,7 +763,7 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
 {
 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
 
-	xe_exec_queue_update_runtime(job->q);
+	xe_exec_queue_update_run_ticks(job->q);
 
 	trace_xe_sched_job_free(job);
 	xe_sched_job_put(job);
-- 
cgit 


From ce62827bc294ba5f8b3909bfa5d7dbf9de8aab6b Mon Sep 17 00:00:00 2001
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Date: Fri, 24 May 2024 16:47:44 -0700
Subject: drm/xe: Do not access xe file when updating exec queue run_ticks

The current code is running into a use after free case where xe file is
closed before the exec queue run_ticks can be updated. This is occurring
in the xe_file_close path. To fix that, do not access xe file when
updating the exec queue run_ticks. Instead store the exec queue run_ticks
locally in the exec queue object and accumulate it when the user dumps
the drm client stats. We know that the xe file is valid when user is
dumping the run_ticks for the drm client, so this effectively
removes the dependency on xe file object in
xe_exec_queue_update_run_ticks().

v2:
- Fix the accumulation of q->run_ticks delta into xe file run_ticks
- s/runtime/run_ticks/ (Rodrigo)

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1908
Fixes: 6109f24f87d7 ("drm/xe: Add helper to accumulate exec queue runtime")
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240524234744.1352543-2-umesh.nerlige.ramappa@intel.com
---
 drivers/gpu/drm/xe/xe_drm_client.c       | 5 ++++-
 drivers/gpu/drm/xe/xe_exec_queue.c       | 5 +----
 drivers/gpu/drm/xe/xe_exec_queue_types.h | 4 ++++
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 1dcae0e6d66d..4a19b771e3a0 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -251,8 +251,11 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
 
 	/* Accumulate all the exec queues from this client */
 	mutex_lock(&xef->exec_queue.lock);
-	xa_for_each(&xef->exec_queue.xa, i, q)
+	xa_for_each(&xef->exec_queue.xa, i, q) {
 		xe_exec_queue_update_run_ticks(q);
+		xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks;
+		q->old_run_ticks = q->run_ticks;
+	}
 	mutex_unlock(&xef->exec_queue.lock);
 
 	/* Get the total GPU cycles */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index f4359479318b..a2daae10ccc6 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -760,7 +760,6 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
  */
 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
 {
-	struct xe_file *xef;
 	struct xe_lrc *lrc;
 	u32 old_ts, new_ts;
 
@@ -772,8 +771,6 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
 	if (!q->vm || !q->vm->xef)
 		return;
 
-	xef = q->vm->xef;
-
 	/*
 	 * Only sample the first LRC. For parallel submission, all of them are
 	 * scheduled together and we compensate that below by multiplying by
@@ -784,7 +781,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
 	 */
 	lrc = &q->lrc[0];
 	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
-	xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
+	q->run_ticks += (new_ts - old_ts) * q->width;
 }
 
 void xe_exec_queue_kill(struct xe_exec_queue *q)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index f0c40e8ad80a..e81704c7c030 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -141,6 +141,10 @@ struct xe_exec_queue {
 	 * Protected by @vm's resv. Unused if @vm == NULL.
 	 */
 	u64 tlb_flush_seqno;
+	/** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
+	u64 old_run_ticks;
+	/** @run_ticks: hw engine class run time in ticks for this exec queue */
+	u64 run_ticks;
 	/** @lrc: logical ring context for this exec queue */
 	struct xe_lrc lrc[];
 };
-- 
cgit 


From 38007fa96419a9db9719f170b9e8a7877821cdd1 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Wed, 22 May 2024 09:27:27 +0200
Subject: drm/xe: flush gtt before signalling user fence on all engines

Tests show that user fence signalling requires kind of write barrier,
otherwise not all writes performed by the workload will be available
to userspace. It is already done for render and compute, we need it
also for the rest: video, gsc, copy.

v2: added gsc and copy engines, added fixes and r-b tags

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1488
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240522-xu_flush_vcs_before_ufence-v2-1-9ac3e9af0323@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index f75756e7a87b..550c3eafbc1d 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -234,13 +234,13 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+
 	if (job->user_fence.used)
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
 
-	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
-
 	i = emit_user_interrupt(dw, i);
 
 	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
@@ -293,13 +293,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+
 	if (job->user_fence.used)
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
 
-	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
-
 	i = emit_user_interrupt(dw, i);
 
 	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
-- 
cgit 


From 82e0b1299a3ef81a672398c547d8e7f553be793e Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Thu, 18 Apr 2024 16:03:37 +0200
Subject: drm/xe: allow unaligned start and size xe_res_cursor parameters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xe_res_cursor code does not depend on the alignment. On the other side
unaligned accesses are useful from pread/pwrite point of view.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240418-xe_res_cursor-no-align-v1-1-8df7834266c9@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_res_cursor.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 0a306963aa8e..655af89b31a9 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -157,8 +157,6 @@ static inline void xe_res_first_sg(const struct sg_table *sg,
 				   struct xe_res_cursor *cur)
 {
 	XE_WARN_ON(!sg);
-	XE_WARN_ON(!IS_ALIGNED(start, PAGE_SIZE) ||
-		   !IS_ALIGNED(size, PAGE_SIZE));
 	cur->node = NULL;
 	cur->start = start;
 	cur->remaining = size;
-- 
cgit 


From 6ca728975654a7247b2e4aa2e9eb61043c833570 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 16:22:55 +0200
Subject: drm/xe: Store platform name in xe_device.info

We already maintain the platform name as part of the device
descriptor, but in xe_device.info we only store platform enum,
which is not the best for use in any user-facing messages.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521142257.756-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_device_types.h | 2 ++
 drivers/gpu/drm/xe/xe_pci.c          | 1 +
 2 files changed, 3 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index db12393217f5..52bc461171d5 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -221,6 +221,8 @@ struct xe_device {
 
 	/** @info: device info */
 	struct intel_device_info {
+		/** @info.platform_name: platform name */
+		const char *platform_name;
 		/** @info.graphics_name: graphics IP name */
 		const char *graphics_name;
 		/** @info.media_name: media IP name */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 90ca97894a09..9782cff9dd82 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -597,6 +597,7 @@ static int xe_info_init_early(struct xe_device *xe,
 {
 	int err;
 
+	xe->info.platform_name = desc->platform_name;
 	xe->info.platform = desc->platform;
 	xe->info.subplatform = subplatform_desc ?
 		subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
-- 
cgit 


From 0aa256252dbe74f48300824b9e65d5e93fe631ff Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 16:22:57 +0200
Subject: drm/xe: Use platform name in xe_assert()

We can now use more user-friendly platform name instead of
previosly used magic platform enumerator value:

  [ ] xe 0000:00:02.0: [drm] Assertion `false` failed!
      platform: ALDERLAKE_S ...
  [ ] xe 0000:03:00.0: [drm] Assertion `false` failed!
      platform: DG2 ...

vs

  [ ] xe 0000:00:02.0: [drm] Assertion `false` failed!
      platform: 3 ...
  [ ] xe 0000:03:00.0: [drm] Assertion `false` failed!
      platform: 7 ...

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521142257.756-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_assert.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
index 34c142e6cfb0..8b0cc1bc9327 100644
--- a/drivers/gpu/drm/xe/xe_assert.h
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -109,11 +109,11 @@
 #define xe_assert_msg(xe, condition, msg, arg...) ({						\
 	const struct xe_device *__xe = (xe);							\
 	__xe_assert_msg(__xe, condition,							\
-			"platform: %d subplatform: %d\n"					\
+			"platform: %s subplatform: %d\n"					\
 			"graphics: %s %u.%02u step %s\n"					\
 			"media: %s %u.%02u step %s\n"						\
 			msg,									\
-			__xe->info.platform, __xe->info.subplatform,				\
+			__xe->info.platform_name, __xe->info.subplatform,			\
 			__xe->info.graphics_name,						\
 			__xe->info.graphics_verx100 / 100,					\
 			__xe->info.graphics_verx100 % 100,					\
-- 
cgit 


From 5c9464e2c7a74ce7b0288a7d3c8b96b1321fce55 Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Fri, 24 May 2024 16:04:45 -0700
Subject: drm/xe: Don't refer to general LRC initialization as a "wa"

During engine LRC initialization a number of registers need to be
programmed as general setup.  This programming is not a "workaround" so
naming the RTP table as "lrc_was" is misleading; switch to the name
"lrc_setup" to more accurately describe what the table is actually for.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240524230444.1447797-2-matthew.d.roper@intel.com
---
 drivers/gpu/drm/xe/xe_hw_engine.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index de1aefaa2335..9eef789be897 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -342,7 +342,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
-	const struct xe_rtp_entry_sr lrc_was[] = {
+	const struct xe_rtp_entry_sr lrc_setup[] = {
 		/*
 		 * Some blitter commands do not have a field for MOCS, those
 		 * commands will use MOCS index pointed by BLIT_CCTL.
@@ -374,7 +374,7 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 		{}
 	};
 
-	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
+	xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
 }
 
 static void
-- 
cgit 


From 9276bcc22f52ab52ae77b7b19133943d88b9a3f8 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Fri, 24 May 2024 12:39:15 +0530
Subject: drm/xe: Standardize power gate registers

Standardize power gate registers

No functional changes

v2: change commit message (Rodrigo)

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240524070916.143022-2-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  8 +++-----
 drivers/gpu/drm/xe/xe_gt_idle.c      |  2 +-
 drivers/gpu/drm/xe/xe_wa.c           | 10 +++++-----
 3 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 9cacdcfe27ff..7c173db7d585 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -316,11 +316,9 @@
 
 #define FORCEWAKE_GT				XE_REG(0xa188)
 
-#define PG_ENABLE				XE_REG(0xa210)
-#define   VD2_MFXVDENC_POWERGATE_ENABLE		REG_BIT(8)
-#define   VD2_HCP_POWERGATE_ENABLE		REG_BIT(7)
-#define   VD0_MFXVDENC_POWERGATE_ENABLE		REG_BIT(4)
-#define   VD0_HCP_POWERGATE_ENABLE		REG_BIT(3)
+#define POWERGATE_ENABLE			XE_REG(0xa210)
+#define   VDN_HCP_POWERGATE_ENABLE(n)		REG_BIT(3 + 2 * (n))
+#define   VDN_MFXVDENC_POWERGATE_ENABLE(n)	REG_BIT(4 + 2 * (n))
 
 #define CTC_MODE				XE_REG(0xa26c)
 #define   CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 749b7f4b688f..6790b5674965 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -202,7 +202,7 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt)
 	xe_device_assert_mem_access(gt_to_xe(gt));
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 
-	xe_mmio_write32(gt, PG_ENABLE, 0);
+	xe_mmio_write32(gt, POWERGATE_ENABLE, 0);
 	xe_mmio_write32(gt, RC_CONTROL, 0);
 	xe_mmio_write32(gt, RC_STATE, 0);
 }
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 05db53c1448c..64bc595fc727 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -238,11 +238,11 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	},
 	{ XE_RTP_NAME("14020316580"),
 	  XE_RTP_RULES(MEDIA_VERSION(1301)),
-	  XE_RTP_ACTIONS(CLR(PG_ENABLE,
-			     VD0_HCP_POWERGATE_ENABLE |
-			     VD0_MFXVDENC_POWERGATE_ENABLE |
-			     VD2_HCP_POWERGATE_ENABLE |
-			     VD2_MFXVDENC_POWERGATE_ENABLE)),
+	  XE_RTP_ACTIONS(CLR(POWERGATE_ENABLE,
+			     VDN_HCP_POWERGATE_ENABLE(0) |
+			     VDN_MFXVDENC_POWERGATE_ENABLE(0) |
+			     VDN_HCP_POWERGATE_ENABLE(2) |
+			     VDN_MFXVDENC_POWERGATE_ENABLE(2))),
 	},
 	{ XE_RTP_NAME("14019449301"),
 	  XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)),
-- 
cgit 


From 38e8c4184ea0eebe478a9e6fc8d1062561223429 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Fri, 24 May 2024 12:39:16 +0530
Subject: drm/xe: Enable Coarse Power Gating

Enable power gating for all units and sub-pipes that
are disabled by default.

v2: change the init function name
    use symmetric calls for enable/disable pg
    re-pharase commit message (Rodrigo)
    modify the sub-pipe power gating condition

v3: set hysteresis value for render and media
    when GuC PC is disabled
    skip CPG for PVC (Vinay)

v4: rebase

Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> #v2
Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240524070916.143022-3-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_gt_regs.h |  4 +++
 drivers/gpu/drm/xe/xe_gt.c           | 12 +++++---
 drivers/gpu/drm/xe/xe_gt_idle.c      | 57 +++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_gt_idle.h      |  4 ++-
 4 files changed, 68 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 7c173db7d585..d09b2473259f 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -309,6 +309,8 @@
 #define   RC_CTL_RC6_ENABLE			REG_BIT(18)
 #define RC_STATE				XE_REG(0xa094)
 #define RC_IDLE_HYSTERSIS			XE_REG(0xa0ac)
+#define MEDIA_POWERGATE_IDLE_HYSTERESIS		XE_REG(0xa0c4)
+#define RENDER_POWERGATE_IDLE_HYSTERESIS	XE_REG(0xa0c8)
 
 #define PMINTRMSK				XE_REG(0xa168)
 #define   PMINTR_DISABLE_REDIRECT_TO_GUC	REG_BIT(31)
@@ -317,6 +319,8 @@
 #define FORCEWAKE_GT				XE_REG(0xa188)
 
 #define POWERGATE_ENABLE			XE_REG(0xa210)
+#define   RENDER_POWERGATE_ENABLE		REG_BIT(0)
+#define   MEDIA_POWERGATE_ENABLE		REG_BIT(1)
 #define   VDN_HCP_POWERGATE_ENABLE(n)		REG_BIT(3 + 2 * (n))
 #define   VDN_MFXVDENC_POWERGATE_ENABLE(n)	REG_BIT(4 + 2 * (n))
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 6f4b59a6e710..34c1896807e9 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -366,10 +366,6 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
 	}
 
-	err = xe_gt_idle_sysfs_init(&gt->gtidle);
-	if (err)
-		goto err_force_wake;
-
 	/* Enable per hw engine IRQs */
 	xe_irq_enable_hwe(gt);
 
@@ -554,6 +550,10 @@ int xe_gt_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
+	err = xe_gt_idle_init(&gt->gtidle);
+	if (err)
+		return err;
+
 	err = xe_gt_freq_init(gt);
 	if (err)
 		return err;
@@ -760,6 +760,8 @@ int xe_gt_suspend(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	xe_gt_idle_disable_pg(gt);
+
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 	xe_gt_dbg(gt, "suspended\n");
 
@@ -786,6 +788,8 @@ int xe_gt_resume(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	xe_gt_idle_enable_pg(gt);
+
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 	xe_gt_dbg(gt, "resumed\n");
 
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 6790b5674965..0109d1d2e9c4 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -12,6 +12,7 @@
 #include "xe_gt_sysfs.h"
 #include "xe_guc_pc.h"
 #include "regs/xe_gt_regs.h"
+#include "xe_macros.h"
 #include "xe_mmio.h"
 #include "xe_pm.h"
 
@@ -93,6 +94,50 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency)
 	return cur_residency;
 }
 
+void xe_gt_idle_enable_pg(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 pg_enable;
+	int i, j;
+
+	/* Disable CPG for PVC */
+	if (xe->info.platform == XE_PVC)
+		return;
+
+	xe_device_assert_mem_access(gt_to_xe(gt));
+
+	pg_enable = RENDER_POWERGATE_ENABLE | MEDIA_POWERGATE_ENABLE;
+
+	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
+		if ((gt->info.engine_mask & BIT(i)))
+			pg_enable |= (VDN_HCP_POWERGATE_ENABLE(j) |
+				      VDN_MFXVDENC_POWERGATE_ENABLE(j));
+	}
+
+	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+	if (xe->info.skip_guc_pc) {
+		/*
+		 * GuC sets the hysteresis value when GuC PC is enabled
+		 * else set it to 25 (25 * 1.28us)
+		 */
+		xe_mmio_write32(gt, MEDIA_POWERGATE_IDLE_HYSTERESIS, 25);
+		xe_mmio_write32(gt, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
+	}
+
+	xe_mmio_write32(gt, POWERGATE_ENABLE, pg_enable);
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+}
+
+void xe_gt_idle_disable_pg(struct xe_gt *gt)
+{
+	xe_device_assert_mem_access(gt_to_xe(gt));
+	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+
+	xe_mmio_write32(gt, POWERGATE_ENABLE, 0);
+
+	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT));
+}
+
 static ssize_t name_show(struct device *dev,
 			 struct device_attribute *attr, char *buff)
 {
@@ -145,15 +190,18 @@ static const struct attribute *gt_idle_attrs[] = {
 	NULL,
 };
 
-static void gt_idle_sysfs_fini(void *arg)
+static void gt_idle_fini(void *arg)
 {
 	struct kobject *kobj = arg;
+	struct xe_gt *gt = kobj_to_gt(kobj->parent);
+
+	xe_gt_idle_disable_pg(gt);
 
 	sysfs_remove_files(kobj, gt_idle_attrs);
 	kobject_put(kobj);
 }
 
-int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
+int xe_gt_idle_init(struct xe_gt_idle *gtidle)
 {
 	struct xe_gt *gt = gtidle_to_gt(gtidle);
 	struct xe_device *xe = gt_to_xe(gt);
@@ -182,7 +230,9 @@ int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
 		return err;
 	}
 
-	return devm_add_action_or_reset(xe->drm.dev, gt_idle_sysfs_fini, kobj);
+	xe_gt_idle_enable_pg(gt);
+
+	return devm_add_action_or_reset(xe->drm.dev, gt_idle_fini, kobj);
 }
 
 void xe_gt_idle_enable_c6(struct xe_gt *gt)
@@ -202,7 +252,6 @@ void xe_gt_idle_disable_c6(struct xe_gt *gt)
 	xe_device_assert_mem_access(gt_to_xe(gt));
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 
-	xe_mmio_write32(gt, POWERGATE_ENABLE, 0);
 	xe_mmio_write32(gt, RC_CONTROL, 0);
 	xe_mmio_write32(gt, RC_STATE, 0);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 75bd99659b1b..554447b5d46d 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -10,8 +10,10 @@
 
 struct xe_gt;
 
-int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
+int xe_gt_idle_init(struct xe_gt_idle *gtidle);
 void xe_gt_idle_enable_c6(struct xe_gt *gt);
 void xe_gt_idle_disable_c6(struct xe_gt *gt);
+void xe_gt_idle_enable_pg(struct xe_gt *gt);
+void xe_gt_idle_disable_pg(struct xe_gt *gt);
 
 #endif /* _XE_GT_IDLE_H_ */
-- 
cgit 


From fa171d49e49af89dfece89ade803a5d471d919a8 Mon Sep 17 00:00:00 2001
From: John Harrison <John.C.Harrison@Intel.com>
Date: Fri, 24 May 2024 13:26:03 -0700
Subject: drm/xe/guc: Fix uninitialised count in GuC load debug prints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The debug prints about how long the GuC load takes have a loop
counter. However that was neither initialised nor incremented! Plus,
counting loops is no longer meaningful given the wait function returns
early for any change in the status value. So fix it to only count
loops due to actual timeouts.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405250151.IbH0l8FG-lkp@intel.com/
Fixes: b0ac1b42dbdc ("drm/xe/guc: Port over the slow GuC loading support from i915")
Cc: John Harrison <John.C.Harrison@Intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Oded Gabbay <ogabbay@kernel.org>
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Fei Yang <fei.yang@intel.com>
Cc: intel-xe@lists.freedesktop.org
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240524202603.4011656-1-John.C.Harrison@Intel.com
---
 drivers/gpu/drm/xe/xe_guc.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index f7886c00af01..086a048876ba 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -591,7 +591,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
 	ktime_t before, after, delta;
 	int load_done;
 	u32 status = 0;
-	int count;
+	int count = 0;
 	u64 delta_ms;
 	u32 before_freq;
 
@@ -604,6 +604,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
 	 */
 	do {
 		u32 last_status = status & (GS_UKERNEL_MASK | GS_BOOTROM_MASK);
+		int ret;
 
 		/*
 		 * Wait for any change (intermediate or terminal) in the status register.
@@ -612,9 +613,10 @@ static void guc_wait_ucode(struct xe_guc *guc)
 		 * timeouts rather than allowing a huge timeout each time. So basically, need
 		 * to treat a timeout no different to a value change.
 		 */
-		xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
-				   last_status, 1000 * 1000, &status, false);
-
+		ret = xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
+					 last_status, 1000 * 1000, &status, false);
+		if (ret < 0)
+			count++;
 		after = ktime_get();
 		delta = ktime_sub(after, before);
 		delta_ms = ktime_to_ms(delta);
@@ -626,7 +628,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
 		if (delta_ms >= (GUC_LOAD_RETRY_LIMIT * 1000))
 			break;
 
-		xe_gt_dbg(gt, "load still in progress, count = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
+		xe_gt_dbg(gt, "load still in progress, timeouts = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
 			  count, xe_guc_pc_get_act_freq(guc_pc),
 			  guc_pc_get_cur_freq(guc_pc), status,
 			  REG_FIELD_GET(GS_BOOTROM_MASK, status),
@@ -678,13 +680,13 @@ static void guc_wait_ucode(struct xe_guc *guc)
 
 		xe_device_declare_wedged(gt_to_xe(gt));
 	} else if (delta_ms > GUC_LOAD_TIME_WARN_MS) {
-		xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, count = %d]\n",
+		xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n",
 			   delta_ms, status, count);
 		xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n",
 			   xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
 			   before_freq, xe_gt_throttle_get_limit_reasons(gt));
 	} else {
-		xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, count = %d\n",
+		xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, timeouts = %d\n",
 			  delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
 			  before_freq, status, count);
 	}
-- 
cgit 


From a17aceb34e2f0ef36ffab0a96c51240e88a1c5fa Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 28 May 2024 13:54:08 +0200
Subject: drm/xe: Check empty pinned BO list with lock held.

Use lock that is meant to use for accessing the BO pin list.

Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240528115408.22094-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 03f7fe7acf8c..2bae01ce4e5b 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1758,11 +1758,10 @@ void xe_bo_unpin_external(struct xe_bo *bo)
 	xe_assert(xe, xe_bo_is_pinned(bo));
 	xe_assert(xe, xe_bo_is_user(bo));
 
-	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
-		spin_lock(&xe->pinned.lock);
+	spin_lock(&xe->pinned.lock);
+	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
 		list_del_init(&bo->pinned_link);
-		spin_unlock(&xe->pinned.lock);
-	}
+	spin_unlock(&xe->pinned.lock);
 
 	ttm_bo_unpin(&bo->ttm);
 
@@ -1785,9 +1784,8 @@ void xe_bo_unpin(struct xe_bo *bo)
 		struct ttm_place *place = &(bo->placements[0]);
 
 		if (mem_type_is_vram(place->mem_type)) {
-			xe_assert(xe, !list_empty(&bo->pinned_link));
-
 			spin_lock(&xe->pinned.lock);
+			xe_assert(xe, !list_empty(&bo->pinned_link));
 			list_del_init(&bo->pinned_link);
 			spin_unlock(&xe->pinned.lock);
 		}
-- 
cgit 


From dac81a9adbf0d57a77d566fea65e8153003e5bc2 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Tue, 28 May 2024 12:14:45 +0200
Subject: drm/xe: Add engine name to the engine reset and cat-err log

Add engine name to the engine reset and cat error log
which should be useful while debugging.

v2: Add logical mask and engine class(Matt)
    Use xe_gt_{info|dbg} (Michal)

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240528101445.27688-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 72aa2f91832c..e22bd6b09a74 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1681,6 +1681,7 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 
 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
+	struct xe_gt *gt = guc_to_gt(guc);
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_exec_queue *q;
 	u32 guc_id = msg[0];
@@ -1694,7 +1695,8 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	if (unlikely(!q))
 		return -EPROTO;
 
-	drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
+	xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
+		   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
 
 	/* FIXME: Do error capture, most likely async */
 
@@ -1716,6 +1718,7 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
 					       u32 len)
 {
+	struct xe_gt *gt = guc_to_gt(guc);
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_exec_queue *q;
 	u32 guc_id = msg[0];
@@ -1729,7 +1732,9 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
 	if (unlikely(!q))
 		return -EPROTO;
 
-	drm_dbg(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
+	xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
+		  xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
+
 	trace_xe_exec_queue_memory_cat_error(q);
 
 	/* Treat the same as engine reset */
-- 
cgit 


From e90f7a58e659a30656b3a83173d4bdee1e2b853d Mon Sep 17 00:00:00 2001
From: Karthik Poosa <karthik.poosa@intel.com>
Date: Wed, 29 May 2024 10:37:57 +0530
Subject: drm/xe/hwmon: Add HWMON support for BMG

Add HWMON support for BMG. Exposing the pkg power, current,
energy info.

Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://lore.kernel.org/r/20240523144351.4040131-2-balasubramani.vivekanandan@intel.com
Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240529050758.442056-2-balasubramani.vivekanandan@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_pcode_regs.h |  5 +++++
 drivers/gpu/drm/xe/xe_hwmon.c           | 16 ++++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
index 3dae858508c8..beba16d592fc 100644
--- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
@@ -18,4 +18,9 @@
 #define PVC_GT0_PLATFORM_ENERGY_STATUS          XE_REG(0x28106c)
 #define PVC_GT0_PACKAGE_POWER_SKU               XE_REG(0x281080)
 
+#define BMG_PACKAGE_POWER_SKU			XE_REG(0x138098)
+#define BMG_PACKAGE_POWER_SKU_UNIT		XE_REG(0x1380dc)
+#define BMG_PACKAGE_ENERGY_STATUS		XE_REG(0x138120)
+#define BMG_PACKAGE_RAPL_LIMIT			XE_REG(0x138440)
+
 #endif /* _XE_PCODE_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index dca275117232..8daa070d7b1a 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -86,19 +86,25 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
 
 	switch (hwmon_reg) {
 	case REG_PKG_RAPL_LIMIT:
-		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+		if (xe->info.platform == XE_BATTLEMAGE && channel == CHANNEL_PKG)
+			return BMG_PACKAGE_RAPL_LIMIT;
+		else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
 			return PVC_GT0_PACKAGE_RAPL_LIMIT;
 		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
 			return PCU_CR_PACKAGE_RAPL_LIMIT;
 		break;
 	case REG_PKG_POWER_SKU:
-		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+		if (xe->info.platform == XE_BATTLEMAGE)
+			return BMG_PACKAGE_POWER_SKU;
+		else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
 			return PVC_GT0_PACKAGE_POWER_SKU;
 		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
 			return PCU_CR_PACKAGE_POWER_SKU;
 		break;
 	case REG_PKG_POWER_SKU_UNIT:
-		if (xe->info.platform == XE_PVC)
+		if (xe->info.platform == XE_BATTLEMAGE)
+			return BMG_PACKAGE_POWER_SKU_UNIT;
+		else if (xe->info.platform == XE_PVC)
 			return PVC_GT0_PACKAGE_POWER_SKU_UNIT;
 		else if (xe->info.platform == XE_DG2)
 			return PCU_CR_PACKAGE_POWER_SKU_UNIT;
@@ -108,7 +114,9 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
 			return GT_PERF_STATUS;
 		break;
 	case REG_PKG_ENERGY_STATUS:
-		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+		if (xe->info.platform == XE_BATTLEMAGE && channel == CHANNEL_PKG)
+			return BMG_PACKAGE_ENERGY_STATUS;
+		else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
 			return PVC_GT0_PLATFORM_ENERGY_STATUS;
 		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
 			return PCU_CR_PACKAGE_ENERGY_STATUS;
-- 
cgit 


From 7e4333567db8f7d82929cb95458ceb6eb43bb4e4 Mon Sep 17 00:00:00 2001
From: Karthik Poosa <karthik.poosa@intel.com>
Date: Wed, 29 May 2024 10:37:58 +0530
Subject: drm/xe/hwmon: Expose card power and energy attributes of BMG

In BMG there are separate registers for card/platform power and
energy.
These are exposed through channel 0 i.e power_1/energy1_xxx.

Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Link: https://lore.kernel.org/r/20240523144351.4040131-3-balasubramani.vivekanandan@intel.com
Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240529050758.442056-3-balasubramani.vivekanandan@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/regs/xe_pcode_regs.h |  2 ++
 drivers/gpu/drm/xe/xe_hwmon.c           | 24 ++++++++++++++++--------
 2 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
index beba16d592fc..0b0b49d850ae 100644
--- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
@@ -22,5 +22,7 @@
 #define BMG_PACKAGE_POWER_SKU_UNIT		XE_REG(0x1380dc)
 #define BMG_PACKAGE_ENERGY_STATUS		XE_REG(0x138120)
 #define BMG_PACKAGE_RAPL_LIMIT			XE_REG(0x138440)
+#define BMG_PLATFORM_ENERGY_STATUS		XE_REG(0x138458)
+#define BMG_PLATFORM_POWER_LIMIT		XE_REG(0x138460)
 
 #endif /* _XE_PCODE_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index 8daa070d7b1a..222c651ee1f8 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -86,12 +86,16 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
 
 	switch (hwmon_reg) {
 	case REG_PKG_RAPL_LIMIT:
-		if (xe->info.platform == XE_BATTLEMAGE && channel == CHANNEL_PKG)
-			return BMG_PACKAGE_RAPL_LIMIT;
-		else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+		if (xe->info.platform == XE_BATTLEMAGE) {
+			if (channel == CHANNEL_PKG)
+				return BMG_PACKAGE_RAPL_LIMIT;
+			else
+				return BMG_PLATFORM_POWER_LIMIT;
+		} else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) {
 			return PVC_GT0_PACKAGE_RAPL_LIMIT;
-		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
+		} else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) {
 			return PCU_CR_PACKAGE_RAPL_LIMIT;
+		}
 		break;
 	case REG_PKG_POWER_SKU:
 		if (xe->info.platform == XE_BATTLEMAGE)
@@ -114,12 +118,16 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
 			return GT_PERF_STATUS;
 		break;
 	case REG_PKG_ENERGY_STATUS:
-		if (xe->info.platform == XE_BATTLEMAGE && channel == CHANNEL_PKG)
-			return BMG_PACKAGE_ENERGY_STATUS;
-		else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+		if (xe->info.platform == XE_BATTLEMAGE) {
+			if (channel == CHANNEL_PKG)
+				return BMG_PACKAGE_ENERGY_STATUS;
+			else
+				return BMG_PLATFORM_ENERGY_STATUS;
+		} else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) {
 			return PVC_GT0_PLATFORM_ENERGY_STATUS;
-		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
+		} else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) {
 			return PCU_CR_PACKAGE_ENERGY_STATUS;
+		}
 		break;
 	default:
 		drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
-- 
cgit 


From 37ea1aee18e7418ad5a3aa5f14c87c8e0736fad1 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 21 May 2024 16:22:56 +0200
Subject: drm/xe: Drop undesired prefix from the platform name

We don't have to use exact names of the enumerators as the
potentially user-facing platform names. When constructing
platform descriptor fields, use the unique platform tag and
add the XE_ prefix only to the generated enum field.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521142257.756-3-michal.wajdeczko@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_pci.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 9782cff9dd82..e84da0cbb8e9 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -71,7 +71,7 @@ __diag_push();
 __diag_ignore_all("-Woverride-init", "Allow field overrides in table");
 
 #define PLATFORM(x)		\
-	.platform = (x),	\
+	.platform = XE_##x,	\
 	.platform_name = #x
 
 #define NOP(x)	x
@@ -218,7 +218,7 @@ static const struct xe_media_desc media_xe2 = {
 static const struct xe_device_desc tgl_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
-	PLATFORM(XE_TIGERLAKE),
+	PLATFORM(TIGERLAKE),
 	.has_display = true,
 	.has_llc = true,
 	.require_force_probe = true,
@@ -227,7 +227,7 @@ static const struct xe_device_desc tgl_desc = {
 static const struct xe_device_desc rkl_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
-	PLATFORM(XE_ROCKETLAKE),
+	PLATFORM(ROCKETLAKE),
 	.has_display = true,
 	.has_llc = true,
 	.require_force_probe = true,
@@ -238,7 +238,7 @@ static const u16 adls_rpls_ids[] = { XE_RPLS_IDS(NOP), 0 };
 static const struct xe_device_desc adl_s_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
-	PLATFORM(XE_ALDERLAKE_S),
+	PLATFORM(ALDERLAKE_S),
 	.has_display = true,
 	.has_llc = true,
 	.require_force_probe = true,
@@ -253,7 +253,7 @@ static const u16 adlp_rplu_ids[] = { XE_RPLU_IDS(NOP), 0 };
 static const struct xe_device_desc adl_p_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
-	PLATFORM(XE_ALDERLAKE_P),
+	PLATFORM(ALDERLAKE_P),
 	.has_display = true,
 	.has_llc = true,
 	.require_force_probe = true,
@@ -266,7 +266,7 @@ static const struct xe_device_desc adl_p_desc = {
 static const struct xe_device_desc adl_n_desc = {
 	.graphics = &graphics_xelp,
 	.media = &media_xem,
-	PLATFORM(XE_ALDERLAKE_N),
+	PLATFORM(ALDERLAKE_N),
 	.has_display = true,
 	.has_llc = true,
 	.require_force_probe = true,
@@ -279,7 +279,7 @@ static const struct xe_device_desc dg1_desc = {
 	.graphics = &graphics_xelpp,
 	.media = &media_xem,
 	DGFX_FEATURES,
-	PLATFORM(XE_DG1),
+	PLATFORM(DG1),
 	.has_display = true,
 	.has_heci_gscfi = 1,
 	.require_force_probe = true,
@@ -291,7 +291,7 @@ static const u16 dg2_g12_ids[] = { XE_DG2_G12_IDS(NOP), 0 };
 
 #define DG2_FEATURES \
 	DGFX_FEATURES, \
-	PLATFORM(XE_DG2), \
+	PLATFORM(DG2), \
 	.has_heci_gscfi = 1, \
 	.subplatforms = (const struct xe_subplatform_desc[]) { \
 		{ XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
@@ -321,7 +321,7 @@ static const struct xe_device_desc dg2_desc = {
 static const __maybe_unused struct xe_device_desc pvc_desc = {
 	.graphics = &graphics_xehpc,
 	DGFX_FEATURES,
-	PLATFORM(XE_PVC),
+	PLATFORM(PVC),
 	.has_display = false,
 	.has_heci_gscfi = 1,
 	.require_force_probe = true,
@@ -330,19 +330,19 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
 static const struct xe_device_desc mtl_desc = {
 	/* .graphics and .media determined via GMD_ID */
 	.require_force_probe = true,
-	PLATFORM(XE_METEORLAKE),
+	PLATFORM(METEORLAKE),
 	.has_display = true,
 };
 
 static const struct xe_device_desc lnl_desc = {
-	PLATFORM(XE_LUNARLAKE),
+	PLATFORM(LUNARLAKE),
 	.has_display = true,
 	.require_force_probe = true,
 };
 
 static const struct xe_device_desc bmg_desc __maybe_unused = {
 	DGFX_FEATURES,
-	PLATFORM(XE_BATTLEMAGE),
+	PLATFORM(BATTLEMAGE),
 	.require_force_probe = true,
 };
 
-- 
cgit 


From 0568a4086a6c7386885eb2ac2dae3f7186eb503f Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Wed, 29 May 2024 15:16:39 -0700
Subject: drm/xe: Remove unwanted mutex locking

Do not hold xef->exec_queue.lock mutex while parsing the xarray
xef->exec_queue.xa in xe_file_close() as it is not needed and
will cause an unwanted dependency between this lock and the vm->lock.

This lock protects the exec queue lookup and reference taking which
doesn't apply to this code path. When FD is closing, IOCTLs presumably
can't be modifying the xarray.

v2: Update commit text (Matt Brost)
v3: Add more code comment (Rodrigo Vivi)
v4: Further expand code comment (Rodirgo Vivi)

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Jagmeet Randhawa <jagmeet.randhawa@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240529221639.23117-1-niranjana.vishwanathapura@intel.com
---
 drivers/gpu/drm/xe/xe_device.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index b2d5c7341238..d1560f5d4b84 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -95,12 +95,16 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
 	struct xe_exec_queue *q;
 	unsigned long idx;
 
-	mutex_lock(&xef->exec_queue.lock);
+	/*
+	 * No need for exec_queue.lock here as there is no contention for it
+	 * when FD is closing as IOCTLs presumably can't be modifying the
+	 * xarray. Taking exec_queue.lock here causes undue dependency on
+	 * vm->lock taken during xe_exec_queue_kill().
+	 */
 	xa_for_each(&xef->exec_queue.xa, idx, q) {
 		xe_exec_queue_kill(q);
 		xe_exec_queue_put(q);
 	}
-	mutex_unlock(&xef->exec_queue.lock);
 	xa_destroy(&xef->exec_queue.xa);
 	mutex_destroy(&xef->exec_queue.lock);
 	mutex_lock(&xef->vm.lock);
-- 
cgit 


From 264eecdba211bbeb8c0ed313ffe03e9dd1e20262 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Wed, 29 May 2024 20:22:10 -0700
Subject: drm/xe: Decouple xe_exec_queue and xe_lrc

Decouple xe_lrc from xe_exec_queue and reference count xe_lrc.
Removing hard coupling between xe_exec_queue and xe_lrc allows
flexible design where the user interface xe_exec_queue can be
destroyed independent of the hardware/firmware interface xe_lrc.

v2: Fix lrc indexing in wq_item_append()

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240530032211.29299-1-niranjana.vishwanathapura@intel.com
---
 drivers/gpu/drm/xe/xe_exec_queue.c       | 26 ++++++++++---------
 drivers/gpu/drm/xe/xe_exec_queue_types.h |  2 +-
 drivers/gpu/drm/xe/xe_execlist.c         | 10 ++++----
 drivers/gpu/drm/xe/xe_gt.c               |  4 +--
 drivers/gpu/drm/xe/xe_guc_submit.c       | 30 +++++++++++-----------
 drivers/gpu/drm/xe/xe_hw_engine.c        | 10 +++++---
 drivers/gpu/drm/xe/xe_hw_engine_types.h  |  2 +-
 drivers/gpu/drm/xe/xe_lrc.c              | 44 ++++++++++++++++++++++++++------
 drivers/gpu/drm/xe/xe_lrc.h              | 19 +++++++++++---
 drivers/gpu/drm/xe/xe_lrc_types.h        |  5 ++++
 drivers/gpu/drm/xe/xe_ring_ops.c         | 10 ++++----
 drivers/gpu/drm/xe/xe_sched_job.c        |  6 ++---
 12 files changed, 109 insertions(+), 59 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index a2daae10ccc6..27215075c799 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -86,7 +86,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
 
 	if (extensions) {
 		/*
-		 * may set q->usm, must come before xe_lrc_init(),
+		 * may set q->usm, must come before xe_lrc_create(),
 		 * may overwrite q->sched_props, must come before q->ops->init()
 		 */
 		err = exec_queue_user_extensions(xe, q, extensions, 0);
@@ -104,9 +104,11 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
 	int i, err;
 
 	for (i = 0; i < q->width; ++i) {
-		err = xe_lrc_init(q->lrc + i, q->hwe, q, q->vm, SZ_16K);
-		if (err)
+		q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
+		if (IS_ERR(q->lrc[i])) {
+			err = PTR_ERR(q->lrc[i]);
 			goto err_lrc;
+		}
 	}
 
 	err = q->ops->init(q);
@@ -117,7 +119,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
 
 err_lrc:
 	for (i = i - 1; i >= 0; --i)
-		xe_lrc_finish(q->lrc + i);
+		xe_lrc_put(q->lrc[i]);
 	return err;
 }
 
@@ -198,7 +200,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
 	int i;
 
 	for (i = 0; i < q->width; ++i)
-		xe_lrc_finish(q->lrc + i);
+		xe_lrc_put(q->lrc[i]);
 	__xe_exec_queue_free(q);
 }
 
@@ -701,7 +703,7 @@ bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
 
 static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
 {
-	return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1;
+	return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1;
 }
 
 /**
@@ -712,7 +714,7 @@ static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
  */
 bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
 {
-	struct xe_lrc *lrc = q->lrc;
+	struct xe_lrc *lrc = q->lrc[0];
 	s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
 
 	return xe_exec_queue_num_job_inflight(q) >= max_job;
@@ -738,16 +740,16 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
 		int i;
 
 		for (i = 0; i < q->width; ++i) {
-			if (xe_lrc_seqno(&q->lrc[i]) !=
-			    q->lrc[i].fence_ctx.next_seqno - 1)
+			if (xe_lrc_seqno(q->lrc[i]) !=
+			    q->lrc[i]->fence_ctx.next_seqno - 1)
 				return false;
 		}
 
 		return true;
 	}
 
-	return xe_lrc_seqno(&q->lrc[0]) ==
-		q->lrc[0].fence_ctx.next_seqno - 1;
+	return xe_lrc_seqno(q->lrc[0]) ==
+		q->lrc[0]->fence_ctx.next_seqno - 1;
 }
 
 /**
@@ -779,7 +781,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
 	 * the LRCs and reading them in different time could also introduce
 	 * errors.
 	 */
-	lrc = &q->lrc[0];
+	lrc = q->lrc[0];
 	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
 	q->run_ticks += (new_ts - old_ts) * q->width;
 }
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index e81704c7c030..18d8b2a60928 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -146,7 +146,7 @@ struct xe_exec_queue {
 	/** @run_ticks: hw engine class run time in ticks for this exec queue */
 	u64 run_ticks;
 	/** @lrc: logical ring context for this exec queue */
-	struct xe_lrc lrc[];
+	struct xe_lrc *lrc[];
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 8e5c591fcecd..db906117db6d 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -109,7 +109,7 @@ static void __xe_execlist_port_start(struct xe_execlist_port *port,
 			port->last_ctx_id = 1;
 	}
 
-	__start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id);
+	__start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
 	port->running_exl = exl;
 	exl->has_run = true;
 }
@@ -123,14 +123,14 @@ static void __xe_execlist_port_idle(struct xe_execlist_port *port)
 	if (!port->running_exl)
 		return;
 
-	xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
-	__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
+	xe_lrc_write_ring(port->hwe->kernel_lrc, noop, sizeof(noop));
+	__start_lrc(port->hwe, port->hwe->kernel_lrc, 0);
 	port->running_exl = NULL;
 }
 
 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
 {
-	struct xe_lrc *lrc = exl->q->lrc;
+	struct xe_lrc *lrc = exl->q->lrc[0];
 
 	return lrc->ring.tail == lrc->ring.old_tail;
 }
@@ -333,7 +333,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q)
 	exl->q = q;
 
 	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
-			     q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
+			     q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
 			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
 			     NULL, NULL, q->hwe->name,
 			     gt_to_xe(q->gt)->drm.dev);
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 34c1896807e9..9e9da887f4ca 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -297,8 +297,8 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 		}
 
 		xe_map_memcpy_from(xe, default_lrc,
-				   &q->lrc[0].bo->vmap,
-				   xe_lrc_pphwsp_offset(&q->lrc[0]),
+				   &q->lrc[0]->bo->vmap,
+				   xe_lrc_pphwsp_offset(q->lrc[0]),
 				   xe_gt_lrc_size(gt, hwe->class));
 
 		gt->default_lrc[hwe->class] = default_lrc;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index e22bd6b09a74..47aab04cf34f 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -490,7 +490,7 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc,
 	action[len++] = info->hwlrca_hi;
 
 	for (i = 1; i < q->width; ++i) {
-		struct xe_lrc *lrc = q->lrc + i;
+		struct xe_lrc *lrc = q->lrc[i];
 
 		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
 		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
@@ -527,7 +527,7 @@ static void register_exec_queue(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_lrc *lrc = q->lrc;
+	struct xe_lrc *lrc = q->lrc[0];
 	struct guc_ctxt_registration_info info;
 
 	xe_assert(xe, !exec_queue_registered(q));
@@ -586,7 +586,7 @@ static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
 	unsigned int sleep_period_ms = 1;
 
 #define AVAILABLE_SPACE \
@@ -614,7 +614,7 @@ static int wq_noop_append(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
 	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
 
 	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
@@ -634,7 +634,7 @@ static void wq_item_append(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
 #define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
 	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
 	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
@@ -650,12 +650,12 @@ static void wq_item_append(struct xe_exec_queue *q)
 
 	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
 		FIELD_PREP(WQ_LEN_MASK, len_dw);
-	wqi[i++] = xe_lrc_descriptor(q->lrc);
+	wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
 	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
-		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64));
+		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
 	wqi[i++] = 0;
 	for (j = 1; j < q->width; ++j) {
-		struct xe_lrc *lrc = q->lrc + j;
+		struct xe_lrc *lrc = q->lrc[j];
 
 		wqi[i++] = lrc->ring.tail / sizeof(u64);
 	}
@@ -670,7 +670,7 @@ static void wq_item_append(struct xe_exec_queue *q)
 
 	xe_device_wmb(xe);
 
-	map = xe_lrc_parallel_map(q->lrc);
+	map = xe_lrc_parallel_map(q->lrc[0]);
 	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
 }
 
@@ -679,7 +679,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_lrc *lrc = q->lrc;
+	struct xe_lrc *lrc = q->lrc[0];
 	u32 action[3];
 	u32 g2h_len = 0;
 	u32 num_g2h = 0;
@@ -1236,7 +1236,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
 			    get_submit_wq(guc),
-			    q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
+			    q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
 			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
 			    q->name, gt_to_xe(q->gt)->drm.dev);
 	if (err)
@@ -1464,7 +1464,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
 				ban = true;
 			}
 		} else if (xe_exec_queue_is_lr(q) &&
-			   (xe_lrc_ring_head(q->lrc) != xe_lrc_ring_tail(q->lrc))) {
+			   (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
 			ban = true;
 		}
 
@@ -1529,7 +1529,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q)
 
 		trace_xe_exec_queue_resubmit(q);
 		for (i = 0; i < q->width; ++i)
-			xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail);
+			xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
 		xe_sched_resubmit_jobs(sched);
 	}
 
@@ -1775,7 +1775,7 @@ guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_device *xe = guc_to_xe(guc);
-	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
+	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
 	int i;
 
 	snapshot->guc.wqi_head = q->guc->wqi_head;
@@ -1855,7 +1855,7 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
 
 	if (snapshot->lrc) {
 		for (i = 0; i < q->width; ++i) {
-			struct xe_lrc *lrc = q->lrc + i;
+			struct xe_lrc *lrc = q->lrc[i];
 
 			snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
 		}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 9eef789be897..0a83506e1ad8 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -268,7 +268,7 @@ static void hw_engine_fini(struct drm_device *drm, void *arg)
 
 	if (hwe->exl_port)
 		xe_execlist_port_destroy(hwe->exl_port);
-	xe_lrc_finish(&hwe->kernel_lrc);
+	xe_lrc_put(hwe->kernel_lrc);
 
 	hwe->gt = NULL;
 }
@@ -527,9 +527,11 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 		goto err_name;
 	}
 
-	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
-	if (err)
+	hwe->kernel_lrc = xe_lrc_create(hwe, NULL, SZ_16K);
+	if (IS_ERR(hwe->kernel_lrc)) {
+		err = PTR_ERR(hwe->kernel_lrc);
 		goto err_hwsp;
+	}
 
 	if (!xe_device_uc_enabled(xe)) {
 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
@@ -554,7 +556,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
 
 err_kernel_lrc:
-	xe_lrc_finish(&hwe->kernel_lrc);
+	xe_lrc_put(hwe->kernel_lrc);
 err_hwsp:
 	xe_bo_unpin_map_no_vm(hwe->hwsp);
 err_name:
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index b2f64b92a636..580bbd7e83b2 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -137,7 +137,7 @@ struct xe_hw_engine {
 	/** @hwsp: hardware status page buffer object */
 	struct xe_bo *hwsp;
 	/** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */
-	struct xe_lrc kernel_lrc;
+	struct xe_lrc *kernel_lrc;
 	/** @exl_port: execlists port */
 	struct xe_execlist_port *exl_port;
 	/** @fence_irq: fence IRQ to run when a hw engine IRQ is received */
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index e91967070478..26922e1bac82 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -808,11 +808,20 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
 }
 
+static void xe_lrc_finish(struct xe_lrc *lrc)
+{
+	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
+	xe_bo_lock(lrc->bo, false);
+	xe_bo_unpin(lrc->bo);
+	xe_bo_unlock(lrc->bo);
+	xe_bo_put(lrc->bo);
+}
+
 #define PVC_CTX_ASID		(0x2e + 1)
 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
 
-int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
-		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
+static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+		       struct xe_vm *vm, u32 ring_size)
 {
 	struct xe_gt *gt = hwe->gt;
 	struct xe_tile *tile = gt_to_tile(gt);
@@ -823,6 +832,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	u32 lrc_size;
 	int err;
 
+	kref_init(&lrc->refcount);
 	lrc->flags = 0;
 	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
 	if (xe_gt_has_indirect_ring_state(gt))
@@ -935,13 +945,31 @@ err_lrc_finish:
 	return err;
 }
 
-void xe_lrc_finish(struct xe_lrc *lrc)
+struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
+			     u32 ring_size)
 {
-	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
-	xe_bo_lock(lrc->bo, false);
-	xe_bo_unpin(lrc->bo);
-	xe_bo_unlock(lrc->bo);
-	xe_bo_put(lrc->bo);
+	struct xe_lrc *lrc;
+	int err;
+
+	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
+	if (!lrc)
+		return ERR_PTR(-ENOMEM);
+
+	err = xe_lrc_init(lrc, hwe, vm, ring_size);
+	if (err) {
+		kfree(lrc);
+		return ERR_PTR(err);
+	}
+
+	return lrc;
+}
+
+void xe_lrc_destroy(struct kref *ref)
+{
+	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
+
+	xe_lrc_finish(lrc);
+	kfree(lrc);
 }
 
 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index c2df6bfd1889..ebe0e362e434 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -7,6 +7,8 @@
 
 #include <linux/types.h>
 
+#include "xe_lrc_types.h"
+
 struct drm_printer;
 struct xe_bb;
 struct xe_device;
@@ -20,9 +22,20 @@ struct xe_vm;
 
 #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
 
-int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
-		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size);
-void xe_lrc_finish(struct xe_lrc *lrc);
+struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
+			     u32 ring_size);
+void xe_lrc_destroy(struct kref *ref);
+
+static inline struct xe_lrc *xe_lrc_get(struct xe_lrc *lrc)
+{
+	kref_get(&lrc->refcount);
+	return lrc;
+}
+
+static inline void xe_lrc_put(struct xe_lrc *lrc)
+{
+	kref_put(&lrc->refcount, xe_lrc_destroy);
+}
 
 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 0fa055da6b27..71ecb453f811 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -6,6 +6,8 @@
 #ifndef _XE_LRC_TYPES_H_
 #define _XE_LRC_TYPES_H_
 
+#include <linux/kref.h>
+
 #include "xe_hw_fence_types.h"
 
 struct xe_bo;
@@ -30,6 +32,9 @@ struct xe_lrc {
 #define XE_LRC_FLAG_INDIRECT_RING_STATE		0x1
 	u32 flags;
 
+	/** @refcount: ref count of this lrc */
+	struct kref refcount;
+
 	/** @ring: submission ring state */
 	struct {
 		/** @ring.size: size of submission ring */
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 550c3eafbc1d..2a607c141d65 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -396,7 +396,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
 
 	xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */
 
-	__emit_job_gen12_simple(job, job->q->lrc,
+	__emit_job_gen12_simple(job, job->q->lrc[0],
 				job->ptrs[0].batch_addr,
 				xe_sched_job_lrc_seqno(job));
 }
@@ -406,13 +406,13 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
 	int i;
 
 	if (xe_sched_job_is_migration(job->q)) {
-		emit_migration_job_gen12(job, job->q->lrc,
+		emit_migration_job_gen12(job, job->q->lrc[0],
 					 xe_sched_job_lrc_seqno(job));
 		return;
 	}
 
 	for (i = 0; i < job->q->width; ++i)
-		__emit_job_gen12_simple(job, job->q->lrc + i,
+		__emit_job_gen12_simple(job, job->q->lrc[i],
 					job->ptrs[i].batch_addr,
 					xe_sched_job_lrc_seqno(job));
 }
@@ -423,7 +423,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
 
 	/* FIXME: Not doing parallel handshake for now */
 	for (i = 0; i < job->q->width; ++i)
-		__emit_job_gen12_video(job, job->q->lrc + i,
+		__emit_job_gen12_video(job, job->q->lrc[i],
 				       job->ptrs[i].batch_addr,
 				       xe_sched_job_lrc_seqno(job));
 }
@@ -433,7 +433,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
 	int i;
 
 	for (i = 0; i < job->q->width; ++i)
-		__emit_job_gen12_render_compute(job, job->q->lrc + i,
+		__emit_job_gen12_render_compute(job, job->q->lrc[i],
 						job->ptrs[i].batch_addr,
 						xe_sched_job_lrc_seqno(job));
 }
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 29f3201d7dfa..5c013904877a 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -216,7 +216,7 @@ void xe_sched_job_set_error(struct xe_sched_job *job, int error)
 
 bool xe_sched_job_started(struct xe_sched_job *job)
 {
-	struct xe_lrc *lrc = job->q->lrc;
+	struct xe_lrc *lrc = job->q->lrc[0];
 
 	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
 				     xe_lrc_start_seqno(lrc),
@@ -225,7 +225,7 @@ bool xe_sched_job_started(struct xe_sched_job *job)
 
 bool xe_sched_job_completed(struct xe_sched_job *job)
 {
-	struct xe_lrc *lrc = job->q->lrc;
+	struct xe_lrc *lrc = job->q->lrc[0];
 
 	/*
 	 * Can safely check just LRC[0] seqno as that is last seqno written when
@@ -265,7 +265,7 @@ void xe_sched_job_arm(struct xe_sched_job *job)
 		struct dma_fence_chain *chain;
 
 		fence = job->ptrs[i].lrc_fence;
-		xe_lrc_init_seqno_fence(&q->lrc[i], fence);
+		xe_lrc_init_seqno_fence(q->lrc[i], fence);
 		job->ptrs[i].lrc_fence = NULL;
 		if (!i) {
 			job->lrc_seqno = fence->seqno;
-- 
cgit 


From 9bbfab1c7cf7801e50b131ccf04af8d32b01dcec Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 May 2024 15:32:36 +0200
Subject: drm/xe: replace format-less snprintf() with strscpy()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using snprintf() with a format string from task->comm is a bit
dangerous since the string may be controlled by unprivileged
userspace:

drivers/gpu/drm/xe/xe_devcoredump.c: In function 'devcoredump_snapshot':
drivers/gpu/drm/xe/xe_devcoredump.c:184:9: error: format not a string literal and no format arguments [-Werror=format-security]
  184 |         snprintf(ss->process_name, sizeof(ss->process_name), process_name);
      |         ^~~~~~~~

In this case there is no reason for an snprintf(), so use a simpler
string copy.

Fixes: b10d0c5e9df7 ("drm/xe: Add process name to devcoredump")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240528133251.2310868-1-arnd@kernel.org
---
 drivers/gpu/drm/xe/xe_devcoredump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 1643d44f8bc4..1973bfaece40 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -181,7 +181,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 		if (task)
 			process_name = task->comm;
 	}
-	snprintf(ss->process_name, sizeof(ss->process_name), process_name);
+	strscpy(ss->process_name, process_name);
 	if (task)
 		put_task_struct(task);
 
-- 
cgit 


From 1c4324793e9bfb5a29c02844023c8ad412fa0467 Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Tue, 28 May 2024 11:23:54 -0700
Subject: Revert "drm/xe: make gt_remove use devm"

This reverts commit cd506a33b0d9759e0a58556799b1b38650fa3698.

The gt_remove function was explicitly added as part of the remove flow
instead of using drmm/devm automatic cleanup due to it being illegal
to remove a component after the driver has been detached from the pci
device; the GSC proxy component is removed as part of gt_remove, so we
need to do it in the pci cleanup flow. The function already has a
comment above it to explain this.

Note that the change to use the devm also caused an invalid pointer
deref in the gsc_proxy unbind function, but I didn't bother to debug
which pointer was bad since we shouldn't be calling the unbind that
late anyway and this revert fixes it.

Both issue were not seen in CI because the GSC loading is temporarily
disabled due to a critical bug, which means we're not binding the
component.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240528182354.1200424-1-daniele.ceraolospurio@intel.com
---
 drivers/gpu/drm/xe/xe_device.c | 22 ++++++++++++++++++++--
 drivers/gpu/drm/xe/xe_gt.c     | 16 +++++++++-------
 drivers/gpu/drm/xe/xe_gt.h     |  1 +
 3 files changed, 30 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d1560f5d4b84..f0a2604bf62a 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -544,6 +544,7 @@ int xe_device_probe(struct xe_device *xe)
 	struct xe_tile *tile;
 	struct xe_gt *gt;
 	int err;
+	u8 last_gt;
 	u8 id;
 
 	xe_pat_init_early(xe);
@@ -642,16 +643,18 @@ int xe_device_probe(struct xe_device *xe)
 		goto err_irq_shutdown;
 
 	for_each_gt(gt, xe, id) {
+		last_gt = id;
+
 		err = xe_gt_init(gt);
 		if (err)
-			goto err_irq_shutdown;
+			goto err_fini_gt;
 	}
 
 	xe_heci_gsc_init(xe);
 
 	err = xe_display_init(xe);
 	if (err)
-		goto err_irq_shutdown;
+		goto err_fini_gt;
 
 	err = drm_dev_register(&xe->drm, 0);
 	if (err)
@@ -667,6 +670,15 @@ int xe_device_probe(struct xe_device *xe)
 
 err_fini_display:
 	xe_display_driver_remove(xe);
+
+err_fini_gt:
+	for_each_gt(gt, xe, id) {
+		if (id < last_gt)
+			xe_gt_remove(gt);
+		else
+			break;
+	}
+
 err_irq_shutdown:
 	xe_irq_shutdown(xe);
 err:
@@ -684,12 +696,18 @@ static void xe_device_remove_display(struct xe_device *xe)
 
 void xe_device_remove(struct xe_device *xe)
 {
+	struct xe_gt *gt;
+	u8 id;
+
 	xe_device_remove_display(xe);
 
 	xe_display_fini(xe);
 
 	xe_heci_gsc_fini(xe);
 
+	for_each_gt(gt, xe, id)
+		xe_gt_remove(gt);
+
 	xe_irq_shutdown(xe);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 9e9da887f4ca..0a1e04fe2690 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -93,14 +93,16 @@ void xe_gt_sanitize(struct xe_gt *gt)
 	gt->uc.guc.submission_state.enabled = false;
 }
 
-/*
- * Clean up the GT structures before driver removal. This function should only
- * act on objects/structures that must be cleaned before the driver removal
- * callback is complete and therefore can't be deferred to a drmm action.
+/**
+ * xe_gt_remove() - Clean up the GT structures before driver removal
+ * @gt: the GT object
+ *
+ * This function should only act on objects/structures that must be cleaned
+ * before the driver removal callback is complete and therefore can't be
+ * deferred to a drmm action.
  */
-static void gt_remove(void *arg)
+void xe_gt_remove(struct xe_gt *gt)
 {
-	struct xe_gt *gt = arg;
 	int i;
 
 	xe_uc_remove(&gt->uc);
@@ -566,7 +568,7 @@ int xe_gt_init(struct xe_gt *gt)
 
 	xe_gt_record_user_engines(gt);
 
-	return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, gt_remove, gt);
+	return 0;
 }
 
 void xe_gt_record_user_engines(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index d0747edfe020..9073ac68a777 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -56,6 +56,7 @@ int xe_gt_suspend(struct xe_gt *gt);
 int xe_gt_resume(struct xe_gt *gt);
 void xe_gt_reset_async(struct xe_gt *gt);
 void xe_gt_sanitize(struct xe_gt *gt);
+void xe_gt_remove(struct xe_gt *gt);
 
 /**
  * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
-- 
cgit 


From 2adfc4e022f397913a23756a08fbfca3c4ee9c8b Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 30 May 2024 15:35:23 +0200
Subject: drm/xe: Move XEHP_MTCFG_ADDR register definition to xe_regs.h

We should not define registers directly in the code while we have
dedicated files for all register definitions. Move XEHP_MTCFG_ADDR
to regs/xe_regs.h

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240530133527.1328-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/regs/xe_regs.h | 3 +++
 drivers/gpu/drm/xe/xe_mmio.c      | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 722fb6dbb72e..23e33ec84902 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -30,6 +30,9 @@
 #define XEHP_CLOCK_GATE_DIS			XE_REG(0x101014)
 #define   SGSI_SIDECLK_DIS			REG_BIT(17)
 
+#define XEHP_MTCFG_ADDR				XE_REG(0x101800)
+#define   TILE_COUNT				REG_GENMASK(15, 8)
+
 #define GGC					XE_REG(0x108040)
 #define   GMS_MASK				REG_GENMASK(15, 8)
 #define   GGMS_MASK				REG_GENMASK(7, 6)
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 248e93ec6df7..44bff104c011 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -28,9 +28,6 @@
 #include "xe_sriov.h"
 #include "xe_tile.h"
 
-#define XEHP_MTCFG_ADDR		XE_REG(0x101800)
-#define TILE_COUNT		REG_GENMASK(15, 8)
-
 #define BAR_SIZE_SHIFT 20
 
 static void
-- 
cgit 


From 2d8865b27724117022a67984e4a6ef94c9555ec7 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 30 May 2024 15:35:24 +0200
Subject: drm/xe: Move BAR definitions to dedicated file

We should keep all hardware definitions separated from the driver
code. Move LMEM_BAR definition to new regs/xe_bars.h file and also
add there GTTMMADR_BAR definition to avoid using magic 0 resource.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240530133527.1328-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/regs/xe_bars.h      | 11 +++++++++++
 drivers/gpu/drm/xe/xe_mmio.c           |  3 ++-
 drivers/gpu/drm/xe/xe_mmio.h           |  2 --
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c |  1 +
 4 files changed, 14 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_bars.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/regs/xe_bars.h b/drivers/gpu/drm/xe/regs/xe_bars.h
new file mode 100644
index 000000000000..ce05b6ae832f
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_bars.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_BARS_H_
+#define _XE_BARS_H_
+
+#define GTTMMADR_BAR			0 /* MMIO + GTT */
+#define LMEM_BAR			2 /* VRAM */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 44bff104c011..1272246dd8a3 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -12,6 +12,7 @@
 #include <drm/drm_managed.h>
 #include <drm/xe_drm.h>
 
+#include "regs/xe_bars.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
@@ -435,7 +436,7 @@ int xe_mmio_init(struct xe_device *xe)
 	 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB).
 	 */
 	xe->mmio.size = pci_resource_len(pdev, mmio_bar);
-	xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0);
+	xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR);
 	if (xe->mmio.regs == NULL) {
 		drm_err(&xe->drm, "failed to map registers\n");
 		return -EIO;
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 1d578fd6ffc2..6ae0cc32c651 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -11,8 +11,6 @@
 struct xe_device;
 struct xe_reg;
 
-#define LMEM_BAR		2
-
 int xe_mmio_init(struct xe_device *xe);
 int xe_mmio_probe_tiles(struct xe_device *xe);
 
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 64592a8e527b..f46fd2df84de 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -13,6 +13,7 @@
 
 #include <generated/xe_wa_oob.h>
 
+#include "regs/xe_bars.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
-- 
cgit 


From 8c52ca22b15b5b3ce62b3e2a9ec9f3fbb6302783 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 30 May 2024 15:35:25 +0200
Subject: drm/xe: Drop xe_ prefix from static functions in xe_mmio.c

Rename static functions to align with our typical coding style.
While at it, downgrade the existing kernel-doc for internal
function to normal comment.

Suggested-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240530133527.1328-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_mmio.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 1272246dd8a3..6e57632b44a1 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -55,7 +55,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
  * if force_vram_bar_size is set, attempt to set to the requested size
  * else set to maximum possible size
  */
-static void xe_resize_vram_bar(struct xe_device *xe)
+static void resize_vram_bar(struct xe_device *xe)
 {
 	u64 force_vram_bar_size = xe_modparam.force_vram_bar_size;
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -127,7 +127,7 @@ static void xe_resize_vram_bar(struct xe_device *xe)
 	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
 }
 
-static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
+static bool resource_is_valid(struct pci_dev *pdev, int bar)
 {
 	if (!pci_resource_flags(pdev, bar))
 		return false;
@@ -141,16 +141,16 @@ static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar)
 	return true;
 }
 
-static int xe_determine_lmem_bar_size(struct xe_device *xe)
+static int determine_lmem_bar_size(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
-	if (!xe_pci_resource_valid(pdev, LMEM_BAR)) {
+	if (!resource_is_valid(pdev, LMEM_BAR)) {
 		drm_err(&xe->drm, "pci resource is not valid\n");
 		return -ENXIO;
 	}
 
-	xe_resize_vram_bar(xe);
+	resize_vram_bar(xe);
 
 	xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR);
 	xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR);
@@ -202,8 +202,8 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
 	return offset;
 }
 
-/**
- * xe_mmio_tile_vram_size() - Collect vram size and offset information
+/*
+ * tile_vram_size() - Collect vram size and offset information
  * @tile: tile to get info for
  * @vram_size: available vram (size - device reserved portions)
  * @tile_size: actual vram size
@@ -221,8 +221,8 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
  * NOTE: multi-tile bases will include the tile offset.
  *
  */
-static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size,
-				  u64 *tile_size, u64 *tile_offset)
+static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
+			  u64 *tile_size, u64 *tile_offset)
 {
 	struct xe_device *xe = tile_to_xe(tile);
 	struct xe_gt *gt = tile->primary_gt;
@@ -289,11 +289,11 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 	/* Get the size of the root tile's vram for later accessibility comparison */
 	tile = xe_device_get_root_tile(xe);
-	err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+	err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
 	if (err)
 		return err;
 
-	err = xe_determine_lmem_bar_size(xe);
+	err = determine_lmem_bar_size(xe);
 	if (err)
 		return err;
 
@@ -304,7 +304,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
 
 	/* tile specific ranges */
 	for_each_tile(tile, xe, id) {
-		err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+		err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
 		if (err)
 			return err;
 
-- 
cgit 


From 638d1c79cbf15c14f1bcf98fd8135ea83a04dc50 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 30 May 2024 15:35:26 +0200
Subject: drm/xe: Promote VRAM initialization function to own file

There is no point in mixing register access and VRAM code in the
same file. Move and rename the VRAM probe function to a new file
(there are no other changes other then new simple kernel-doc).

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240530133527.1328-5-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/Makefile    |   1 +
 drivers/gpu/drm/xe/xe_device.c |   3 +-
 drivers/gpu/drm/xe/xe_mmio.c   | 333 +--------------------------------------
 drivers/gpu/drm/xe/xe_vram.c   | 350 +++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_vram.h   |  13 ++
 5 files changed, 368 insertions(+), 332 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_vram.c
 create mode 100644 drivers/gpu/drm/xe/xe_vram.h

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index ae07f3d6a068..a55d284386c5 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -143,6 +143,7 @@ xe-y += xe_bb.o \
 	xe_uc_debugfs.o \
 	xe_uc_fw.o \
 	xe_vm.o \
+	xe_vram.o \
 	xe_vram_freq.o \
 	xe_wait_user_fence.o \
 	xe_wa.o \
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index f0a2604bf62a..0ff95a0ea5ea 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -50,6 +50,7 @@
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_sys_mgr.h"
 #include "xe_vm.h"
+#include "xe_vram.h"
 #include "xe_wait_user_fence.h"
 
 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
@@ -619,7 +620,7 @@ int xe_device_probe(struct xe_device *xe)
 	if (err)
 		goto err_irq_shutdown;
 
-	err = xe_mmio_probe_vram(xe);
+	err = xe_vram_probe(xe);
 	if (err)
 		goto err_irq_shutdown;
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 6e57632b44a1..7962eeb9adb7 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -8,348 +8,19 @@
 #include <linux/delay.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/minmax.h>
+#include <linux/pci.h>
 
 #include <drm/drm_managed.h>
-#include <drm/xe_drm.h>
+#include <drm/drm_print.h>
 
 #include "regs/xe_bars.h"
-#include "regs/xe_engine_regs.h"
-#include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
-#include "xe_bo.h"
 #include "xe_device.h"
-#include "xe_force_wake.h"
-#include "xe_ggtt.h"
 #include "xe_gt.h"
-#include "xe_gt_mcr.h"
 #include "xe_gt_printk.h"
 #include "xe_gt_sriov_vf.h"
 #include "xe_macros.h"
-#include "xe_module.h"
 #include "xe_sriov.h"
-#include "xe_tile.h"
-
-#define BAR_SIZE_SHIFT 20
-
-static void
-_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
-{
-	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	int bar_size = pci_rebar_bytes_to_size(size);
-	int ret;
-
-	if (pci_resource_len(pdev, resno))
-		pci_release_resource(pdev, resno);
-
-	ret = pci_resize_resource(pdev, resno, bar_size);
-	if (ret) {
-		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
-			 resno, 1 << bar_size, ERR_PTR(ret));
-		return;
-	}
-
-	drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
-}
-
-/*
- * if force_vram_bar_size is set, attempt to set to the requested size
- * else set to maximum possible size
- */
-static void resize_vram_bar(struct xe_device *xe)
-{
-	u64 force_vram_bar_size = xe_modparam.force_vram_bar_size;
-	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	struct pci_bus *root = pdev->bus;
-	resource_size_t current_size;
-	resource_size_t rebar_size;
-	struct resource *root_res;
-	u32 bar_size_mask;
-	u32 pci_cmd;
-	int i;
-
-	/* gather some relevant info */
-	current_size = pci_resource_len(pdev, LMEM_BAR);
-	bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR);
-
-	if (!bar_size_mask)
-		return;
-
-	/* set to a specific size? */
-	if (force_vram_bar_size) {
-		u32 bar_size_bit;
-
-		rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M;
-
-		bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size));
-
-		if (!bar_size_bit) {
-			drm_info(&xe->drm,
-				 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n",
-				 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20);
-			return;
-		}
-
-		rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT);
-
-		if (rebar_size == current_size)
-			return;
-	} else {
-		rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT);
-
-		/* only resize if larger than current */
-		if (rebar_size <= current_size)
-			return;
-	}
-
-	drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n",
-		 (u64)current_size >> 20, (u64)rebar_size >> 20);
-
-	while (root->parent)
-		root = root->parent;
-
-	pci_bus_for_each_resource(root, root_res, i) {
-		if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
-		    (u64)root_res->start > 0x100000000ul)
-			break;
-	}
-
-	if (!root_res) {
-		drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n");
-		return;
-	}
-
-	pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
-	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
-
-	_resize_bar(xe, LMEM_BAR, rebar_size);
-
-	pci_assign_unassigned_bus_resources(pdev->bus);
-	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
-}
-
-static bool resource_is_valid(struct pci_dev *pdev, int bar)
-{
-	if (!pci_resource_flags(pdev, bar))
-		return false;
-
-	if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET)
-		return false;
-
-	if (!pci_resource_len(pdev, bar))
-		return false;
-
-	return true;
-}
-
-static int determine_lmem_bar_size(struct xe_device *xe)
-{
-	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-
-	if (!resource_is_valid(pdev, LMEM_BAR)) {
-		drm_err(&xe->drm, "pci resource is not valid\n");
-		return -ENXIO;
-	}
-
-	resize_vram_bar(xe);
-
-	xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR);
-	xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR);
-	if (!xe->mem.vram.io_size)
-		return -EIO;
-
-	/* XXX: Need to change when xe link code is ready */
-	xe->mem.vram.dpa_base = 0;
-
-	/* set up a map to the total memory area. */
-	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
-
-	return 0;
-}
-
-static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
-{
-	struct xe_device *xe = gt_to_xe(gt);
-	u64 offset;
-	u32 reg;
-
-	if (GRAPHICS_VER(xe) >= 20) {
-		u64 ccs_size = tile_size / 512;
-		u64 offset_hi, offset_lo;
-		u32 nodes, num_enabled;
-
-		reg = xe_mmio_read32(gt, MIRROR_FUSE3);
-		nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg);
-		num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */
-
-		reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
-		offset_lo = REG_FIELD_GET(XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK, reg);
-
-		reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_UPPER);
-		offset_hi = REG_FIELD_GET(XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK, reg);
-
-		offset = offset_hi << 32; /* HW view bits 39:32 */
-		offset |= offset_lo << 6; /* HW view bits 31:6 */
-		offset *= num_enabled; /* convert to SW view */
-
-		/* We don't expect any holes */
-		xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),
-			      "Hole between CCS and GSM.\n");
-	} else {
-		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
-		offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K;
-	}
-
-	return offset;
-}
-
-/*
- * tile_vram_size() - Collect vram size and offset information
- * @tile: tile to get info for
- * @vram_size: available vram (size - device reserved portions)
- * @tile_size: actual vram size
- * @tile_offset: physical start point in the vram address space
- *
- * There are 4 places for size information:
- * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1)
- * - TILEx size (actual vram size)
- * - GSMBASE offset (TILEx - "stolen")
- * - CSSBASE offset (TILEx - CSS space necessary)
- *
- * CSSBASE is always a lower/smaller offset then GSMBASE.
- *
- * The actual available size of memory is to the CCS or GSM base.
- * NOTE: multi-tile bases will include the tile offset.
- *
- */
-static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
-			  u64 *tile_size, u64 *tile_offset)
-{
-	struct xe_device *xe = tile_to_xe(tile);
-	struct xe_gt *gt = tile->primary_gt;
-	u64 offset;
-	int err;
-	u32 reg;
-
-	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-	if (err)
-		return err;
-
-	/* actual size */
-	if (unlikely(xe->info.platform == XE_DG1)) {
-		*tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR);
-		*tile_offset = 0;
-	} else {
-		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
-		*tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
-		*tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G;
-	}
-
-	/* minus device usage */
-	if (xe->info.has_flat_ccs) {
-		offset = get_flat_ccs_offset(gt, *tile_size);
-	} else {
-		offset = xe_mmio_read64_2x32(gt, GSMBASE);
-	}
-
-	/* remove the tile offset so we have just the available size */
-	*vram_size = offset - *tile_offset;
-
-	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-}
-
-static void vram_fini(void *arg)
-{
-	struct xe_device *xe = arg;
-	struct xe_tile *tile;
-	int id;
-
-	if (xe->mem.vram.mapping)
-		iounmap(xe->mem.vram.mapping);
-
-	xe->mem.vram.mapping = NULL;
-
-	for_each_tile(tile, xe, id)
-		tile->mem.vram.mapping = NULL;
-}
-
-int xe_mmio_probe_vram(struct xe_device *xe)
-{
-	struct xe_tile *tile;
-	resource_size_t io_size;
-	u64 available_size = 0;
-	u64 total_size = 0;
-	u64 tile_offset;
-	u64 tile_size;
-	u64 vram_size;
-	int err;
-	u8 id;
-
-	if (!IS_DGFX(xe))
-		return 0;
-
-	/* Get the size of the root tile's vram for later accessibility comparison */
-	tile = xe_device_get_root_tile(xe);
-	err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
-	if (err)
-		return err;
-
-	err = determine_lmem_bar_size(xe);
-	if (err)
-		return err;
-
-	drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
-		 &xe->mem.vram.io_size);
-
-	io_size = xe->mem.vram.io_size;
-
-	/* tile specific ranges */
-	for_each_tile(tile, xe, id) {
-		err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
-		if (err)
-			return err;
-
-		tile->mem.vram.actual_physical_size = tile_size;
-		tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
-		tile->mem.vram.io_size = min_t(u64, vram_size, io_size);
-
-		if (!tile->mem.vram.io_size) {
-			drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
-			return -ENODEV;
-		}
-
-		tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset;
-		tile->mem.vram.usable_size = vram_size;
-		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
-
-		if (tile->mem.vram.io_size < tile->mem.vram.usable_size)
-			drm_info(&xe->drm, "Small BAR device\n");
-		drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id,
-			 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size);
-		drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id,
-			 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size,
-			 &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size);
-
-		/* calculate total size using tile size to get the correct HW sizing */
-		total_size += tile_size;
-		available_size += vram_size;
-
-		if (total_size > xe->mem.vram.io_size) {
-			drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
-				 &total_size, &xe->mem.vram.io_size);
-		}
-
-		io_size -= min_t(u64, tile_size, io_size);
-	}
-
-	xe->mem.vram.actual_physical_size = total_size;
-
-	drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
-		 &xe->mem.vram.actual_physical_size);
-	drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
-		 &available_size);
-
-	return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe);
-}
 
 static void tiles_fini(void *arg)
 {
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
new file mode 100644
index 000000000000..6fa150643242
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021-2024 Intel Corporation
+ */
+
+#include <linux/pci.h>
+
+#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
+
+#include "regs/xe_bars.h"
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt_mcr.h"
+#include "xe_mmio.h"
+#include "xe_module.h"
+#include "xe_vram.h"
+
+#define BAR_SIZE_SHIFT 20
+
+static void
+_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int bar_size = pci_rebar_bytes_to_size(size);
+	int ret;
+
+	if (pci_resource_len(pdev, resno))
+		pci_release_resource(pdev, resno);
+
+	ret = pci_resize_resource(pdev, resno, bar_size);
+	if (ret) {
+		drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
+			 resno, 1 << bar_size, ERR_PTR(ret));
+		return;
+	}
+
+	drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size);
+}
+
+/*
+ * if force_vram_bar_size is set, attempt to set to the requested size
+ * else set to maximum possible size
+ */
+static void resize_vram_bar(struct xe_device *xe)
+{
+	u64 force_vram_bar_size = xe_modparam.force_vram_bar_size;
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	struct pci_bus *root = pdev->bus;
+	resource_size_t current_size;
+	resource_size_t rebar_size;
+	struct resource *root_res;
+	u32 bar_size_mask;
+	u32 pci_cmd;
+	int i;
+
+	/* gather some relevant info */
+	current_size = pci_resource_len(pdev, LMEM_BAR);
+	bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR);
+
+	if (!bar_size_mask)
+		return;
+
+	/* set to a specific size? */
+	if (force_vram_bar_size) {
+		u32 bar_size_bit;
+
+		rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M;
+
+		bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size));
+
+		if (!bar_size_bit) {
+			drm_info(&xe->drm,
+				 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n",
+				 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20);
+			return;
+		}
+
+		rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT);
+
+		if (rebar_size == current_size)
+			return;
+	} else {
+		rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT);
+
+		/* only resize if larger than current */
+		if (rebar_size <= current_size)
+			return;
+	}
+
+	drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n",
+		 (u64)current_size >> 20, (u64)rebar_size >> 20);
+
+	while (root->parent)
+		root = root->parent;
+
+	pci_bus_for_each_resource(root, root_res, i) {
+		if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+		    (u64)root_res->start > 0x100000000ul)
+			break;
+	}
+
+	if (!root_res) {
+		drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n");
+		return;
+	}
+
+	pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
+
+	_resize_bar(xe, LMEM_BAR, rebar_size);
+
+	pci_assign_unassigned_bus_resources(pdev->bus);
+	pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
+}
+
+static bool resource_is_valid(struct pci_dev *pdev, int bar)
+{
+	if (!pci_resource_flags(pdev, bar))
+		return false;
+
+	if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET)
+		return false;
+
+	if (!pci_resource_len(pdev, bar))
+		return false;
+
+	return true;
+}
+
+static int determine_lmem_bar_size(struct xe_device *xe)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	if (!resource_is_valid(pdev, LMEM_BAR)) {
+		drm_err(&xe->drm, "pci resource is not valid\n");
+		return -ENXIO;
+	}
+
+	resize_vram_bar(xe);
+
+	xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR);
+	xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR);
+	if (!xe->mem.vram.io_size)
+		return -EIO;
+
+	/* XXX: Need to change when xe link code is ready */
+	xe->mem.vram.dpa_base = 0;
+
+	/* set up a map to the total memory area. */
+	xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
+
+	return 0;
+}
+
+static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u64 offset;
+	u32 reg;
+
+	if (GRAPHICS_VER(xe) >= 20) {
+		u64 ccs_size = tile_size / 512;
+		u64 offset_hi, offset_lo;
+		u32 nodes, num_enabled;
+
+		reg = xe_mmio_read32(gt, MIRROR_FUSE3);
+		nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg);
+		num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */
+
+		reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+		offset_lo = REG_FIELD_GET(XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK, reg);
+
+		reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_UPPER);
+		offset_hi = REG_FIELD_GET(XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK, reg);
+
+		offset = offset_hi << 32; /* HW view bits 39:32 */
+		offset |= offset_lo << 6; /* HW view bits 31:6 */
+		offset *= num_enabled; /* convert to SW view */
+
+		/* We don't expect any holes */
+		xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),
+			      "Hole between CCS and GSM.\n");
+	} else {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K;
+	}
+
+	return offset;
+}
+
+/*
+ * tile_vram_size() - Collect vram size and offset information
+ * @tile: tile to get info for
+ * @vram_size: available vram (size - device reserved portions)
+ * @tile_size: actual vram size
+ * @tile_offset: physical start point in the vram address space
+ *
+ * There are 4 places for size information:
+ * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1)
+ * - TILEx size (actual vram size)
+ * - GSMBASE offset (TILEx - "stolen")
+ * - CSSBASE offset (TILEx - CSS space necessary)
+ *
+ * CSSBASE is always a lower/smaller offset then GSMBASE.
+ *
+ * The actual available size of memory is to the CCS or GSM base.
+ * NOTE: multi-tile bases will include the tile offset.
+ *
+ */
+static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
+			  u64 *tile_size, u64 *tile_offset)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_gt *gt = tile->primary_gt;
+	u64 offset;
+	int err;
+	u32 reg;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		return err;
+
+	/* actual size */
+	if (unlikely(xe->info.platform == XE_DG1)) {
+		*tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR);
+		*tile_offset = 0;
+	} else {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
+		*tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
+		*tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G;
+	}
+
+	/* minus device usage */
+	if (xe->info.has_flat_ccs) {
+		offset = get_flat_ccs_offset(gt, *tile_size);
+	} else {
+		offset = xe_mmio_read64_2x32(gt, GSMBASE);
+	}
+
+	/* remove the tile offset so we have just the available size */
+	*vram_size = offset - *tile_offset;
+
+	return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+}
+
+static void vram_fini(void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile;
+	int id;
+
+	if (xe->mem.vram.mapping)
+		iounmap(xe->mem.vram.mapping);
+
+	xe->mem.vram.mapping = NULL;
+
+	for_each_tile(tile, xe, id)
+		tile->mem.vram.mapping = NULL;
+}
+
+/**
+ * xe_vram_probe() - Probe VRAM configuration
+ * @xe: the &xe_device
+ *
+ * Collect VRAM size and offset information for all tiles.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_vram_probe(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	resource_size_t io_size;
+	u64 available_size = 0;
+	u64 total_size = 0;
+	u64 tile_offset;
+	u64 tile_size;
+	u64 vram_size;
+	int err;
+	u8 id;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	/* Get the size of the root tile's vram for later accessibility comparison */
+	tile = xe_device_get_root_tile(xe);
+	err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+	if (err)
+		return err;
+
+	err = determine_lmem_bar_size(xe);
+	if (err)
+		return err;
+
+	drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &xe->mem.vram.io_size);
+
+	io_size = xe->mem.vram.io_size;
+
+	/* tile specific ranges */
+	for_each_tile(tile, xe, id) {
+		err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+		if (err)
+			return err;
+
+		tile->mem.vram.actual_physical_size = tile_size;
+		tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
+		tile->mem.vram.io_size = min_t(u64, vram_size, io_size);
+
+		if (!tile->mem.vram.io_size) {
+			drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
+			return -ENODEV;
+		}
+
+		tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset;
+		tile->mem.vram.usable_size = vram_size;
+		tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
+
+		if (tile->mem.vram.io_size < tile->mem.vram.usable_size)
+			drm_info(&xe->drm, "Small BAR device\n");
+		drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id,
+			 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size);
+		drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id,
+			 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size,
+			 &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size);
+
+		/* calculate total size using tile size to get the correct HW sizing */
+		total_size += tile_size;
+		available_size += vram_size;
+
+		if (total_size > xe->mem.vram.io_size) {
+			drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
+				 &total_size, &xe->mem.vram.io_size);
+		}
+
+		io_size -= min_t(u64, tile_size, io_size);
+	}
+
+	xe->mem.vram.actual_physical_size = total_size;
+
+	drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &xe->mem.vram.actual_physical_size);
+	drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+		 &available_size);
+
+	return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h
new file mode 100644
index 000000000000..e31cc04ec0db
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_VRAM_H_
+#define _XE_VRAM_H_
+
+struct xe_device;
+
+int xe_vram_probe(struct xe_device *xe);
+
+#endif
-- 
cgit 


From 9d85821a58f4ff2839d7d3290e0256c1b42dd9da Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 30 May 2024 15:35:27 +0200
Subject: drm/xe/vf: Setup VRAM based on received config data

VF drivers will obtain VRAM configuration from the GuC as part of
the VF self config. Use that configuration instead of trying to
read inaccessible registers.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240530133527.1328-6-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 17 +++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_vf.h |  1 +
 drivers/gpu/drm/xe/xe_vram.c        | 18 ++++++++++++++++++
 3 files changed, 36 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index da7c64f6285b..41e46a00c01e 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -478,6 +478,23 @@ u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt)
 	return gt->sriov.vf.self_config.num_ctxs;
 }
 
+/**
+ * xe_gt_sriov_vf_lmem - VF LMEM configuration.
+ * @gt: the &xe_gt
+ *
+ * This function is for VF use only.
+ *
+ * Return: size of the LMEM assigned to VF.
+ */
+u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+	xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
+	xe_gt_assert(gt, gt->sriov.vf.self_config.lmem_size);
+
+	return gt->sriov.vf.self_config.lmem_size;
+}
+
 static int vf_balloon_ggtt(struct xe_gt *gt)
 {
 	struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index 7a04bcaffe9f..0de7f8cbcfa6 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -20,6 +20,7 @@ int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt);
 
 u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
 u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
+u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt);
 u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg);
 
 void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index 6fa150643242..5bcd59190353 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -15,8 +15,10 @@
 #include "xe_device.h"
 #include "xe_force_wake.h"
 #include "xe_gt_mcr.h"
+#include "xe_gt_sriov_vf.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
+#include "xe_sriov.h"
 #include "xe_vram.h"
 
 #define BAR_SIZE_SHIFT 20
@@ -220,6 +222,22 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
 	int err;
 	u32 reg;
 
+	if (IS_SRIOV_VF(xe)) {
+		struct xe_tile *t;
+		int id;
+
+		offset = 0;
+		for_each_tile(t, xe, id)
+			for_each_if(t->id < tile->id)
+				offset += xe_gt_sriov_vf_lmem(t->primary_gt);
+
+		*tile_size = xe_gt_sriov_vf_lmem(gt);
+		*vram_size = *tile_size;
+		*tile_offset = offset;
+
+		return 0;
+	}
+
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		return err;
-- 
cgit 


From 2d3789e325e6aa91d228aa461c152d8e8f107bc4 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 30 May 2024 13:58:14 +0200
Subject: drm/xe: Split MCR initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The initialization order of GT topology, MCR, PAT and GuC HWconfig
as done today by native/PF driver, can't be followed as-is by the
VF driver, since fuse registers used in GT topology discovery will
be obtained by the VF driver from the GuC in HWconfig step.

While native/PF drivers need to program the HW PAT table prior to
loading the GuC, this requires only multicast writes support from
the MCR code, which could be initialized separately from the full
MCR support that requires the GT topology to setup steering data.

Split MCR initialization into two steps to avoid introducing VF
specific code paths. This also fixes duplicated spin_lock inits.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240530115814.1284-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt.c     |  6 ++++--
 drivers/gpu/drm/xe/xe_gt_mcr.c | 27 ++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_gt_mcr.h |  1 +
 3 files changed, 27 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0a1e04fe2690..4e11662b78bd 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -504,8 +504,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
 	if (err)
 		goto out;
 
-	xe_gt_topology_init(gt);
-	xe_gt_mcr_init(gt);
+	xe_gt_mcr_init_early(gt);
 	xe_pat_init(gt);
 
 	err = xe_uc_init(&gt->uc);
@@ -516,6 +515,9 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
 	if (err)
 		goto out_fw;
 
+	xe_gt_topology_init(gt);
+	xe_gt_mcr_init(gt);
+
 out_fw:
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 out:
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 577bd7043740..386ac3269909 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -375,18 +375,35 @@ static const struct {
 	[IMPLICIT_STEERING] = { "IMPLICIT", NULL },
 };
 
-void xe_gt_mcr_init(struct xe_gt *gt)
+/**
+ * xe_gt_mcr_init_early - Early initialization of the MCR support
+ * @gt: GT structure
+ *
+ * Perform early software only initialization of the MCR lock to allow
+ * the synchronization on accessing the STEER_SEMAPHORE register and
+ * use the xe_gt_mcr_multicast_write() function.
+ */
+void xe_gt_mcr_init_early(struct xe_gt *gt)
 {
-	struct xe_device *xe = gt_to_xe(gt);
-
 	BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES);
 	BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
 
+	spin_lock_init(&gt->mcr_lock);
+}
+
+/**
+ * xe_gt_mcr_init - Normal initialization of the MCR support
+ * @gt: GT structure
+ *
+ * Perform normal initialization of the MCR for all usages.
+ */
+void xe_gt_mcr_init(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
 	if (IS_SRIOV_VF(xe))
 		return;
 
-	spin_lock_init(&gt->mcr_lock);
-
 	if (gt->info.type == XE_GT_TYPE_MEDIA) {
 		drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index e7d03e001a49..8d119a0d5493 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -12,6 +12,7 @@
 struct drm_printer;
 struct xe_gt;
 
+void xe_gt_mcr_init_early(struct xe_gt *gt);
 void xe_gt_mcr_init(struct xe_gt *gt);
 
 void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt);
-- 
cgit 


From c063cce7df3a765539e2a2d75ab943f334446cce Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 27 May 2024 13:54:08 +0200
Subject: drm/xe/pf: Update the LMTT when freeing VF GT config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The LMTT must be updated whenever we change the VF LMEM configuration.
We missed that step when freeing the whole VF GT config, which could
result in stale PTE in LMTT or LMTT PT object leaks. Fix that.

Fixes: ac6598aed1b3 ("drm/xe/pf: Add support to configure SR-IOV VFs")
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527115408.1064-1-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index f678cd1ad9c5..f49fc2917f93 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1842,6 +1842,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
 	if (!xe_gt_is_media_type(gt)) {
 		pf_release_vf_config_ggtt(gt, config);
 		pf_release_vf_config_lmem(gt, config);
+		pf_update_vf_lmtt(gt_to_xe(gt), vfid);
 	}
 	pf_release_config_ctxs(gt, config);
 	pf_release_config_dbs(gt, config);
-- 
cgit 


From f2bf9e95989c0163650dbeaede658d0fcf929063 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Thu, 30 May 2024 13:33:41 -0700
Subject: drm/xe: Fix NULL ptr dereference in devcoredump
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kernel VM do not have an Xe file. Include a check for Xe file in the VM
before trying to get pid from VM's Xe file when taking a devcoredump.

Fixes: b10d0c5e9df7 ("drm/xe: Add process name to devcoredump")
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240530203341.1795181-1-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_devcoredump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 1973bfaece40..d7f2d19a77c1 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -176,7 +176,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 	ss->snapshot_time = ktime_get_real();
 	ss->boot_time = ktime_get_boottime();
 
-	if (q->vm) {
+	if (q->vm && q->vm->xef) {
 		task = get_pid_task(q->vm->xef->drm->pid, PIDTYPE_PID);
 		if (task)
 			process_name = task->comm;
-- 
cgit 


From 877517f2dcba58867b64e3e0c616f26c62d4a8db Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Fri, 31 May 2024 09:12:30 -0700
Subject: drm/xe: Add kernel-doc to some xe_lrc interfaces

Add kernel-doc to xe_lrc_create/destroy and xe_lrc_get/put
interfaces.

v2: Fix kernel-doc for xe_lrc_create(), drop Fixes tag.
    (Matt Brost, Michal Wajdeczko)

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240531161230.32317-1-niranjana.vishwanathapura@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_lrc.c | 18 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_lrc.h | 13 +++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 26922e1bac82..c1bb85d2e243 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -945,6 +945,17 @@ err_lrc_finish:
 	return err;
 }
 
+/**
+ * xe_lrc_create - Create a LRC
+ * @hwe: Hardware Engine
+ * @vm: The VM (address space)
+ * @ring_size: LRC ring size
+ *
+ * Allocate and initialize the Logical Ring Context (LRC).
+ *
+ * Return pointer to created LRC upon success and an error pointer
+ * upon failure.
+ */
 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
 			     u32 ring_size)
 {
@@ -964,6 +975,13 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
 	return lrc;
 }
 
+/**
+ * xe_lrc_destroy - Destroy the LRC
+ * @ref: reference to LRC
+ *
+ * Called when ref == 0, release resources held by the Logical Ring Context
+ * (LRC) and free the LRC memory.
+ */
 void xe_lrc_destroy(struct kref *ref)
 {
 	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index ebe0e362e434..882c3437ba5c 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -26,12 +26,25 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
 			     u32 ring_size);
 void xe_lrc_destroy(struct kref *ref);
 
+/**
+ * xe_lrc_get - Get reference to the LRC
+ * @lrc: Logical Ring Context
+ *
+ * Increment reference count of @lrc
+ */
 static inline struct xe_lrc *xe_lrc_get(struct xe_lrc *lrc)
 {
 	kref_get(&lrc->refcount);
 	return lrc;
 }
 
+/**
+ * xe_lrc_put - Put reference of the LRC
+ * @lrc: Logical Ring Context
+ *
+ * Decrement reference count of @lrc, call xe_lrc_destroy when
+ * reference count reaches 0.
+ */
 static inline void xe_lrc_put(struct xe_lrc *lrc)
 {
 	kref_put(&lrc->refcount, xe_lrc_destroy);
-- 
cgit 


From 131328aa5699c35ad0db0a4da75b38fae2379d23 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Mon, 3 Jun 2024 16:19:50 +0530
Subject: drm/xe/xe2lpm: Add permanent Wa_14020756599

For xe2_lpm Wa_14020756599 is applied to all steppings and
when RCS is present on graphics GT.

V5(MattR):
  - Add more comments about new API
V4:
  - Make it part of lrc wa
  - Check for RCS as rtp rule
V3(MattR):
  - Rename rtp api name
  - Use MEDIA_VERx100
V2:
  - Remove engine filter video decode
  - Fix typo GRAPHICS/MEDIA/s - Himal

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240603104951.705603-2-tejas.upadhyay@intel.com
---
 drivers/gpu/drm/xe/xe_rtp.c |  6 ++++++
 drivers/gpu/drm/xe/xe_rtp.h | 14 ++++++++++++++
 drivers/gpu/drm/xe/xe_wa.c  |  7 +++++++
 3 files changed, 27 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index fb44cc7521d8..01c32a932780 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -323,3 +323,9 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
 
 	return dss >= dss_per_gslice;
 }
+
+bool xe_rtp_match_when_media2000(const struct xe_gt *gt,
+				 const struct xe_hw_engine *hwe)
+{
+	return (gt_to_xe(gt))->info.media_verx100 == 2000;
+}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 337b1ef1959c..a32645f5f80b 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -427,4 +427,18 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
 bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
 					 const struct xe_hw_engine *hwe);
 
+/*
+ * xe_rtp_match_when_media2000 - Match when media GT version 2000
+ *
+ * @gt: GT structure
+ * @hwe: Engine instance
+ *
+ * Its one of the case where we need to apply workaround on primary GT
+ * based on if media GT version 2000 is present. Thus this API will help
+ * us to match media version 2000.
+ *
+ * Returns: true if media GT version 2000, false otherwise.
+ */
+bool xe_rtp_match_when_media2000(const struct xe_gt *gt,
+				 const struct xe_hw_engine *hwe);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 64bc595fc727..321ea100a491 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -701,6 +701,13 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 			     DIS_AUTOSTRIP))
 	},
 
+	/* Xe2_LPM */
+
+	{ XE_RTP_NAME("14020756599"),
+	  XE_RTP_RULES(ENGINE_CLASS(RENDER), FUNC(xe_rtp_match_when_media2000)),
+	  XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
+	},
+
 	{}
 };
 
-- 
cgit 


From c393538e01a1b92a63899023857b8140100ca7f3 Mon Sep 17 00:00:00 2001
From: Tejas Upadhyay <tejas.upadhyay@intel.com>
Date: Mon, 3 Jun 2024 16:19:51 +0530
Subject: drm/xe/xe2lpg: Add permanent wa_14020756599

For xe2_lpg render Wa_14020756599 is applied to
all steppings.

Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240603104951.705603-3-tejas.upadhyay@intel.com
---
 drivers/gpu/drm/xe/xe_wa.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 321ea100a491..26b170a0cdc7 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -677,6 +677,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
 	},
+	{ XE_RTP_NAME("14020756599"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
+	},
 
 	/* Xe2_HPG */
 	{ XE_RTP_NAME("15010599737"),
-- 
cgit 


From 8b01f970ee890574b3607c85781354a765c849bd Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Mon, 3 Jun 2024 10:17:23 +0200
Subject: drm/xe: Use missing lock in relay_needs_worker

Add missing lock that is protecting relay->incoming_actions.

Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240603081723.18775-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_relay.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
index c3bbaf474f9a..ade6162dc259 100644
--- a/drivers/gpu/drm/xe/xe_guc_relay.c
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -761,7 +761,14 @@ static void relay_process_incoming_action(struct xe_guc_relay *relay)
 
 static bool relay_needs_worker(struct xe_guc_relay *relay)
 {
-	return !list_empty(&relay->incoming_actions);
+	bool is_empty;
+
+	spin_lock(&relay->lock);
+	is_empty = list_empty(&relay->incoming_actions);
+	spin_unlock(&relay->lock);
+
+	return !is_empty;
+
 }
 
 static void relay_kick_worker(struct xe_guc_relay *relay)
-- 
cgit 


From f6c823f9b9d40f6811a6da4bcf6005a52f28c6d6 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 23 May 2024 16:37:06 +0300
Subject: drm/xe: drop redundant W=1 warnings from Makefile

Since commit a61ddb4393ad ("drm: enable (most) W=1 warnings by default
across the subsystem"), most of the extra warnings in the driver
Makefile are redundant. Remove them.

Note that -Wmissing-declarations and -Wmissing-prototypes are always
enabled by default in scripts/Makefile.extrawarn.

Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/958a67adcbb64d3a387d2a07d83b05d71176e938.1716471145.git.jani.nikula@intel.com
---
 drivers/gpu/drm/xe/Makefile | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index a55d284386c5..0c3e3adabb27 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -3,31 +3,8 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-# Unconditionally enable W=1 warnings locally
-# --- begin copy-paste W=1 warnings from scripts/Makefile.extrawarn
-subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
-subdir-ccflags-y += -Wmissing-declarations
-subdir-ccflags-y += $(call cc-option, -Wrestrict)
-subdir-ccflags-y += -Wmissing-format-attribute
-subdir-ccflags-y += -Wmissing-prototypes
-subdir-ccflags-y += -Wold-style-definition
-subdir-ccflags-y += -Wmissing-include-dirs
-subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
-subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
-subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
-subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
+# Enable W=1 warnings not enabled in drm subsystem Makefile
 subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
-subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
-# The following turn off the warnings enabled by -Wextra
-ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
-subdir-ccflags-y += -Wno-missing-field-initializers
-subdir-ccflags-y += -Wno-type-limits
-subdir-ccflags-y += -Wno-shift-negative-value
-endif
-ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
-subdir-ccflags-y += -Wno-sign-compare
-endif
-# --- end copy-paste
 
 # Enable -Werror in CI and development
 subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
-- 
cgit 


From b3181f433206a1432bc7093d1896fe36026f7fff Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@toblux.com>
Date: Mon, 3 Jun 2024 20:00:07 +0200
Subject: drm/xe/vm: Simplify if condition

The if condition !A || A && B can be simplified to !A || B.

Fixes the following Coccinelle/coccicheck warning reported by
excluded_middle.cocci:

	WARNING !A || A && B is equivalent to !A || B

Compile-tested only.

Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240603180005.191578-1-thorsten.blum@toblux.com
---
 drivers/gpu/drm/xe/xe_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 26b409e1b0f0..99bf7412475c 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -85,8 +85,8 @@ static bool preempt_fences_waiting(struct xe_vm *vm)
 
 	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
 		if (!q->compute.pfence ||
-		    (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
-						   &q->compute.pfence->flags))) {
+		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			     &q->compute.pfence->flags)) {
 			return true;
 		}
 	}
-- 
cgit 


From 3494f5f5f675d84385b0f6bfcca1e10c739bf9bb Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Wed, 5 Jun 2024 09:29:47 +0200
Subject: Revert "drm/xe: flush gtt before signalling user fence on all
 engines"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 38007fa96419a9db9719f170b9e8a7877821cdd1.

Signaling user-fence after seqno write does not seem to be good solution.
Instead of changing order separate barrier should be put before user-fence,
this will be done in separate patch.

v2: added fixes tag in case reverted patch gets backported to stable

Fixes: 38007fa96419 ("drm/xe: flush gtt before signalling user fence on all engines")
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240605-fix_user_fence_posted-v3-1-06e7932f784a@intel.com
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 2a607c141d65..b11b3cd16b5e 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -234,13 +234,13 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
-	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
-
 	if (job->user_fence.used)
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
 
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+
 	i = emit_user_interrupt(dw, i);
 
 	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
@@ -293,13 +293,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
-	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
-
 	if (job->user_fence.used)
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
 
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+
 	i = emit_user_interrupt(dw, i);
 
 	xe_gt_assert(gt, i <= MAX_JOB_SIZE_DW);
-- 
cgit 


From 3ad7d18c5dad75ed38098c7cc3bc9594b4701399 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Wed, 5 Jun 2024 09:29:48 +0200
Subject: drm/xe: flush engine buffers before signalling user fence on all
 engines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests show that user fence signalling requires kind of write barrier,
otherwise not all writes performed by the workload will be available
to userspace. It is already done for render and compute, we need it
also for the rest: video, gsc, copy.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240605-fix_user_fence_posted-v3-2-06e7932f784a@intel.com
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index b11b3cd16b5e..db630d27beba 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -80,6 +80,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
 	return i;
 }
 
+static int emit_flush_dw(u32 *dw, int i)
+{
+	dw[i++] = MI_FLUSH_DW | MI_FLUSH_IMM_DW;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
 static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
 			       u32 *dw, int i)
 {
@@ -234,10 +244,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
-	if (job->user_fence.used)
+	if (job->user_fence.used) {
+		i = emit_flush_dw(dw, i);
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
+	}
 
 	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
 
@@ -293,10 +305,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
-	if (job->user_fence.used)
+	if (job->user_fence.used) {
+		i = emit_flush_dw(dw, i);
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
+	}
 
 	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
 
-- 
cgit 


From 6d3581edffea0b3a64b0d3094d3f09222e0024f7 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 3 Jun 2024 11:18:24 -0700
Subject: drm/xe: Don't overmap identity VRAM mapping

Overmapping the identity VRAM mapping is triggering hardware bugs on
certain platforms. Use 2M pages for the last unaligned (to 1G) VRAM
chunk.

v2:
 - Always use 2M pages for last chunk (Fei Yang)
 - break loop when 2M pages are used
 - Add assert for usable_size being 2M aligned
v3:
 - Fix checkpatch

Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Fei Yang <fei.yang@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Fei Yang <fei.yang@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240603181824.1927675-1-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_migrate.c | 55 +++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 13 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index cccffaf3db06..7e3fb33110d9 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -69,7 +69,7 @@ struct xe_migrate {
 
 #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
 #define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE * (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */
-#define NUM_KERNEL_PDE 17
+#define NUM_KERNEL_PDE 15
 #define NUM_PT_SLOTS 32
 #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
 #define MAX_NUM_PTE 512
@@ -137,10 +137,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	struct xe_device *xe = tile_to_xe(tile);
 	u16 pat_index = xe->pat.idx[XE_CACHE_WB];
 	u8 id = tile->id;
-	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level;
+	u32 num_entries = NUM_PT_SLOTS, num_level = vm->pt_root[id]->level,
+	    num_setup = num_level + 1;
 	u32 map_ofs, level, i;
 	struct xe_bo *bo, *batch = tile->mem.kernel_bb_pool->bo;
-	u64 entry;
+	u64 entry, pt30_ofs;
 
 	/* Can't bump NUM_PT_SLOTS too high */
 	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
@@ -160,10 +161,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
-	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, pat_index);
+	/* PT31 reserved for 2M identity map */
+	pt30_ofs = bo->size - 2 * XE_PAGE_SIZE;
+	entry = vm->pt_ops->pde_encode_bo(bo, pt30_ofs, pat_index);
 	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
 
-	map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
+	map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE;
 
 	/* Map the entire BO in our level 0 pt */
 	for (i = 0, level = 0; i < num_entries; level++) {
@@ -234,7 +237,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	}
 
 	/* Write PDE's that point to our BO. */
-	for (i = 0; i < num_entries - num_level; i++) {
+	for (i = 0; i < map_ofs / PAGE_SIZE; i++) {
 		entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
 						  pat_index);
 
@@ -252,28 +255,54 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	/* Identity map the entire vram at 256GiB offset */
 	if (IS_DGFX(xe)) {
 		u64 pos, ofs, flags;
+		/* XXX: Unclear if this should be usable_size? */
+		u64 vram_limit =  xe->mem.vram.actual_physical_size +
+			xe->mem.vram.dpa_base;
 
 		level = 2;
 		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
 		flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
 						    true, 0);
 
+		xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M));
+
 		/*
-		 * Use 1GB pages, it shouldn't matter the physical amount of
-		 * vram is less, when we don't access it.
+		 * Use 1GB pages when possible, last chunk always use 2M
+		 * pages as mixing reserved memory (stolen, WOCPM) with a single
+		 * mapping is not allowed on certain platforms.
 		 */
-		for (pos = xe->mem.vram.dpa_base;
-		     pos < xe->mem.vram.actual_physical_size + xe->mem.vram.dpa_base;
-		     pos += SZ_1G, ofs += 8)
+		for (pos = xe->mem.vram.dpa_base; pos < vram_limit;
+		     pos += SZ_1G, ofs += 8) {
+			if (pos + SZ_1G >= vram_limit) {
+				u64 pt31_ofs = bo->size - XE_PAGE_SIZE;
+
+				entry = vm->pt_ops->pde_encode_bo(bo, pt31_ofs,
+								  pat_index);
+				xe_map_wr(xe, &bo->vmap, ofs, u64, entry);
+
+				flags = vm->pt_ops->pte_encode_addr(xe, 0,
+								    pat_index,
+								    level - 1,
+								    true, 0);
+
+				for (ofs = pt31_ofs; pos < vram_limit;
+				     pos += SZ_2M, ofs += 8)
+					xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+				break;	/* Ensure pos == vram_limit assert correct */
+			}
+
 			xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
+		}
+
+		xe_assert(xe, pos == vram_limit);
 	}
 
 	/*
 	 * Example layout created above, with root level = 3:
 	 * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
 	 * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
-	 * [PT9...PT28]: Userspace PT's for VM_BIND, 4 KiB PTE's
-	 * [PT29 = PDE 0] [PT30 = PDE 1] [PT31 = PDE 2]
+	 * [PT9...PT27]: Userspace PT's for VM_BIND, 4 KiB PTE's
+	 * [PT28 = PDE 0] [PT29 = PDE 1] [PT30 = PDE 2] [PT31 = 2M vram identity map]
 	 *
 	 * This makes the lowest part of the VM point to the pagetables.
 	 * Hence the lowest 2M in the vm should point to itself, with a few writes
-- 
cgit 


From 05e49e0c7cb839a7c4d1ae82ceb1fd940c561370 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 4 Jun 2024 23:22:28 +0200
Subject: drm/xe/vf: Support only GuC/HuC firmwares
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only GuC/HuC firmwares can be used by the VFs, don't claim support
for any other firmware (like GSC) even if the driver can support it.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240604212231.1416-2-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_uc_fw.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 5571612321d4..5f23ecd98376 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -654,6 +654,10 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar
 	uc_fw_auto_select(xe, uc_fw);
 
 	if (IS_SRIOV_VF(xe)) {
+		/* Only GuC/HuC are supported */
+		if (uc_fw->type != XE_UC_FW_TYPE_GUC &&
+		    uc_fw->type != XE_UC_FW_TYPE_HUC)
+			uc_fw->path = NULL;
 		/* VF will support only firmwares that driver can autoselect */
 		xe_uc_fw_change_status(uc_fw, uc_fw->path ?
 				       XE_UC_FIRMWARE_PRELOADED :
-- 
cgit 


From d9cf98e79ebfdeea8844c282b627d07c3512650c Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 4 Jun 2024 23:22:29 +0200
Subject: drm/xe/vf: Custom uC initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VF drivers can't modify WOPCM registers nor upload firmwares to
GuC, HuC or GSC.  Modify xe_uc initialization functions to skip
those steps if running in the VF mode, or defer to a new custom
helper function that would not include those steps.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240604212231.1416-3-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_uc.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 85808706d1c8..0f240534fb72 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -11,9 +11,11 @@
 #include "xe_gsc_proxy.h"
 #include "xe_gt.h"
 #include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
 #include "xe_guc.h"
 #include "xe_guc_pc.h"
 #include "xe_huc.h"
+#include "xe_sriov.h"
 #include "xe_uc_fw.h"
 #include "xe_wopcm.h"
 
@@ -53,6 +55,9 @@ int xe_uc_init(struct xe_uc *uc)
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
+	if (IS_SRIOV_VF(uc_to_xe(uc)))
+		return 0;
+
 	ret = xe_wopcm_init(&uc->wopcm);
 	if (ret)
 		goto err;
@@ -141,6 +146,31 @@ int xe_uc_init_hwconfig(struct xe_uc *uc)
 	return 0;
 }
 
+static int vf_uc_init_hw(struct xe_uc *uc)
+{
+	int err;
+
+	err = xe_uc_sanitize_reset(uc);
+	if (err)
+		return err;
+
+	err = xe_guc_enable_communication(&uc->guc);
+	if (err)
+		return err;
+
+	err = xe_gt_sriov_vf_connect(uc_to_gt(uc));
+	if (err)
+		return err;
+
+	uc->guc.submission_state.enabled = true;
+
+	err = xe_gt_record_default_lrcs(uc_to_gt(uc));
+	if (err)
+		return err;
+
+	return 0;
+}
+
 /*
  * Should be called during driver load, after every GT reset, and after every
  * suspend to reload / auth the firmwares.
@@ -153,6 +183,9 @@ int xe_uc_init_hw(struct xe_uc *uc)
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
 		return 0;
 
+	if (IS_SRIOV_VF(uc_to_xe(uc)))
+		return vf_uc_init_hw(uc);
+
 	ret = xe_huc_upload(&uc->huc);
 	if (ret)
 		return ret;
-- 
cgit 


From 5bfae679d39966fb12c8e390a80e7ecf05693dfe Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 4 Jun 2024 23:22:30 +0200
Subject: drm/xe/vf: Custom GuC reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The VF drivers can't trigger real GuC firmware reset using GDRST
register, but for the VF drivers it is sufficient to send VF_RESET
message to reset any VF specific state maintained by the GuC.
Use our existing VF bootstrap function as VF_RESET is part of it.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240604212231.1416-4-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_guc.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 086a048876ba..0bf6e01b8910 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -430,6 +430,9 @@ int xe_guc_reset(struct xe_guc *guc)
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
+	if (IS_SRIOV_VF(gt_to_xe(gt)))
+		return xe_gt_sriov_vf_bootstrap(gt);
+
 	xe_mmio_write32(gt, GDRST, GRDOM_GUC);
 
 	ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
-- 
cgit 


From df433a3fac078c79dc4efab81015dec012322668 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Tue, 4 Jun 2024 23:22:31 +0200
Subject: drm/xe/vf: Custom GT restart
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only few steps from the GT restart phase are applicable for the
VF drivers, as initialization of PAT, WOPCM, MOCS or CCS mode can
be done only by the native or PF drivers. Use custom GT restart
function if running in VF mode.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240604212231.1416-5-michal.wajdeczko@intel.com
---
 drivers/gpu/drm/xe/xe_gt.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 4e11662b78bd..57d84751e160 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -611,12 +611,34 @@ static int do_gt_reset(struct xe_gt *gt)
 	return err;
 }
 
+static int vf_gt_restart(struct xe_gt *gt)
+{
+	int err;
+
+	err = xe_uc_sanitize_reset(&gt->uc);
+	if (err)
+		return err;
+
+	err = xe_uc_init_hw(&gt->uc);
+	if (err)
+		return err;
+
+	err = xe_uc_start(&gt->uc);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int do_gt_restart(struct xe_gt *gt)
 {
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 	int err;
 
+	if (IS_SRIOV_VF(gt_to_xe(gt)))
+		return vf_gt_restart(gt);
+
 	xe_pat_init(gt);
 
 	xe_gt_mcr_set_implicit_defaults(gt);
-- 
cgit 


From 5d30de4311d2d4165e78dc021c5cacb7496b3491 Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Tue, 4 Jun 2024 22:50:41 -0700
Subject: drm/xe: Do not dereference NULL job->fence in trace points
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

job->fence is not assigned until xe_sched_job_arm(), check for
job->fence in xe_sched_job_seqno() so any usage of this function (trace
points) do not result in NULL ptr dereference. Also check job->fence
before assigning error in job trace points.

Fixes: 0ac7a2c745e8 ("drm/xe: Don't initialize fences at xe_sched_job_create()")
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240605055041.2082074-1-matthew.brost@intel.com
---
 drivers/gpu/drm/xe/xe_sched_job.h | 2 +-
 drivers/gpu/drm/xe/xe_trace.h     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 002c3b5c0a5c..f362e28455db 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -70,7 +70,7 @@ to_xe_sched_job(struct drm_sched_job *drm)
 
 static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
 {
-	return job->fence->seqno;
+	return job->fence ? job->fence->seqno : 0;
 }
 
 static inline u32 xe_sched_job_lrc_seqno(struct xe_sched_job *job)
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 450f407c66e8..e4cba64474e6 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -270,7 +270,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 			   __entry->guc_state =
 			   atomic_read(&job->q->guc->state);
 			   __entry->flags = job->q->flags;
-			   __entry->error = job->fence->error;
+			   __entry->error = job->fence ? job->fence->error : 0;
 			   __entry->fence = job->fence;
 			   __entry->batch_addr = (u64)job->ptrs[0].batch_addr;
 			   ),
-- 
cgit 


From 9ab4a7e181823793cdd8ba7c7aceebaf52e3824e Mon Sep 17 00:00:00 2001
From: Jonathan Cavitt <jonathan.cavitt@intel.com>
Date: Wed, 5 Jun 2024 08:08:28 -0700
Subject: drm/xe/xe_gt_debugfs: Add synchronous gt reset debugfs

We currently have debugfs support that allows the userspace to initiate
an asynchronous gt reset on command.  However, userspace may also wish
to wait for the completion of the gt reset before performing any
additional work.  To that end, add a version of the force_reset gt
debugfs function that operates synchronously.

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1068
Suggested-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
CC: John Harrison <john.c.harrison@intel.com>
CC: Stuart Summers <stuart.summers@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240605150828.2736396-1-jonathan.cavitt@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_debugfs.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 66f897a9b6ca..5e7fd937917a 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -116,6 +116,17 @@ static int force_reset(struct xe_gt *gt, struct drm_printer *p)
 	return 0;
 }
 
+static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p)
+{
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_gt_reset_async(gt);
+	xe_pm_runtime_put(gt_to_xe(gt));
+
+	flush_work(&gt->reset.worker);
+
+	return 0;
+}
+
 static int sa_info(struct xe_gt *gt, struct drm_printer *p)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
@@ -261,6 +272,7 @@ static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
 	{"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset},
+	{"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync},
 	{"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
 	{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
 	{"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
-- 
cgit 


From 21b708554648177a0078962c31629bce31ef5d83 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Thu, 6 Jun 2024 15:38:41 +0530
Subject: drm/xe/xe_gt_idle: use GT forcewake domain assertion

The rc6 registers used in disable_c6 function belong
to the GT forcewake domain. Hence change the forcewake
assertion to check GT forcewake domain.

v2: add fixes tag (Himal)

Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set")
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240606100842.956072-2-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_idle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 0109d1d2e9c4..a6ab8bb4571d 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -250,7 +250,7 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
 void xe_gt_idle_disable_c6(struct xe_gt *gt)
 {
 	xe_device_assert_mem_access(gt_to_xe(gt));
-	xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
 	xe_mmio_write32(gt, RC_CONTROL, 0);
 	xe_mmio_write32(gt, RC_STATE, 0);
-- 
cgit 


From 6800e63cf97bae62bca56d8e691544540d945f53 Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Thu, 6 Jun 2024 15:38:42 +0530
Subject: drm/xe: move disable_c6 call

disable c6 called in guc_pc_fini_hw is unreachable.

GuC PC init returns earlier if skip_guc_pc is true and never
registers the finish call thus making disable_c6 unreachable.

move this call to gt idle.

v2: rebase
v3: add fixes tag (Himal)

Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set")
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240606100842.956072-3-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_idle.c | 6 ++++++
 drivers/gpu/drm/xe/xe_guc_pc.c  | 5 -----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index a6ab8bb4571d..5d6181117ab2 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -197,6 +197,12 @@ static void gt_idle_fini(void *arg)
 
 	xe_gt_idle_disable_pg(gt);
 
+	if (gt_to_xe(gt)->info.skip_guc_pc) {
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+		xe_gt_idle_disable_c6(gt);
+		xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	}
+
 	sysfs_remove_files(kobj, gt_idle_attrs);
 	kobject_put(kobj);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 8187dfb2ad6c..508f0d39b4ad 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -897,11 +897,6 @@ static void xe_guc_pc_fini_hw(void *arg)
 	struct xe_guc_pc *pc = arg;
 	struct xe_device *xe = pc_to_xe(pc);
 
-	if (xe->info.skip_guc_pc) {
-		xe_gt_idle_disable_c6(pc_to_gt(pc));
-		return;
-	}
-
 	if (xe_device_wedged(xe))
 		return;
 
-- 
cgit