From 869e54d4d54b1b0ee16c98c2149f5785eea08c02 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 22 Apr 2024 16:14:54 -0400 Subject: drm/xe: make xe_pm_runtime_lockdep_map a static struct Fix the new sparse warning: >> drivers/gpu/drm/xe/xe_pm.c:72:20: sparse: sparse: symbol 'xe_pm_runtime_lockdep_map' was not declared. Should it be static? Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404191329.EZzOTzwK-lkp@intel.com/ Reviewed-by: Gustavo Sousa Reviewed-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20240422201454.699089-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 37fbeda12d3b..c1831106ea4b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -69,7 +69,7 @@ */ #ifdef CONFIG_LOCKDEP -struct lockdep_map xe_pm_runtime_lockdep_map = { +static struct lockdep_map xe_pm_runtime_lockdep_map = { .name = "xe_pm_runtime_lockdep_map" }; #endif -- cgit From 06e7139a034f26804904368fe4af2ceb70724756 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 23 Apr 2024 14:11:14 +0200 Subject: drm/xe: Fix unexpected backmerge results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent backmerge from drm-next to drm-xe-next brought with it some silent unexpected results. One code snippet was added twice and a partial revert had merge errors. Fix that up to reinstate the affected code as it was before the backmerge. v2: - Commit log message rewording (Lucas DeMarchi) Fixes: 79790b6818e9 ("Merge drm/drm-next into drm-xe-next") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240423121114.39325-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_vm.c | 13 +++++++------ drivers/gpu/drm/xe/xe_vm_types.h | 4 ++++ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 85d6f359142d..7ae2b0300db6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -863,11 +863,6 @@ static void xe_vma_destroy_late(struct xe_vma *vma) vma->ufence = NULL; } - if (vma->ufence) { - xe_sync_ufence_put(vma->ufence); - vma->ufence = NULL; - } - if (xe_vma_is_userptr(vma)) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); struct xe_userptr *userptr = &uvma->userptr; @@ -2100,6 +2095,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.immediate = + flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; + op->map.read_only = + flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; @@ -2294,6 +2293,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { + flags |= op->map.read_only ? + VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; flags |= op->map.dumpable ? @@ -2438,7 +2439,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - !xe_vm_in_fault_mode(vm), + op->map.immediate || !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 7570c2c6c463..72a100671e5d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -269,6 +269,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + /** @immediate: Immediate bind */ + bool immediate; + /** @read_only: Read only */ + bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @dumpable: whether BO is dumped on GPU hang */ -- cgit From 8f21f82d8b7652e11e6800612e34547bffdc7fd2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Apr 2024 20:04:32 +0200 Subject: drm/xe/guc: Add GuC Relay ABI version 1.0 definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This initial GuC Relay ABI specification includes messages for ABI version negotiation and to query values of runtime/fuse registers. We will start handling those messages on the PF driver soon. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h | 170 ++++++++++++++++++++++++- 1 file changed, 169 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h index 747e428de421..6c2834613081 100644 --- a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h @@ -1,11 +1,179 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright © 2023 Intel Corporation + * Copyright © 2023-2024 Intel Corporation */ #ifndef _ABI_GUC_RELAY_ACTIONS_ABI_H_ #define _ABI_GUC_RELAY_ACTIONS_ABI_H_ +#include "abi/guc_relay_communication_abi.h" + +/** + * DOC: GuC Relay VF/PF ABI Version + * + * The _`GUC_RELAY_VERSION_BASE` defines minimum VF/PF ABI version that + * drivers must support. Currently this is version 1.0. + * + * The _`GUC_RELAY_VERSION_LATEST` defines latest VF/PF ABI version that + * drivers may use. Currently this is version 1.0. + * + * Some platforms may require different base VF/PF ABI version. + * No supported VF/PF ABI version can be 0.0. + */ + +#define GUC_RELAY_VERSION_BASE_MAJOR 1 +#define GUC_RELAY_VERSION_BASE_MINOR 0 + +#define GUC_RELAY_VERSION_LATEST_MAJOR 1 +#define GUC_RELAY_VERSION_LATEST_MINOR 0 + +/** + * DOC: GuC Relay Actions + * + * The following actions are supported from VF/PF ABI version 1.0: + * + * * `VF2PF_HANDSHAKE`_ + * * `VF2PF_QUERY_RUNTIME`_ + */ + +/** + * DOC: VF2PF_HANDSHAKE + * + * This `Relay Message`_ is used by the VF to establish ABI version with the PF. + * + * Prior to exchanging any other messages, both VF driver and PF driver must + * negotiate the VF/PF ABI version that will be used in their communication. + * + * The VF driver shall use @MAJOR and @MINOR fields to pass requested ABI version. + * The VF driver may use special version 0.0 (both @MAJOR and @MINOR set to 0) + * to request latest (or any) ABI version that is supported by the PF driver. + * + * This message definition shall be supported by all future ABI versions. + * This message definition shall not be changed by future ABI versions. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_RELAY_ACTION_VF2PF_HANDSHAKE` = 0x0001 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:16 | **MAJOR** - requested major version of the VFPF interface | + * | | | (use MAJOR_ANY to request latest version supported by PF) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **MINOR** - requested minor version of the VFPF interface | + * | | | (use MINOR_ANY to request latest version supported by PF) | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:16 | **MAJOR** - agreed major version of the VFPF interface | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **MINOR** - agreed minor version of the VFPF interface | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_RELAY_ACTION_VF2PF_HANDSHAKE 0x0001u + +#define VF2PF_HANDSHAKE_REQUEST_MSG_LEN 2u +#define VF2PF_HANDSHAKE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR (0xffffu << 16) +#define VF2PF_HANDSHAKE_MAJOR_ANY 0 +#define VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR (0xffffu << 0) +#define VF2PF_HANDSHAKE_MINOR_ANY 0 + +#define VF2PF_HANDSHAKE_RESPONSE_MSG_LEN 2u +#define VF2PF_HANDSHAKE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 +#define VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR (0xffffu << 16) +#define VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR (0xffffu << 0) + +/** + * DOC: VF2PF_QUERY_RUNTIME + * + * This `Relay Message`_ is used by the VF to query values of runtime registers. + * + * On some platforms, VF drivers may not have access to the some fuse registers + * (referred here as 'runtime registers') and therefore VF drivers need to ask + * the PF driver to obtain their values. + * + * However, the list of such registers, and their values, is fully owned and + * maintained by the PF driver and the VF driver may only initiate the query + * sequence and indicate in the @START field the starting index of the next + * requested register from this predefined list. + * + * In the response, the PF driver will return tuple of 32-bit register offset and + * the 32-bit value of that register (respectively @REG_OFFSET and @REG_VALUE). + * + * The VF driver can use @LIMIT field to limit number of returned register tuples. + * If @LIMIT is unset then PF decides about number of returned register tuples. + * + * This message definition is supported from ABI version 1.0. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = **LIMIT** - limit number of returned entries | + * | | | (use zero to not enforce any limits on the response) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME` = 0x0101 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **START** - index of the first requested entry | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = **COUNT** - number of entries included in response | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **REMAINING** - number of remaining entries | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | DATA2 = **REG_OFFSET** - offset of register[START] | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | DATA3 = **REG_VALUE** - value of register[START] | + * +---+-------+--------------------------------------------------------------+ + * | | | | + * +---+-------+--------------------------------------------------------------+ + * |n-1| 31:0 | REG_OFFSET - offset of register[START + x] | + * +---+-------+--------------------------------------------------------------+ + * | n | 31:0 | REG_VALUE - value of register[START + x] | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME 0x0101u + +#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN 2u +#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT GUC_HXG_REQUEST_MSG_0_DATA0 +#define VF2PF_QUERY_RUNTIME_NO_LIMIT 0u +#define VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START GUC_HXG_REQUEST_MSG_n_DATAn + +#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN (GUC_HXG_MSG_MIN_LEN + 1u) +#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MAX_LEN \ + (VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + VF2PF_QUERY_RUNTIME_MAX_COUNT * 2) +#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0 +#define VF2PF_QUERY_RUNTIME_MIN_COUNT 0 +#define VF2PF_QUERY_RUNTIME_MAX_COUNT \ + ((GUC_RELAY_MSG_MAX_LEN - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2) +#define VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING GUC_HXG_RESPONSE_MSG_n_DATAn +#define VF2PF_QUERY_RUNTIME_RESPONSE_DATAn_REG_OFFSETx GUC_HXG_RESPONSE_MSG_n_DATAn +#define VF2PF_QUERY_RUNTIME_RESPONSE_DATAn_REG_VALUEx GUC_HXG_RESPONSE_MSG_n_DATAn + /** * DOC: GuC Relay Debug Actions * -- cgit From 1cb4db30cf685709584743d8bf8a0db2eac620c9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Apr 2024 20:04:33 +0200 Subject: drm/xe: Add helper to calculate adjusted register offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our MMIO accessing functions automatically adjust addresses for the media registers based on mmio.adj_limit and mmio.adj_offset logic. Move it to the separate helper to avoid code duplication and to allow using it by the upcoming changes to PF driver code. Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_mmio.c | 38 +++++++++++++++----------------------- drivers/gpu/drm/xe/xe_mmio.h | 7 +++++++ 2 files changed, 22 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 334637511e75..2b18e8149ec3 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -423,41 +423,33 @@ int xe_mmio_init(struct xe_device *xe) u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); + return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); + return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) { struct xe_tile *tile = gt_to_tile(gt); + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); + writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); + return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); } u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set) @@ -486,10 +478,9 @@ bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg) { - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; + u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); - return range && reg.addr >= range->start && reg.addr <= range->end; + return range && addr >= range->start && addr <= range->end; } /** @@ -519,10 +510,11 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; u32 ldw, udw, oldudw, retries; - if (reg.addr < gt->mmio.adj_limit) { - reg.addr += gt->mmio.adj_offset; - reg_udw.addr += gt->mmio.adj_offset; - } + reg.addr = xe_mmio_adjusted_addr(gt, reg.addr); + reg_udw.addr = xe_mmio_adjusted_addr(gt, reg_udw.addr); + + /* we shouldn't adjust just one register address */ + xe_gt_assert(gt, reg_udw.addr == reg.addr + 0x4); oldudw = xe_mmio_read32(gt, reg_udw); for (retries = 5; retries; --retries) { diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index a3cd7b3036c7..445ec6a0753e 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -36,4 +36,11 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, u32 *out_val, bool atomic); +static inline u32 xe_mmio_adjusted_addr(const struct xe_gt *gt, u32 addr) +{ + if (addr < gt->mmio.adj_limit) + addr += gt->mmio.adj_offset; + return addr; +} + #endif -- cgit From dec793860d5137c58c633712554abfed71642a88 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Apr 2024 20:04:34 +0200 Subject: drm/xe: Add few more GT register definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While we are not using these registers right now, they are part of some runtime register lists that PF driver share with VFs on some legacy platforms that we might want to support as SDV. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 94445810ccc9..6eea7a459c68 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -173,8 +173,11 @@ #define MAX_MSLICES 4 #define MEML3_EN_MASK REG_GENMASK(3, 0) +#define MIRROR_FUSE1 XE_REG(0x911c) + #define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ #define XELP_EU_MASK REG_GENMASK(7, 0) +#define XELP_GT_SLICE_ENABLE XE_REG(0x9138) #define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c) #define GT_VEBOX_VDBOX_DISABLE XE_REG(0x9140) -- cgit From 98e62805921cebcd2fcac3692037ca2ebef63b4a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Apr 2024 20:04:35 +0200 Subject: drm/xe/pf: Add SR-IOV GuC Relay PF services MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have mechanism that allows a VF driver to communicate with the PF driver, now add PF side handlers for VF2PF requests defined in version 1.0 of VF/PF GuC Relay ABI specification. The VF2PF_HANDSHAKE request must be used by the VF driver to negotiate the ABI version prior to sending any other request. We will reset any negotiated version later during FLR. The outcome of the VF2PF_QUERY_RUNTIME requests depends on actual platform, for legacy platforms used as SDV is provided as-is, for latest platforms it is preliminary, and might be changed. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c | 546 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h | 36 ++ drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h | 52 +++ drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h | 5 + drivers/gpu/drm/xe/xe_guc_relay.c | 8 +- 6 files changed, 646 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 8bc62bfbc679..4fba50036539 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -164,6 +164,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ xe_gt_sriov_pf_policy.o \ + xe_gt_sriov_pf_service.o \ xe_lmtt.o \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c new file mode 100644 index 000000000000..a253f196261d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -0,0 +1,546 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include "abi/guc_actions_sriov_abi.h" +#include "abi/guc_relay_actions_abi.h" + +#include "regs/xe_gt_regs.h" +#include "regs/xe_guc_regs.h" +#include "regs/xe_regs.h" + +#include "xe_mmio.h" +#include "xe_gt_sriov_printk.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_service.h" +#include "xe_gt_sriov_pf_service_types.h" +#include "xe_guc_ct.h" +#include "xe_guc_hxg_helpers.h" + +static void pf_init_versions(struct xe_gt *gt) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + /* base versions may differ between platforms */ + gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_gt *gt, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; + struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; + + xe_gt_assert(gt, base.major); + xe_gt_assert(gt, base.major <= latest.major); + xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_gt_assert(gt, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) +{ + xe_gt_sriov_pf_assert_vfid(gt, vfid); + xe_gt_assert(gt, major || minor); + + gt->sriov.pf.vfs[vfid].version.major = major; + gt->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_gt *gt, u32 vfid) +{ + xe_gt_sriov_pf_assert_vfid(gt, vfid); + + gt->sriov.pf.vfs[vfid].version.major = 0; + gt->sriov.pf.vfs[vfid].version.minor = 0; +} + +static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) +{ + xe_gt_sriov_pf_assert_vfid(gt, vfid); + + return major == gt->sriov.pf.vfs[vfid].version.major && + minor <= gt->sriov.pf.vfs[vfid].version.minor; +} + +static const struct xe_reg tgl_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_SLICE_ENABLE, /* _MMIO(0x9138) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg ats_m_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_FUSE1, /* _MMIO(0x911c) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg pvc_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + CTC_MODE, /* _MMIO(0xA26C) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg ver_1270_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + XEHP_FUSE4, /* _MMIO(0x9114) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_FUSE1, /* _MMIO(0x911c) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg ver_2000_runtime_regs[] = { + RPM_CONFIG0, /* _MMIO(0x0d00) */ + XEHP_FUSE4, /* _MMIO(0x9114) */ + MIRROR_FUSE3, /* _MMIO(0x9118) */ + MIRROR_FUSE1, /* _MMIO(0x911c) */ + XELP_EU_ENABLE, /* _MMIO(0x9134) */ + XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */ + GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */ + XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */ + XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */ + XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ + XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ + XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ + CTC_MODE, /* _MMIO(0xa26c) */ + HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */ + TIMESTAMP_OVERRIDE, /* _MMIO(0x44074) */ +}; + +static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count) +{ + const struct xe_reg *regs; + + if (GRAPHICS_VERx100(xe) >= 2000) { + *count = ARRAY_SIZE(ver_2000_runtime_regs); + regs = ver_2000_runtime_regs; + } else if (GRAPHICS_VERx100(xe) >= 1270) { + *count = ARRAY_SIZE(ver_1270_runtime_regs); + regs = ver_1270_runtime_regs; + } else if (GRAPHICS_VERx100(xe) == 1260) { + *count = ARRAY_SIZE(pvc_runtime_regs); + regs = pvc_runtime_regs; + } else if (GRAPHICS_VERx100(xe) == 1255) { + *count = ARRAY_SIZE(ats_m_runtime_regs); + regs = ats_m_runtime_regs; + } else if (GRAPHICS_VERx100(xe) == 1200) { + *count = ARRAY_SIZE(tgl_runtime_regs); + regs = tgl_runtime_regs; + } else { + regs = ERR_PTR(-ENOPKG); + *count = 0; + } + + return regs; +} + +static int pf_alloc_runtime_info(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + const struct xe_reg *regs; + unsigned int size; + u32 *values; + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + xe_gt_assert(gt, !gt->sriov.pf.service.runtime.size); + xe_gt_assert(gt, !gt->sriov.pf.service.runtime.regs); + xe_gt_assert(gt, !gt->sriov.pf.service.runtime.values); + + regs = pick_runtime_regs(xe, &size); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + if (unlikely(!size)) + return 0; + + values = drmm_kcalloc(&xe->drm, size, sizeof(u32), GFP_KERNEL); + if (!values) + return -ENOMEM; + + gt->sriov.pf.service.runtime.size = size; + gt->sriov.pf.service.runtime.regs = regs; + gt->sriov.pf.service.runtime.values = values; + + return 0; +} + +static void read_many(struct xe_gt *gt, unsigned int count, + const struct xe_reg *regs, u32 *values) +{ + while (count--) + *values++ = xe_mmio_read32(gt, *regs++); +} + +static void pf_prepare_runtime_info(struct xe_gt *gt) +{ + const struct xe_reg *regs; + unsigned int size; + u32 *values; + + if (!gt->sriov.pf.service.runtime.size) + return; + + size = gt->sriov.pf.service.runtime.size; + regs = gt->sriov.pf.service.runtime.regs; + values = gt->sriov.pf.service.runtime.values; + + read_many(gt, size, regs, values); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { + struct drm_printer p = xe_gt_info_printer(gt); + + xe_gt_sriov_pf_service_print_runtime(gt, &p); + } +} + +/** + * xe_gt_sriov_pf_service_init - Early initialization of the GT SR-IOV PF services. + * @gt: the &xe_gt to initialize + * + * Performs early initialization of the GT SR-IOV PF services, including preparation + * of the runtime info that will be shared with VFs. + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_service_init(struct xe_gt *gt) +{ + int err; + + pf_init_versions(gt); + + err = pf_alloc_runtime_info(gt); + if (unlikely(err)) + goto failed; + + return 0; +failed: + xe_gt_sriov_err(gt, "Failed to initialize service (%pe)\n", ERR_PTR(err)); + return err; +} + +/** + * xe_gt_sriov_pf_service_update - Update PF SR-IOV services. + * @gt: the &xe_gt to update + * + * Updates runtime data shared with VFs. + * + * This function can be called more than once. + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_service_update(struct xe_gt *gt) +{ + pf_prepare_runtime_info(gt); +} + +/** + * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) +{ + pf_disconnect(gt, vfid); +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_process_handshake(struct xe_gt *gt, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(gt, vfid); + } else { + xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(gt, vfid, *major, *minor); + } + + return 0; +} + +/* Return: length of the response message or a negative error code on failure. */ +static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, + const u32 *request, u32 len, u32 *response, u32 size) +{ + u32 wanted_major, wanted_minor; + u32 major, minor; + u32 mbz; + int err; + + if (unlikely(len != VF2PF_HANDSHAKE_REQUEST_MSG_LEN)) + return -EMSGSIZE; + + mbz = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_0_MBZ, request[0]); + if (unlikely(mbz)) + return -EPFNOSUPPORT; + + wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); + wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); + + err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + if (err < 0) + return err; + + xe_gt_assert(gt, major || minor); + xe_gt_assert(gt, size >= VF2PF_HANDSHAKE_RESPONSE_MSG_LEN); + + response[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) | + FIELD_PREP(GUC_HXG_RESPONSE_MSG_0_DATA0, 0); + response[1] = FIELD_PREP(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR, major) | + FIELD_PREP(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR, minor); + + return VF2PF_HANDSHAKE_RESPONSE_MSG_LEN; +} + +struct reg_data { + u32 offset; + u32 value; +} __packed; +static_assert(hxg_sizeof(struct reg_data) == 2); + +/* Return: number of entries copied or negative error code on failure. */ +static int pf_service_runtime_query(struct xe_gt *gt, u32 start, u32 limit, + struct reg_data *data, u32 *remaining) +{ + struct xe_gt_sriov_pf_service_runtime_regs *runtime; + unsigned int count, i; + u32 addr; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + runtime = >->sriov.pf.service.runtime; + + if (start > runtime->size) + return -ERANGE; + + count = min_t(u32, runtime->size - start, limit); + + for (i = 0; i < count; ++i, ++data) { + addr = runtime->regs[start + i].addr; + data->offset = xe_mmio_adjusted_addr(gt, addr); + data->value = runtime->values[start + i]; + } + + *remaining = runtime->size - start - count; + return count; +} + +/* Return: length of the response message or a negative error code on failure. */ +static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, + const u32 *msg, u32 msg_len, u32 *response, u32 resp_size) +{ + const u32 chunk_size = hxg_sizeof(struct reg_data); + struct reg_data *reg_data_buf; + u32 limit, start, max_chunks; + u32 remaining = 0; + int ret; + + if (!pf_is_negotiated(gt, origin, 1, 0)) + return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) + return -EMSGSIZE; + if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) + return -EPROTO; + if (unlikely(resp_size < VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN)) + return -EINVAL; + + limit = FIELD_GET(VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT, msg[0]); + start = FIELD_GET(VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START, msg[1]); + + resp_size = min_t(u32, resp_size, VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MAX_LEN); + max_chunks = (resp_size - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / chunk_size; + limit = limit == VF2PF_QUERY_RUNTIME_NO_LIMIT ? max_chunks : min_t(u32, max_chunks, limit); + reg_data_buf = (void *)(response + VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN); + + ret = pf_service_runtime_query(gt, start, limit, reg_data_buf, &remaining); + if (ret < 0) + return ret; + + response[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) | + FIELD_PREP(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT, ret); + response[1] = FIELD_PREP(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING, remaining); + + return VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + ret * hxg_sizeof(struct reg_data); +} + +/** + * xe_gt_sriov_pf_service_process_request - Service GT level SR-IOV request message from the VF. + * @gt: the &xe_gt that provides the service + * @origin: VF number that is requesting the service + * @msg: request message + * @msg_len: length of the request message (in dwords) + * @response: placeholder for the response message + * @resp_size: length of the response message buffer (in dwords) + * + * This function processes `Relay Message`_ request from the VF. + * + * Return: length of the response message or a negative error code on failure. + */ +int xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin, + const u32 *msg, u32 msg_len, + u32 *response, u32 resp_size) +{ + u32 action, data __maybe_unused; + int ret; + + xe_gt_assert(gt, msg_len >= GUC_HXG_MSG_MIN_LEN); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_REQUEST); + + action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); + data = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]); + xe_gt_sriov_dbg_verbose(gt, "service action %#x:%u from VF%u\n", + action, data, origin); + + switch (action) { + case GUC_RELAY_ACTION_VF2PF_HANDSHAKE: + ret = pf_process_handshake_msg(gt, origin, msg, msg_len, response, resp_size); + break; + case GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME: + ret = pf_process_runtime_query_msg(gt, origin, msg, msg_len, response, resp_size); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +/** + * xe_gt_sriov_pf_service_print_runtime - Print PF runtime data shared with VFs. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * This function is for PF use only. + */ +int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p) +{ + const struct xe_reg *regs; + unsigned int size; + u32 *values; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + size = gt->sriov.pf.service.runtime.size; + regs = gt->sriov.pf.service.runtime.regs; + values = gt->sriov.pf.service.runtime.values; + + for (; size--; regs++, values++) { + drm_printf(p, "reg[%#x] = %#x\n", + xe_mmio_adjusted_addr(gt, regs->addr), *values); + } + + return 0; +} + +/** + * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * This function is for PF use only. + */ +int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_device *xe = gt_to_xe(gt); + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_gt_sriov_pf_service_version *version; + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + for (n = 1; n <= total_vfs; n++) { + version = >->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); + } + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h new file mode 100644 index 000000000000..56aaadf0360d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_SERVICE_H_ +#define _XE_GT_SRIOV_PF_SERVICE_H_ + +#include +#include + +struct drm_printer; +struct xe_gt; + +int xe_gt_sriov_pf_service_init(struct xe_gt *gt); +void xe_gt_sriov_pf_service_update(struct xe_gt *gt); +void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); + +int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); + +#ifdef CONFIG_PCI_IOV +int xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin, + const u32 *msg, u32 msg_len, + u32 *response, u32 resp_size); +#else +static inline int +xe_gt_sriov_pf_service_process_request(struct xe_gt *gt, u32 origin, + const u32 *msg, u32 msg_len, + u32 *response, u32 resp_size) +{ + return -EPROTO; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h new file mode 100644 index 000000000000..ad6dd75f0056 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service_types.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_GT_SRIOV_PF_SERVICE_TYPES_H_ + +#include + +struct xe_reg; + +/** + * struct xe_gt_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_gt_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_gt_sriov_pf_service_runtime_regs - Runtime data shared with VFs. + * @regs: pointer to static array with register offsets. + * @values: pointer to array with captured register values. + * @size: size of the regs and value arrays. + */ +struct xe_gt_sriov_pf_service_runtime_regs { + const struct xe_reg *regs; + u32 *values; + u32 size; +}; + +/** + * struct xe_gt_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + * @runtime: runtime data shared with VFs. + */ +struct xe_gt_sriov_pf_service { + struct { + struct xe_gt_sriov_pf_service_version base; + struct xe_gt_sriov_pf_service_version latest; + } version; + struct xe_gt_sriov_pf_service_runtime_regs runtime; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index faf9ee8266ce..880754f3e215 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -10,6 +10,7 @@ #include "xe_gt_sriov_pf_config_types.h" #include "xe_gt_sriov_pf_policy_types.h" +#include "xe_gt_sriov_pf_service_types.h" /** * struct xe_gt_sriov_metadata - GT level per-VF metadata. @@ -17,15 +18,19 @@ struct xe_gt_sriov_metadata { /** @config: per-VF provisioning data. */ struct xe_gt_sriov_config config; + /** @version: negotiated VF/PF ABI version */ + struct xe_gt_sriov_pf_service_version version; }; /** * struct xe_gt_sriov_pf - GT level PF virtualization data. + * @service: service data. * @policy: policy data. * @spare: PF-only provisioning configuration. * @vfs: metadata for all VFs. */ struct xe_gt_sriov_pf { + struct xe_gt_sriov_pf_service service; struct xe_gt_sriov_pf_policy policy; struct xe_gt_sriov_spare_config spare; struct xe_gt_sriov_metadata *vfs; diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c index c0a2d8d5d3b3..c3bbaf474f9a 100644 --- a/drivers/gpu/drm/xe/xe_guc_relay.c +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -19,6 +19,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_sriov_printk.h" +#include "xe_gt_sriov_pf_service.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" @@ -664,6 +665,7 @@ static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin, static int relay_action_handler(struct xe_guc_relay *relay, u32 origin, const u32 *msg, u32 len, u32 *response, u32 size) { + struct xe_gt *gt = relay_to_gt(relay); u32 type; int ret; @@ -674,8 +676,10 @@ static int relay_action_handler(struct xe_guc_relay *relay, u32 origin, type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]); - /* XXX: PF services will be added later */ - ret = -EOPNOTSUPP; + if (IS_SRIOV_PF(relay_to_xe(relay))) + ret = xe_gt_sriov_pf_service_process_request(gt, origin, msg, len, response, size); + else + ret = -EOPNOTSUPP; if (type == GUC_HXG_TYPE_EVENT) relay_assert(relay, ret <= 0); -- cgit From 11294bf38fa2f71619ebb5c7baa3bbe380cbcf0c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Apr 2024 20:04:36 +0200 Subject: drm/xe/kunit: Add PF service tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start with basic tests for VF/PF ABI version negotiation. As we treat all platforms in the same way, we can run the tests on one platform. More tests will likely come later. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240423180436.2089-6-michal.wajdeczko@intel.com --- .../gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c | 232 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c | 4 + 2 files changed, 236 insertions(+) create mode 100644 drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c new file mode 100644 index 000000000000..b683585db852 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + struct xe_gt *gt; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + gt = xe_device_get_gt(xe, 0); + pf_init_versions(gt); + + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, + gt->sriov.pf.service.version.latest.major); + if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, + gt->sriov.pf.service.version.latest.minor); + + test->priv = gt; + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major, + gt->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major, + gt->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); + if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); + if (major == gt->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + if (!gt->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major, + gt->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major, + gt->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major, + gt->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + if (!gt->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major, + gt->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_gt *gt = test->priv; + u32 major, minor; + + if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(gt, + gt->sriov.pf.service.version.latest.major - 1, + gt->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index a253f196261d..0e23b7ea4f3e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -544,3 +544,7 @@ int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p return 0; } + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_gt_sriov_pf_service_test.c" +#endif -- cgit From e42a51fb9c0f386d3ebb115d081896d41eb844af Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Apr 2024 15:12:42 +0200 Subject: drm/xe/pf: Expose SR-IOV VFs configuration over debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have functions to configure VF resources and to print actual provisioning details. Expose this functionality in debugfs to allow experiment with different settings or inspect details in case of unexpected issues with the provisioning. As debugfs attributes are per-VF, we use parent d_inode->i_private to store VFID, similarly how we did for per-GT attributes. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240423131244.2045-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_debugfs.c | 5 + drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 203 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h | 18 +++ 4 files changed, 227 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 4fba50036539..efd660553673 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -163,6 +163,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ + xe_gt_sriov_pf_debugfs.o \ xe_gt_sriov_pf_policy.o \ xe_gt_sriov_pf_service.o \ xe_lmtt.o \ diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index ff7f4cf52fa9..599aed47f2ba 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -13,6 +13,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" +#include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_lrc.h" @@ -21,6 +22,7 @@ #include "xe_pm.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" +#include "xe_sriov.h" #include "xe_uc_debugfs.h" #include "xe_wa.h" @@ -288,4 +290,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) root, minor); xe_uc_debugfs_register(>->uc, root); + + if (IS_SRIOV_PF(xe)) + xe_gt_sriov_pf_debugfs_register(gt, root); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c new file mode 100644 index 000000000000..32ce98698690 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include +#include + +#include "xe_bo.h" +#include "xe_debugfs.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_debugfs.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_debugfs.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_pm.h" + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 # d_inode->i_private = gt + * │   ├── pf # d_inode->i_private = gt + * │   ├── vf1 # d_inode->i_private = VFID(1) + * :   : + * │   ├── vfN # d_inode->i_private = VFID(N) + */ + +static void *extract_priv(struct dentry *d) +{ + return d->d_inode->i_private; +} + +static struct xe_gt *extract_gt(struct dentry *d) +{ + return extract_priv(d->d_parent); +} + +static unsigned int extract_vfid(struct dentry *d) +{ + return extract_priv(d) == extract_gt(d) ? PFID : (uintptr_t)extract_priv(d); +} + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── pf + * │   │   ├── ggtt_available + * │   │   ├── ggtt_provisioned + * │   │   ├── contexts_provisioned + * │   │   ├── doorbells_provisioned + */ + +static const struct drm_info_list pf_info[] = { + { + "ggtt_available", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_available_ggtt, + }, + { + "ggtt_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_ggtt, + }, + { + "contexts_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_ctxs, + }, + { + "doorbells_provisioned", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_config_print_dbs, + }, +}; + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── pf + * │   │   ├── ggtt_spare + * │   │   ├── lmem_spare + * │   │   ├── doorbells_spare + * │   │   ├── contexts_spare + * │   │   ├── exec_quantum_ms + * │   │   ├── preempt_timeout_us + * │   ├── vf1 + * │   │   ├── ggtt_quota + * │   │   ├── lmem_quota + * │   │   ├── doorbells_quota + * │   │   ├── contexts_quota + * │   │   ├── exec_quantum_ms + * │   │   ├── preempt_timeout_us + */ + +#define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \ + \ +static int CONFIG##_set(void *data, u64 val) \ +{ \ + struct xe_gt *gt = extract_gt(data); \ + unsigned int vfid = extract_vfid(data); \ + struct xe_device *xe = gt_to_xe(gt); \ + int err; \ + \ + if (val > (TYPE)~0ull) \ + return -EOVERFLOW; \ + \ + xe_pm_runtime_get(xe); \ + err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + xe_pm_runtime_put(xe); \ + \ + return err; \ +} \ + \ +static int CONFIG##_get(void *data, u64 *val) \ +{ \ + struct xe_gt *gt = extract_gt(data); \ + unsigned int vfid = extract_vfid(data); \ + \ + *val = xe_gt_sriov_pf_config_get_##CONFIG(gt, vfid); \ + return 0; \ +} \ + \ +DEFINE_DEBUGFS_ATTRIBUTE(CONFIG##_fops, CONFIG##_get, CONFIG##_set, FORMAT) + +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, u64, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(lmem, u64, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); + +static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigned int vfid) +{ + xe_gt_assert(gt, gt == extract_gt(parent)); + xe_gt_assert(gt, vfid == extract_vfid(parent)); + + if (!xe_gt_is_media_type(gt)) { + debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", + 0644, parent, parent, &ggtt_fops); + if (IS_DGFX(gt_to_xe(gt))) + debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare", + 0644, parent, parent, &lmem_fops); + } + debugfs_create_file_unsafe(vfid ? "doorbells_quota" : "doorbells_spare", + 0644, parent, parent, &dbs_fops); + debugfs_create_file_unsafe(vfid ? "contexts_quota" : "contexts_spare", + 0644, parent, parent, &ctxs_fops); + debugfs_create_file_unsafe("exec_quantum_ms", 0644, parent, parent, + &exec_quantum_fops); + debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent, + &preempt_timeout_fops); +} + +/** + * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs. + * @gt: the &xe_gt to register + * @root: the &dentry that represents the GT directory + * + * Register SR-IOV PF entries that are GT related and must be shown under GT debugfs. + */ +void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) +{ + struct xe_device *xe = gt_to_xe(gt); + struct drm_minor *minor = xe->drm.primary; + int n, totalvfs = xe_sriov_pf_get_totalvfs(xe); + struct dentry *pfdentry; + struct dentry *vfdentry; + char buf[14]; /* should be enough up to "vf%u\0" for 2^32 - 1 */ + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + xe_gt_assert(gt, root->d_inode->i_private == gt); + + /* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── pf + */ + pfdentry = debugfs_create_dir("pf", root); + if (IS_ERR(pfdentry)) + return; + pfdentry->d_inode->i_private = gt; + + drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); + pf_add_config_attrs(gt, pfdentry, PFID); + + for (n = 1; n <= totalvfs; n++) { + /* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── vf1 + * │   ├── vf2 + */ + snprintf(buf, sizeof(buf), "vf%u", n); + vfdentry = debugfs_create_dir(buf, root); + if (IS_ERR(vfdentry)) + break; + vfdentry->d_inode->i_private = (void *)(uintptr_t)n; + + pf_add_config_attrs(gt, vfdentry, VFID(n)); + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h new file mode 100644 index 000000000000..038cc8ddc244 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_DEBUGFS_H_ +#define _XE_GT_SRIOV_PF_DEBUGFS_H_ + +struct xe_gt; +struct dentry; + +#ifdef CONFIG_PCI_IOV +void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root); +#else +static inline void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) { } +#endif + +#endif -- cgit From b00240b6a28a36986c4021daabaecc81c708c01c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Apr 2024 15:12:43 +0200 Subject: drm/xe/pf: Expose SR-IOV VF control commands over debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have functions to control the VF. Allow to control the VF using debugfs. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240423131244.2045-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 79 +++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 32ce98698690..8909bb950a8b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -14,6 +14,7 @@ #include "xe_gt.h" #include "xe_gt_debugfs.h" #include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_pf_helpers.h" #include "xe_pm.h" @@ -153,6 +154,83 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne &preempt_timeout_fops); } +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── vf1 + * │   │   ├── control { stop, pause, resume } + */ + +static const struct { + const char *cmd; + int (*fn)(struct xe_gt *gt, unsigned int vfid); +} control_cmds[] = { + { "stop", xe_gt_sriov_pf_control_stop_vf }, + { "pause", xe_gt_sriov_pf_control_pause_vf }, + { "resume", xe_gt_sriov_pf_control_resume_vf }, +}; + +static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) +{ + struct dentry *dent = file_dentry(file); + struct dentry *parent = dent->d_parent; + struct xe_gt *gt = extract_gt(parent); + struct xe_device *xe = gt_to_xe(gt); + unsigned int vfid = extract_vfid(parent); + int ret = -EINVAL; + char cmd[32]; + size_t n; + + xe_gt_assert(gt, vfid); + xe_gt_sriov_pf_assert_vfid(gt, vfid); + + if (*pos) + return -ESPIPE; + + if (count > sizeof(cmd) - 1) + return -EINVAL; + + ret = simple_write_to_buffer(cmd, sizeof(cmd) - 1, pos, buf, count); + if (ret < 0) + return ret; + cmd[ret] = '\0'; + + for (n = 0; n < ARRAY_SIZE(control_cmds); n++) { + xe_gt_assert(gt, sizeof(cmd) > strlen(control_cmds[n].cmd)); + + if (sysfs_streq(cmd, control_cmds[n].cmd)) { + xe_pm_runtime_get(xe); + ret = control_cmds[n].fn ? (*control_cmds[n].fn)(gt, vfid) : 0; + xe_pm_runtime_put(xe); + break; + } + } + + return (ret < 0) ? ret : count; +} + +static ssize_t control_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + char help[128]; + size_t n; + + help[0] = '\0'; + for (n = 0; n < ARRAY_SIZE(control_cmds); n++) { + strlcat(help, control_cmds[n].cmd, sizeof(help)); + strlcat(help, "\n", sizeof(help)); + } + + return simple_read_from_buffer(buf, count, ppos, help, strlen(help)); +} + +static const struct file_operations control_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = control_write, + .read = control_read, + .llseek = default_llseek, +}; + /** * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs. * @gt: the &xe_gt to register @@ -199,5 +277,6 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) vfdentry->d_inode->i_private = (void *)(uintptr_t)n; pf_add_config_attrs(gt, vfdentry, VFID(n)); + debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops); } } -- cgit From 2cab6319b41023e4ad7b1c4604b9aa994fa2d4d0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 23 Apr 2024 15:12:44 +0200 Subject: drm/xe/pf: Expose SR-IOV policy settings over debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have functions to configure SR-IOV policies. Allow to tweak those policy settings over debugfs. Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240423131244.2045-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 53 +++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 8909bb950a8b..ab1a26fce3aa 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -17,6 +17,7 @@ #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_policy.h" #include "xe_pm.h" /* @@ -76,6 +77,57 @@ static const struct drm_info_list pf_info[] = { }, }; +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── pf + * │   │   ├── reset_engine + * │   │   ├── sample_period + * │   │   ├── sched_if_idle + */ + +#define DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(POLICY, TYPE, FORMAT) \ + \ +static int POLICY##_set(void *data, u64 val) \ +{ \ + struct xe_gt *gt = extract_gt(data); \ + struct xe_device *xe = gt_to_xe(gt); \ + int err; \ + \ + if (val > (TYPE)~0ull) \ + return -EOVERFLOW; \ + \ + xe_pm_runtime_get(xe); \ + err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val); \ + xe_pm_runtime_put(xe); \ + \ + return err; \ +} \ + \ +static int POLICY##_get(void *data, u64 *val) \ +{ \ + struct xe_gt *gt = extract_gt(data); \ + \ + *val = xe_gt_sriov_pf_policy_get_##POLICY(gt); \ + return 0; \ +} \ + \ +DEFINE_DEBUGFS_ATTRIBUTE(POLICY##_fops, POLICY##_get, POLICY##_set, FORMAT) + +DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(reset_engine, bool, "%llu\n"); +DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(sched_if_idle, bool, "%llu\n"); +DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(sample_period, u32, "%llu\n"); + +static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) +{ + xe_gt_assert(gt, gt == extract_gt(parent)); + xe_gt_assert(gt, PFID == extract_vfid(parent)); + + debugfs_create_file_unsafe("reset_engine", 0644, parent, parent, &reset_engine_fops); + debugfs_create_file_unsafe("sched_if_idle", 0644, parent, parent, &sched_if_idle_fops); + debugfs_create_file_unsafe("sample_period_ms", 0644, parent, parent, &sample_period_fops); +} + /* * /sys/kernel/debug/dri/0/ * ├── gt0 @@ -261,6 +313,7 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pfdentry->d_inode->i_private = gt; drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); + pf_add_policy_attrs(gt, pfdentry); pf_add_config_attrs(gt, pfdentry, PFID); for (n = 1; n <= totalvfs; n++) { -- cgit From 5a8c292f74c2b0dc84653c7b59323368a849a3ad Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Apr 2024 14:35:42 +0200 Subject: drm/xe/guc: Update VF configuration KLVs definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuC firmware specification says that maximum value for the execution quantum KLV is 100s and anything exceeding that will be clamped. The same limitation applies to the preemption timeout KLV. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240419123543.270-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 511cf974d585..e53ffaee2fcd 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -201,7 +201,11 @@ enum { * it to take effect. Such cases might typically happen on a 1PF+1VF * Virtualization config enabled for heavier workloads like AI/ML. * + * The max value for this KLV is 100 seconds, anything exceeding that + * will be clamped to the max. + * * :0: infinite exec quantum (default) + * :100000: maximum exec quantum (100000ms == 100s) * * _`GUC_KLV_VF_CFG_PREEMPT_TIMEOUT` : 0x8A02 * This config sets the VF-preemption-timeout in microseconds. @@ -219,7 +223,11 @@ enum { * on a 1PF+1VF Virtualization config enabled for heavier workloads like * AI/ML. * + * The max value for this KLV is 100 seconds, anything exceeding that + * will be clamped to the max. + * * :0: no preemption timeout (default) + * :100000000: maximum preemption timeout (100000000us == 100s) * * _`GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR` : 0x8A03 * This config sets threshold for CAT errors caused by the VF. @@ -291,9 +299,11 @@ enum { #define GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY 0x8a01 #define GUC_KLV_VF_CFG_EXEC_QUANTUM_LEN 1u +#define GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE 100000u -#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02 -#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY 0x8a02 +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_LEN 1u +#define GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE 100000000u #define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_KEY 0x8a03 #define GUC_KLV_VF_CFG_THRESHOLD_CAT_ERR_LEN 1u -- cgit From 49f853c78e688780cacb9712be4136869f3e34fe Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Apr 2024 14:35:43 +0200 Subject: drm/xe/pf: Clamp maximum execution quantum to 100s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuC is silently clamping values of the execution quantum and preemption timeout KLVs to 100s. Perform explicit clamping on the driver side as later there is no way to read back values used by the firmware and we shouldn't mislead the user about actual values being used when we print them in dmesg or debugfs. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240419123543.270-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 79116ad58620..7eac01e04cc5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -187,14 +187,20 @@ static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u3 return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); } -static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum) +static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 *exec_quantum) { - return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, exec_quantum); + /* GuC will silently clamp values exceeding max */ + *exec_quantum = min_t(u32, *exec_quantum, GUC_KLV_VF_CFG_EXEC_QUANTUM_MAX_VALUE); + + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, *exec_quantum); } -static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout) +static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 *preempt_timeout) { - return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, preempt_timeout); + /* GuC will silently clamp values exceeding max */ + *preempt_timeout = min_t(u32, *preempt_timeout, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_MAX_VALUE); + + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout); } static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1604,7 +1610,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid, struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); int err; - err = pf_push_vf_cfg_exec_quantum(gt, vfid, exec_quantum); + err = pf_push_vf_cfg_exec_quantum(gt, vfid, &exec_quantum); if (unlikely(err)) return err; @@ -1674,7 +1680,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid, struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); int err; - err = pf_push_vf_cfg_preempt_timeout(gt, vfid, preempt_timeout); + err = pf_push_vf_cfg_preempt_timeout(gt, vfid, &preempt_timeout); if (unlikely(err)) return err; -- cgit From f332625733b967afca3ccbc32553e3003d847b44 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 24 Apr 2024 07:03:01 -0700 Subject: drm/xe: Store xe_hw_engine in xe_hw_engine_snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A future patch will require gt and xe device structs, so here replacing class by hwe. Cc: Rodrigo Vivi Cc: Matt Roper Cc: Zhanjun Dong Cc: Himal Prasad Ghimiray Reviewed-by: Rodrigo Vivi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240424140319.61651-1-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 6 +++--- drivers/gpu/drm/xe/xe_hw_engine_types.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 455f375c1cbd..c84dbe8a8ed1 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -791,7 +791,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) return NULL; snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); - snapshot->class = hwe->class; + snapshot->hwe = hwe; snapshot->logical_instance = hwe->logical_instance; snapshot->forcewake.domain = hwe->domain; snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), @@ -842,7 +842,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); - if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) + if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); return snapshot; @@ -887,7 +887,7 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr); drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd); drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr); - if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) + if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) drm_printf(p, "\tRCU_MODE: 0x%08x\n", snapshot->reg.rcu_mode); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index d7f828c76cc5..27deaa31efd3 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -158,8 +158,8 @@ struct xe_hw_engine { struct xe_hw_engine_snapshot { /** @name: name of the hw engine */ char *name; - /** @class: class of this hw engine */ - enum xe_engine_class class; + /** @hwe: hw engine */ + struct xe_hw_engine *hwe; /** @logical_instance: logical instance of this hw engine */ u16 logical_instance; /** @forcewake: Force Wake information snapshot */ -- cgit From 082a634f608200d569412114fc0ee4d8c9f0f2aa Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 24 Apr 2024 07:03:02 -0700 Subject: drm/xe: Add helpers to loop over geometry and compute DSS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some DSS can only be available for geometry while others can only be available for compute. So here adding helpers to loop only available DSS for given usage. User of this helper will come in the next patch. v2: - drop has_dss() Cc: Rodrigo Vivi Cc: Matt Roper Cc: Zhanjun Dong Cc: Himal Prasad Ghimiray Reviewed-by: Rodrigo Vivi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240424140319.61651-2-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_gt_mcr.h | 24 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_topology.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_gt_topology.h | 3 +++ 3 files changed, 37 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index a7f4ab1aa584..e7d03e001a49 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -40,4 +40,28 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, for_each_dss((dss), (gt)) \ for_each_if((xe_gt_mcr_get_dss_steering((gt), (dss), &(group), &(instance)), true)) +/* + * Loop over each DSS available for geometry and determine the group and + * instance IDs that should be used to steer MCR accesses toward this DSS. + * @dss: DSS ID to obtain steering for + * @gt: GT structure + * @group: steering group ID, data type: u16 + * @instance: steering instance ID, data type: u16 + */ +#define for_each_geometry_dss(dss, gt, group, instance) \ + for_each_dss_steering(dss, gt, group, instance) \ + if (xe_gt_has_geometry_dss(gt, dss)) + +/* + * Loop over each DSS available for compute and determine the group and + * instance IDs that should be used to steer MCR accesses toward this DSS. + * @dss: DSS ID to obtain steering for + * @gt: GT structure + * @group: steering group ID, data type: u16 + * @instance: steering instance ID, data type: u16 + */ +#define for_each_compute_dss(dss, gt, group, instance) \ + for_each_dss_steering(dss, gt, group, instance) \ + if (xe_gt_has_compute_dss(gt, dss)) + #endif /* _XE_GT_MCR_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 3733e7a6860d..af841d801a8f 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -278,3 +278,13 @@ bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) return quad_first < (quad + 1) * dss_per_quad; } + +bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss) +{ + return test_bit(dss, gt->fuse_topo.g_dss_mask); +} + +bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) +{ + return test_bit(dss, gt->fuse_topo.c_dss_mask); +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index b3e357777a6e..746b325bbf6e 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -33,4 +33,7 @@ bool xe_dss_mask_empty(const xe_dss_mask_t mask); bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); +bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss); +bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss); + #endif /* _XE_GT_TOPOLOGY_H_ */ -- cgit From c8d4524ecc79f8b5a3bf58c6bd4438127c54a4cd Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 24 Apr 2024 07:03:03 -0700 Subject: drm/xe: Add INSTDONE registers to devcoredump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This registers contains important information that can help with debug of GPU hangs. While at it also fixing the double line jump at the end of engine registers for CCS engines. v2: - print other INSTDONE registers v3: - add for_each_geometry/compute_dss() v4: - print one slice_common_instdone per glice in DG2+ v5: - rename registers prefix from DG2 to XEHPG (Zhanjun) Cc: Rodrigo Vivi Cc: Matt Roper Cc: Zhanjun Dong Cc: Himal Prasad Ghimiray Reviewed-by: Rodrigo Vivi Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240424140319.61651-3-jose.souza@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 + drivers/gpu/drm/xe/regs/xe_gt_regs.h | 13 ++++ drivers/gpu/drm/xe/xe_hw_engine.c | 128 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_types.h | 16 ++++ 4 files changed, 158 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index af71b87d8030..97d2aed63e01 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -65,6 +65,7 @@ #define RING_ACTHD_UDW(base) XE_REG((base) + 0x5c) #define RING_DMA_FADD_UDW(base) XE_REG((base) + 0x60) #define RING_IPEHR(base) XE_REG((base) + 0x68) +#define RING_INSTDONE(base) XE_REG((base) + 0x6c) #define RING_ACTHD(base) XE_REG((base) + 0x74) #define RING_DMA_FADD(base) XE_REG((base) + 0x78) #define RING_HWS_PGA(base) XE_REG((base) + 0x80) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 6eea7a459c68..83847f2da72a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -98,6 +98,8 @@ #define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) #define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) +#define XEHPG_INSTDONE_GEOM_SVGUNIT XE_REG_MCR(0x666c) + #define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) @@ -115,6 +117,14 @@ #define FLSH_IGNORES_PSD REG_BIT(10) #define FD_END_COLLECT REG_BIT(5) +#define SC_INSTDONE XE_REG(0x7100) +#define SC_INSTDONE_EXTRA XE_REG(0x7104) +#define SC_INSTDONE_EXTRA2 XE_REG(0x7108) + +#define XEHPG_SC_INSTDONE XE_REG_MCR(0x7100) +#define XEHPG_SC_INSTDONE_EXTRA XE_REG_MCR(0x7104) +#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) + #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) @@ -345,6 +355,9 @@ #define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED) #define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0) +#define SAMPLER_INSTDONE XE_REG_MCR(0xe160) +#define ROW_INSTDONE XE_REG_MCR(0xe164) + #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) #define ENABLE_SMALLPL REG_BIT(15) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c84dbe8a8ed1..4cc757457e01 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -18,6 +18,7 @@ #include "xe_gt.h" #include "xe_gt_ccs_mode.h" #include "xe_gt_printk.h" +#include "xe_gt_mcr.h" #include "xe_gt_topology.h" #include "xe_hw_fence.h" #include "xe_irq.h" @@ -766,6 +767,57 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) xe_hw_fence_irq_run(hwe->fence_irq); } +static bool +is_slice_common_per_gslice(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) >= 1255; +} + +static void +xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, + struct xe_hw_engine_snapshot *snapshot) +{ + struct xe_gt *gt = hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + unsigned int dss; + u16 group, instance; + + snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); + + if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) + return; + + if (is_slice_common_per_gslice(xe) == false) { + snapshot->reg.instdone.slice_common[0] = + xe_mmio_read32(gt, SC_INSTDONE); + snapshot->reg.instdone.slice_common_extra[0] = + xe_mmio_read32(gt, SC_INSTDONE_EXTRA); + snapshot->reg.instdone.slice_common_extra2[0] = + xe_mmio_read32(gt, SC_INSTDONE_EXTRA2); + } else { + for_each_geometry_dss(dss, gt, group, instance) { + snapshot->reg.instdone.slice_common[dss] = + xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance); + snapshot->reg.instdone.slice_common_extra[dss] = + xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance); + snapshot->reg.instdone.slice_common_extra2[dss] = + xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance); + } + } + + for_each_geometry_dss(dss, gt, group, instance) { + snapshot->reg.instdone.sampler[dss] = + xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance); + snapshot->reg.instdone.row[dss] = + xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance); + + if (GRAPHICS_VERx100(xe) >= 1255) + snapshot->reg.instdone.geom_svg[dss] = + xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT, + group, instance); + } +} + /** * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. * @hwe: Xe HW Engine. @@ -780,6 +832,7 @@ struct xe_hw_engine_snapshot * xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) { struct xe_hw_engine_snapshot *snapshot; + size_t len; u64 val; if (!xe_hw_engine_is_valid(hwe)) @@ -790,6 +843,28 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) if (!snapshot) return NULL; + /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it + * includes xe_hw_engine_types.h the length of this 3 registers can't be + * set in struct xe_hw_engine_snapshot, so here doing additional + * allocations. + */ + len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32)); + snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC); + snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC); + if (!snapshot->reg.instdone.slice_common || + !snapshot->reg.instdone.slice_common_extra || + !snapshot->reg.instdone.slice_common_extra2 || + !snapshot->reg.instdone.sampler || + !snapshot->reg.instdone.row || + !snapshot->reg.instdone.geom_svg) { + xe_hw_engine_snapshot_free(snapshot); + return NULL; + } + snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); snapshot->hwe = hwe; snapshot->logical_instance = hwe->logical_instance; @@ -841,6 +916,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE); @@ -848,6 +924,49 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) return snapshot; } +static void +xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p) +{ + struct xe_gt *gt = snapshot->hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + u16 group, instance; + unsigned int dss; + + drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring); + + if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) + return; + + if (is_slice_common_per_gslice(xe) == false) { + drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n", + snapshot->reg.instdone.slice_common[0]); + drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n", + snapshot->reg.instdone.slice_common_extra[0]); + drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n", + snapshot->reg.instdone.slice_common_extra2[0]); + } else { + for_each_geometry_dss(dss, gt, group, instance) { + drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.slice_common[dss]); + drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.slice_common_extra[dss]); + drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.slice_common_extra2[dss]); + } + } + + for_each_geometry_dss(dss, gt, group, instance) { + drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.sampler[dss]); + drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss, + snapshot->reg.instdone.row[dss]); + + if (GRAPHICS_VERx100(xe) >= 1255) + drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n", + dss, snapshot->reg.instdone.geom_svg[dss]); + } +} + /** * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. * @snapshot: Xe HW Engine snapshot object. @@ -887,9 +1006,12 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr); drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd); drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr); + xe_hw_engine_snapshot_instdone_print(snapshot, p); + if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) drm_printf(p, "\tRCU_MODE: 0x%08x\n", snapshot->reg.rcu_mode); + drm_puts(p, "\n"); } /** @@ -904,6 +1026,12 @@ void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot) if (!snapshot) return; + kfree(snapshot->reg.instdone.slice_common); + kfree(snapshot->reg.instdone.slice_common_extra); + kfree(snapshot->reg.instdone.slice_common_extra2); + kfree(snapshot->reg.instdone.sampler); + kfree(snapshot->reg.instdone.row); + kfree(snapshot->reg.instdone.geom_svg); kfree(snapshot->name); kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 27deaa31efd3..9f9755e31b9f 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -211,6 +211,22 @@ struct xe_hw_engine_snapshot { u32 ipehr; /** @reg.rcu_mode: RCU_MODE */ u32 rcu_mode; + struct { + /** @reg.instdone.ring: RING_INSTDONE */ + u32 ring; + /** @reg.instdone.slice_common: SC_INSTDONE */ + u32 *slice_common; + /** @reg.instdone.slice_common_extra: SC_INSTDONE_EXTRA */ + u32 *slice_common_extra; + /** @reg.instdone.slice_common_extra2: SC_INSTDONE_EXTRA2 */ + u32 *slice_common_extra2; + /** @reg.instdone.sampler: SAMPLER_INSTDONE */ + u32 *sampler; + /** @reg.instdone.row: ROW_INSTDONE */ + u32 *row; + /** @reg.instdone.geom_svg: INSTDONE_GEOM_SVGUNIT */ + u32 *geom_svg; + } instdone; } reg; }; -- cgit From fb74b205cdd26357469cab8957f5935f10b810e2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 23 Apr 2024 18:18:14 -0400 Subject: drm/xe: Introduce a simple wedged state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a very simple 'wedged' state where any attempt to access the GPU is entirely blocked. On some critical cases, like on gt_reset failure, we need to block any other attempt to use the GPU. Otherwise we are at a risk of reaching cases that would force us to reboot the machine. So, when this cases are identified we corner and block any GPU access. No IOCTL and not even another GT reset should be attempted. The 'wedged' state in Xe is an end state with no way back. Only a device "re-probe" (unbind + bind) can restore the GPU access. v2: - s/wedged/busted (Lucas) - use unbind+bind instead of module reload (Lucas) - added more info on unbind operations and instruction on bug report - only print the message once. v3: - s/busted/wedged (Ashutosh, Tvrtko, Thomas) - don't assume user has sudo and tee available (Lucas) v4: - remove unnecessary cases around ct communication or migration. Cc: Ashutosh Dixit Cc: Tvrtko Ursulin Cc: Thomas Hellström Cc: Lucas De Marchi Cc: Anshuman Gupta Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Lucas De Marchi #v2 Link: https://patchwork.freedesktop.org/patch/msgid/20240423221817.1285081-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 6 ++++++ drivers/gpu/drm/xe/xe_device.h | 20 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_gt.c | 5 ++++- drivers/gpu/drm/xe/xe_guc_pc.c | 3 +++ 5 files changed, 36 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 55bbc8b8df15..76a7b37a4a53 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -137,6 +137,9 @@ static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct xe_device *xe = to_xe_device(file_priv->minor->dev); long ret; + if (xe_device_wedged(xe)) + return -ECANCELED; + ret = xe_pm_runtime_get_ioctl(xe); if (ret >= 0) ret = drm_ioctl(file, cmd, arg); @@ -152,6 +155,9 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo struct xe_device *xe = to_xe_device(file_priv->minor->dev); long ret; + if (xe_device_wedged(xe)) + return -ECANCELED; + ret = xe_pm_runtime_get_ioctl(xe); if (ret >= 0) ret = drm_compat_ioctl(file, cmd, arg); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 36d4434ebccc..d2e4249d37ce 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -167,4 +167,24 @@ void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p); u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address); u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); +static inline bool xe_device_wedged(struct xe_device *xe) +{ + return atomic_read(&xe->wedged); +} + +static inline void xe_device_declare_wedged(struct xe_device *xe) +{ + if (!atomic_xchg(&xe->wedged, 1)) { + xe->needs_flr_on_fini = true; + drm_err(&xe->drm, + "CRITICAL: Xe has declared device %s as wedged.\n" + "IOCTLs and executions are blocked until device is probed again with unbind and bind operations:\n" + "echo '%s' > /sys/bus/pci/drivers/xe/unbind\n" + "echo '%s' > /sys/bus/pci/drivers/xe/bind\n" + "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", + dev_name(xe->drm.dev), dev_name(xe->drm.dev), + dev_name(xe->drm.dev)); + } +} + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 8a9f12a8d7c1..91c720d6ad29 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -459,6 +459,9 @@ struct xe_device { /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; + /** @wedged: Xe device faced a critical error and is now blocked. */ + atomic_t wedged; + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 491d0413de15..e922e77f5010 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -633,6 +633,9 @@ static int gt_reset(struct xe_gt *gt) { int err; + if (xe_device_wedged(gt_to_xe(gt))) + return -ECANCELED; + /* We only support GT resets with GuC submission */ if (!xe_device_uc_enabled(gt_to_xe(gt))) return -ENODEV; @@ -685,7 +688,7 @@ err_msg: err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - gt_to_xe(gt)->needs_flr_on_fini = true; + xe_device_declare_wedged(gt_to_xe(gt)); return err; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 509649d0e65e..8fc757900ed1 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -902,6 +902,9 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg) return; } + if (xe_device_wedged(xe)) + return; + XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); -- cgit From 692818678e80e5999ee1975953f7c6f82cb4a2be Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 23 Apr 2024 18:18:15 -0400 Subject: drm/xe: declare wedged upon GuC load failure Let's block the device upon any GuC load failure. But let's continue with the probe so guc logs can be read from the debugfs. v2: - s/wedged/busted - do not block probe or we lose guc_logs in debugfs (Matt) v3: - s/busted/wedged v4: Do not change __xe_guc_upload return. (Himal) Cc: Matthew Brost Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240423221817.1285081-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 240e7a4bbff1..17438d5b18a4 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -451,7 +451,7 @@ static int guc_xfer_rsa(struct xe_guc *guc) return 0; } -static int guc_wait_ucode(struct xe_guc *guc) +static void guc_wait_ucode(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); u32 status; @@ -479,30 +479,26 @@ static int guc_wait_ucode(struct xe_guc *guc) 200000, &status, false); if (ret) { - xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status); - xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status), - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); - - if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - xe_gt_info(gt, "GuC firmware signature verification failed\n"); - ret = -ENOEXEC; - } + xe_gt_err(gt, "GuC load failed: status = 0x%08X\n", status); + xe_gt_err(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) + xe_gt_err(gt, "GuC firmware signature verification failed\n"); if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == - XE_GUC_LOAD_STATUS_EXCEPTION) { - xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n", - xe_mmio_read32(gt, SOFT_SCRATCH(13))); - ret = -ENXIO; - } + XE_GUC_LOAD_STATUS_EXCEPTION) + xe_gt_err(gt, "GuC firmware exception. EIP: %#x\n", + xe_mmio_read32(gt, SOFT_SCRATCH(13))); + + xe_device_declare_wedged(gt_to_xe(gt)); } else { xe_gt_dbg(gt, "GuC successfully loaded\n"); } - - return ret; } static int __xe_guc_upload(struct xe_guc *guc) @@ -532,9 +528,7 @@ static int __xe_guc_upload(struct xe_guc *guc) goto out; /* Wait for authentication */ - ret = guc_wait_ucode(guc); - if (ret) - goto out; + guc_wait_ucode(guc); xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); return 0; -- cgit From 8ed9aaae39f39130b7a3eb2726be05d7f64b344c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 23 Apr 2024 18:18:16 -0400 Subject: drm/xe: Force wedged state and block GT reset upon any GPU hang In many validation situations when debugging GPU Hangs, it is useful to preserve the GT situation from the moment that the timeout occurred. This patch introduces a module parameter that could be used on situations like this. If xe.wedged module parameter is set to 2, Xe will be declared wedged on every single execution timeout (a.k.a. GPU hang) right after devcoredump snapshot capture and without attempting any kind of GT reset and blocking entirely any kind of execution. v2: Really block gt_reset from guc side. (Lucas) s/wedged/busted (Lucas) v3: - s/busted/wedged - Really use global_flags (Dafna) - More robust timeout handling when wedging it. v4: A really robust clean exit done by Matt Brost. No more kernel warns on unbind. v5: Simplify error message (Lucas) Cc: Matthew Brost Cc: Dafna Hirschfeld Cc: Lucas De Marchi Cc: Alan Previn Cc: Himanshu Somaiya Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240423221817.1285081-3-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 29 ++++++++++ drivers/gpu/drm/xe/xe_device.h | 15 +---- drivers/gpu/drm/xe/xe_exec_queue.h | 9 +++ drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 +- drivers/gpu/drm/xe/xe_guc_ads.c | 9 ++- drivers/gpu/drm/xe/xe_guc_submit.c | 90 ++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_module.c | 5 ++ drivers/gpu/drm/xe/xe_module.h | 1 + 8 files changed, 129 insertions(+), 31 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 76a7b37a4a53..d45db6ff1fa3 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -764,3 +764,32 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) { return address & GENMASK_ULL(xe->info.va_bits - 1, 0); } + +/** + * xe_device_declare_wedged - Declare device wedged + * @xe: xe device instance + * + * This is a final state that can only be cleared with a module + * re-probe (unbind + bind). + * In this state every IOCTL will be blocked so the GT cannot be used. + * In general it will be called upon any critical error such as gt reset + * failure or guc loading failure. + * If xe.wedged module parameter is set to 2, this function will be called + * on every single execution timeout (a.k.a. GPU hang) right after devcoredump + * snapshot capture. In this mode, GT reset won't be attempted so the state of + * the issue is preserved for further debugging. + */ +void xe_device_declare_wedged(struct xe_device *xe) +{ + if (xe_modparam.wedged_mode == 0) + return; + + if (!atomic_xchg(&xe->wedged, 1)) { + xe->needs_flr_on_fini = true; + drm_err(&xe->drm, + "CRITICAL: Xe has declared device %s as wedged.\n" + "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" + "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", + dev_name(xe->drm.dev)); + } +} diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index d2e4249d37ce..9ede45fc062a 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -172,19 +172,6 @@ static inline bool xe_device_wedged(struct xe_device *xe) return atomic_read(&xe->wedged); } -static inline void xe_device_declare_wedged(struct xe_device *xe) -{ - if (!atomic_xchg(&xe->wedged, 1)) { - xe->needs_flr_on_fini = true; - drm_err(&xe->drm, - "CRITICAL: Xe has declared device %s as wedged.\n" - "IOCTLs and executions are blocked until device is probed again with unbind and bind operations:\n" - "echo '%s' > /sys/bus/pci/drivers/xe/unbind\n" - "echo '%s' > /sys/bus/pci/drivers/xe/bind\n" - "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", - dev_name(xe->drm.dev), dev_name(xe->drm.dev), - dev_name(xe->drm.dev)); - } -} +void xe_device_declare_wedged(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 02ce8d204622..48f6da53a292 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -26,6 +26,15 @@ void xe_exec_queue_fini(struct xe_exec_queue *q); void xe_exec_queue_destroy(struct kref *ref); void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance); +static inline struct xe_exec_queue * +xe_exec_queue_get_unless_zero(struct xe_exec_queue *q) +{ + if (kref_get_unless_zero(&q->refcount)) + return q; + + return NULL; +} + struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id); static inline struct xe_exec_queue *xe_exec_queue_get(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 93df2d7969b3..8e9c4b990fbb 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -245,7 +245,7 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return seqno; xe_gt_tlb_invalidation_wait(gt, seqno); - } else if (xe_device_uc_enabled(xe)) { + } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 1aafa486edec..db817a46f157 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -20,6 +20,7 @@ #include "xe_lrc.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_module.h" #include "xe_platform_types.h" #include "xe_wa.h" @@ -440,11 +441,17 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) static void guc_policies_init(struct xe_guc_ads *ads) { + u32 global_flags = 0; + ads_blob_write(ads, policies.dpc_promote_time, GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US); ads_blob_write(ads, policies.max_num_work_items, GLOBAL_POLICY_MAX_NUM_WI); - ads_blob_write(ads, policies.global_flags, 0); + + if (xe_modparam.wedged_mode == 2) + global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; + + ads_blob_write(ads, policies.global_flags, global_flags); ads_blob_write(ads, policies.is_valid, 1); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index c7d38469fb46..0bea17536659 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -35,6 +35,7 @@ #include "xe_macros.h" #include "xe_map.h" #include "xe_mocs.h" +#include "xe_module.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" @@ -59,6 +60,7 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define ENGINE_STATE_SUSPENDED (1 << 5) #define EXEC_QUEUE_STATE_RESET (1 << 6) #define ENGINE_STATE_KILLED (1 << 7) +#define EXEC_QUEUE_STATE_WEDGED (1 << 8) static bool exec_queue_registered(struct xe_exec_queue *q) { @@ -175,9 +177,20 @@ static void set_exec_queue_killed(struct xe_exec_queue *q) atomic_or(ENGINE_STATE_KILLED, &q->guc->state); } -static bool exec_queue_killed_or_banned(struct xe_exec_queue *q) +static bool exec_queue_wedged(struct xe_exec_queue *q) { - return exec_queue_killed(q) || exec_queue_banned(q); + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; +} + +static void set_exec_queue_wedged(struct xe_exec_queue *q) +{ + atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); +} + +static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) +{ + return exec_queue_banned(q) || (atomic_read(&q->guc->state) & + (EXEC_QUEUE_STATE_WEDGED | ENGINE_STATE_KILLED)); } #ifdef CONFIG_PROVE_LOCKING @@ -240,6 +253,17 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } +static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +{ + struct xe_guc *guc = arg; + struct xe_exec_queue *q; + unsigned long index; + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (exec_queue_wedged(q)) + xe_exec_queue_put(q); +} + static const struct xe_exec_queue_ops guc_exec_queue_ops; static void primelockdep(struct xe_guc *guc) @@ -708,7 +732,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) trace_xe_sched_job_run(job); - if (!exec_queue_killed_or_banned(q) && !xe_sched_job_is_error(job)) { + if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) register_engine(q); if (!lr) /* LR jobs are emitted in the exec IOCTL */ @@ -844,6 +868,28 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } +static void guc_submit_wedged(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + int err; + + xe_device_declare_wedged(guc_to_xe(guc)); + xe_guc_submit_reset_prepare(guc); + xe_guc_ct_stop(&guc->ct); + + err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + guc_submit_wedged_fini, guc); + if (err) + return; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (xe_exec_queue_get_unless_zero(q)) + set_exec_queue_wedged(q); + mutex_unlock(&guc->submission_state.lock); +} + static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) { struct xe_guc_exec_queue *ge = @@ -852,10 +898,16 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; + bool wedged = xe_device_wedged(xe); xe_assert(xe, xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); + if (!wedged && xe_modparam.wedged_mode == 2) { + guc_submit_wedged(exec_queue_to_guc(q)); + wedged = true; + } + /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -870,7 +922,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) * xe_guc_deregister_done_handler() which treats it as an unexpected * state. */ - if (exec_queue_registered(q) && !exec_queue_destroyed(q)) { + if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { struct xe_guc *guc = exec_queue_to_guc(q); int ret; @@ -905,6 +957,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); int err = -ETIME; int i = 0; + bool wedged = xe_device_wedged(xe); /* * TDR has fired before free job worker. Common if exec queue @@ -928,6 +981,11 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) trace_xe_sched_job_timedout(job); + if (!wedged && xe_modparam.wedged_mode == 2) { + guc_submit_wedged(exec_queue_to_guc(q)); + wedged = true; + } + /* Kill the run_job entry point */ xe_sched_submission_stop(sched); @@ -935,8 +993,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * Kernel jobs should never fail, nor should VM jobs if they do * somethings has gone wrong and the GT needs a reset */ - if (q->flags & EXEC_QUEUE_FLAG_KERNEL || - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))) { + if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || + (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { if (!xe_sched_invalidate_job(job, 2)) { xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); @@ -946,7 +1004,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) } /* Engine state now stable, disable scheduling if needed */ - if (exec_queue_registered(q)) { + if (!wedged && exec_queue_registered(q)) { struct xe_guc *guc = exec_queue_to_guc(q); int ret; @@ -989,6 +1047,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) */ xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); + xe_guc_exec_queue_trigger_cleanup(q); /* Mark all outstanding jobs as bad, thus completing them */ @@ -1028,7 +1087,7 @@ static void guc_exec_queue_fini_async(struct xe_exec_queue *q) INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ - if (q->flags & EXEC_QUEUE_FLAG_PERMANENT) + if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) __guc_exec_queue_fini_async(&q->guc->fini_async); else queue_work(system_wq, &q->guc->fini_async); @@ -1063,7 +1122,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) { - return !exec_queue_killed_or_banned(q) && exec_queue_registered(q); + return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); } static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) @@ -1274,7 +1333,7 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT)) + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) guc_exec_queue_add_msg(q, msg, CLEANUP); else __guc_exec_queue_fini(exec_queue_to_guc(q), q); @@ -1285,7 +1344,8 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q, { struct xe_sched_msg *msg; - if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q)) + if (q->sched_props.priority == priority || + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1303,7 +1363,7 @@ static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_u struct xe_sched_msg *msg; if (q->sched_props.timeslice_us == timeslice_us || - exec_queue_killed_or_banned(q)) + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1322,7 +1382,7 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, struct xe_sched_msg *msg; if (q->sched_props.preempt_timeout_us == preempt_timeout_us || - exec_queue_killed_or_banned(q)) + exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); @@ -1339,7 +1399,7 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (exec_queue_killed_or_banned(q) || q->guc->suspend_pending) + if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) return -EINVAL; q->guc->suspend_pending = true; @@ -1485,7 +1545,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; - if (!exec_queue_killed_or_banned(q)) { + if (!exec_queue_killed_or_banned_or_wedged(q)) { int i; trace_xe_exec_queue_resubmit(q); diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index ceb8345cbca6..3edeb30d5ccb 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -17,6 +17,7 @@ struct xe_modparam xe_modparam = { .enable_display = true, .guc_log_level = 5, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, + .wedged_mode = 1, /* the rest are 0 by default */ }; @@ -55,6 +56,10 @@ MODULE_PARM_DESC(max_vfs, "(0 = no VFs [default]; N = allow up to N VFs)"); #endif +module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); +MODULE_PARM_DESC(wedged_mode, + "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + struct init_funcs { int (*init)(void); void (*exit)(void); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index b369984f08ec..61a0d28a28c8 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -21,6 +21,7 @@ struct xe_modparam { #ifdef CONFIG_PCI_IOV unsigned int max_vfs; #endif + int wedged_mode; }; extern struct xe_modparam xe_modparam; -- cgit From 6b8ef44cc0a952549a6773a0233cee853f807a79 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 23 Apr 2024 18:18:17 -0400 Subject: drm/xe: Introduce the wedged_mode debugfs So, the wedged mode can be selected per device at runtime, before the tests or before reproducing the issue. v2: - s/busted/wedged - some locking consistency v3: - remove mutex - toggle guc reset policy on any mode change Cc: Lucas De Marchi Cc: Alan Previn Cc: Himal Prasad Ghimiray Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240423221817.1285081-4-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_debugfs.c | 55 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_device.c | 10 ++++-- drivers/gpu/drm/xe/xe_device.h | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 9 ++++-- drivers/gpu/drm/xe/xe_guc_ads.c | 60 ++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_guc_ads.h | 1 + drivers/gpu/drm/xe/xe_guc_submit.c | 35 ++++++++++++--------- 7 files changed, 149 insertions(+), 23 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index c9b30dbdc14d..0e61fa462c7b 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -12,6 +12,8 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_gt_debugfs.h" +#include "xe_gt_printk.h" +#include "xe_guc_ads.h" #include "xe_pm.h" #include "xe_sriov.h" #include "xe_step.h" @@ -117,6 +119,56 @@ static const struct file_operations forcewake_all_fops = { .release = forcewake_release, }; +static ssize_t wedged_mode_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + char buf[32]; + int len = 0; + + len = scnprintf(buf, sizeof(buf), "%d\n", xe->wedged.mode); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + struct xe_gt *gt; + u32 wedged_mode; + ssize_t ret; + u8 id; + + ret = kstrtouint_from_user(ubuf, size, 0, &wedged_mode); + if (ret) + return ret; + + if (wedged_mode > 2) + return -EINVAL; + + if (xe->wedged.mode == wedged_mode) + return 0; + + xe->wedged.mode = wedged_mode; + + for_each_gt(gt, xe, id) { + ret = xe_guc_ads_scheduler_policy_toggle_reset(>->uc.guc.ads); + if (ret) { + xe_gt_err(gt, "Failed to update GuC ADS scheduler policy. GuC may still cause engine reset even with wedged_mode=2\n"); + return -EIO; + } + } + + return size; +} + +static const struct file_operations wedged_mode_fops = { + .owner = THIS_MODULE, + .read = wedged_mode_show, + .write = wedged_mode_set, +}; + void xe_debugfs_register(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; @@ -134,6 +186,9 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("forcewake_all", 0400, root, xe, &forcewake_all_fops); + debugfs_create_file("wedged_mode", 0400, root, xe, + &wedged_mode_fops); + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { man = ttm_manager_type(bdev, mem_type); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d45db6ff1fa3..a5b4a9643a78 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -506,6 +506,8 @@ int xe_device_probe_early(struct xe_device *xe) if (err) return err; + xe->wedged.mode = xe_modparam.wedged_mode; + return 0; } @@ -769,7 +771,7 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance * - * This is a final state that can only be cleared with a module + * This is a final state that can only be cleared with a mudule * re-probe (unbind + bind). * In this state every IOCTL will be blocked so the GT cannot be used. * In general it will be called upon any critical error such as gt reset @@ -781,10 +783,12 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) */ void xe_device_declare_wedged(struct xe_device *xe) { - if (xe_modparam.wedged_mode == 0) + if (xe->wedged.mode == 0) { + drm_dbg(&xe->drm, "Wedged mode is forcebly disabled\n"); return; + } - if (!atomic_xchg(&xe->wedged, 1)) { + if (!atomic_xchg(&xe->wedged.flag, 1)) { xe->needs_flr_on_fini = true; drm_err(&xe->drm, "CRITICAL: Xe has declared device %s as wedged.\n" diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 9ede45fc062a..82317580f4bf 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -169,7 +169,7 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); static inline bool xe_device_wedged(struct xe_device *xe) { - return atomic_read(&xe->wedged); + return atomic_read(&xe->wedged.flag); } void xe_device_declare_wedged(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 91c720d6ad29..af509af922b9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -459,8 +459,13 @@ struct xe_device { /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; - /** @wedged: Xe device faced a critical error and is now blocked. */ - atomic_t wedged; + /** @wedged: Struct to control Wedged States and mode */ + struct { + /** @wedged.flag: Xe device faced a critical error and is now blocked. */ + atomic_t flag; + /** @wedged.mode: Mode controlled by kernel parameter and debugfs */ + int mode; + } wedged; /* private: */ diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index db817a46f157..6a5eb21748b1 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -9,6 +9,7 @@ #include +#include "abi/guc_actions_abi.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" @@ -16,11 +17,11 @@ #include "xe_gt.h" #include "xe_gt_ccs_mode.h" #include "xe_guc.h" +#include "xe_guc_ct.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" #include "xe_mmio.h" -#include "xe_module.h" #include "xe_platform_types.h" #include "xe_wa.h" @@ -441,6 +442,7 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads) static void guc_policies_init(struct xe_guc_ads *ads) { + struct xe_device *xe = ads_to_xe(ads); u32 global_flags = 0; ads_blob_write(ads, policies.dpc_promote_time, @@ -448,7 +450,7 @@ static void guc_policies_init(struct xe_guc_ads *ads) ads_blob_write(ads, policies.max_num_work_items, GLOBAL_POLICY_MAX_NUM_WI); - if (xe_modparam.wedged_mode == 2) + if (xe->wedged.mode == 2) global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; ads_blob_write(ads, policies.global_flags, global_flags); @@ -806,3 +808,57 @@ void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) { guc_populate_golden_lrc(ads); } + +static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset) +{ + struct xe_guc_ct *ct = &ads_to_guc(ads)->ct; + u32 action[] = { + XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, + policy_offset + }; + + return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); +} + +/** + * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy + * @ads: Additional data structures object + * + * This function update the GuC's engine reset policy based on wedged.mode. + * + * Return: 0 on success, and negative error code otherwise. + */ +int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) +{ + struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); + struct xe_tile *tile = gt_to_tile(gt); + struct guc_policies *policies; + struct xe_bo *bo; + int ret = 0; + + policies = kmalloc(sizeof(*policies), GFP_KERNEL); + if (!policies) + return -ENOMEM; + + policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); + policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); + policies->is_valid = 1; + if (xe->wedged.mode == 2) + policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; + else + policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; + + bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies), + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto out; + } + + ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo)); +out: + kfree(policies); + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h index 138ef6267671..2e2531779122 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.h +++ b/drivers/gpu/drm/xe/xe_guc_ads.h @@ -13,5 +13,6 @@ int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads); void xe_guc_ads_populate(struct xe_guc_ads *ads); void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads); void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads); +int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 0bea17536659..93e1ee183e4a 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -35,7 +35,6 @@ #include "xe_macros.h" #include "xe_map.h" #include "xe_mocs.h" -#include "xe_module.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" @@ -868,26 +867,38 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } -static void guc_submit_wedged(struct xe_guc *guc) +static bool guc_submit_hint_wedged(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; unsigned long index; int err; - xe_device_declare_wedged(guc_to_xe(guc)); + if (xe->wedged.mode != 2) + return false; + + if (xe_device_wedged(xe)) + return true; + + xe_device_declare_wedged(xe); + xe_guc_submit_reset_prepare(guc); xe_guc_ct_stop(&guc->ct); err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, guc_submit_wedged_fini, guc); - if (err) - return; + if (err) { + drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); + return true; /* Device is wedged anyway */ + } mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) if (xe_exec_queue_get_unless_zero(q)) set_exec_queue_wedged(q); mutex_unlock(&guc->submission_state.lock); + + return true; } static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) @@ -898,15 +909,12 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged = xe_device_wedged(xe); + bool wedged; xe_assert(xe, xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - if (!wedged && xe_modparam.wedged_mode == 2) { - guc_submit_wedged(exec_queue_to_guc(q)); - wedged = true; - } + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -957,7 +965,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); int err = -ETIME; int i = 0; - bool wedged = xe_device_wedged(xe); + bool wedged; /* * TDR has fired before free job worker. Common if exec queue @@ -981,10 +989,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) trace_xe_sched_job_timedout(job); - if (!wedged && xe_modparam.wedged_mode == 2) { - guc_submit_wedged(exec_queue_to_guc(q)); - wedged = true; - } + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job entry point */ xe_sched_submission_stop(sched); -- cgit From ad4ca914de384681ce8984785f4ee2078945a759 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Apr 2024 17:34:07 +0200 Subject: drm/xe/guc: Improve GuC doorbell/context ID manager intro message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use recently added str_plural() helper. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240419153407.402-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_db_mgr.c | 3 ++- drivers/gpu/drm/xe/xe_guc_id_mgr.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.c b/drivers/gpu/drm/xe/xe_guc_db_mgr.c index 8d9a0287df6b..6767e8076e6b 100644 --- a/drivers/gpu/drm/xe/xe_guc_db_mgr.c +++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.c @@ -106,7 +106,8 @@ int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count) if (ret) return ret; done: - xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell(s)\n", dbm->count); + xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell%s\n", + dbm->count, str_plural(dbm->count)); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c index 0fb7c6b78c31..cd0549d0ef89 100644 --- a/drivers/gpu/drm/xe/xe_guc_id_mgr.c +++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c @@ -97,7 +97,8 @@ int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit) if (ret) return ret; - xe_gt_info(idm_to_gt(idm), "using %u GUC ID(s)\n", idm->total); + xe_gt_info(idm_to_gt(idm), "using %u GUC ID%s\n", + idm->total, str_plural(idm->total)); return 0; } -- cgit From b5ef80879dfec1c8e2a992dc186196687293e1fe Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 10 Apr 2024 12:16:40 +0530 Subject: drm/xe/xe2: Add workaround 14021567978 Workaround 14021567978 applies to RenderCS xe2 V3: - Cover xe2_hpg as its landed upstream now V2(MattR): - Move tuning to wa and apply to xe2 Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240410064640.1010098-1-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index dcf7ed51757c..9d9b7fa7a8f0 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -673,6 +673,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, + { XE_RTP_NAME("14021567978"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) + }, /* Xe2_HPG */ { XE_RTP_NAME("15010599737"), -- cgit From cbf7579304c234208569d767355cc39c0665bd5b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 9 Apr 2024 17:31:32 +0200 Subject: drm/xe: Check result of drmm_mutex_init() Although it's unlikely that drmm_mutex_init() will fail during driver initialization, however we shouldn't ignore this case. Signed-off-by: Michal Wajdeczko Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240409153132.1111-1-michal.wajdeczko@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a5b4a9643a78..47db4bc9f12c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -274,7 +274,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_waitqueue_head(&xe->ufence_wq); - drmm_mutex_init(&xe->drm, &xe->usm.lock); + err = drmm_mutex_init(&xe->drm, &xe->usm.lock); + if (err) + goto err; + xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { -- cgit From 4befb17e83ed8747049c91f5009e786bb858e446 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 24 Apr 2024 19:10:30 +0200 Subject: drm/xe/pf: Expose PF service details via debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For debug purposes we might want to verify which registers values PF is sharing with VFs and to view which VF/PF ABI versions were negotiated by the VFs. Plug the 'print' functions already provided by the PF service code into our debugfs. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240424171030.2177-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index ab1a26fce3aa..5102035faa7e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -18,6 +18,7 @@ #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_policy.h" +#include "xe_gt_sriov_pf_service.h" #include "xe_pm.h" /* @@ -52,6 +53,8 @@ static unsigned int extract_vfid(struct dentry *d) * │   │   ├── ggtt_provisioned * │   │   ├── contexts_provisioned * │   │   ├── doorbells_provisioned + * │   │   ├── runtime_registers + * │   │   ├── negotiated_versions */ static const struct drm_info_list pf_info[] = { @@ -75,6 +78,16 @@ static const struct drm_info_list pf_info[] = { .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_config_print_dbs, }, + { + "runtime_registers", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_service_print_runtime, + }, + { + "negotiated_versions", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_service_print_version, + }, }; /* -- cgit From 7547a23cae4145836dbb94522453af4e7d0ccc92 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 24 Apr 2024 16:05:06 +0200 Subject: drm/xe/guc: Fix typos in VF CFG KLVs descriptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apart from the obvious spelling typo, use the correct values for infinity quantum/timeout settings (it's 0x0 instead of 0xFFFFFFFF). Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240424140506.2133-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index e53ffaee2fcd..5c1d40432ca0 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -194,9 +194,9 @@ enum { * granularity) since the GPUs clock time runs off a different crystal * from the CPUs clock. Changing this KLV on a VF that is currently * running a context wont take effect until a new context is scheduled in. - * That said, when the PF is changing this value from 0xFFFFFFFF to - * something else, it might never take effect if the VF is running an - * inifinitely long compute or shader kernel. In such a scenario, the + * That said, when the PF is changing this value from 0x0 to + * a non-zero value, it might never take effect if the VF is running an + * infinitely long compute or shader kernel. In such a scenario, the * PF would need to trigger a VM PAUSE and then change the KLV to force * it to take effect. Such cases might typically happen on a 1PF+1VF * Virtualization config enabled for heavier workloads like AI/ML. @@ -215,9 +215,9 @@ enum { * different crystal from the CPUs clock. Changing this KLV on a VF * that is currently running a context wont take effect until a new * context is scheduled in. - * That said, when the PF is changing this value from 0xFFFFFFFF to - * something else, it might never take effect if the VF is running an - * inifinitely long compute or shader kernel. + * That said, when the PF is changing this value from 0x0 to + * a non-zero value, it might never take effect if the VF is running an + * infinitely long compute or shader kernel. * In this case, the PF would need to trigger a VM PAUSE and then change * the KLV to force it to take effect. Such cases might typically happen * on a 1PF+1VF Virtualization config enabled for heavier workloads like -- cgit From 3cd1585e57908b6efcd967465ef7685f40b2a294 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 18 Apr 2024 15:46:31 +0100 Subject: drm/xe/preempt_fence: enlarge the fence critical section It is really easy to introduce subtle deadlocks in preempt_fence_work_func() since we operate on single global ordered-wq for signalling our preempt fences behind the scenes, so even though we signal a particular fence, everything in the callback should be in the fence critical section, since blocking in the callback will prevent other published fences from signalling. If we enlarge the fence critical section to cover the entire callback, then lockdep should be able to understand this better, and complain if we grab a sensitive lock like vm->lock, which is also held when waiting on preempt fences. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240418144630.299531-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_preempt_fence.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 7d50c6e89d8e..5b243b7feb59 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -23,11 +23,19 @@ static void preempt_fence_work_func(struct work_struct *w) q->ops->suspend_wait(q); dma_fence_signal(&pfence->base); - dma_fence_end_signalling(cookie); - + /* + * Opt for keep everything in the fence critical section. This looks really strange since we + * have just signalled the fence, however the preempt fences are all signalled via single + * global ordered-wq, therefore anything that happens in this callback can easily block + * progress on the entire wq, which itself may prevent other published preempt fences from + * ever signalling. Therefore try to keep everything here in the callback in the fence + * critical section. For example if something below grabs a scary lock like vm->lock, + * lockdep should complain since we also hold that lock whilst waiting on preempt fences to + * complete. + */ xe_vm_queue_rebind_worker(q->vm); - xe_exec_queue_put(q); + dma_fence_end_signalling(cookie); } static const char * -- cgit From 6e78e0719d0ed5ec230e8e28bd59e47acb3dbc04 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 23 Apr 2024 08:47:22 +0100 Subject: Revert "drm/xe/vm: drop vm->destroy_work" This reverts commit 5b259c0d1d3caa6efc66c2b856840e68993f814e. Cleanup here is good, however we need to able to flush a worker during vm destruction which might involve sleeping, so bring back the worker. Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240423074721.119633-3-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 17 +++++++++++++++-- drivers/gpu/drm/xe/xe_vm_types.h | 7 +++++++ 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7ae2b0300db6..633485c8c62b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1173,6 +1173,8 @@ static const struct xe_pt_ops xelp_pt_ops = { .pde_encode_bo = xelp_pde_encode_bo, }; +static void vm_destroy_work_func(struct work_struct *w); + /** * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the * given tile and vm. @@ -1252,6 +1254,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); + INIT_WORK(&vm->destroy_work, vm_destroy_work_func); + INIT_LIST_HEAD(&vm->preempt.exec_queues); vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ @@ -1489,9 +1493,10 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vm_put(vm); } -static void xe_vm_free(struct drm_gpuvm *gpuvm) +static void vm_destroy_work_func(struct work_struct *w) { - struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); + struct xe_vm *vm = + container_of(w, struct xe_vm, destroy_work); struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; @@ -1511,6 +1516,14 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm) kfree(vm); } +static void xe_vm_free(struct drm_gpuvm *gpuvm) +{ + struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); + + /* To destroy the VM we need to be able to sleep */ + queue_work(system_unbound_wq, &vm->destroy_work); +} + struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) { struct xe_vm *vm; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 72a100671e5d..0447c79c40a2 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -177,6 +177,13 @@ struct xe_vm { */ struct list_head rebind_list; + /** + * @destroy_work: worker to destroy VM, needed as a dma_fence signaling + * from an irq context can be last put and the destroy needs to be able + * to sleep. + */ + struct work_struct destroy_work; + /** * @rftree: range fence tree to track updates to page table structure. * Used to implement conflict tracking between independent bind engines. -- cgit From 3d44d67c441a9fe6f81a1d705f7de009a32a5b35 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 23 Apr 2024 08:47:23 +0100 Subject: drm/xe/vm: prevent UAF in rebind_work_func() We flush the rebind worker during the vm close phase, however in places like preempt_fence_work_func() we seem to queue the rebind worker without first checking if the vm has already been closed. The concern here is the vm being closed with the worker flushed, but then being rearmed later, which looks like potential uaf, since there is no actual refcounting to track the queued worker. We can't take the vm->lock here in preempt_rebind_work_func() to first check if the vm is closed since that will deadlock, so instead flush the worker again when the vm refcount reaches zero. v2: - Grabbing vm->lock in the preempt worker creates a deadlock, so checking the closed state is tricky. Instead flush the worker when the refcount reaches zero. It should be impossible to queue the preempt worker without already holding vm ref. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1676 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1591 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1364 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1304 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1249 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240423074721.119633-4-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 633485c8c62b..dc685bf45857 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1504,6 +1504,9 @@ static void vm_destroy_work_func(struct work_struct *w) /* xe_vm_close_and_put was not called? */ xe_assert(xe, !vm->size); + if (xe_vm_in_preempt_fence_mode(vm)) + flush_work(&vm->preempt.rebind_work); + mutex_destroy(&vm->snap_mutex); if (!(vm->flags & XE_VM_FLAG_MIGRATION)) -- cgit From 3f371a98deada9aee53d908c9aa53f6cdcb1300b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 22:47:47 -0700 Subject: drm/xe: Delete unused GuC submission_state.suspend GuC submission_state.suspend is unused, delete it. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240425054747.1918811-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ---- drivers/gpu/drm/xe/xe_guc_types.h | 9 --------- 2 files changed, 13 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 93e1ee183e4a..8f409c9e0f3c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -273,7 +273,6 @@ static void primelockdep(struct xe_guc *guc) fs_reclaim_acquire(GFP_KERNEL); mutex_lock(&guc->submission_state.lock); - might_lock(&guc->submission_state.suspend.lock); mutex_unlock(&guc->submission_state.lock); fs_reclaim_release(GFP_KERNEL); @@ -301,9 +300,6 @@ int xe_guc_submit_init(struct xe_guc *guc) xa_init(&guc->submission_state.exec_queue_lookup); - spin_lock_init(&guc->submission_state.suspend.lock); - guc->submission_state.suspend.context = dma_fence_context_alloc(1); - primelockdep(guc); return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 82bd93f7867d..546ac6350a31 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -72,15 +72,6 @@ struct xe_guc { atomic_t stopped; /** @submission_state.lock: protects submission state */ struct mutex lock; - /** @submission_state.suspend: suspend fence state */ - struct { - /** @submission_state.suspend.lock: suspend fences lock */ - spinlock_t lock; - /** @submission_state.suspend.context: suspend fences context */ - u64 context; - /** @submission_state.suspend.seqno: suspend fences seqno */ - u32 seqno; - } suspend; #ifdef CONFIG_PROVE_LOCKING #define NUM_SUBMIT_WQ 256 /** @submission_state.submit_wq_pool: submission ordered workqueues pool */ -- cgit From f85ada84f60cdcccb0ce897d7e54bac8c6f0722e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 25 Apr 2024 16:25:40 -0700 Subject: drm/xe: s/ENGINE_STATE_ENABLED/EXEC_QUEUE_STATE_ENABLED Exec queue has replaced engine nomenclature. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 8f409c9e0f3c..872a782337f2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -52,7 +52,7 @@ exec_queue_to_guc(struct xe_exec_queue *q) * engine done being processed). */ #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) -#define ENGINE_STATE_ENABLED (1 << 1) +#define EXEC_QUEUE_STATE_ENABLED (1 << 1) #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) @@ -78,17 +78,17 @@ static void clear_exec_queue_registered(struct xe_exec_queue *q) static bool exec_queue_enabled(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_ENABLED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; } static void set_exec_queue_enabled(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_ENABLED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); } static void clear_exec_queue_enabled(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_ENABLED, &q->guc->state); + atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); } static bool exec_queue_pending_enable(struct xe_exec_queue *q) -- cgit From 03b3517630ce2ad079d1863c408d5d4df7d80388 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 25 Apr 2024 16:25:41 -0700 Subject: drm/xe: s/ENGINE_STATE_SUSPENDED/EXEC_QUEUE_STATE_SUSPENDED Exec queue has replaced engine nomenclature. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 872a782337f2..5dab20fa6d74 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -56,7 +56,7 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) -#define ENGINE_STATE_SUSPENDED (1 << 5) +#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) #define EXEC_QUEUE_STATE_RESET (1 << 6) #define ENGINE_STATE_KILLED (1 << 7) #define EXEC_QUEUE_STATE_WEDGED (1 << 8) @@ -143,17 +143,17 @@ static void set_exec_queue_banned(struct xe_exec_queue *q) static bool exec_queue_suspended(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_SUSPENDED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; } static void set_exec_queue_suspended(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_SUSPENDED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); } static void clear_exec_queue_suspended(struct xe_exec_queue *q) { - atomic_and(~ENGINE_STATE_SUSPENDED, &q->guc->state); + atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); } static bool exec_queue_reset(struct xe_exec_queue *q) @@ -1471,7 +1471,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) set_exec_queue_suspended(q); suspend_fence_signal(q); } - atomic_and(EXEC_QUEUE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED, + atomic_and(EXEC_QUEUE_STATE_DESTROYED | EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); q->guc->resume_time = 0; trace_xe_exec_queue_stop(q); -- cgit From 1a1563e3245d96a30b62c30c4e6861ec9518699f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 25 Apr 2024 16:25:42 -0700 Subject: drm/xe: s/ENGINE_STATE_KILLED/EXEC_QUEUE_STATE_KILLED Exec queue has replaced engine nomenclature. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 5dab20fa6d74..d4aa3823410c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -58,7 +58,7 @@ exec_queue_to_guc(struct xe_exec_queue *q) #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) #define EXEC_QUEUE_STATE_RESET (1 << 6) -#define ENGINE_STATE_KILLED (1 << 7) +#define EXEC_QUEUE_STATE_KILLED (1 << 7) #define EXEC_QUEUE_STATE_WEDGED (1 << 8) static bool exec_queue_registered(struct xe_exec_queue *q) @@ -168,12 +168,12 @@ static void set_exec_queue_reset(struct xe_exec_queue *q) static bool exec_queue_killed(struct xe_exec_queue *q) { - return atomic_read(&q->guc->state) & ENGINE_STATE_KILLED; + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; } static void set_exec_queue_killed(struct xe_exec_queue *q) { - atomic_or(ENGINE_STATE_KILLED, &q->guc->state); + atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); } static bool exec_queue_wedged(struct xe_exec_queue *q) @@ -189,7 +189,7 @@ static void set_exec_queue_wedged(struct xe_exec_queue *q) static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) { return exec_queue_banned(q) || (atomic_read(&q->guc->state) & - (EXEC_QUEUE_STATE_WEDGED | ENGINE_STATE_KILLED)); + (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED)); } #ifdef CONFIG_PROVE_LOCKING -- cgit From 3713a383f5402c57007d341703ce447fb6df1083 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 25 Apr 2024 16:25:43 -0700 Subject: drm/xe: Fix alignment in GuC exec queue state defines Normalize the alignment for readability. v3: - Fix typo in commit (Himal) - Fix EXEC_QUEUE_STATE_WEDGED too (Himal) Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index d4aa3823410c..1945bc5ffc21 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -53,13 +53,13 @@ exec_queue_to_guc(struct xe_exec_queue *q) */ #define EXEC_QUEUE_STATE_REGISTERED (1 << 0) #define EXEC_QUEUE_STATE_ENABLED (1 << 1) -#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) +#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) #define EXEC_QUEUE_STATE_DESTROYED (1 << 4) #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) -#define EXEC_QUEUE_STATE_RESET (1 << 6) +#define EXEC_QUEUE_STATE_RESET (1 << 6) #define EXEC_QUEUE_STATE_KILLED (1 << 7) -#define EXEC_QUEUE_STATE_WEDGED (1 << 8) +#define EXEC_QUEUE_STATE_WEDGED (1 << 8) static bool exec_queue_registered(struct xe_exec_queue *q) { -- cgit From edc9f11af3adab20ede4a0289a1335f0d8125998 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 25 Apr 2024 16:25:44 -0700 Subject: drm/xe: Replace engine references with exec queue in xe_guc_submit.c Exec queue has replaced engine nomenclature. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240425232544.1935578-6-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 1945bc5ffc21..cd082b8523fa 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -449,9 +449,9 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ field_, val_) -static void __register_mlrc_engine(struct xe_guc *guc, - struct xe_exec_queue *q, - struct guc_ctxt_registration_info *info) +static void __register_mlrc_exec_queue(struct xe_guc *guc, + struct xe_exec_queue *q, + struct guc_ctxt_registration_info *info) { #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) struct xe_device *xe = guc_to_xe(guc); @@ -488,8 +488,8 @@ static void __register_mlrc_engine(struct xe_guc *guc, xe_guc_ct_send(&guc->ct, action, len, 0, 0); } -static void __register_engine(struct xe_guc *guc, - struct guc_ctxt_registration_info *info) +static void __register_exec_queue(struct xe_guc *guc, + struct guc_ctxt_registration_info *info) { u32 action[] = { XE_GUC_ACTION_REGISTER_CONTEXT, @@ -509,7 +509,7 @@ static void __register_engine(struct xe_guc *guc, xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void register_engine(struct xe_exec_queue *q) +static void register_exec_queue(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); @@ -557,9 +557,9 @@ static void register_engine(struct xe_exec_queue *q) set_exec_queue_registered(q); trace_xe_exec_queue_register(q); if (xe_exec_queue_is_parallel(q)) - __register_mlrc_engine(guc, q, &info); + __register_mlrc_exec_queue(guc, q, &info); else - __register_engine(guc, &info); + __register_exec_queue(guc, &info); init_policies(guc, q); } @@ -729,7 +729,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) - register_engine(q); + register_exec_queue(q); if (!lr) /* LR jobs are emitted in the exec IOCTL */ q->ring_ops->emit_job(job); submit_exec_queue(q); -- cgit From a1adb3d250925ddccd5270106d39aa09493d6edf Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 24 Apr 2024 10:09:09 +0530 Subject: drm/xe/vm: Use xe_vm_lock()/xe_vm_unlock() helpers There is no change in functionality. Using the helper function defined within the driver. -v2 Use xe_vm_unlock() (Ashutosh/Matt) -v3 Use xe_vm_unlock() for error label too (Matt) Reviewed-by: Badal Nilawar Cc: Matthew Brost Cc: Ashutosh Dixit Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240424043910.2190376-2-himal.prasad.ghimiray@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index dc685bf45857..c506432420c5 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1278,7 +1278,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) drm_gem_object_put(vm_resv_obj); - err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + err = xe_vm_lock(vm, true); if (err) goto err_close; @@ -1322,7 +1322,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) xe_pt_populate_empty(tile, vm, vm->pt_root[id]); } - dma_resv_unlock(xe_vm_resv(vm)); + xe_vm_unlock(vm); /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { @@ -1364,7 +1364,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) return vm; err_unlock_close: - dma_resv_unlock(xe_vm_resv(vm)); + xe_vm_unlock(vm); err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); -- cgit From c79828e0c7795cccc92abcd24107aa478168628c Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 24 Apr 2024 10:09:10 +0530 Subject: drm/xe: Use xe_bo_lock()/xe_bo_unlock() helpers There is no change in functionality. Using the helper function defined within the driver for locking/unlocking the reservation object. Cc: Matthew Brost Cc: Ashutosh Dixit Suggested-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240424043910.2190376-3-himal.prasad.ghimiray@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 4 ++-- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 615bbc372ac6..2066d34ddf0b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1382,7 +1382,7 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) if (!snapshot->lrc_snapshot) goto put_bo; - dma_resv_lock(bo->ttm.base.resv, NULL); + xe_bo_lock(bo, false); if (!ttm_bo_vmap(&bo->ttm, &src)) { xe_map_memcpy_from(xe_bo_device(bo), snapshot->lrc_snapshot, &src, snapshot->lrc_offset, @@ -1392,7 +1392,7 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) kvfree(snapshot->lrc_snapshot); snapshot->lrc_snapshot = NULL; } - dma_resv_unlock(bo->ttm.base.resv); + xe_bo_unlock(bo); put_bo: xe_bo_put(bo); } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c506432420c5..89c73d109f6a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3352,7 +3352,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) } if (bo) { - dma_resv_lock(bo->ttm.base.resv, NULL); + xe_bo_lock(bo, false); err = ttm_bo_vmap(&bo->ttm, &src); if (!err) { xe_map_memcpy_from(xe_bo_device(bo), @@ -3361,7 +3361,7 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) snap->snap[i].len); ttm_bo_vunmap(&bo->ttm, &src); } - dma_resv_unlock(bo->ttm.base.resv); + xe_bo_unlock(bo); } else { void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; -- cgit From c832541ca8d5b04cbf957ffce5f4a2a4ee6b396e Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 24 Apr 2024 09:49:11 +0530 Subject: drm/xe: Change xe_guc_submit_stop return to void The function xe_guc_submit_stop consistently returns 0 without an error state, prompting the caller to verify it, which is redundant. Cc: Matthew Brost Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240424041911.2184868-1-himal.prasad.ghimiray@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 4 +--- drivers/gpu/drm/xe/xe_guc.c | 10 ++-------- drivers/gpu/drm/xe/xe_guc.h | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 3 +-- drivers/gpu/drm/xe/xe_guc_submit.h | 2 +- drivers/gpu/drm/xe/xe_uc.c | 12 ++++-------- drivers/gpu/drm/xe/xe_uc.h | 2 +- 7 files changed, 11 insertions(+), 24 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index e922e77f5010..a49e456b968d 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -658,9 +658,7 @@ static int gt_reset(struct xe_gt *gt) xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); - err = xe_uc_stop(>->uc); - if (err) - goto err_out; + xe_uc_stop(>->uc); xe_gt_tlb_invalidation_reset(gt); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 17438d5b18a4..0c9938e0ab8c 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -885,17 +885,11 @@ void xe_guc_stop_prepare(struct xe_guc *guc) XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); } -int xe_guc_stop(struct xe_guc *guc) +void xe_guc_stop(struct xe_guc *guc) { - int ret; - xe_guc_ct_stop(&guc->ct); - ret = xe_guc_submit_stop(guc); - if (ret) - return ret; - - return 0; + xe_guc_submit_stop(guc); } int xe_guc_start(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 94f2dc5f6f90..a3c92b74a3d5 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -35,7 +35,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p); int xe_guc_reset_prepare(struct xe_guc *guc); void xe_guc_reset_wait(struct xe_guc *guc); void xe_guc_stop_prepare(struct xe_guc *guc); -int xe_guc_stop(struct xe_guc *guc); +void xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); bool xe_guc_in_reset(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index cd082b8523fa..d274a139010b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1519,7 +1519,7 @@ void xe_guc_submit_reset_wait(struct xe_guc *guc) wait_event(guc->ct.wq, !guc_read_stopped(guc)); } -int xe_guc_submit_stop(struct xe_guc *guc) +void xe_guc_submit_stop(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; @@ -1539,7 +1539,6 @@ int xe_guc_submit_stop(struct xe_guc *guc) * creation which is protected by guc->submission_state.lock. */ - return 0; } static void guc_exec_queue_start(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index fad0421ead36..4275b7da9df5 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -16,7 +16,7 @@ int xe_guc_submit_init(struct xe_guc *guc); int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); -int xe_guc_submit_stop(struct xe_guc *guc); +void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 4feb35c95a1c..0f6cfe06e635 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -215,13 +215,13 @@ void xe_uc_stop_prepare(struct xe_uc *uc) xe_guc_stop_prepare(&uc->guc); } -int xe_uc_stop(struct xe_uc *uc) +void xe_uc_stop(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; + return; - return xe_guc_stop(&uc->guc); + xe_guc_stop(&uc->guc); } int xe_uc_start(struct xe_uc *uc) @@ -247,17 +247,13 @@ again: int xe_uc_suspend(struct xe_uc *uc) { - int ret; - /* GuC submission not enabled, nothing to do */ if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; uc_reset_wait(uc); - ret = xe_uc_stop(uc); - if (ret) - return ret; + xe_uc_stop(uc); return xe_guc_suspend(&uc->guc); } diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index e4d4e3c99f0e..5dfa7725483d 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -16,7 +16,7 @@ int xe_uc_fini_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); -int xe_uc_stop(struct xe_uc *uc); +void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); -- cgit From d6c5bac8e3638de85190ff381f75b8120feafb9c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 25 Apr 2024 16:39:26 +0200 Subject: drm/xe/pf: Re-initialize SR-IOV specific HW settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On older platforms (12.00) the PF driver must explicitly unblock VF's modifications to the GGTT. On newer platforms this capability is enabled by default. Bspec: 49908, 53204 Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240425143927.2265-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/regs/xe_sriov_regs.h | 3 +++ drivers/gpu/drm/xe/xe_gt.c | 6 ++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.h | 5 +++++ 4 files changed, 39 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h index 617ddb84b7fa..017b4ddd1ecf 100644 --- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h @@ -14,6 +14,9 @@ #define LMEM_EN REG_BIT(31) #define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ +#define VIRTUAL_CTRL_REG XE_REG(0x10108c) +#define GUEST_GTT_UPDATE_EN REG_BIT(8) + #define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) #define VF_CAP REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index a49e456b968d..0528d599c3fe 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -477,6 +477,9 @@ static int all_fw_domain_init(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_init_hw(gt); + err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); @@ -613,6 +616,9 @@ static int do_gt_restart(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_init_hw(gt); + xe_mocs_init(gt); err = xe_uc_start(>->uc); if (err) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 791dcdd767e2..687ea81931d1 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -5,8 +5,11 @@ #include +#include "regs/xe_sriov_regs.h" + #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_helpers.h" +#include "xe_mmio.h" /* * VF's metadata is maintained in the flexible array where: @@ -50,3 +53,25 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } + +static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) == 1200; +} + +static void pf_enable_ggtt_guest_update(struct xe_gt *gt) +{ + xe_mmio_write32(gt, VIRTUAL_CTRL_REG, GUEST_GTT_UPDATE_EN); +} + +/** + * xe_gt_sriov_pf_init_hw - Initialize SR-IOV hardware support. + * @gt: the &xe_gt to initialize + * + * On some platforms the PF must explicitly enable VF's access to the GGTT. + */ +void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) +{ + if (pf_needs_enable_ggtt_guest_update(gt_to_xe(gt))) + pf_enable_ggtt_guest_update(gt); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index 05142ffc4319..37d7d6c3df03 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -10,11 +10,16 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); +void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) { return 0; } + +static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) +{ +} #endif #endif -- cgit From e77dff51baf565c2a6b8c77b979c42e814ed0c73 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 25 Apr 2024 16:39:27 +0200 Subject: drm/xe/pf: Initialize and update PF services on driver init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe_gt_sriov_pf_init_early() and xe_gt_sriov_pf_init_hw() are ideal places to call per-GT PF service init and update functions. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240425143927.2265-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index 687ea81931d1..7decf71c2b7d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -9,6 +9,7 @@ #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_service.h" #include "xe_mmio.h" /* @@ -51,6 +52,10 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) if (err) return err; + err = xe_gt_sriov_pf_service_init(gt); + if (err) + return err; + return 0; } @@ -74,4 +79,6 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { if (pf_needs_enable_ggtt_guest_update(gt_to_xe(gt))) pf_enable_ggtt_guest_update(gt); + + xe_gt_sriov_pf_service_update(gt); } -- cgit From 445237d67a818c18a748602f8eaa4b52f8c6b39c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 26 Apr 2024 10:49:04 +0100 Subject: drm/xe: Fix spelling mistake "forcebly" -> "forcibly" There is a spelling mistake in a drm_dbg message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240426094904.816033-1-colin.i.king@gmail.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 47db4bc9f12c..ca7a101bd34e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -787,7 +787,7 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) void xe_device_declare_wedged(struct xe_device *xe) { if (xe->wedged.mode == 0) { - drm_dbg(&xe->drm, "Wedged mode is forcebly disabled\n"); + drm_dbg(&xe->drm, "Wedged mode is forcibly disabled\n"); return; } -- cgit From 6a2a90cba12b42eb96c2af3426b77ceb4be31df2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 25 Apr 2024 11:16:09 -0700 Subject: drm/xe/display: Fix ADL-N detection Contrary to i915, in xe ADL-N is kept as a different platform, not a subplatform of ADL-P. Since the display side doesn't need to differentiate between P and N, i.e. IS_ALDERLAKE_P_N() is never called, just fixup the compat header to check for both P and N. Moving ADL-N to be a subplatform would be more complex as the firmware loading in xe only handles platforms, not subplatforms, as going forward the direction is to check on IP version rather than platforms/subplatforms. Fix warning when initializing display: xe 0000:00:02.0: [drm:intel_pch_type [xe]] Found Alder Lake PCH ------------[ cut here ]------------ xe 0000:00:02.0: drm_WARN_ON(!((dev_priv)->info.platform == XE_ALDERLAKE_S) && !((dev_priv)->info.platform == XE_ALDERLAKE_P)) And wrong paths being taken on the display side. Reviewed-by: Matt Roper Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240425181610.2704633-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 6a502e9f97d0..9ee694bf331f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -84,7 +84,8 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) #define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) -#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) +#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \ + IS_PLATFORM(dev_priv, XE_ALDERLAKE_N)) #define IS_XEHPSDV(dev_priv) (dev_priv && 0) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) #define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC) -- cgit From 77f2ef3f16f511c8a8444061d59c8eadc634d33b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:01 -0700 Subject: drm/xe: Lock all gpuva ops during VM bind IOCTL Lock all BOs used in gpuva ops and validate all BOs in a single step during the VM bind IOCTL. This help with the transition to making all gpuva ops in a VM bind IOCTL a single atomic job which is required for proper error handling. v2: - Better commit message (Oak) - s/op_lock/op_lock_and_prep, few other renames too (Oak) - Use DRM_EXEC_IGNORE_DUPLICATES flag in drm_exec_init (local testing) - Do not reserve slots in locking step (direction based on series from Thomas) v3: - Validate BO if is immediate set (Oak) Cc: Oak Zeng Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 148 +++++++++++++++++++++++++++++++++------------ 1 file changed, 108 insertions(+), 40 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 89c73d109f6a..cc9d2ebc7e66 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -315,19 +315,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm) #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 -static void xe_vm_kill(struct xe_vm *vm) +static void xe_vm_kill(struct xe_vm *vm, bool unlocked) { struct xe_exec_queue *q; lockdep_assert_held(&vm->lock); - xe_vm_lock(vm, false); + if (unlocked) + xe_vm_lock(vm, false); + vm->flags |= XE_VM_FLAG_BANNED; trace_xe_vm_kill(vm); list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) q->ops->kill(q); - xe_vm_unlock(vm); + + if (unlocked) + xe_vm_unlock(vm); /* TODO: Inform user the VM is banned */ } @@ -557,7 +561,7 @@ out_unlock_outer: if (err) { drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); - xe_vm_kill(vm); + xe_vm_kill(vm, true); } up_write(&vm->lock); @@ -1774,17 +1778,9 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue u32 num_syncs, bool immediate, bool first_op, bool last_op) { - int err; - xe_vm_assert_held(vm); xe_bo_assert_held(bo); - if (bo && immediate) { - err = xe_bo_validate(bo, vm, true); - if (err) - return err; - } - return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op, last_op); } @@ -2437,17 +2433,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, return 0; } -static int op_execute(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma *vma, struct xe_vma_op *op) +static int op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) { int err; lockdep_assert_held_write(&vm->lock); - err = xe_vm_lock_vma(exec, vma); - if (err) - return err; - xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); @@ -2528,19 +2520,10 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, struct xe_vma_op *op) { - struct drm_exec exec; int err; retry_userptr: - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - drm_exec_until_all_locked(&exec) { - err = op_execute(&exec, vm, vma, op); - drm_exec_retry_on_contention(&exec); - if (err) - break; - } - drm_exec_fini(&exec); - + err = op_execute(vm, vma, op); if (err == -EAGAIN) { lockdep_assert_held_write(&vm->lock); @@ -2705,29 +2688,114 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } } +static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, + bool validate) +{ + struct xe_bo *bo = xe_vma_bo(vma); + int err = 0; + + if (bo) { + if (!bo->vm) + err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0); + if (!err && validate) + err = xe_bo_validate(bo, xe_vma_vm(vma), true); + } + + return err; +} + +static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, + struct xe_vma_op *op) +{ + int err = 0; + + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + err = vma_lock_and_validate(exec, op->map.vma, + !xe_vm_in_fault_mode(vm) || + op->map.immediate); + break; + case DRM_GPUVA_OP_REMAP: + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.remap.unmap->va), + false); + if (!err && op->remap.prev) + err = vma_lock_and_validate(exec, op->remap.prev, true); + if (!err && op->remap.next) + err = vma_lock_and_validate(exec, op->remap.next, true); + break; + case DRM_GPUVA_OP_UNMAP: + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.unmap.va), + false); + break; + case DRM_GPUVA_OP_PREFETCH: + err = vma_lock_and_validate(exec, + gpuva_to_vma(op->base.prefetch.va), true); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } + + return err; +} + +static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, + struct xe_vm *vm, + struct list_head *ops_list) +{ + struct xe_vma_op *op; + int err; + + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0); + if (err) + return err; + + list_for_each_entry(op, ops_list, link) { + err = op_lock_and_prep(exec, vm, op); + if (err) + return err; + } + + return 0; +} + static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, struct list_head *ops_list) { + struct drm_exec exec; struct xe_vma_op *op, *next; int err; lockdep_assert_held_write(&vm->lock); - list_for_each_entry_safe(op, next, ops_list, link) { - err = xe_vma_op_execute(vm, op); - if (err) { - drm_warn(&vm->xe->drm, "VM op(%d) failed with %d", - op->base.op, err); - /* - * FIXME: Killing VM rather than proper error handling - */ - xe_vm_kill(vm); - return -ENOSPC; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list); + drm_exec_retry_on_contention(&exec); + if (err) + goto unlock; + + list_for_each_entry_safe(op, next, ops_list, link) { + err = xe_vma_op_execute(vm, op); + if (err) { + drm_warn(&vm->xe->drm, "VM op(%d) failed with %d", + op->base.op, err); + /* + * FIXME: Killing VM rather than proper error handling + */ + xe_vm_kill(vm, false); + err = -ENOSPC; + goto unlock; + } + xe_vma_op_cleanup(vm, op); } - xe_vma_op_cleanup(vm, op); } - return 0; +unlock: + drm_exec_fini(&exec); + return err; } #define SUPPORTED_FLAGS \ -- cgit From 75192758d640227b68e4e21de811891219f3d0e2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:02 -0700 Subject: drm/xe: Add ops_execute function which returns a fence Add ops_execute function which returns a fence. This will be helpful to initiate all binds (VM bind IOCTL, rebinds in exec IOCTL, rebinds in preempt rebind worker, and rebinds in pagefaults) via a gpuva ops list. Returning a fence is needed in various paths. v2: - Rebase Cc: Oak Zeng Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 211 ++++++++++++++++++++++++--------------------- 1 file changed, 111 insertions(+), 100 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index cc9d2ebc7e66..d0905d98de8c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1732,16 +1732,17 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) return NULL; } -static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, bool immediate, bool first_op, - bool last_op) +static struct dma_fence * +xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, + struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs, + bool immediate, bool first_op, bool last_op) { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); struct xe_user_fence *ufence; xe_vm_assert_held(vm); + xe_bo_assert_held(bo); ufence = find_ufence_get(syncs, num_syncs); if (vma->ufence && ufence) @@ -1753,7 +1754,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); if (IS_ERR(fence)) - return PTR_ERR(fence); + return fence; } else { int i; @@ -1768,26 +1769,14 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, if (last_op) xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); - - return 0; -} - -static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_bo *bo, struct xe_sync_entry *syncs, - u32 num_syncs, bool immediate, bool first_op, - bool last_op) -{ - xe_vm_assert_held(vm); - xe_bo_assert_held(bo); - return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op, - last_op); + return fence; } -static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, struct xe_sync_entry *syncs, - u32 num_syncs, bool first_op, bool last_op) +static struct dma_fence * +xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, struct xe_sync_entry *syncs, + u32 num_syncs, bool first_op, bool last_op) { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); @@ -1797,14 +1786,13 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op); if (IS_ERR(fence)) - return PTR_ERR(fence); + return fence; xe_vma_destroy(vma, fence); if (last_op) xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); - return 0; + return fence; } #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ @@ -1947,10 +1935,11 @@ static const u32 region_to_mem_type[] = { XE_PL_VRAM1, }; -static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, u32 region, - struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op) +static struct dma_fence * +xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, + struct xe_exec_queue *q, u32 region, + struct xe_sync_entry *syncs, u32 num_syncs, + bool first_op, bool last_op) { struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); int err; @@ -1960,27 +1949,24 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, if (!xe_vma_has_no_bo(vma)) { err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); if (err) - return err; + return ERR_PTR(err); } if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) { return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, true, first_op, last_op); } else { + struct dma_fence *fence = + xe_exec_queue_last_fence_get(wait_exec_queue, vm); int i; /* Nothing to do, signal fences now */ if (last_op) { - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = - xe_exec_queue_last_fence_get(wait_exec_queue, vm); - + for (i = 0; i < num_syncs; i++) xe_sync_entry_signal(&syncs[i], fence); - dma_fence_put(fence); - } } - return 0; + return fence; } } @@ -2433,10 +2419,10 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, return 0; } -static int op_execute(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vma_op *op) +static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) { - int err; + struct dma_fence *fence = NULL; lockdep_assert_held_write(&vm->lock); @@ -2445,11 +2431,11 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma, switch (op->base.op) { case DRM_GPUVA_OP_MAP: - err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), - op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), + op->syncs, op->num_syncs, + op->map.immediate || !xe_vm_in_fault_mode(vm), + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_REMAP: { @@ -2459,37 +2445,39 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma, if (!op->remap.unmap_done) { if (prev || next) vma->gpuva.flags |= XE_VMA_FIRST_REBIND; - err = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST && - !prev && !next); - if (err) + fence = xe_vm_unbind(vm, vma, op->q, op->syncs, + op->num_syncs, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST && + !prev && !next); + if (IS_ERR(fence)) break; op->remap.unmap_done = true; } if (prev) { op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND; - err = xe_vm_bind(vm, op->remap.prev, op->q, - xe_vma_bo(op->remap.prev), op->syncs, - op->num_syncs, true, false, - op->flags & XE_VMA_OP_LAST && !next); + dma_fence_put(fence); + fence = xe_vm_bind(vm, op->remap.prev, op->q, + xe_vma_bo(op->remap.prev), op->syncs, + op->num_syncs, true, false, + op->flags & XE_VMA_OP_LAST && !next); op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; - if (err) + if (IS_ERR(fence)) break; op->remap.prev = NULL; } if (next) { op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND; - err = xe_vm_bind(vm, op->remap.next, op->q, - xe_vma_bo(op->remap.next), - op->syncs, op->num_syncs, - true, false, - op->flags & XE_VMA_OP_LAST); + dma_fence_put(fence); + fence = xe_vm_bind(vm, op->remap.next, op->q, + xe_vma_bo(op->remap.next), + op->syncs, op->num_syncs, + true, false, + op->flags & XE_VMA_OP_LAST); op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; - if (err) + if (IS_ERR(fence)) break; op->remap.next = NULL; } @@ -2497,34 +2485,36 @@ static int op_execute(struct xe_vm *vm, struct xe_vma *vma, break; } case DRM_GPUVA_OP_UNMAP: - err = xe_vm_unbind(vm, vma, op->q, op->syncs, - op->num_syncs, op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_unbind(vm, vma, op->q, op->syncs, + op->num_syncs, op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_PREFETCH: - err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, - op->syncs, op->num_syncs, - op->flags & XE_VMA_OP_FIRST, - op->flags & XE_VMA_OP_LAST); + fence = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, + op->syncs, op->num_syncs, + op->flags & XE_VMA_OP_FIRST, + op->flags & XE_VMA_OP_LAST); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } - if (err) + if (IS_ERR(fence)) trace_xe_vma_fail(vma); - return err; + return fence; } -static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vma_op *op) +static struct dma_fence * +__xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, + struct xe_vma_op *op) { + struct dma_fence *fence; int err; retry_userptr: - err = op_execute(vm, vma, op); - if (err == -EAGAIN) { + fence = op_execute(vm, vma, op); + if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) { lockdep_assert_held_write(&vm->lock); if (op->base.op == DRM_GPUVA_OP_REMAP) { @@ -2541,22 +2531,24 @@ retry_userptr: if (!err) goto retry_userptr; + fence = ERR_PTR(err); trace_xe_vma_fail(vma); } } - return err; + return fence; } -static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) +static struct dma_fence * +xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) { - int ret = 0; + struct dma_fence *fence = ERR_PTR(-ENOMEM); lockdep_assert_held_write(&vm->lock); switch (op->base.op) { case DRM_GPUVA_OP_MAP: - ret = __xe_vma_op_execute(vm, op->map.vma, op); + fence = __xe_vma_op_execute(vm, op->map.vma, op); break; case DRM_GPUVA_OP_REMAP: { @@ -2569,23 +2561,23 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) else vma = op->remap.next; - ret = __xe_vma_op_execute(vm, vma, op); + fence = __xe_vma_op_execute(vm, vma, op); break; } case DRM_GPUVA_OP_UNMAP: - ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), - op); + fence = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va), + op); break; case DRM_GPUVA_OP_PREFETCH: - ret = __xe_vma_op_execute(vm, - gpuva_to_vma(op->base.prefetch.va), - op); + fence = __xe_vma_op_execute(vm, + gpuva_to_vma(op->base.prefetch.va), + op); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } - return ret; + return fence; } static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) @@ -2760,11 +2752,35 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return 0; } +static struct dma_fence *ops_execute(struct xe_vm *vm, + struct list_head *ops_list, + bool cleanup) +{ + struct xe_vma_op *op, *next; + struct dma_fence *fence = NULL; + + list_for_each_entry_safe(op, next, ops_list, link) { + if (!IS_ERR(fence)) { + dma_fence_put(fence); + fence = xe_vma_op_execute(vm, op); + } + if (IS_ERR(fence)) { + drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld", + op->base.op, PTR_ERR(fence)); + fence = ERR_PTR(-ENOSPC); + } + if (cleanup) + xe_vma_op_cleanup(vm, op); + } + + return fence; +} + static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, struct list_head *ops_list) { struct drm_exec exec; - struct xe_vma_op *op, *next; + struct dma_fence *fence; int err; lockdep_assert_held_write(&vm->lock); @@ -2777,19 +2793,14 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, if (err) goto unlock; - list_for_each_entry_safe(op, next, ops_list, link) { - err = xe_vma_op_execute(vm, op); - if (err) { - drm_warn(&vm->xe->drm, "VM op(%d) failed with %d", - op->base.op, err); - /* - * FIXME: Killing VM rather than proper error handling - */ - xe_vm_kill(vm, false); - err = -ENOSPC; - goto unlock; - } - xe_vma_op_cleanup(vm, op); + fence = ops_execute(vm, ops_list, true); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + /* FIXME: Killing VM rather than proper error handling */ + xe_vm_kill(vm, false); + goto unlock; + } else { + dma_fence_put(fence); } } -- cgit From 0a34c12449de4b09f74808c6f6c39205ee5071f0 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:03 -0700 Subject: drm/xe: Move migrate to prefetch to op_lock_and_prep function All non-binding operations in VM bind IOCTL should be in the lock and prepare step rather than the execution step. Move prefetch to conform to this pattern. v2: - Rebase - New function names (Oak) - Update stale comment (Oak) Cc: Oak Zeng Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d0905d98de8c..e7be99acaff2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1937,20 +1937,10 @@ static const u32 region_to_mem_type[] = { static struct dma_fence * xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, - struct xe_exec_queue *q, u32 region, - struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op) + struct xe_exec_queue *q, struct xe_sync_entry *syncs, + u32 num_syncs, bool first_op, bool last_op) { struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - int err; - - xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type)); - - if (!xe_vma_has_no_bo(vma)) { - err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); - if (err) - return ERR_PTR(err); - } if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) { return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, @@ -2490,8 +2480,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, op->flags & XE_VMA_OP_LAST); break; case DRM_GPUVA_OP_PREFETCH: - fence = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region, - op->syncs, op->num_syncs, + fence = xe_vm_prefetch(vm, vma, op->q, op->syncs, op->num_syncs, op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2722,9 +2711,20 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, false); break; case DRM_GPUVA_OP_PREFETCH: + { + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + u32 region = op->prefetch.region; + + xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + err = vma_lock_and_validate(exec, - gpuva_to_vma(op->base.prefetch.va), true); + gpuva_to_vma(op->base.prefetch.va), + false); + if (!err && !xe_vma_has_no_bo(vma)) + err = xe_bo_migrate(xe_vma_bo(vma), + region_to_mem_type[region]); break; + } default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } -- cgit From 701109f2e3a45b9748f0f98849fde9a35d391efb Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:04 -0700 Subject: drm/xe: Add struct xe_vma_ops abstraction Having a structure which encapsulates a list of VMA operations will help enable 1 job for the entire list. v2: - Rebase Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 37 +++++++++++++++++++++---------------- drivers/gpu/drm/xe/xe_vm_types.h | 7 +++++++ 2 files changed, 28 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e7be99acaff2..93cf5116d2a9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2252,7 +2252,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct drm_gpuva_ops *ops, struct xe_sync_entry *syncs, u32 num_syncs, - struct list_head *ops_list, bool last) + struct xe_vma_ops *vops, bool last) { struct xe_device *xe = vm->xe; struct xe_vma_op *last_op = NULL; @@ -2264,11 +2264,11 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); struct xe_vma *vma; - bool first = list_empty(ops_list); + bool first = list_empty(&vops->list); unsigned int flags = 0; INIT_LIST_HEAD(&op->link); - list_add_tail(&op->link, ops_list); + list_add_tail(&op->link, &vops->list); if (first) { op->flags |= XE_VMA_OP_FIRST; @@ -2394,7 +2394,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, } /* FIXME: Unhandled corner case */ - XE_WARN_ON(!last_op && last && !list_empty(ops_list)); + XE_WARN_ON(!last_op && last && !list_empty(&vops->list)); if (!last_op) return 0; @@ -2734,7 +2734,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct list_head *ops_list) + struct xe_vma_ops *vops) { struct xe_vma_op *op; int err; @@ -2743,7 +2743,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, if (err) return err; - list_for_each_entry(op, ops_list, link) { + list_for_each_entry(op, &vops->list, link) { err = op_lock_and_prep(exec, vm, op); if (err) return err; @@ -2753,13 +2753,13 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, } static struct dma_fence *ops_execute(struct xe_vm *vm, - struct list_head *ops_list, + struct xe_vma_ops *vops, bool cleanup) { struct xe_vma_op *op, *next; struct dma_fence *fence = NULL; - list_for_each_entry_safe(op, next, ops_list, link) { + list_for_each_entry_safe(op, next, &vops->list, link) { if (!IS_ERR(fence)) { dma_fence_put(fence); fence = xe_vma_op_execute(vm, op); @@ -2777,7 +2777,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, } static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, - struct list_head *ops_list) + struct xe_vma_ops *vops) { struct drm_exec exec; struct dma_fence *fence; @@ -2788,12 +2788,12 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { - err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list); + err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); drm_exec_retry_on_contention(&exec); if (err) goto unlock; - fence = ops_execute(vm, ops_list, true); + fence = ops_execute(vm, vops, true); if (IS_ERR(fence)) { err = PTR_ERR(fence); /* FIXME: Killing VM rather than proper error handling */ @@ -2954,6 +2954,11 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, return err; } +static void xe_vma_ops_init(struct xe_vma_ops *vops) +{ + INIT_LIST_HEAD(&vops->list); +} + int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -2967,7 +2972,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; - LIST_HEAD(ops_list); + struct xe_vma_ops vops; int err; int i; @@ -3118,6 +3123,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto free_syncs; } + xe_vma_ops_init(&vops); for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -3137,14 +3143,13 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs, - &ops_list, - i == args->num_binds - 1); + &vops, i == args->num_binds - 1); if (err) goto unwind_ops; } /* Nothing to do */ - if (list_empty(&ops_list)) { + if (list_empty(&vops.list)) { err = -ENODATA; goto unwind_ops; } @@ -3153,7 +3158,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (q) xe_exec_queue_get(q); - err = vm_bind_ioctl_ops_execute(vm, &ops_list); + err = vm_bind_ioctl_ops_execute(vm, &vops); up_write(&vm->lock); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 0447c79c40a2..466b6c62d1f9 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -358,4 +358,11 @@ struct xe_vma_op { struct xe_vma_op_prefetch prefetch; }; }; + +/** struct xe_vma_ops - VMA operations */ +struct xe_vma_ops { + /** @list: list of VMA operations */ + struct list_head list; +}; + #endif -- cgit From 5f677a9b6537dbfe061ec9ab1c5b34b21e4855a3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:05 -0700 Subject: drm/xe: Use xe_vma_ops to implement xe_vm_rebind All page tables updates are moving to a xe_vma_ops interface to implement 1 job per VM bind IOCTL. Convert xe_vm_rebind to use a xe_vma_ops based interface. Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-6-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 78 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 93cf5116d2a9..be41b3f41529 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -712,37 +712,87 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm) list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; } -static struct dma_fence * -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op); +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, + u8 tile_mask) +{ + INIT_LIST_HEAD(&op->link); + op->base.op = DRM_GPUVA_OP_MAP; + op->base.map.va.addr = vma->gpuva.va.addr; + op->base.map.va.range = vma->gpuva.va.range; + op->base.map.gem.obj = vma->gpuva.gem.obj; + op->base.map.gem.offset = vma->gpuva.gem.offset; + op->map.vma = vma; + op->map.immediate = true; + op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; + op->map.is_null = xe_vma_is_null(vma); +} + +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, + u8 tile_mask) +{ + struct xe_vma_op *op; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) + return -ENOMEM; + + xe_vm_populate_rebind(op, vma, tile_mask); + list_add_tail(&op->link, &vops->list); + + return 0; +} + +static struct dma_fence *ops_execute(struct xe_vm *vm, + struct xe_vma_ops *vops, + bool cleanup); +static void xe_vma_ops_init(struct xe_vma_ops *vops); int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { struct dma_fence *fence; struct xe_vma *vma, *next; + struct xe_vma_ops vops; + struct xe_vma_op *op, *next_op; + int err; lockdep_assert_held(&vm->lock); - if (xe_vm_in_lr_mode(vm) && !rebind_worker) + if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || + list_empty(&vm->rebind_list)) return 0; + xe_vma_ops_init(&vops); + xe_vm_assert_held(vm); - list_for_each_entry_safe(vma, next, &vm->rebind_list, - combined_links.rebind) { + list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { xe_assert(vm->xe, vma->tile_present); - list_del_init(&vma->combined_links.rebind); if (rebind_worker) trace_xe_vma_rebind_worker(vma); else trace_xe_vma_rebind_exec(vma); - fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false); - if (IS_ERR(fence)) - return PTR_ERR(fence); + + err = xe_vm_ops_add_rebind(&vops, vma, + vma->tile_present); + if (err) + goto free_ops; + } + + fence = ops_execute(vm, &vops, false); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + } else { dma_fence_put(fence); + list_for_each_entry_safe(vma, next, &vm->rebind_list, + combined_links.rebind) + list_del_init(&vma->combined_links.rebind); + } +free_ops: + list_for_each_entry_safe(op, next_op, &vops.list, link) { + list_del(&op->link); + kfree(op); } - return 0; + return err; } static void xe_vma_free(struct xe_vma *vma) @@ -2414,7 +2464,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, { struct dma_fence *fence = NULL; - lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); @@ -2533,7 +2583,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) { struct dma_fence *fence = ERR_PTR(-ENOMEM); - lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->lock); switch (op->base.op) { case DRM_GPUVA_OP_MAP: -- cgit From 4dbbe4579490b4dbc89bf9c79ef3b9f1d94366c3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:06 -0700 Subject: drm/xe: Simplify VM bind IOCTL error handling and cleanup Clean up everything in VM bind IOCTL in 1 path for both errors and non-errors. Also move VM bind IOCTL cleanup from ops (also used by non-IOCTL binds) to the VM bind IOCTL. v2: - Break ops_execute on error (Oak) Cc: Oak Zeng Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-7-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 67 +++++++--------------------------------- drivers/gpu/drm/xe/xe_vm_types.h | 5 --- 2 files changed, 12 insertions(+), 60 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index be41b3f41529..66a27ade77d7 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -743,8 +743,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, } static struct dma_fence *ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops, - bool cleanup); + struct xe_vma_ops *vops); static void xe_vma_ops_init(struct xe_vma_ops *vops); int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) @@ -777,7 +776,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) goto free_ops; } - fence = ops_execute(vm, &vops, false); + fence = ops_execute(vm, &vops); if (IS_ERR(fence)) { err = PTR_ERR(fence); } else { @@ -2449,7 +2448,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, if (!last_op) return 0; - last_op->ops = ops; if (last) { last_op->flags |= XE_VMA_OP_LAST; last_op->num_syncs = num_syncs; @@ -2619,25 +2617,6 @@ xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) return fence; } -static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op) -{ - bool last = op->flags & XE_VMA_OP_LAST; - - if (last) { - while (op->num_syncs--) - xe_sync_entry_cleanup(&op->syncs[op->num_syncs]); - kfree(op->syncs); - if (op->q) - xe_exec_queue_put(op->q); - } - if (!list_empty(&op->link)) - list_del(&op->link); - if (op->ops) - drm_gpuva_ops_free(&vm->gpuvm, op->ops); - if (last) - xe_vm_put(vm); -} - static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, bool post_commit, bool prev_post_commit, bool next_post_commit) @@ -2714,8 +2693,6 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, op->flags & XE_VMA_OP_PREV_COMMITTED, op->flags & XE_VMA_OP_NEXT_COMMITTED); } - - drm_gpuva_ops_free(&vm->gpuvm, __ops); } } @@ -2803,24 +2780,20 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, } static struct dma_fence *ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops, - bool cleanup) + struct xe_vma_ops *vops) { struct xe_vma_op *op, *next; struct dma_fence *fence = NULL; list_for_each_entry_safe(op, next, &vops->list, link) { - if (!IS_ERR(fence)) { - dma_fence_put(fence); - fence = xe_vma_op_execute(vm, op); - } + dma_fence_put(fence); + fence = xe_vma_op_execute(vm, op); if (IS_ERR(fence)) { drm_warn(&vm->xe->drm, "VM op(%d) failed with %ld", op->base.op, PTR_ERR(fence)); fence = ERR_PTR(-ENOSPC); + break; } - if (cleanup) - xe_vma_op_cleanup(vm, op); } return fence; @@ -2843,7 +2816,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, if (err) goto unlock; - fence = ops_execute(vm, vops, true); + fence = ops_execute(vm, vops); if (IS_ERR(fence)) { err = PTR_ERR(fence); /* FIXME: Killing VM rather than proper error handling */ @@ -3204,30 +3177,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto unwind_ops; } - xe_vm_get(vm); - if (q) - xe_exec_queue_get(q); - err = vm_bind_ioctl_ops_execute(vm, &vops); - up_write(&vm->lock); - - if (q) - xe_exec_queue_put(q); - xe_vm_put(vm); - - for (i = 0; bos && i < args->num_binds; ++i) - xe_bo_put(bos[i]); - - kvfree(bos); - kvfree(ops); - if (args->num_binds > 1) - kvfree(bind_ops); - - return err; - unwind_ops: - vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); + if (err && err != -ENODATA) + vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); + for (i = args->num_binds - 1; i >= 0; --i) + if (ops[i]) + drm_gpuva_ops_free(&vm->gpuvm, ops[i]); free_syncs: if (err == -ENODATA) err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 466b6c62d1f9..149ab892967e 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -330,11 +330,6 @@ enum xe_vma_op_flags { struct xe_vma_op { /** @base: GPUVA base operation */ struct drm_gpuva_op base; - /** - * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this - * operations is processed - */ - struct drm_gpuva_ops *ops; /** @q: exec queue for this operation */ struct xe_exec_queue *q; /** -- cgit From bf69918b7199ffa5bb6213f2b0a2c0b1be8f87dd Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:07 -0700 Subject: drm/xe: Use xe_vma_ops to implement page fault rebinds In effort to make multiple VMA binds operations atomic (1 job), all device page tables updates will be implemented via a xe_vma_ops (atomic unit) interface, Add xe_vma_rebind function which is implemented using xe_vma_ops interface. Use xe_vma_rebind in GPU page faults for rebinds rather than directly called deprecated function in PT layer. v3: - Update commit message (Oak) v4: - Fix tile_mask argument (CI) Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-8-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 16 +++++----- drivers/gpu/drm/xe/xe_vm.c | 57 +++++++++++++++++++++++++++++------- drivers/gpu/drm/xe/xe_vm.h | 2 ++ drivers/gpu/drm/xe/xe_vm_types.h | 2 ++ 4 files changed, 58 insertions(+), 19 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index fa9e9853c53b..040dd142c49c 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -19,7 +19,6 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_migrate.h" -#include "xe_pt.h" #include "xe_trace.h" #include "xe_vm.h" @@ -204,15 +203,14 @@ retry_userptr: drm_exec_retry_on_contention(&exec); if (ret) goto unlock_dma_resv; - } - /* Bind VMA only to the GT that has faulted */ - trace_xe_vma_pf_bind(vma); - fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0, - vma->tile_present & BIT(tile->id)); - if (IS_ERR(fence)) { - ret = PTR_ERR(fence); - goto unlock_dma_resv; + /* Bind VMA only to the GT that has faulted */ + trace_xe_vma_pf_bind(vma); + fence = xe_vma_rebind(vm, vma, BIT(tile->id)); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto unlock_dma_resv; + } } /* diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 66a27ade77d7..cb38acabe682 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -716,6 +716,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) { INIT_LIST_HEAD(&op->link); + op->tile_mask = tile_mask; op->base.op = DRM_GPUVA_OP_MAP; op->base.map.va.addr = vma->gpuva.va.addr; op->base.map.va.range = vma->gpuva.va.range; @@ -794,6 +795,33 @@ free_ops: return err; } +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) +{ + struct dma_fence *fence = NULL; + struct xe_vma_ops vops; + struct xe_vma_op *op, *next_op; + int err; + + lockdep_assert_held(&vm->lock); + xe_vm_assert_held(vm); + xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); + + xe_vma_ops_init(&vops); + + err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); + if (err) + return ERR_PTR(err); + + fence = ops_execute(vm, &vops); + + list_for_each_entry_safe(op, next_op, &vops.list, link) { + list_del(&op->link); + kfree(op); + } + + return fence; +} + static void xe_vma_free(struct xe_vma *vma) { if (xe_vma_is_userptr(vma)) @@ -1694,7 +1722,7 @@ err_fences: static struct dma_fence * xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, - bool first_op, bool last_op) + u8 tile_mask, bool first_op, bool last_op) { struct xe_tile *tile; struct dma_fence *fence; @@ -1702,7 +1730,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct dma_fence_array *cf = NULL; struct xe_vm *vm = xe_vma_vm(vma); int cur_fence = 0, i; - int number_tiles = hweight8(vma->tile_mask); + int number_tiles = hweight8(tile_mask); int err; u8 id; @@ -1716,7 +1744,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, } for_each_tile(tile, vm->xe, id) { - if (!(vma->tile_mask & BIT(id))) + if (!(tile_mask & BIT(id))) goto next; fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id], @@ -1784,7 +1812,7 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) static struct dma_fence * xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs, - bool immediate, bool first_op, bool last_op) + u8 tile_mask, bool immediate, bool first_op, bool last_op) { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); @@ -1800,8 +1828,8 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, vma->ufence = ufence ?: vma->ufence; if (immediate) { - fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, - last_op); + fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask, + first_op, last_op); if (IS_ERR(fence)) return fence; } else { @@ -1993,7 +2021,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) { return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, - true, first_op, last_op); + vma->tile_mask, true, first_op, last_op); } else { struct dma_fence *fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); @@ -2306,10 +2334,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct xe_device *xe = vm->xe; struct xe_vma_op *last_op = NULL; struct drm_gpuva_op *__op; + struct xe_tile *tile; + u8 id, tile_mask = 0; int err = 0; lockdep_assert_held_write(&vm->lock); + for_each_tile(tile, vm->xe, id) + tile_mask |= 0x1 << id; + drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); struct xe_vma *vma; @@ -2326,6 +2359,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, } op->q = q; + op->tile_mask = tile_mask; switch (op->base.op) { case DRM_GPUVA_OP_MAP: @@ -2471,6 +2505,7 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, case DRM_GPUVA_OP_MAP: fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, + op->tile_mask, op->map.immediate || !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); @@ -2498,7 +2533,9 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, dma_fence_put(fence); fence = xe_vm_bind(vm, op->remap.prev, op->q, xe_vma_bo(op->remap.prev), op->syncs, - op->num_syncs, true, false, + op->num_syncs, + op->remap.prev->tile_mask, true, + false, op->flags & XE_VMA_OP_LAST && !next); op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND; if (IS_ERR(fence)) @@ -2512,8 +2549,8 @@ static struct dma_fence *op_execute(struct xe_vm *vm, struct xe_vma *vma, fence = xe_vm_bind(vm, op->remap.next, op->q, xe_vma_bo(op->remap.next), op->syncs, op->num_syncs, - true, false, - op->flags & XE_VMA_OP_LAST); + op->remap.next->tile_mask, true, + false, op->flags & XE_VMA_OP_LAST); op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND; if (IS_ERR(fence)) break; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 306cd0934a19..204a4ff63f88 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -208,6 +208,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm); int xe_vm_userptr_check_repin(struct xe_vm *vm); int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, + u8 tile_mask); int xe_vm_invalidate_vma(struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 149ab892967e..e9cd6da6263a 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -343,6 +343,8 @@ struct xe_vma_op { struct list_head link; /** @flags: operation flags */ enum xe_vma_op_flags flags; + /** @tile_mask: Tile mask for operation */ + u8 tile_mask; union { /** @map: VMA map operation specific data */ -- cgit From 22cfdd286572decf5225cc219205ca3348cfc4af Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:08 -0700 Subject: drm/xe: Add some members to xe_vma_ops This will help with moving to single jobs for many bind operations. v2: - Rebase Cc: Oak Zeng Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-9-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 19 ++++++++++++++----- drivers/gpu/drm/xe/xe_vm_types.h | 8 ++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index cb38acabe682..45258d38d4ee 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -745,7 +745,9 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, static struct dma_fence *ops_execute(struct xe_vm *vm, struct xe_vma_ops *vops); -static void xe_vma_ops_init(struct xe_vma_ops *vops); +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, + struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs); int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) { @@ -760,7 +762,7 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) list_empty(&vm->rebind_list)) return 0; - xe_vma_ops_init(&vops); + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); xe_vm_assert_held(vm); list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { @@ -806,7 +808,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma xe_vm_assert_held(vm); xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); - xe_vma_ops_init(&vops); + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); if (err) @@ -3014,9 +3016,16 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, return err; } -static void xe_vma_ops_init(struct xe_vma_ops *vops) +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, + struct xe_exec_queue *q, + struct xe_sync_entry *syncs, u32 num_syncs) { + memset(vops, 0, sizeof(*vops)); INIT_LIST_HEAD(&vops->list); + vops->vm = vm; + vops->q = q; + vops->syncs = syncs; + vops->num_syncs = num_syncs; } int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -3183,7 +3192,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto free_syncs; } - xe_vma_ops_init(&vops); + xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index e9cd6da6263a..ce1a63a5e3e7 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -360,6 +360,14 @@ struct xe_vma_op { struct xe_vma_ops { /** @list: list of VMA operations */ struct list_head list; + /** @vm: VM */ + struct xe_vm *vm; + /** @q: exec queue these operations */ + struct xe_exec_queue *q; + /** @syncs: syncs these operation */ + struct xe_sync_entry *syncs; + /** @num_syncs: number of syncs */ + u32 num_syncs; }; #endif -- cgit From 61e3270ef9610ab40b1b56aa62e2b8471c32f1f7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:09 -0700 Subject: drm/xe: Add vm_bind_ioctl_ops_fini helper Simplify VM bind code by signaling out-fences / destroying VMAs in a single location. Will help with transition single job for many bind ops. v2: - s/vm_bind_ioctl_ops_install_fences/vm_bind_ioctl_ops_fini (Oak) - Set last fence in vm_bind_ioctl_ops_fini (Oak) Cc: Oak Zeng Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-10-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 62 ++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 38 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 45258d38d4ee..2f19372aaad5 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1646,7 +1646,7 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct dma_fence *fence = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; - int cur_fence = 0, i; + int cur_fence = 0; int number_tiles = hweight8(vma->tile_present); int err; u8 id; @@ -1704,10 +1704,6 @@ next: fence = cf ? &cf->base : !fence ? xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - } return fence; @@ -1731,7 +1727,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct xe_vm *vm = xe_vma_vm(vma); - int cur_fence = 0, i; + int cur_fence = 0; int number_tiles = hweight8(tile_mask); int err; u8 id; @@ -1778,12 +1774,6 @@ next: } } - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], - cf ? &cf->base : fence); - } - return cf ? &cf->base : fence; err_fences: @@ -1835,20 +1825,11 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, if (IS_ERR(fence)) return fence; } else { - int i; - xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - } } - if (last_op) - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - return fence; } @@ -1858,7 +1839,6 @@ xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, u32 num_syncs, bool first_op, bool last_op) { struct dma_fence *fence; - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); xe_vm_assert_held(vm); xe_bo_assert_held(xe_vma_bo(vma)); @@ -1867,10 +1847,6 @@ xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, if (IS_ERR(fence)) return fence; - xe_vma_destroy(vma, fence); - if (last_op) - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - return fence; } @@ -2025,17 +2001,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs, vma->tile_mask, true, first_op, last_op); } else { - struct dma_fence *fence = - xe_exec_queue_last_fence_get(wait_exec_queue, vm); - int i; - - /* Nothing to do, signal fences now */ - if (last_op) { - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - } - - return fence; + return xe_exec_queue_last_fence_get(wait_exec_queue, vm); } } @@ -2838,6 +2804,26 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, return fence; } +static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, + struct dma_fence *fence) +{ + struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); + struct xe_vma_op *op; + int i; + + list_for_each_entry(op, &vops->list, link) { + if (op->base.op == DRM_GPUVA_OP_UNMAP) + xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); + else if (op->base.op == DRM_GPUVA_OP_REMAP) + xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), + fence); + } + for (i = 0; i < vops->num_syncs; i++) + xe_sync_entry_signal(vops->syncs + i, fence); + xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); + dma_fence_put(fence); +} + static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, struct xe_vma_ops *vops) { @@ -2862,7 +2848,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, xe_vm_kill(vm, false); goto unlock; } else { - dma_fence_put(fence); + vm_bind_ioctl_ops_fini(vm, vops, fence); } } -- cgit From fda75ef80bddf2f08b0e597d59da69a3d8eb5be2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:10 -0700 Subject: drm/xe: Move ufence check to op_lock_and_prep Rather than checking for an unsignaled ufence ay unbind time, check for this during the op_lock_and_prep function. This helps with the transition to job 1 per VM bind IOCTL. v2: - Rebase v3: - Fix typo in commit message (Oak) Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-11-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 2f19372aaad5..40c1258c3282 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1653,16 +1653,6 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, trace_xe_vma_unbind(vma); - if (vma->ufence) { - struct xe_user_fence * const f = vma->ufence; - - if (!xe_sync_ufence_get_status(f)) - return ERR_PTR(-EBUSY); - - vma->ufence = NULL; - xe_sync_ufence_put(f); - } - if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -2717,6 +2707,21 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, return err; } +static int check_ufence(struct xe_vma *vma) +{ + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return -EBUSY; + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + + return 0; +} + static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, struct xe_vma_op *op) { @@ -2729,6 +2734,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, op->map.immediate); break; case DRM_GPUVA_OP_REMAP: + err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); + if (err) + break; + err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), false); @@ -2738,6 +2747,10 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, op->remap.next, true); break; case DRM_GPUVA_OP_UNMAP: + err = check_ufence(gpuva_to_vma(op->base.unmap.va)); + if (err) + break; + err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), false); -- cgit From 5aa5eea09af08ad446f78f3de10388c98f52f19c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:11 -0700 Subject: drm/xe: Move ufence add to vm_bind_ioctl_ops_fini Rather than adding a ufence to a VMA in the bind function, add the ufence to all VMAs in the IOCTL that require binds in vm_bind_ioctl_ops_fini. This help withs the transition to job 1 per VM bind IOCTL. v2: - Rebase v3: - Fix typo in commit (Oak) Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-12-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_sync.c | 15 +++++++++++++++ drivers/gpu/drm/xe/xe_sync.h | 1 + drivers/gpu/drm/xe/xe_vm.c | 44 +++++++++++++++++++++++++++++++++++++------- 3 files changed, 53 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 65f1f1628235..2883d9aca404 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -338,6 +338,21 @@ err_out: return ERR_PTR(-ENOMEM); } +/** + * __xe_sync_ufence_get() - Get user fence from user fence + * @ufence: input user fence + * + * Get a user fence reference from user fence + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence) +{ + user_fence_get(ufence); + + return ufence; +} + /** * xe_sync_ufence_get() - Get user fence from sync * @sync: input sync diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 3e03396af2c6..006dbf780793 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -37,6 +37,7 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) return !!sync->ufence; } +struct xe_user_fence *__xe_sync_ufence_get(struct xe_user_fence *ufence); struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); void xe_sync_ufence_put(struct xe_user_fence *ufence); int xe_sync_ufence_get_status(struct xe_user_fence *ufence); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 40c1258c3282..dfd31b346021 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1798,17 +1798,10 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); - struct xe_user_fence *ufence; xe_vm_assert_held(vm); xe_bo_assert_held(bo); - ufence = find_ufence_get(syncs, num_syncs); - if (vma->ufence && ufence) - xe_sync_ufence_put(vma->ufence); - - vma->ufence = ufence ?: vma->ufence; - if (immediate) { fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, tile_mask, first_op, last_op); @@ -2817,20 +2810,57 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, return fence; } +static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) +{ + if (vma->ufence) + xe_sync_ufence_put(vma->ufence); + vma->ufence = __xe_sync_ufence_get(ufence); +} + +static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, + struct xe_user_fence *ufence) +{ + switch (op->base.op) { + case DRM_GPUVA_OP_MAP: + vma_add_ufence(op->map.vma, ufence); + break; + case DRM_GPUVA_OP_REMAP: + if (op->remap.prev) + vma_add_ufence(op->remap.prev, ufence); + if (op->remap.next) + vma_add_ufence(op->remap.next, ufence); + break; + case DRM_GPUVA_OP_UNMAP: + break; + case DRM_GPUVA_OP_PREFETCH: + vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); + break; + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); + } +} + static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, struct dma_fence *fence) { struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); + struct xe_user_fence *ufence; struct xe_vma_op *op; int i; + ufence = find_ufence_get(vops->syncs, vops->num_syncs); list_for_each_entry(op, &vops->list, link) { + if (ufence) + op_add_ufence(vm, op, ufence); + if (op->base.op == DRM_GPUVA_OP_UNMAP) xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); else if (op->base.op == DRM_GPUVA_OP_REMAP) xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), fence); } + if (ufence) + xe_sync_ufence_put(ufence); for (i = 0; i < vops->num_syncs; i++) xe_sync_entry_signal(vops->syncs + i, fence); xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); -- cgit From c4f18703629dd0112641d6974eb295a53c4a4615 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:12 -0700 Subject: drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this xe_gt_tlb_invalidation_range accepts a start and end address rather than a VMA. This will enable multiple VMAs to be invalidated in a single invalidation. Update the PT layer to use this new function. Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-13-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 59 +++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 3 ++ drivers/gpu/drm/xe/xe_pt.c | 25 ++++++++---- 3 files changed, 65 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 8e9c4b990fbb..d0ee1e0df0bd 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -263,11 +263,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) } /** - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA + * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an + * address range + * * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation * completion, can be NULL - * @vma: VMA to invalidate + * @start: start address + * @end: end address + * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full * TLB invalidation. Completion of TLB is asynchronous and caller can either use @@ -277,17 +281,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * negative error code on error. */ -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma) +int xe_gt_tlb_invalidation_range(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + u64 start, u64 end, u32 asid) { struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; - xe_gt_assert(gt, vma); - /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { if (fence) @@ -301,8 +303,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, if (!xe->info.has_range_tlb_invalidation) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { - u64 start = xe_vma_start(vma); - u64 length = xe_vma_size(vma); + u64 orig_start = start; + u64 length = end - start; u64 align, end; if (length < SZ_4K) @@ -315,12 +317,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, * address mask covering the required range. */ align = roundup_pow_of_two(length); - start = ALIGN_DOWN(xe_vma_start(vma), align); - end = ALIGN(xe_vma_end(vma), align); + start = ALIGN_DOWN(start, align); + end = ALIGN(end, align); length = align; while (start + length < end) { length <<= 1; - start = ALIGN_DOWN(xe_vma_start(vma), length); + start = ALIGN_DOWN(orig_start, length); } /* @@ -329,16 +331,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, */ if (length >= SZ_2M) { length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(xe_vma_start(vma), length); + start = ALIGN_DOWN(orig_start, length); } xe_gt_assert(gt, length >= SZ_4K); xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, + ilog2(SZ_2M) + 1))); xe_gt_assert(gt, IS_ALIGNED(start, length)); action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); - action[len++] = xe_vma_vm(vma)->usm.asid; + action[len++] = asid; action[len++] = lower_32_bits(start); action[len++] = upper_32_bits(start); action[len++] = ilog2(length) - ilog2(SZ_4K); @@ -349,6 +352,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, return send_tlb_invalidation(>->uc.guc, fence, action, len); } +/** + * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA + * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion, can be NULL + * @vma: VMA to invalidate + * + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can either use + * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for + * completion. + * + * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, + * negative error code on error. + */ +int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + struct xe_vma *vma) +{ + xe_gt_assert(gt, vma); + + return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); +} + /** * xe_gt_tlb_invalidation_wait - Wait for TLB to complete * @gt: graphics tile diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index fbb743d80d2c..bf3bebd9f985 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma); +int xe_gt_tlb_invalidation_range(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + u64 start, u64 end, u32 asid); int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 5b7930f46cf3..8d3765d3351e 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = { struct invalidation_fence { struct xe_gt_tlb_invalidation_fence base; struct xe_gt *gt; - struct xe_vma *vma; struct dma_fence *fence; struct dma_fence_cb cb; struct work_struct work; + u64 start; + u64 end; + u32 asid; }; static const char * @@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct work_struct *w) container_of(w, struct invalidation_fence, work); trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); - xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma); + xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, + ifence->end, ifence->asid); } static int invalidation_fence_init(struct xe_gt *gt, struct invalidation_fence *ifence, struct dma_fence *fence, - struct xe_vma *vma) + u64 start, u64 end, u32 asid) { int ret; @@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt, dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; - ifence->vma = vma; + ifence->start = start; + ifence->end = end; + ifence->asid = asid; INIT_WORK(&ifence->work, invalidation_fence_work_func); ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); @@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue /* TLB invalidation must be done before signaling rebind */ if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, ifence, fence, - vma); + int err = invalidation_fence_init(tile->primary_gt, + ifence, fence, + xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); if (err) { dma_fence_put(fence); kfree(ifence); @@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu dma_fence_wait(fence, false); /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma); + err = invalidation_fence_init(tile->primary_gt, ifence, fence, + xe_vma_start(vma), + xe_vma_end(vma), + xe_vma_vm(vma)->usm.asid); if (err) { dma_fence_put(fence); kfree(ifence); -- cgit From 98ad158e543426561fa5df5c4387d4368601866f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Apr 2024 21:55:13 -0700 Subject: drm/xe: Delete PT update selftest IGTs (e.g. xe_vm) can provide the exact same coverage as the PT update selftest. The PT update selftest is dependent on internal functions which can change thus maintaining this test is costly and provide no extra coverage. Delete this test. Signed-off-by: Matthew Brost Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-14-matthew.brost@intel.com --- drivers/gpu/drm/xe/tests/xe_migrate.c | 86 ----------------------------------- 1 file changed, 86 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 977d5f4e4490..b6e7f80c3774 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -62,36 +62,6 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, return 0; } -static void -sanity_populate_cb(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, void *dst, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct migrate_test_params *p = - to_migrate_test_params(xe_cur_kunit_priv(XE_TEST_LIVE_MIGRATE)); - int i; - u64 *ptr = dst; - u64 value; - - for (i = 0; i < num_qwords; i++) { - value = (qword_ofs + i - update->ofs) * 0x1111111111111111ULL; - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, value); - else - ptr[i] = value; - } - - kunit_info(xe_cur_kunit(), "Used %s.\n", map ? "CPU" : "GPU"); - if (p->force_gpu && map) - KUNIT_FAIL(xe_cur_kunit(), "GPU pagetable update used CPU.\n"); -} - -static const struct xe_migrate_pt_update_ops sanity_ops = { - .populate = sanity_populate_cb, -}; - #define check(_retval, _expected, str, _test) \ do { if ((_retval) != (_expected)) { \ KUNIT_FAIL(_test, "Sanity check failed: " str \ @@ -209,57 +179,6 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, test_copy(m, bo, test, region); } -static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt, - struct kunit *test, bool force_gpu) -{ - struct xe_device *xe = tile_to_xe(m->tile); - struct dma_fence *fence; - u64 retval, expected; - ktime_t then, now; - int i; - - struct xe_vm_pgtable_update update = { - .ofs = 1, - .qwords = 0x10, - .pt_bo = pt, - }; - struct xe_migrate_pt_update pt_update = { - .ops = &sanity_ops, - }; - struct migrate_test_params p = { - .base.id = XE_TEST_LIVE_MIGRATE, - .force_gpu = force_gpu, - }; - - test->priv = &p; - /* Test xe_migrate_update_pgtables() updates the pagetable as expected */ - expected = 0xf0f0f0f0f0f0f0f0ULL; - xe_map_memset(xe, &pt->vmap, 0, (u8)expected, pt->size); - - then = ktime_get(); - fence = xe_migrate_update_pgtables(m, m->q->vm, NULL, m->q, &update, 1, - NULL, 0, &pt_update); - now = ktime_get(); - if (sanity_fence_failed(xe, fence, "Migration pagetable update", test)) - return; - - kunit_info(test, "Updating without syncing took %llu us,\n", - (unsigned long long)ktime_to_us(ktime_sub(now, then))); - - dma_fence_put(fence); - retval = xe_map_rd(xe, &pt->vmap, 0, u64); - check(retval, expected, "PTE[0] must stay untouched", test); - - for (i = 0; i < update.qwords; i++) { - retval = xe_map_rd(xe, &pt->vmap, (update.ofs + i) * 8, u64); - check(retval, i * 0x1111111111111111ULL, "PTE update", test); - } - - retval = xe_map_rd(xe, &pt->vmap, 8 * (update.ofs + update.qwords), - u64); - check(retval, expected, "PTE[0x11] must stay untouched", test); -} - static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) { struct xe_tile *tile = m->tile; @@ -398,11 +317,6 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) test_copy_vram(m, big, test); } - kunit_info(test, "Testing page table update using CPU if GPU idle.\n"); - test_pt_update(m, pt, test, false); - kunit_info(test, "Testing page table update using GPU\n"); - test_pt_update(m, pt, test, true); - out: xe_bb_free(bb, NULL); free_tiny: -- cgit From bb442bfb9b3d1bc6ebd9fc64fa566e12d64627ce Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Wed, 24 Apr 2024 09:12:47 +0530 Subject: drm/xe/xe2hpg: Add Wa_14021490052 Add Wa_14021490052 for Xe2HPG 20.01. Signed-off-by: Shekhar Chauhan Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20240424034247.1352755-1-shekhar.chauhan@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 9d9b7fa7a8f0..4266ffac3503 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -692,6 +692,15 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, + { XE_RTP_NAME("14021490052"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, + DIS_MESH_PARTIAL_AUTOSTRIP | + DIS_MESH_AUTOSTRIP), + SET(VFLSKPD, + DIS_PARTIAL_AUTOSTRIP | + DIS_AUTOSTRIP)) + }, {} }; -- cgit From 4caf410766add8cf376a3afc910b17dd0961dd75 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sat, 27 Apr 2024 06:53:39 -0700 Subject: drm/xe: Merge 16021540221 and 18034896535 WAs In order to detect duplicate implementations for the same workaround, early in the implementation of RTP it was decided to error out even if the values set are exactly the same. With the introduction of 18034896535 in commit 74671d23ca18 ("drm/xe/xe2: Add workaround 18034896535"), LNL stepping with graphics stepping A1 now gives the following error on module load: xe 0000:00:02.0: [drm] *ERROR* GT0: [GT OTHER] \ discarding save-restore reg e48c (clear: 00000200, set: 00000200,\ masked: yes, mcr: yes): ret=-22 RTP may be improved in the future, but for now simply join the entries like done with e.g. "1607297627, 1607030317, 1607186500". Fixes: 74671d23ca18 ("drm/xe/xe2: Add workaround 18034896535") Cc: Bommu Krishnaiah Cc: Tejas Upadhyay Cc: Matt Roper Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240427135339.3485559-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4266ffac3503..134a34dbfe8d 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -449,12 +449,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) }, - { XE_RTP_NAME("16021540221"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) - }, - { XE_RTP_NAME("18034896535"), + { XE_RTP_NAME("18034896535, 16021540221"), /* 16021540221: GRAPHICS_STEP(A0, B0) */ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) -- cgit From e29a7a34c3cf140ceb2f994a8eae0b68d21e357e Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Mon, 29 Apr 2024 22:30:39 +0200 Subject: drm/xe: Remove uninitialized end var from xe_gt_tlb_invalidation_range() This fixes commit c4f18703629d ("drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this") which added the end variable as part of the function param. v2: Add fixes tag(Matt) Fixes: c4f18703629d ("drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this") Cc: Matthew Brost Signed-off-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240429203039.26918-1-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index d0ee1e0df0bd..c3d015a7ac33 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -305,7 +305,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, } else { u64 orig_start = start; u64 length = end - start; - u64 align, end; + u64 align; if (length < SZ_4K) length = SZ_4K; -- cgit From d457519c942d4885d92f6367e42ca67f4c7471ad Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 19 Apr 2024 11:34:11 -0700 Subject: drm/xe/gsc: Turn off GSCCS interrupts when disabling the engine Starting on LNL, there is a new GSCCS interrupt that is triggered when the GSC engine reset fails. If the HW is in a bad state, this interrupt might end up being triggered even if we're not using the engine, which will lead to a warning because we'll see it as unexpected. Since there is no point in handling the interrupt in this scenario, we can just make sure the interrupts are off when we disable the engine. Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Tested-by: Matt Roper Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240419183412.1605782-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 4cc757457e01..ec69803152a2 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -717,6 +717,11 @@ static void check_gsc_availability(struct xe_gt *gt) */ if (!xe_uc_fw_is_available(>->uc.gsc.fw)) { gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0); + + /* interrupts where previously enabled, so turn them off */ + xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0); + xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0); + drm_info(&xe->drm, "gsccs disabled due to lack of FW\n"); } } -- cgit From a1ea30b69e02eb02043b0d6d7c42abcfafe99bd0 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 19 Apr 2024 11:34:12 -0700 Subject: drm/xe/gsc: define GSCCS for LNL LNL has 1 GSCCS, same as MTL. Note that the GSCCS will be disabled until we have a GSC FW defined, but having it in the list of engine is a requirement to add such definition. v2: rebase Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Shekhar Chauhan Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240419183412.1605782-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index fb20c9828563..d0d4d8f9749c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -211,7 +211,8 @@ static const struct xe_media_desc media_xe2 = { .name = "Xe2_LPM / Xe2_HPM", .hw_engine_mask = GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | - GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) | + BIT(XE_HW_ENGINE_GSCCS0) }; static const struct xe_device_desc tgl_desc = { -- cgit From 8ad0e1810bf23f22cedb8a2664548b15646570c7 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 2 May 2024 14:43:10 +0200 Subject: drm/xe/gt: Fix assert in L3 bank mask generation What needs to be asserted is that the pattern fits in the number of bits provided by the user in patternbits, otherwise it would be truncated when replicated according to the mask, which is likely not the intended use of this function. The pattern argument is a bitmap so use find_last_bit() instead of fls(). The bit position starts at index 0 so remove "or equal" from the comparison. XE_MAX_L3_BANK_MASK_BITS would be the returned value if the pattern is 0, which can be the case on some platforms. v2: Check the result does not overflow the array (Lucas De Marchi) v3: Use __fls() for long and handle mask == 0 (Lucas De Marchi) Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Francois Dugast Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240502124311.159695-1-francois.dugast@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index af841d801a8f..25ff03ab8448 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -108,7 +108,9 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, { unsigned long bit; - xe_assert(xe, fls(mask) <= patternbits); + xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits || + bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS)); + xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS); for_each_set_bit(bit, &mask, 32) { xe_l3_bank_mask_t shifted_pattern = {}; -- cgit From 75521e8b56e8f9dc673b782df7bc3660f51f329a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Thu, 2 May 2024 20:32:51 +0200 Subject: drm/xe: Perform dma_map when moving system buffer objects to TT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we dma_map on ttm_tt population and dma_unmap when the pages are released in ttm_tt unpopulate. Strictly, the dma_map is not needed until the bo is moved to the XE_PL_TT placement, so perform the dma_mapping on such moves instead, and remove the dma_mappig when moving to XE_PL_SYSTEM. This is desired for the upcoming shrinker series where shrinking of a ttm_tt might fail. That would lead to an odd construct where we first dma_unmap, then shrink and if shrinking fails dma_map again. If dma_mapping instead is performed on move like this, shrinking does not need to care at all about dma mapping. Finally, where a ttm_tt is destroyed while bound to a different memory type than XE_PL_SYSTEM, we keep the dma_unmap in unpopulate(). v2: - Don't accidently unmap the dma-buf's sgtable. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240502183251.10170-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo.c | 47 +++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index bc1f794e3e61..52a16cb4e736 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -302,6 +302,18 @@ static int xe_tt_map_sg(struct ttm_tt *tt) return 0; } +static void xe_tt_unmap_sg(struct ttm_tt *tt) +{ + struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); + + if (xe_tt->sg) { + dma_unmap_sgtable(xe_tt->dev, xe_tt->sg, + DMA_BIDIRECTIONAL, 0); + sg_free_table(xe_tt->sg); + xe_tt->sg = NULL; + } +} + struct sg_table *xe_bo_sg(struct xe_bo *bo) { struct ttm_tt *tt = bo->ttm.ttm; @@ -377,27 +389,15 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, if (err) return err; - /* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */ - err = xe_tt_map_sg(tt); - if (err) - ttm_pool_free(&ttm_dev->pool, tt); - return err; } static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) { - struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm); - if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) return; - if (xe_tt->sg) { - dma_unmap_sgtable(xe_tt->dev, xe_tt->sg, - DMA_BIDIRECTIONAL, 0); - sg_free_table(xe_tt->sg); - xe_tt->sg = NULL; - } + xe_tt_unmap_sg(tt); return ttm_pool_free(&ttm_dev->pool, tt); } @@ -628,17 +628,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && ttm && ttm_tt_is_populated(ttm)) ? true : false; int ret = 0; + /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { - ttm_bo_move_null(ttm_bo, new_mem); - return 0; + if (new_mem->mem_type == XE_PL_TT) + ret = xe_tt_map_sg(ttm); + if (!ret) + ttm_bo_move_null(ttm_bo, new_mem); + goto out; } if (ttm_bo->type == ttm_bo_type_sg) { ret = xe_bo_move_notify(bo, ctx); if (!ret) ret = xe_bo_move_dmabuf(ttm_bo, new_mem); - goto out; + return ret; } tt_has_data = ttm && (ttm_tt_is_populated(ttm) || @@ -650,6 +654,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); + if (new_mem->mem_type == XE_PL_TT) { + ret = xe_tt_map_sg(ttm); + if (ret) + goto out; + } + if ((move_lacks_source && !needs_clear)) { ttm_bo_move_null(ttm_bo, new_mem); goto out; @@ -786,8 +796,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_pm_runtime_put(xe); out: - return ret; + if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && + ttm_bo->ttm) + xe_tt_unmap_sg(ttm_bo->ttm); + return ret; } /** -- cgit From 87ea92a19216a454a6eb5710501a470dcdb8577d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 3 May 2024 00:33:11 +0200 Subject: drm/xe: Add helpers for manipulating macro arguments Define generic helpers that will replace private definitions used by the RTP code and will allow reuse by the new code. Put them in new xe_args.h file (instead of infamous xe_macros.h) as once we find more potential users outside of the Xe driver we may want to move all of these macros as-is to linux/args.h. Signed-off-by: Michal Wajdeczko Cc: Andy Shevchenko Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240502223313.2527-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_args.h | 121 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_args.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_args.h b/drivers/gpu/drm/xe/xe_args.h new file mode 100644 index 000000000000..40b9eb4151d8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_args.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_ARGS_H_ +#define _XE_ARGS_H_ + +#include + +/* + * Why don't the following macros have the XE prefix? + * + * Once we find more potential users outside of the Xe driver, we plan to move + * all of the following macros unchanged to linux/args.h. + */ + +/** + * CALL_ARGS - Invoke a macro, but allow parameters to be expanded beforehand. + * @f: name of the macro to invoke + * @args: arguments for the macro + * + * This macro allows calling macros which names might generated or we want to + * make sure it's arguments will be correctly expanded. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar COUNT_ARGS(foo) + * #define buz CALL_ARGS(COUNT_ARGS, foo) + * + * With above definitions bar expands to 1 while buz expands to 4. + */ +#define CALL_ARGS(f, args...) __CALL_ARGS(f, args) +#define __CALL_ARGS(f, args...) f(args) + +/** + * DROP_FIRST - Returns all arguments except the first one. + * @args: arguments + * + * This helper macro allows manipulation the argument list before passing it + * to the next level macro. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo)) + * + * With above definitions bar expands to 3. + */ +#define DROP_FIRST(args...) __DROP_FIRST(args) +#define __DROP_FIRST(a, b...) b + +/** + * PICK_FIRST - Returns the first argument. + * @args: arguments + * + * This helper macro allows manipulation the argument list before passing it + * to the next level macro. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar PICK_FIRST(foo) + * + * With above definitions bar expands to X. + */ +#define PICK_FIRST(args...) __PICK_FIRST(args) +#define __PICK_FIRST(a, b...) a + +/** + * PICK_LAST - Returns the last argument. + * @args: arguments + * + * This helper macro allows manipulation the argument list before passing it + * to the next level macro. + * + * Like COUNT_ARGS() this macro works up to 12 arguments. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar PICK_LAST(foo) + * + * With above definitions bar expands to Q. + */ +#define PICK_LAST(args...) __PICK_ARG(COUNT_ARGS(args), args) +#define __PICK_ARG(n, args...) CALL_ARGS(CONCATENATE(PICK_ARG, n), args) +#define PICK_ARG1(args...) PICK_FIRST(args) +#define PICK_ARG2(args...) PICK_ARG1(DROP_FIRST(args)) +#define PICK_ARG3(args...) PICK_ARG2(DROP_FIRST(args)) +#define PICK_ARG4(args...) PICK_ARG3(DROP_FIRST(args)) +#define PICK_ARG5(args...) PICK_ARG4(DROP_FIRST(args)) +#define PICK_ARG6(args...) PICK_ARG5(DROP_FIRST(args)) +#define PICK_ARG7(args...) PICK_ARG6(DROP_FIRST(args)) +#define PICK_ARG8(args...) PICK_ARG7(DROP_FIRST(args)) +#define PICK_ARG9(args...) PICK_ARG8(DROP_FIRST(args)) +#define PICK_ARG10(args...) PICK_ARG9(DROP_FIRST(args)) +#define PICK_ARG11(args...) PICK_ARG10(DROP_FIRST(args)) +#define PICK_ARG12(args...) PICK_ARG11(DROP_FIRST(args)) + +/** + * ARGS_SEP_COMMA - Definition of a comma character. + * + * This definition can be used in cases where any intermediate macro expects + * fixed number of arguments, but we want to pass more arguments which can + * be properly evaluated only by the next level macro. + * + * Example: + * + * #define foo(f) f(X) f(Y) f(Z) f(Q) + * #define bar DROP_FIRST(foo(ARGS_SEP_COMMA __stringify)) + * #define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo(ARGS_SEP_COMMA))) + * + * With above definitions bar expands to + * "X", "Y", "Z", "Q" + * and buz expands to 4. + */ +#define ARGS_SEP_COMMA , + +#endif -- cgit From 9f79e24485494f35740fd85ffb2c67fefb8c67b2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 3 May 2024 00:33:12 +0200 Subject: drm/xe/kunit: Add simple tests for new xe_args macros We want to make sure that helper macros are working as expected. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240502223313.2527-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/tests/Makefile | 1 + drivers/gpu/drm/xe/tests/xe_args_test.c | 190 ++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 drivers/gpu/drm/xe/tests/xe_args_test.c (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 8cf2367449d8..6e58931fddd4 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -11,6 +11,7 @@ xe_live_test-y = xe_live_test_mod.o \ # Normal kunit tests obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o xe_test-y = xe_test_mod.o \ + xe_args_test.o \ xe_pci_test.o \ xe_rtp_test.o \ xe_wa_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_args_test.c b/drivers/gpu/drm/xe/tests/xe_args_test.c new file mode 100644 index 000000000000..9b44c1ab6364 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_args_test.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2024 Intel Corporation + */ + +#include + +#include "xe_args.h" + +static void call_args_example(struct kunit *test) +{ +#define foo X, Y, Z, Q +#define bar COUNT_ARGS(foo) +#define buz CALL_ARGS(COUNT_ARGS, foo) + + KUNIT_EXPECT_EQ(test, bar, 1); + KUNIT_EXPECT_EQ(test, buz, 4); + +#undef foo +#undef bar +#undef buz +} + +static void drop_first_example(struct kunit *test) +{ +#define foo X, Y, Z, Q +#define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo)) + + KUNIT_EXPECT_EQ(test, bar, 3); + +#undef foo +#undef bar +} + +static void pick_first_example(struct kunit *test) +{ + int X = 1; + +#define foo X, Y, Z, Q +#define bar PICK_FIRST(foo) + + KUNIT_EXPECT_EQ(test, bar, X); + KUNIT_EXPECT_STREQ(test, __stringify(bar), "X"); + +#undef foo +#undef bar +} + +static void pick_last_example(struct kunit *test) +{ + int Q = 1; + +#define foo X, Y, Z, Q +#define bar PICK_LAST(foo) + + KUNIT_EXPECT_EQ(test, bar, Q); + KUNIT_EXPECT_STREQ(test, __stringify(bar), "Q"); + +#undef foo +#undef bar +} + +static void sep_comma_example(struct kunit *test) +{ +#define foo(f) f(X) f(Y) f(Z) f(Q) +#define bar DROP_FIRST(foo(ARGS_SEP_COMMA __stringify)) +#define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo(ARGS_SEP_COMMA))) + + static const char * const a[] = { bar }; + + KUNIT_EXPECT_STREQ(test, a[0], "X"); + KUNIT_EXPECT_STREQ(test, a[1], "Y"); + KUNIT_EXPECT_STREQ(test, a[2], "Z"); + KUNIT_EXPECT_STREQ(test, a[3], "Q"); + + KUNIT_EXPECT_EQ(test, buz, 4); + +#undef foo +#undef bar +#undef buz +} + +#define NO_ARGS +#define FOO_ARGS X, Y, Z, Q +#define MAX_ARGS -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12 + +static void count_args_test(struct kunit *test) +{ + int count; + + /* COUNT_ARGS() counts to 12 */ + + count = COUNT_ARGS(); + KUNIT_EXPECT_EQ(test, count, 0); + + count = COUNT_ARGS(1); + KUNIT_EXPECT_EQ(test, count, 1); + + count = COUNT_ARGS(a, b, c, d, e); + KUNIT_EXPECT_EQ(test, count, 5); + + count = COUNT_ARGS(a, b, c, d, e, f, g, h, i, j, k, l); + KUNIT_EXPECT_EQ(test, count, 12); + + /* COUNT_ARGS() does not expand params */ + + count = COUNT_ARGS(NO_ARGS); + KUNIT_EXPECT_EQ(test, count, 1); + + count = COUNT_ARGS(FOO_ARGS); + KUNIT_EXPECT_EQ(test, count, 1); +} + +static void call_args_test(struct kunit *test) +{ + int count; + + count = CALL_ARGS(COUNT_ARGS, NO_ARGS); + KUNIT_EXPECT_EQ(test, count, 0); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, NO_ARGS), 0); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FOO_ARGS), 4); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FOO_ARGS, FOO_ARGS), 8); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, MAX_ARGS), 12); +} + +static void drop_first_test(struct kunit *test) +{ + int Y = -2, Z = -3, Q = -4; + int a[] = { DROP_FIRST(FOO_ARGS) }; + + KUNIT_EXPECT_EQ(test, DROP_FIRST(0, -1), -1); + KUNIT_EXPECT_EQ(test, DROP_FIRST(DROP_FIRST(0, -1, -2)), -2); + + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, DROP_FIRST(FOO_ARGS)), 3); + KUNIT_EXPECT_EQ(test, DROP_FIRST(DROP_FIRST(DROP_FIRST(FOO_ARGS))), -4); + KUNIT_EXPECT_EQ(test, a[0], -2); + KUNIT_EXPECT_EQ(test, a[1], -3); + KUNIT_EXPECT_EQ(test, a[2], -4); + KUNIT_EXPECT_STREQ(test, __stringify(DROP_FIRST(DROP_FIRST(DROP_FIRST(FOO_ARGS)))), "Q"); +} + +static void pick_first_test(struct kunit *test) +{ + int X = -1; + int a[] = { PICK_FIRST(FOO_ARGS) }; + + KUNIT_EXPECT_EQ(test, PICK_FIRST(-1, -2), -1); + + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, PICK_FIRST(FOO_ARGS)), 1); + KUNIT_EXPECT_EQ(test, PICK_FIRST(FOO_ARGS), -1); + KUNIT_EXPECT_EQ(test, a[0], -1); + KUNIT_EXPECT_STREQ(test, __stringify(PICK_FIRST(FOO_ARGS)), "X"); +} + +static void pick_last_test(struct kunit *test) +{ + int Q = -4; + int a[] = { PICK_LAST(FOO_ARGS) }; + + KUNIT_EXPECT_EQ(test, PICK_LAST(-1, -2), -2); + + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, PICK_LAST(FOO_ARGS)), 1); + KUNIT_EXPECT_EQ(test, PICK_LAST(FOO_ARGS), -4); + KUNIT_EXPECT_EQ(test, a[0], -4); + KUNIT_EXPECT_STREQ(test, __stringify(PICK_LAST(FOO_ARGS)), "Q"); + + KUNIT_EXPECT_EQ(test, PICK_LAST(MAX_ARGS), -12); + KUNIT_EXPECT_STREQ(test, __stringify(PICK_LAST(MAX_ARGS)), "-12"); +} + +static struct kunit_case args_tests[] = { + KUNIT_CASE(count_args_test), + KUNIT_CASE(call_args_example), + KUNIT_CASE(call_args_test), + KUNIT_CASE(drop_first_example), + KUNIT_CASE(drop_first_test), + KUNIT_CASE(pick_first_example), + KUNIT_CASE(pick_first_test), + KUNIT_CASE(pick_last_example), + KUNIT_CASE(pick_last_test), + KUNIT_CASE(sep_comma_example), + {} +}; + +static struct kunit_suite args_test_suite = { + .name = "args", + .test_cases = args_tests, +}; + +kunit_test_suite(args_test_suite); -- cgit From 233e8d1de827b58ec92c60b86a3b0f5bdf7c3892 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 3 May 2024 00:33:13 +0200 Subject: drm/xe/rtp: Prefer helper macros from xe_args.h Some custom implementation can be replaced with generic macros from the linux/args.h or xe_args.h. Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240502223313.2527-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_rtp.h | 4 ++-- drivers/gpu/drm/xe/xe_rtp_helpers.h | 26 ++++++++++---------------- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index c56fedd126e6..337b1ef1959c 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -341,7 +341,7 @@ struct xe_reg_sr; * }; */ #define XE_RTP_RULES(...) \ - .n_rules = _XE_COUNT_ARGS(__VA_ARGS__), \ + .n_rules = COUNT_ARGS(__VA_ARGS__), \ .rules = (const struct xe_rtp_rule[]) { \ XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__)) \ } @@ -366,7 +366,7 @@ struct xe_reg_sr; * }; */ #define XE_RTP_ACTIONS(...) \ - .n_actions = _XE_COUNT_ARGS(__VA_ARGS__), \ + .n_actions = COUNT_ARGS(__VA_ARGS__), \ .actions = (const struct xe_rtp_action[]) { \ XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__)) \ } diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h index 181b6290fac3..8129d6d9ef37 100644 --- a/drivers/gpu/drm/xe/xe_rtp_helpers.h +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -10,22 +10,16 @@ #error "This header is supposed to be included by xe_rtp.h only" #endif +#include "xe_args.h" + /* * Helper macros - not to be used outside this header. */ #define _XE_ESC(...) __VA_ARGS__ -#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__, 5, 4, 3, 2, 1,)) -#define __XE_COUNT_ARGS(_, _5, _4, _3, _2, X_, ...) X_ - -#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,)) -#define __XE_FIRST(x_, ...) x_ -#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__)) -#define __XE_TUPLE_TAIL(x_, ...) (__VA_ARGS__) -#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__ +#define _XE_TUPLE_TAIL(...) (DROP_FIRST(__VA_ARGS__)) -#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b) -#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b +#define _XE_RTP_CONCAT(a, b) CONCATENATE(XE_RTP_, CONCATENATE(a, b)) #define __XE_RTP_PASTE_SEP_COMMA , #define __XE_RTP_PASTE_SEP_BITWISE_OR | @@ -59,11 +53,11 @@ * * XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR */ -#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_, _XE_COUNT_ARGS args_)(prefix_, sep_, args_)) -#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) -#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) -#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) -#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_RTP_CONCAT(PASTE_, COUNT_ARGS args_)(prefix_, sep_, args_) +#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) +#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) /* * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer @@ -76,6 +70,6 @@ * * { .a = 10 } */ -#define XE_RTP_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__) +#define XE_RTP_DROP_CAST(...) _XE_ESC(DROP_FIRST _XE_ESC __VA_ARGS__) #endif -- cgit From 786754124189e3f67fc52e8fe08703e3f50b1894 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 3 May 2024 10:24:50 +0200 Subject: drm/xe/debugfs: Get a runtime_pm reference when setting wedged mode This function is another entry point where it must be ensured that the device resumes before operating on the GuC, so grab a runtime_pm reference. This fixes inner xe_pm_runtime_get_noresume calls which were previously failing. Cc: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240503082450.268335-1-francois.dugast@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_debugfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 98e3fbde50ea..2c060a0d6251 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -153,6 +153,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, xe->wedged.mode = wedged_mode; + xe_pm_runtime_get(xe); for_each_gt(gt, xe, id) { ret = xe_guc_ads_scheduler_policy_toggle_reset(>->uc.guc.ads); if (ret) { @@ -160,6 +161,7 @@ static ssize_t wedged_mode_set(struct file *f, const char __user *ubuf, return -EIO; } } + xe_pm_runtime_put(xe); return size; } -- cgit From e9c190b9b8e7e07bc0ef0ba9b87321fa37b456c5 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 3 May 2024 15:03:31 -0400 Subject: drm/xe: Demote CCS_MODE info to debug only This information is printed in any gt_reset, which actually occurs in any runtime resume, what can be so verbose in production build. Let's demote it to debug only. Cc: Niranjana Vishwanathapura Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240503190331.6690-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 396aeb5b9924..a34c9a24dafc 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -68,8 +68,8 @@ static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) xe_mmio_write32(gt, CCS_MODE, mode); - xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", - mode, config, num_engines, num_slices); + xe_gt_dbg(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n", + mode, config, num_engines, num_slices); } void xe_gt_apply_ccs_mode(struct xe_gt *gt) -- cgit From c462f81b695a7cfde5ba3b0ea1a52c6abaa52a0b Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 30 Apr 2024 18:25:25 +0200 Subject: drm/xe: Introduce has_atomic_enable_pte_bit device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add has_atomic_enable_pte_bit to specify that a device has PTE_AE bit in its PTE feild. Currently XE2 and PVC supports this so set this for those two. This will help consolidate setting atomic access bit in PTE logic which is spread between multiple files. Reviewed-by: Oak Zeng Reviewed-by: José Roberto de Souza Acked-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-2-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 3 +++ drivers/gpu/drm/xe/xe_pci_types.h | 1 + 3 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 0f68c55ea405..7cddb00f9c35 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -281,6 +281,8 @@ struct xe_device { u8 has_heci_gscfi:1; /** @info.skip_guc_pc: Skip GuC based PM feature init */ u8 skip_guc_pc:1; + /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ + u8 has_atomic_enable_pte_bit:1; #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) struct { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 27edf4fd8bb8..c385f4ddf163 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -146,6 +146,7 @@ static const struct xe_graphics_desc graphics_xehpc = { .vram_flags = XE_VRAM_FLAGS_NEED64K, .has_asid = 1, + .has_atomic_enable_pte_bit = 1, .has_flat_ccs = 0, .has_usm = 1, }; @@ -163,6 +164,7 @@ static const struct xe_graphics_desc graphics_xelpg = { #define XE2_GFX_FEATURES \ .dma_mask_size = 46, \ .has_asid = 1, \ + .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ @@ -629,6 +631,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.va_bits = graphics_desc->va_bits; xe->info.vm_max_level = graphics_desc->vm_max_level; xe->info.has_asid = graphics_desc->has_asid; + xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index b1ad12fa22d6..e1f2b4879fc2 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -25,6 +25,7 @@ struct xe_graphics_desc { u8 max_remote_tiles:2; u8 has_asid:1; + u8 has_atomic_enable_pte_bit:1; u8 has_flat_ccs:1; u8 has_range_tlb_invalidation:1; u8 has_usm:1; -- cgit From e7192f0162a069bc80a519c087bd2a2f18597d52 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 30 Apr 2024 18:25:26 +0200 Subject: drm/xe: Move vm bind bo validation to a helper function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move vm bind bo validation to a helper function to make the xe_vm_bind_ioctl() more readable. v2: Capture ret value of xe_vm_bind_ioctl_validate_bo(Matt B). Remove redundant coh_mode param. Reviewed-by: Matthew Brost Reviewed-by: Oak Zeng Reviewed-by: José Roberto de Souza Acked-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-3-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_vm.c | 77 ++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 34 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index dfd31b346021..f1357e2a3b10 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3057,6 +3057,46 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, vops->num_syncs = num_syncs; } +static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, + u64 addr, u64 range, u64 obj_offset, + u16 pat_index) +{ + u16 coh_mode; + + if (XE_IOCTL_DBG(xe, range > bo->size) || + XE_IOCTL_DBG(xe, obj_offset > + bo->size - range)) { + return -EINVAL; + } + + if (bo->flags & XE_BO_FLAG_INTERNAL_64K) { + if (XE_IOCTL_DBG(xe, obj_offset & + XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || + XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { + return -EINVAL; + } + } + + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (bo->cpu_caching) { + if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { + return -EINVAL; + } + } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { + /* + * Imported dma-buf from a different device should + * require 1way or 2way coherency since we don't know + * how it was mapped on the CPU. Just assume is it + * potentially cached on CPU side. + */ + return -EINVAL; + } + + return 0; +} + int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -3140,7 +3180,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u32 obj = bind_ops[i].obj; u64 obj_offset = bind_ops[i].obj_offset; u16 pat_index = bind_ops[i].pat_index; - u16 coh_mode; if (!obj) continue; @@ -3152,40 +3191,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } bos[i] = gem_to_xe_bo(gem_obj); - if (XE_IOCTL_DBG(xe, range > bos[i]->size) || - XE_IOCTL_DBG(xe, obj_offset > - bos[i]->size - range)) { - err = -EINVAL; - goto put_obj; - } - - if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) { - if (XE_IOCTL_DBG(xe, obj_offset & - XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - err = -EINVAL; - goto put_obj; - } - } - - coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); - if (bos[i]->cpu_caching) { - if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && - bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - err = -EINVAL; - goto put_obj; - } - } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { - /* - * Imported dma-buf from a different device should - * require 1way or 2way coherency since we don't know - * how it was mapped on the CPU. Just assume is it - * potentially cached on CPU side. - */ - err = -EINVAL; + err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, + obj_offset, pat_index); + if (err) goto put_obj; - } } if (args->num_syncs) { -- cgit From 06e69a424930154bf030a56f8ddf781aee71f0e3 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 30 Apr 2024 18:25:27 +0200 Subject: drm/xe: Introduce has_device_atomics_on_smem device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add has_device_atomics_on_smem to specify that a device supports device atomics on system memory. Currently XE2 supports this so set this for XE2. v2: Set has_device_atomics_on_smem for all platform but PVC. Reviewed-by: Oak Zeng Reviewed-by: José Roberto de Souza Acked-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-4-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 7cddb00f9c35..0af739981ebf 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -283,6 +283,8 @@ struct xe_device { u8 skip_guc_pc:1; /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ u8 has_atomic_enable_pte_bit:1; + /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ + u8 has_device_atomics_on_smem:1; #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) struct { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c385f4ddf163..99723a423850 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -632,6 +632,8 @@ static int xe_info_init(struct xe_device *xe, xe->info.vm_max_level = graphics_desc->vm_max_level; xe->info.has_asid = graphics_desc->has_asid; xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; + if (xe->info.platform != XE_PVC) + xe->info.has_device_atomics_on_smem = 1; xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; -- cgit From a4b725767d93e3564019906ad43908b8bf3d4d9e Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 30 Apr 2024 18:25:28 +0200 Subject: drm/xe: Add function to check if BO has single placement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new helper function xe_bo_has_single_placement() to check if a BO has single placement. Reviewed-by: José Roberto de Souza Acked-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-5-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_bo.c | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 1 + 2 files changed, 15 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 52a16cb4e736..03f7fe7acf8c 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -95,6 +95,20 @@ bool xe_bo_is_stolen(struct xe_bo *bo) return bo->ttm.resource->mem_type == XE_PL_STOLEN; } +/** + * xe_bo_has_single_placement - check if BO is placed only in one memory location + * @bo: The BO + * + * This function checks whether a given BO is placed in only one memory location. + * + * Returns: true if the BO is placed in a single memory location, false otherwise. + * + */ +bool xe_bo_has_single_placement(struct xe_bo *bo) +{ + return bo->placement.num_placement == 1; +} + /** * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR * @bo: The BO diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index a885b14bf595..6de894c728f5 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -206,6 +206,7 @@ bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); bool xe_bo_is_stolen_devmem(struct xe_bo *bo); +bool xe_bo_has_single_placement(struct xe_bo *bo); uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); -- cgit From a0862cf2febcc37188ab47441b69960c8c8f3fa3 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 30 Apr 2024 18:25:29 +0200 Subject: drm/xe: Refactor default device atomic settings The default behavior of device atomics depends on the VM type and buffer allocation types. Device atomics are expected to function with all types of allocations for traditional applications/APIs. Additionally, in compute/SVM API scenarios with fault mode or LR mode VMs, device atomics must work with single-region allocations. In all other cases device atomics should be disabled by default also on platforms where we know device atomics doesn't on work on particular allocations types. v3: fault mode requires LR mode so only check for LR mode to determine compute API(Jose). Handle SMEM+LMEM BO's migration to LMEM where device atomics is expected to work. (Brian). v2: Fix platform checks to correct atomics behaviour on PVC. Acked-by: Michal Mrozek Reviewed-by: Oak Zeng Acked-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20240430162529.21588-6-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_pt.c | 37 ++++++++++++++++++++++++++++++++++--- drivers/gpu/drm/xe/xe_vm.c | 2 +- 2 files changed, 35 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 8d3765d3351e..87975e45622a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -619,9 +619,40 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; - if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) && - (is_devmem || !IS_DGFX(xe))) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + /** + * Default atomic expectations for different allocation scenarios are as follows: + * + * 1. Traditional API: When the VM is not in LR mode: + * - Device atomics are expected to function with all allocations. + * + * 2. Compute/SVM API: When the VM is in LR mode: + * - Device atomics are the default behavior when the bo is placed in a single region. + * - In all other cases device atomics will be disabled with AE=0 until an application + * request differently using a ioctl like madvise. + */ + if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { + if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { + if (bo && xe_bo_has_single_placement(bo)) + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + /** + * If a SMEM+LMEM allocation is backed by SMEM, a device + * atomics will cause a gpu page fault and which then + * gets migrated to LMEM, bind such allocations with + * device atomics enabled. + */ + else if (is_devmem && !xe_bo_has_single_placement(bo)) + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + } else { + xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; + } + + /** + * Unset AE if the platform(PVC) doesn't support it on an + * allocation + */ + if (!xe->info.has_device_atomics_on_smem && !is_devmem) + xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; + } if (is_devmem) { xe_walk.default_pte |= XE_PPGTT_PTE_DM; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f1357e2a3b10..d17192c8b7de 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -888,7 +888,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; - if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) + if (vm->xe->info.has_atomic_enable_pte_bit) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; vma->pat_index = pat_index; -- cgit From 72c7163f27483c333a1f27916505459efa1a373a Mon Sep 17 00:00:00 2001 From: Janga Rahul Kumar Date: Sat, 4 May 2024 01:09:01 +0530 Subject: drm/xe: Relocate regs_are_mcr function Relocate regs_are_mcr funciton to a higher position in the file for improved visibility. Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Janga Rahul Kumar Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240503193902.2056202-2-janga.rahul.kumar@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mocs.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 1e92f8ee07ba..aef09eb423e9 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -255,6 +255,16 @@ static const struct xe_mocs_entry gen12_mocs_desc[] = { L3_1_UC) }; +static bool regs_are_mcr(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe_gt_is_media_type(gt)) + return MEDIA_VER(xe) >= 20; + else + return GRAPHICS_VERx100(xe) >= 1250; +} + static const struct xe_mocs_entry dg1_mocs_desc[] = { /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), @@ -467,16 +477,6 @@ static u32 get_entry_control(const struct xe_mocs_info *info, return info->table[info->unused_entries_index].control_value; } -static bool regs_are_mcr(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - - if (xe_gt_is_media_type(gt)) - return MEDIA_VER(xe) >= 20; - else - return GRAPHICS_VERx100(xe) >= 1250; -} - static void __init_mocs_table(struct xe_gt *gt, const struct xe_mocs_info *info) { -- cgit From 9fbd0adbcbe81e207eb030d9ad59953905625dd1 Mon Sep 17 00:00:00 2001 From: Janga Rahul Kumar Date: Sat, 4 May 2024 01:09:02 +0530 Subject: drm/xe/mocs: Add debugfs node to dump mocs This is useful to check mocs configuration. Tests/Tools can use this debugfs entry to get mocs info. v2: Address review comments. Change debugfs output style similar to pat debugfs. (Lucas De Marchi) v3: rebase. v4: Address review comments. Use function pointer inside ops struct. Update Test-with links. Remove usage of flags wherever not required. (Lucas De Marchi) v5: Address review comments. Move register defines. Modify mocs info struct to avoid holes. (Luca De Marchi) Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Janga Rahul Kumar Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240503193902.2056202-3-janga.rahul.kumar@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 38 ++++- drivers/gpu/drm/xe/xe_gt_debugfs.c | 11 ++ drivers/gpu/drm/xe/xe_mocs.c | 279 +++++++++++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_mocs.h | 8 + 4 files changed, 304 insertions(+), 32 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 83847f2da72a..8f44437c8e02 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -59,6 +59,27 @@ #define XELP_GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4) #define XEHP_GLOBAL_MOCS(i) XE_REG_MCR(0x4000 + (i) * 4) +#define LE_SSE_MASK REG_GENMASK(18, 17) +#define LE_SSE(value) REG_FIELD_PREP(LE_SSE_MASK, value) +#define LE_COS_MASK REG_GENMASK(16, 15) +#define LE_COS(value) REG_FIELD_PREP(LE_COS_MASK) +#define LE_SCF_MASK REG_BIT(14) +#define LE_SCF(value) REG_FIELD_PREP(LE_SCF_MASK, value) +#define LE_PFM_MASK REG_GENMASK(13, 11) +#define LE_PFM(value) REG_FIELD_PREP(LE_PFM_MASK, value) +#define LE_SCC_MASK REG_GENMASK(10, 8) +#define LE_SCC(value) REG_FIELD_PREP(LE_SCC_MASK, value) +#define LE_RSC_MASK REG_BIT(7) +#define LE_RSC(value) REG_FIELD_PREP(LE_RSC_MASK, value) +#define LE_AOM_MASK REG_BIT(6) +#define LE_AOM(value) REG_FIELD_PREP(LE_AOM_MASK, value) +#define LE_LRUM_MASK REG_GENMASK(5, 4) +#define LE_LRUM(value) REG_FIELD_PREP(LE_LRUM_MASK, value) +#define LE_TGT_CACHE_MASK REG_GENMASK(3, 2) +#define LE_TGT_CACHE(value) REG_FIELD_PREP(LE_TGT_CACHE_MASK, value) +#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) +#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) + #define CCS_AUX_INV XE_REG(0x4208) #define VD0_AUX_INV XE_REG(0x4218) @@ -314,9 +335,24 @@ #define XEHPC_OVRLSCCC REG_BIT(0) /* L3 Cache Control */ +#define LNCFCMOCS_REG_COUNT 32 #define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) #define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4) -#define LNCFCMOCS_REG_COUNT 32 +#define L3_UPPER_LKUP_MASK REG_BIT(23) +#define L3_UPPER_GLBGO_MASK REG_BIT(22) +#define L3_UPPER_IDX_CACHEABILITY_MASK REG_GENMASK(21, 20) +#define L3_UPPER_IDX_SCC_MASK REG_GENMASK(19, 17) +#define L3_UPPER_IDX_ESC_MASK REG_BIT(16) +#define L3_LKUP_MASK REG_BIT(7) +#define L3_LKUP(value) REG_FIELD_PREP(L3_LKUP_MASK, value) +#define L3_GLBGO_MASK REG_BIT(6) +#define L3_GLBGO(value) REG_FIELD_PREP(L3_GLBGO_MASK, value) +#define L3_CACHEABILITY_MASK REG_GENMASK(5, 4) +#define L3_CACHEABILITY(value) REG_FIELD_PREP(L3_CACHEABILITY_MASK, value) +#define L3_SCC_MASK REG_GENMASK(3, 1) +#define L3_SCC(value) REG_FIELD_PREP(L3_SCC_MASK, value) +#define L3_ESC_MASK REG_BIT(0) +#define L3_ESC(value) REG_FIELD_PREP(L3_ESC_MASK, value) #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 94f226a4438e..c5e562e143fd 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -20,6 +20,7 @@ #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_macros.h" +#include "xe_mocs.h" #include "xe_pat.h" #include "xe_pm.h" #include "xe_reg_sr.h" @@ -202,6 +203,15 @@ static int pat(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int mocs(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_mocs_dump(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); @@ -257,6 +267,7 @@ static const struct drm_info_list debugfs_list[] = { {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, + {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc}, {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index aef09eb423e9..4780708e5fae 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -13,6 +13,7 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_step_types.h" @@ -36,34 +37,23 @@ struct xe_mocs_entry { u16 used; }; +struct xe_mocs_info; + +struct xe_mocs_ops { + void (*dump)(struct xe_mocs_info *mocs, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p); +}; + struct xe_mocs_info { unsigned int size; unsigned int n_entries; const struct xe_mocs_entry *table; + const struct xe_mocs_ops *ops; u8 uc_index; u8 wb_index; u8 unused_entries_index; }; -/* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ -#define _LE_CACHEABILITY(value) ((value) << 0) -#define _LE_TGT_CACHE(value) ((value) << 2) -#define LE_LRUM(value) ((value) << 4) -#define LE_AOM(value) ((value) << 6) -#define LE_RSC(value) ((value) << 7) -#define LE_SCC(value) ((value) << 8) -#define LE_PFM(value) ((value) << 11) -#define LE_SCF(value) ((value) << 14) -#define LE_COS(value) ((value) << 15) -#define LE_SSE(value) ((value) << 17) - -/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ -#define L3_ESC(value) ((value) << 0) -#define L3_SCC(value) ((value) << 1) -#define _L3_CACHEABILITY(value) ((value) << 4) -#define L3_GLBGO(value) ((value) << 6) -#define L3_LKUP(value) ((value) << 7) - /* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */ #define IG_PAT REG_BIT(8) #define L3_CACHE_POLICY_MASK REG_GENMASK(5, 4) @@ -80,22 +70,22 @@ struct xe_mocs_info { * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means * the same as LE_UC */ -#define LE_0_PAGETABLE _LE_CACHEABILITY(0) -#define LE_1_UC _LE_CACHEABILITY(1) -#define LE_2_WT _LE_CACHEABILITY(2) -#define LE_3_WB _LE_CACHEABILITY(3) +#define LE_0_PAGETABLE LE_CACHEABILITY(0) +#define LE_1_UC LE_CACHEABILITY(1) +#define LE_2_WT LE_CACHEABILITY(2) +#define LE_3_WB LE_CACHEABILITY(3) /* Target cache */ -#define LE_TC_0_PAGETABLE _LE_TGT_CACHE(0) -#define LE_TC_1_LLC _LE_TGT_CACHE(1) -#define LE_TC_2_LLC_ELLC _LE_TGT_CACHE(2) -#define LE_TC_3_LLC_ELLC_ALT _LE_TGT_CACHE(3) +#define LE_TC_0_PAGETABLE LE_TGT_CACHE(0) +#define LE_TC_1_LLC LE_TGT_CACHE(1) +#define LE_TC_2_LLC_ELLC LE_TGT_CACHE(2) +#define LE_TC_3_LLC_ELLC_ALT LE_TGT_CACHE(3) /* L3 caching options */ -#define L3_0_DIRECT _L3_CACHEABILITY(0) -#define L3_1_UC _L3_CACHEABILITY(1) -#define L3_2_RESERVED _L3_CACHEABILITY(2) -#define L3_3_WB _L3_CACHEABILITY(3) +#define L3_0_DIRECT L3_CACHEABILITY(0) +#define L3_1_UC L3_CACHEABILITY(1) +#define L3_2_RESERVED L3_CACHEABILITY(2) +#define L3_3_WB L3_CACHEABILITY(3) /* L4 caching options */ #define L4_0_WB REG_FIELD_PREP(L4_CACHE_POLICY_MASK, 0) @@ -107,6 +97,8 @@ struct xe_mocs_info { #define XE2_L3_1_XD REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 1) #define XE2_L3_3_UC REG_FIELD_PREP(L3_CACHE_POLICY_MASK, 3) +#define XE2_L3_CLOS_MASK REG_GENMASK(7, 6) + #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \ [__idx] = { \ .control_value = __control_value, \ @@ -265,6 +257,74 @@ static bool regs_are_mcr(struct xe_gt *gt) return GRAPHICS_VERx100(xe) >= 1250; } +static void xelp_lncf_dump(struct xe_mocs_info *info, struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i, j; + u32 reg_val; + + drm_printf(p, "LNCFCMOCS[idx] = [ESC, SCC, L3CC] (value)\n\n"); + + for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + + drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", + j++, + !!(reg_val & L3_ESC_MASK), + REG_FIELD_GET(L3_SCC_MASK, reg_val), + REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val), + reg_val); + + drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", + j, + !!(reg_val & L3_UPPER_IDX_ESC_MASK), + REG_FIELD_GET(L3_UPPER_IDX_SCC_MASK, reg_val), + REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val), + reg_val); + } +} + +static void xelp_mocs_dump(struct xe_mocs_info *info, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i; + u32 reg_val; + + if (flags & HAS_GLOBAL_MOCS) { + drm_printf(p, "Global mocs table configuration:\n"); + drm_printf(p, "GLOB_MOCS[idx] = [LeCC, TC, LRUM, AOM, RSC, SCC, PFM, SCF, CoS, SSE] (value)\n\n"); + + for (i = 0; i < info->n_entries; i++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + + drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u, %u, %u, %u, %u, %u, %u, %u ] (%#8x)\n", + i, + REG_FIELD_GET(LE_CACHEABILITY_MASK, reg_val), + REG_FIELD_GET(LE_TGT_CACHE_MASK, reg_val), + REG_FIELD_GET(LE_LRUM_MASK, reg_val), + !!(reg_val & LE_AOM_MASK), + !!(reg_val & LE_RSC_MASK), + REG_FIELD_GET(LE_SCC_MASK, reg_val), + REG_FIELD_GET(LE_PFM_MASK, reg_val), + !!(reg_val & LE_SCF_MASK), + REG_FIELD_GET(LE_COS_MASK, reg_val), + REG_FIELD_GET(LE_SSE_MASK, reg_val), + reg_val); + } + } + + xelp_lncf_dump(info, gt, p); +} + +static const struct xe_mocs_ops xelp_mocs_ops = { + .dump = xelp_mocs_dump, +}; + static const struct xe_mocs_entry dg1_mocs_desc[] = { /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), @@ -301,6 +361,40 @@ static const struct xe_mocs_entry dg2_mocs_desc[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; +static void xehp_lncf_dump(struct xe_mocs_info *info, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i, j; + u32 reg_val; + + drm_printf(p, "LNCFCMOCS[idx] = [UCL3LOOKUP, GLBGO, L3CC] (value)\n\n"); + + for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + + drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", + j++, + !!(reg_val & L3_LKUP_MASK), + !!(reg_val & L3_GLBGO_MASK), + REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val), + reg_val); + + drm_printf(p, "LNCFCMOCS[%2d] = [%u, %u, %u] (%#8x)\n", + j, + !!(reg_val & L3_UPPER_LKUP_MASK), + !!(reg_val & L3_UPPER_GLBGO_MASK), + REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val), + reg_val); + } +} + +static const struct xe_mocs_ops xehp_mocs_ops = { + .dump = xehp_lncf_dump, +}; + static const struct xe_mocs_entry pvc_mocs_desc[] = { /* Error */ MOCS_ENTRY(0, 0, L3_3_WB), @@ -312,6 +406,36 @@ static const struct xe_mocs_entry pvc_mocs_desc[] = { MOCS_ENTRY(2, 0, L3_3_WB), }; +static void pvc_mocs_dump(struct xe_mocs_info *info, unsigned int flags, struct xe_gt *gt, + struct drm_printer *p) +{ + unsigned int i, j; + u32 reg_val; + + drm_printf(p, "LNCFCMOCS[idx] = [ L3CC ] (value)\n\n"); + + for (i = 0, j = 0; i < (info->n_entries + 1) / 2; i++, j++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); + + drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n", + j++, + REG_FIELD_GET(L3_CACHEABILITY_MASK, reg_val), + reg_val); + + drm_printf(p, "LNCFCMOCS[%2d] = [ %u ] (%#8x)\n", + j, + REG_FIELD_GET(L3_UPPER_IDX_CACHEABILITY_MASK, reg_val), + reg_val); + } +} + +static const struct xe_mocs_ops pvc_mocs_ops = { + .dump = pvc_mocs_dump, +}; + static const struct xe_mocs_entry mtl_mocs_desc[] = { /* Error - Reserved for Non-Use */ MOCS_ENTRY(0, @@ -363,6 +487,36 @@ static const struct xe_mocs_entry mtl_mocs_desc[] = { L3_GLBGO(1) | L3_1_UC), }; +static void mtl_mocs_dump(struct xe_mocs_info *info, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i; + u32 reg_val; + + drm_printf(p, "Global mocs table configuration:\n"); + drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L4_CACHE_POLICY] (value)\n\n"); + + for (i = 0; i < info->n_entries; i++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + + drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u] (%#8x)\n", + i, + !!(reg_val & IG_PAT), + REG_FIELD_GET(L4_CACHE_POLICY_MASK, reg_val), + reg_val); + } + + /* MTL lncf mocs table pattern is similar to that of xehp */ + xehp_lncf_dump(info, flags, gt, p); +} + +static const struct xe_mocs_ops mtl_mocs_ops = { + .dump = mtl_mocs_dump, +}; + static const struct xe_mocs_entry xe2_mocs_table[] = { /* Defer to PAT */ MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0), @@ -376,6 +530,34 @@ static const struct xe_mocs_entry xe2_mocs_table[] = { MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0), }; +static void xe2_mocs_dump(struct xe_mocs_info *info, unsigned int flags, + struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int i; + u32 reg_val; + + drm_printf(p, "Global mocs table configuration:\n"); + drm_printf(p, "GLOB_MOCS[idx] = [IG_PAT, L3_CLOS, L3_CACHE_POLICY, L4_CACHE_POLICY] (value)\n\n"); + + for (i = 0; i < info->n_entries; i++) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); + else + reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); + + drm_printf(p, "GLOB_MOCS[%2d] = [%u, %u, %u] (%#8x)\n", + i, + !!(reg_val & IG_PAT), + REG_FIELD_GET(XE2_L3_CLOS_MASK, reg_val), + REG_FIELD_GET(L4_CACHE_POLICY_MASK, reg_val), + reg_val); + } +} + +static const struct xe_mocs_ops xe2_mocs_ops = { + .dump = xe2_mocs_dump, +}; + static unsigned int get_mocs_settings(struct xe_device *xe, struct xe_mocs_info *info) { @@ -386,6 +568,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, switch (xe->info.platform) { case XE_LUNARLAKE: case XE_BATTLEMAGE: + info->ops = &xe2_mocs_ops; info->size = ARRAY_SIZE(xe2_mocs_table); info->table = xe2_mocs_table; info->n_entries = XE2_NUM_MOCS_ENTRIES; @@ -394,6 +577,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 4; break; case XE_PVC: + info->ops = &pvc_mocs_ops; info->size = ARRAY_SIZE(pvc_mocs_desc); info->table = pvc_mocs_desc; info->n_entries = PVC_NUM_MOCS_ENTRIES; @@ -402,6 +586,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 2; break; case XE_METEORLAKE: + info->ops = &mtl_mocs_ops; info->size = ARRAY_SIZE(mtl_mocs_desc); info->table = mtl_mocs_desc; info->n_entries = MTL_NUM_MOCS_ENTRIES; @@ -409,6 +594,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 1; break; case XE_DG2: + info->ops = &xehp_mocs_ops; info->size = ARRAY_SIZE(dg2_mocs_desc); info->table = dg2_mocs_desc; info->uc_index = 1; @@ -420,6 +606,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 3; break; case XE_DG1: + info->ops = &xelp_mocs_ops; info->size = ARRAY_SIZE(dg1_mocs_desc); info->table = dg1_mocs_desc; info->uc_index = 1; @@ -431,6 +618,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, case XE_ALDERLAKE_S: case XE_ALDERLAKE_P: case XE_ALDERLAKE_N: + info->ops = &xelp_mocs_ops; info->size = ARRAY_SIZE(gen12_mocs_desc); info->table = gen12_mocs_desc; info->n_entries = XELP_NUM_MOCS_ENTRIES; @@ -452,6 +640,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe, */ xe_assert(xe, info->unused_entries_index != 0); + xe_assert(xe, !info->ops || info->ops->dump); + if (XE_WARN_ON(info->size > info->n_entries)) { info->table = NULL; return 0; @@ -578,6 +768,33 @@ void xe_mocs_init(struct xe_gt *gt) init_l3cc_table(gt, &table); } +void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_mocs_info table; + unsigned int flags; + u32 ret; + struct xe_device *xe = gt_to_xe(gt); + + flags = get_mocs_settings(xe, &table); + + if (!table.ops->dump) + return; + + xe_pm_runtime_get_noresume(xe); + ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + + if (ret) + goto err_fw; + + table.ops->dump(&table, flags, gt, p); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + +err_fw: + xe_assert(xe, !ret); + xe_pm_runtime_put(xe); +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_mocs.c" #endif diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index 053754c5a94e..d6fa4485a6e9 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -10,8 +10,16 @@ struct xe_exec_queue; struct xe_gt; +struct drm_printer; void xe_mocs_init_early(struct xe_gt *gt); void xe_mocs_init(struct xe_gt *gt); +/** + * xe_mocs_dump - Dump mocs table + * @gt: GT structure + * @p: Printer to dump info to + */ +void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p); + #endif -- cgit From 50aec9665e0babd62b9eee4e613d9a1ef8d2b7de Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sun, 5 May 2024 20:47:58 -0700 Subject: drm/xe: Use ordered WQ for G2H handler System work queues are shared, use a dedicated work queue for G2H processing to avoid G2H processing getting block behind system tasks. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Signed-off-by: Matthew Brost Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240506034758.3697397-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 5 +++++ drivers/gpu/drm/xe/xe_guc_ct.h | 2 +- drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8ac819a7061e..0151d29b3c58 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -121,6 +121,7 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } @@ -146,6 +147,10 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); + ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0); + if (!ct->g2h_wq) + return -ENOMEM; + spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 5083e099064f..105bb8e99a8d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -34,7 +34,7 @@ static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) return; wake_up_all(&ct->wq); - queue_work(system_unbound_wq, &ct->g2h_worker); + queue_work(ct->g2h_wq, &ct->g2h_worker); xe_guc_ct_fast_path(ct); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index d29144c9f20b..fede4c6e93cb 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -120,6 +120,8 @@ struct xe_guc_ct { wait_queue_head_t wq; /** @g2h_fence_wq: wait queue used for G2H fencing */ wait_queue_head_t g2h_fence_wq; + /** @g2h_wq: used to process G2H */ + struct workqueue_struct *g2h_wq; /** @msg: Message buffer */ u32 msg[GUC_CTB_MSG_MAX_LEN]; /** @fast_msg: Message buffer */ -- cgit From 5b882c1e5a355d034c0e08fba2402b4451765ab2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 6 May 2024 22:52:54 +0200 Subject: drm/xe: Fix xe_mocs.h We don't need to include . We don't use struct xe_exec_queue here. We should sort forward declarations. Signed-off-by: Michal Wajdeczko Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240506205254.2659-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_mocs.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h index d6fa4485a6e9..dc972ffd4d07 100644 --- a/drivers/gpu/drm/xe/xe_mocs.h +++ b/drivers/gpu/drm/xe/xe_mocs.h @@ -6,11 +6,8 @@ #ifndef _XE_MOCS_H_ #define _XE_MOCS_H_ -#include - -struct xe_exec_queue; -struct xe_gt; struct drm_printer; +struct xe_gt; void xe_mocs_init_early(struct xe_gt *gt); void xe_mocs_init(struct xe_gt *gt); -- cgit From a4cb575d910a5c65c5f8b764e2b5f56b66019522 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Mon, 6 May 2024 22:29:50 +0200 Subject: drm/xe/vm_doc: Fix some typos Fix some typos and add / remove / change a few words to improve readability and prevent some ambiguities. Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240506202950.109750-1-francois.dugast@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_vm_doc.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index bdc6659891a5..4d33f310b653 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -25,7 +25,7 @@ * VM bind (create GPU mapping for a BO or userptr) * ================================================ * - * Creates GPU mapings for a BO or userptr within a VM. VM binds uses the same + * Creates GPU mappings for a BO or userptr within a VM. VM binds uses the same * in / out fence interface (struct drm_xe_sync) as execs which allows users to * think of binds and execs as more or less the same operation. * @@ -190,8 +190,8 @@ * Deferred binds in fault mode * ---------------------------- * - * In a VM is in fault mode (TODO: link to fault mode), new bind operations that - * create mappings are by default are deferred to the page fault handler (first + * If a VM is in fault mode (TODO: link to fault mode), new bind operations that + * create mappings are by default deferred to the page fault handler (first * use). This behavior can be overriden by setting the flag * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping * immediately. @@ -225,7 +225,7 @@ * * A VM in compute mode enables long running workloads and ultra low latency * submission (ULLS). ULLS is implemented via a continuously running batch + - * semaphores. This enables to the user to insert jump to new batch commands + * semaphores. This enables the user to insert jump to new batch commands * into the continuously running batch. In both cases these batches exceed the * time a dma fence is allowed to exist for before signaling, as such dma fences * are not used when a VM is in compute mode. User fences (TODO: link user fence @@ -244,7 +244,7 @@ * Once all preempt fences are signaled for a VM the kernel can safely move the * memory and kick the rebind worker which resumes all the engines execution. * - * A preempt fence, for every engine using the VM, is installed the VM's + * A preempt fence, for every engine using the VM, is installed into the VM's * dma-resv DMA_RESV_USAGE_PREEMPT_FENCE slot. The same preempt fence, for every * engine using the VM, is also installed into the same dma-resv slot of every * external BO mapped in the VM. @@ -314,7 +314,7 @@ * signaling, and memory allocation is usually required to resolve a page * fault, but memory allocation is not allowed to gate dma fence signaling. As * such, dma fences are not allowed when VM is in fault mode. Because dma-fences - * are not allowed, long running workloads and ULLS are enabled on a faulting + * are not allowed, only long running workloads and ULLS are enabled on a faulting * VM. * * Defered VM binds @@ -399,14 +399,14 @@ * Notice no rebind is issued in the access counter handler as the rebind will * be issued on next page fault. * - * Cavets with eviction / user pointer invalidation - * ------------------------------------------------ + * Caveats with eviction / user pointer invalidation + * ------------------------------------------------- * * In the case of eviction and user pointer invalidation on a faulting VM, there * is no need to issue a rebind rather we just need to blow away the page tables * for the VMAs and the page fault handler will rebind the VMAs when they fault. - * The cavet is to update / read the page table structure the VM global lock is - * neeeed. In both the case of eviction and user pointer invalidation locks are + * The caveat is to update / read the page table structure the VM global lock is + * needed. In both the case of eviction and user pointer invalidation locks are * held which make acquiring the VM global lock impossible. To work around this * every VMA maintains a list of leaf page table entries which should be written * to zero to blow away the VMA's page tables. After writing zero to these @@ -427,9 +427,9 @@ * VM global lock (vm->lock) - rw semaphore lock. Outer most lock which protects * the list of userptrs mapped in the VM, the list of engines using this VM, and * the array of external BOs mapped in the VM. When adding or removing any of the - * aforemented state from the VM should acquire this lock in write mode. The VM + * aforementioned state from the VM should acquire this lock in write mode. The VM * bind path also acquires this lock in write while the exec / compute mode - * rebind worker acquire this lock in read mode. + * rebind worker acquires this lock in read mode. * * VM dma-resv lock (vm->ttm.base.resv->lock) - WW lock. Protects VM dma-resv * slots which is shared with any private BO in the VM. Expected to be acquired -- cgit From c18a5e3e61650110b5d8523292abaf6ae19ebdd2 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 30 Apr 2024 18:42:29 +0530 Subject: drm/xe: skip error capture when exec queue is killed When user closes exec queue soon after job submission, we are generating error coredump. Instead check if exec queue is killed during job timeout then skip error coredump capture. V2: - Just skip error capture - MattB Signed-off-by: Tejas Upadhyay Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240430131229.2228809-1-tejas.upadhyay@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index d274a139010b..2c0aa3443cd9 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -980,8 +980,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), "VM job timed out on non-killed execqueue\n"); - simple_error_capture(q); - xe_devcoredump(job); + if (!exec_queue_killed(q)) { + simple_error_capture(q); + xe_devcoredump(job); + } trace_xe_sched_job_timedout(job); -- cgit From 7348a9a1122884ccfc414166daaf3977100d1c30 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 7 May 2024 13:09:57 +0200 Subject: drm/xe: Don't rely on xe_assert.h to be included elsewhere While xe_assert.h is now included and used by the xe_force_wake.h, we want to stop include xe_force_wake.h from xe_device.h as it's not needed there. Explicitly include xe_assert.h where needed. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240507110959.2747-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_bb.c | 1 + drivers/gpu/drm/xe/xe_gsc_submit.c | 1 + drivers/gpu/drm/xe/xe_gt_clock.c | 1 + drivers/gpu/drm/xe/xe_uc.c | 1 + drivers/gpu/drm/xe/xe_vm.h | 1 + 5 files changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 541361caff3b..37e056fde95d 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -7,6 +7,7 @@ #include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" +#include "xe_assert.h" #include "xe_device.h" #include "xe_exec_queue_types.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c index d34d03248843..9ede483d37ef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.c +++ b/drivers/gpu/drm/xe/xe_gsc_submit.c @@ -8,6 +8,7 @@ #include #include "abi/gsc_command_header_abi.h" +#include "xe_assert.h" #include "xe_bb.h" #include "xe_exec_queue.h" #include "xe_gt_printk.h" diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index c7bca20f6b65..9ff2061133df 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -7,6 +7,7 @@ #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" +#include "xe_assert.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_macros.h" diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 0f6cfe06e635..45035e38388b 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -5,6 +5,7 @@ #include "xe_uc.h" +#include "xe_assert.h" #include "xe_device.h" #include "xe_gsc.h" #include "xe_gsc_proxy.h" diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 204a4ff63f88..3ac9021f970e 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -6,6 +6,7 @@ #ifndef _XE_VM_H_ #define _XE_VM_H_ +#include "xe_assert.h" #include "xe_bo_types.h" #include "xe_macros.h" #include "xe_map.h" -- cgit From 93dd6ad89c7d436da988cb5917daf406a3941893 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 7 May 2024 13:09:58 +0200 Subject: drm/xe: Don't rely on xe_force_wake.h to be included elsewhere While xe_force_wake.h is now included from the xe_device.h, we want to drop that include as we don't need it there. Explicitly include xe_force_wake.h where needed. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240507110959.2747-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 1 + drivers/gpu/drm/xe/xe_debugfs.c | 1 + drivers/gpu/drm/xe/xe_gsc.c | 1 + drivers/gpu/drm/xe/xe_gsc_proxy.c | 1 + drivers/gpu/drm/xe/xe_gt_idle.c | 1 + drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 1 + drivers/gpu/drm/xe/xe_guc_pc.c | 1 + drivers/gpu/drm/xe/xe_mmio.c | 1 + drivers/gpu/drm/xe/xe_mocs.c | 1 + drivers/gpu/drm/xe/xe_pat.c | 1 + drivers/gpu/drm/xe/xe_query.c | 1 + 11 files changed, 11 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index d46f87a039f2..eb67ecf08db2 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -13,6 +13,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_device_types.h" +#include "xe_force_wake.h" #include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 2c060a0d6251..1011e5d281fa 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -12,6 +12,7 @@ #include "xe_bo.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt_debugfs.h" #include "xe_gt_printk.h" #include "xe_guc_ads.h" diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 60202b903687..8cc6420a9e7f 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -14,6 +14,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 1b908d238bd1..6d6d1068cf23 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -15,6 +15,7 @@ #include "abi/gsc_proxy_commands_abi.h" #include "regs/xe_gsc_regs.h" #include "xe_bo.h" +#include "xe_force_wake.h" #include "xe_gsc.h" #include "xe_gsc_submit.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 8fc0f3f6ecc5..a4f6f0a96d05 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -5,6 +5,7 @@ #include +#include "xe_force_wake.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_idle.h" diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index c3d015a7ac33..105797776a6c 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -7,6 +7,7 @@ #include "abi/guc_actions_abi.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_guc.h" diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 8fc757900ed1..d10aab29651e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -15,6 +15,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_idle.h" #include "xe_gt_sysfs.h" diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 2b18e8149ec3..05edab0e085d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -15,6 +15,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 4780708e5fae..f04754ad911b 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -9,6 +9,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_mmio.h" diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index d5b516f115ad..4ee32ee1cc88 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -10,6 +10,7 @@ #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_mmio.h" diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index df407d73e5f5..29f847debb5c 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -16,6 +16,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc_hwconfig.h" -- cgit From b7f6318a9c3d9c79b724b20ff5382775a9c58346 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 7 May 2024 13:09:59 +0200 Subject: drm/xe: Fix xe_device.h Some explicit includes are needed only from the xe_device.c. And there is no need for redundant forward declarations. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240507110959.2747-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_device.h | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ca7a101bd34e..f8eb477f359d 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -16,6 +16,7 @@ #include #include "display/xe_display.h" +#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" @@ -26,6 +27,7 @@ #include "xe_drv.h" #include "xe_exec.h" #include "xe_exec_queue.h" +#include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gsc_proxy.h" #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 82317580f4bf..3ed14072d8d1 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -6,15 +6,9 @@ #ifndef _XE_DEVICE_H_ #define _XE_DEVICE_H_ -struct xe_exec_queue; -struct xe_file; - #include -#include "regs/xe_gpu_commands.h" #include "xe_device_types.h" -#include "xe_force_wake.h" -#include "xe_macros.h" static inline struct xe_device *to_xe_device(const struct drm_device *dev) { -- cgit From ee7284230644e21fef0e38fc5bf8f907b6bb7f7c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 6 May 2024 07:19:17 -0700 Subject: drm/xe/ads: Use flexible-array Zero-length arrays are deprecated and flexible arrays should be used instead: https://www.kernel.org/doc/html/v6.9-rc7/process/deprecated.html#zero-length-and-one-element-arrays Reported-by: kernel test robot Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202405051824.AmjAI5Pg-lkp@intel.com/ Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240506141917.205714-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_ads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 6a5eb21748b1..b48639cfe955 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -109,7 +109,7 @@ struct __guc_ads_blob { struct guc_engine_usage engine_usage; struct guc_um_init_params um_init_params; /* From here on, location is dynamic! Refer to above diagram. */ - struct guc_mmio_reg regset[0]; + struct guc_mmio_reg regset[]; } __packed; #define ads_blob_read(ads_, field_) \ -- cgit From 598dc939edf8d7bb1d69e84513c31451812128fc Mon Sep 17 00:00:00 2001 From: Bommu Krishnaiah Date: Thu, 18 Apr 2024 16:45:34 +0530 Subject: drm/xe/xe2: Add workaround 14021402888 This workaround applies to Graphics 20.01 as RCS engine workaround Signed-off-by: Bommu Krishnaiah Cc: Tejas Upadhyay Cc: Matt Roper Cc: Himal Prasad Ghimiray Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240418111534.481568-1-krishnaiah.bommu@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 8f44437c8e02..9cacdcfe27ff 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -402,6 +402,7 @@ #define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED) #define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) +#define CLEAR_OPTIMIZATION_DISABLE REG_BIT(6) #define CACHE_MODE_SS XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED) #define DISABLE_ECC REG_BIT(5) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 134a34dbfe8d..05db53c1448c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -533,6 +533,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, /* Xe2_HPM */ -- cgit From 85cfc412579c041f1aaebba71427acec75ceca39 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 7 May 2024 15:42:50 -0700 Subject: drm/xe: Minor cleanup in LRC handling Properly define register fields and remove redundant lower_32_bits(). Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Stuart Summers Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-2-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 ++-- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 97d2aed63e01..7e1b0fd68275 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -44,9 +44,10 @@ #define GSCCS_RING_BASE 0x11a000 #define RING_TAIL(base) XE_REG((base) + 0x30) +#define TAIL_ADDR REG_GENMASK(20, 3) #define RING_HEAD(base) XE_REG((base) + 0x34) -#define HEAD_ADDR 0x001FFFFC +#define HEAD_ADDR REG_GENMASK(20, 2) #define RING_START(base) XE_REG((base) + 0x38) @@ -136,7 +137,6 @@ #define RING_VALID_MASK 0x00000001 #define RING_VALID 0x00000001 #define STOP_RING REG_BIT(8) -#define TAIL_ADDR 0x001FFFF8 #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) #define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 2066d34ddf0b..f759f4c10914 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1354,7 +1354,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - snapshot->context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc)); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); snapshot->tail.internal = lrc->ring.tail; snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); -- cgit From d6219e1cd5e321351954e317b362db2c1d34402a Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 7 May 2024 15:42:51 -0700 Subject: drm/xe: Add Indirect Ring State support When Indirect Ring State is enabled, the Ring Buffer state and Batch Buffer state are context save/restored to/from Indirect Ring State instead of the LRC. The Indirect Ring State is a 4K page mapped in global GTT at a 4K aligned address. This address is programmed in the INDIRECT_RING_STATE register of the corresponding context's LRC. v2: Fix kernel-doc, add bspec reference v3: Fix typo in commit text Bspec: 67296, 67139 Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-3-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 + drivers/gpu/drm/xe/regs/xe_lrc_layout.h | 7 ++ drivers/gpu/drm/xe/xe_gt.c | 6 +- drivers/gpu/drm/xe/xe_gt.h | 7 ++ drivers/gpu/drm/xe/xe_gt_types.h | 6 +- drivers/gpu/drm/xe/xe_guc_ads.c | 5 +- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 185 ++++++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_lrc.h | 5 +- drivers/gpu/drm/xe/xe_lrc_types.h | 4 + drivers/gpu/drm/xe/xe_pci.c | 2 + drivers/gpu/drm/xe/xe_pci_types.h | 3 + 12 files changed, 197 insertions(+), 36 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7e1b0fd68275..260a44f46f7e 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -125,6 +125,7 @@ #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) +#define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 1825d8f79db6..e6ca8bbda8f4 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -11,6 +11,7 @@ #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) +#define CTX_INDIRECT_RING_STATE (0x26 + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) @@ -23,4 +24,10 @@ #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) +#define INDIRECT_CTX_RING_HEAD (0x02 + 1) +#define INDIRECT_CTX_RING_TAIL (0x04 + 1) +#define INDIRECT_CTX_RING_START (0x06 + 1) +#define INDIRECT_CTX_RING_START_UDW (0x08 + 1) +#define INDIRECT_CTX_RING_CTL (0x0a + 1) + #endif diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0528d599c3fe..36c7b1631fa6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -160,7 +160,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (q->hwe->class == XE_ENGINE_CLASS_RENDER) /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false); + bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); else /* Just pick a large BB size */ bb = xe_bb_new(gt, SZ_4K, false); @@ -244,7 +244,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) xe_tuning_process_lrc(hwe); default_lrc = drmm_kzalloc(&xe->drm, - xe_lrc_size(xe, hwe->class), + xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); if (!default_lrc) return -ENOMEM; @@ -294,7 +294,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) xe_map_memcpy_from(xe, default_lrc, &q->lrc[0].bo->vmap, xe_lrc_pphwsp_offset(&q->lrc[0]), - xe_lrc_size(xe, hwe->class)); + xe_gt_lrc_size(gt, hwe->class)); gt->default_lrc[hwe->class] = default_lrc; put_nop_q: diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index ed6ea8057e35..8474c50b1b30 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -8,6 +8,7 @@ #include +#include "xe_device.h" #include "xe_device_types.h" #include "xe_hw_engine.h" @@ -58,6 +59,12 @@ struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, u16 instance, bool logical); +static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) +{ + return gt->info.has_indirect_ring_state && + xe_device_uc_enabled(gt_to_xe(gt)); +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index cfdc761ff7f4..8dc203413a27 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -110,8 +110,6 @@ struct xe_gt { struct { /** @info.type: type of GT */ enum xe_gt_type type; - /** @info.id: Unique ID of this GT within the PCI Device */ - u8 id; /** @info.reference_clock: clock frequency */ u32 reference_clock; /** @info.engine_mask: mask of engines present on GT */ @@ -124,6 +122,10 @@ struct xe_gt { u64 __engine_mask; /** @info.gmdid: raw GMD_ID value from hardware */ u32 gmdid; + /** @info.id: Unique ID of this GT within the PCI Device */ + u8 id; + /** @info.has_indirect_ring_state: GT has indirect ring state support */ + u8 has_indirect_ring_state:1; } info; /** diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index b48639cfe955..9c33cca4e370 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -267,7 +267,6 @@ static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class) static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) { - struct xe_device *xe = ads_to_xe(ads); struct xe_gt *gt = ads_to_gt(ads); size_t total_size = 0, alloc_size, real_size; int class; @@ -276,7 +275,7 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) if (!engine_enable_mask(gt, class)) continue; - real_size = xe_lrc_size(xe, class); + real_size = xe_gt_lrc_size(gt, class); alloc_size = PAGE_ALIGN(real_size); total_size += alloc_size; } @@ -774,7 +773,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) xe_gt_assert(gt, gt->default_lrc[class]); - real_size = xe_lrc_size(xe, class); + real_size = xe_gt_lrc_size(gt, class); alloc_size = PAGE_ALIGN(real_size); total_size += alloc_size; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 2c0aa3443cd9..fde527d34f58 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -677,7 +677,7 @@ static void submit_exec_queue(struct xe_exec_queue *q) if (xe_exec_queue_is_parallel(q)) wq_item_append(q); else - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) return; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index f759f4c10914..ef954cd5cd68 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -34,12 +34,15 @@ #define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) +#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K + struct xe_lrc_snapshot { struct xe_bo *lrc_bo; void *lrc_snapshot; unsigned long lrc_size, lrc_offset; u32 context_desc; + u32 indirect_context_desc; u32 head; struct { u32 internal; @@ -55,20 +58,25 @@ lrc_to_xe(struct xe_lrc *lrc) return gt_to_xe(lrc->fence_ctx.gt); } -size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) +size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { + struct xe_device *xe = gt_to_xe(gt); + size_t size; + switch (class) { case XE_ENGINE_CLASS_RENDER: if (GRAPHICS_VER(xe) >= 20) - return 4 * SZ_4K; + size = 4 * SZ_4K; else - return 14 * SZ_4K; + size = 14 * SZ_4K; + break; case XE_ENGINE_CLASS_COMPUTE: /* 14 pages since graphics_ver == 11 */ if (GRAPHICS_VER(xe) >= 20) - return 3 * SZ_4K; + size = 3 * SZ_4K; else - return 14 * SZ_4K; + size = 14 * SZ_4K; + break; default: WARN(1, "Unknown engine class: %d", class); fallthrough; @@ -76,8 +84,14 @@ size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class) case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: case XE_ENGINE_CLASS_OTHER: - return 2 * SZ_4K; + size = 2 * SZ_4K; } + + /* Add indirect ring state page */ + if (xe_gt_has_indirect_ring_state(gt)) + size += LRC_INDIRECT_RING_STATE_SIZE; + + return size; } /* @@ -508,6 +522,32 @@ static const u8 xe2_xcs_offsets[] = { 0 }; +static const u8 xe2_indirect_ring_state_offsets[] = { + NOP(1), /* [0x00] */ + LRI(5, POSTED), /* [0x01] */ + REG(0x034), /* [0x02] RING_BUFFER_HEAD */ + REG(0x030), /* [0x04] RING_BUFFER_TAIL */ + REG(0x038), /* [0x06] RING_BUFFER_START */ + REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ + REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ + + NOP(5), /* [0x0c] */ + LRI(9, POSTED), /* [0x11] */ + REG(0x168), /* [0x12] BB_ADDR_UDW */ + REG(0x140), /* [0x14] BB_ADDR */ + REG(0x110), /* [0x16] BB_STATE */ + REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ + REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ + + NOP(12), /* [0x00] */ + + 0 +}; + #undef REG16 #undef REG #undef LRI @@ -546,6 +586,10 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (xe_gt_has_indirect_ring_state(hwe->gt)) + regs[CTX_CONTEXT_CONTROL] |= + _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); + /* TODO: Timestamp */ } @@ -589,6 +633,11 @@ static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) regs[x + 1] |= STOP_RING << 16; } +static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) +{ + return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; +} + static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) { return 0; @@ -643,6 +692,12 @@ static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; } +static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) +{ + /* Indirect ring state page is at the very end of LRC */ + return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; +} + #define DECL_MAP_ADDR_HELPERS(elem) \ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ { \ @@ -663,6 +718,7 @@ DECL_MAP_ADDR_HELPERS(seqno) DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) DECL_MAP_ADDR_HELPERS(parallel) +DECL_MAP_ADDR_HELPERS(indirect_ring) #undef DECL_MAP_ADDR_HELPERS @@ -671,6 +727,35 @@ u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) return __xe_lrc_pphwsp_ggtt_addr(lrc); } +u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) +{ + if (!xe_lrc_has_indirect_ring_state(lrc)) + return 0; + + return __xe_lrc_indirect_ring_ggtt_addr(lrc); +} + +static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_indirect_ring_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + return xe_map_read32(xe, &map); +} + +static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, + int reg_nr, u32 val) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_indirect_ring_map(lrc); + iosys_map_incr(&map, reg_nr * sizeof(u32)); + xe_map_write32(xe, &map, val); +} + u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) { struct xe_device *xe = lrc_to_xe(lrc); @@ -693,20 +778,25 @@ void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) static void *empty_lrc_data(struct xe_hw_engine *hwe) { - struct xe_device *xe = gt_to_xe(hwe->gt); + struct xe_gt *gt = hwe->gt; void *data; u32 *regs; - data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL); + data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); if (!data) return NULL; /* 1st page: Per-Process of HW status Page */ regs = data + LRC_PPHWSP_SIZE; - set_offsets(regs, reg_offsets(xe, hwe->class), hwe); + set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); set_context_control(regs, hwe); set_memory_based_intr(regs, hwe); reset_stop_ring(regs, hwe); + if (xe_gt_has_indirect_ring_state(gt)) { + regs = data + xe_gt_lrc_size(gt, hwe->class) - + LRC_INDIRECT_RING_STATE_SIZE; + set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); + } return data; } @@ -731,23 +821,27 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct iosys_map map; void *init_data = NULL; u32 arb_enable; + u32 lrc_size; int err; lrc->flags = 0; + lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); + if (xe_gt_has_indirect_ring_state(gt)) + lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; /* * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, - ring_size + xe_lrc_size(xe, hwe->class), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); + lrc->size = lrc_size; lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; @@ -772,10 +866,10 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE); + xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); } else { xe_map_memcpy_to(xe, &map, 0, init_data, - xe_lrc_size(xe, hwe->class)); + xe_gt_lrc_size(gt, hwe->class)); kfree(init_data); } @@ -786,11 +880,25 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_drm_client_add_bo(vm->xef->client, lrc->bo); } - xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + if (xe_gt_has_indirect_ring_state(gt)) { + xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, + __xe_lrc_indirect_ring_ggtt_addr(lrc)); + + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, + __xe_lrc_ring_ggtt_addr(lrc)); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, + RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + } else { + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, + RING_CTL_SIZE(lrc->ring.size) | RING_VALID); + } + if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); @@ -834,14 +942,36 @@ void xe_lrc_finish(struct xe_lrc *lrc) xe_bo_put(lrc->bo); } +void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); + else + xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); +} + +u32 xe_lrc_ring_tail(struct xe_lrc *lrc) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) + return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; + else + return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; +} + void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) { - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); + if (xe_lrc_has_indirect_ring_state(lrc)) + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); + else + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); } u32 xe_lrc_ring_head(struct xe_lrc *lrc) { - return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; + if (xe_lrc_has_indirect_ring_state(lrc)) + return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; + else + return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; } u32 xe_lrc_ring_space(struct xe_lrc *lrc) @@ -1214,7 +1344,7 @@ void xe_lrc_dump_default(struct drm_printer *p, * hardware status page. */ dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; - remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4; + remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; while (remaining_dw > 0) { if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { @@ -1355,9 +1485,10 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) return NULL; snapshot->context_desc = xe_lrc_ggtt_addr(lrc); + snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); snapshot->tail.internal = lrc->ring.tail; - snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); + snapshot->tail.memory = xe_lrc_ring_tail(lrc); snapshot->start_seqno = xe_lrc_start_seqno(lrc); snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); @@ -1405,6 +1536,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer return; drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); + drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", + snapshot->indirect_context_desc); drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", snapshot->tail.internal, snapshot->tail.memory); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index d32fa31faa2c..06a95a598736 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -21,14 +21,17 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size); void xe_lrc_finish(struct xe_lrc *lrc); -size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class); +size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); +void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail); +u32 xe_lrc_ring_tail(struct xe_lrc *lrc); void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head); u32 xe_lrc_ring_head(struct xe_lrc *lrc); u32 xe_lrc_ring_space(struct xe_lrc *lrc); void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); +u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index b716df0dfb4e..cdbf03faef15 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -20,10 +20,14 @@ struct xe_lrc { */ struct xe_bo *bo; + /** @size: size of lrc including any indirect ring state page */ + u32 size; + /** @tile: tile which this LRC belongs to */ struct xe_tile *tile; /** @flags: LRC flags */ +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 u32 flags; /** @ring: submission ring state */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 99723a423850..595e4096a17a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -661,6 +661,7 @@ static int xe_info_init(struct xe_device *xe, gt = tile->primary_gt; gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; + gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.__engine_mask = graphics_desc->hw_engine_mask; if (MEDIA_VER(xe) < 13 && media_desc) gt->info.__engine_mask |= media_desc->hw_engine_mask; @@ -678,6 +679,7 @@ static int xe_info_init(struct xe_device *xe, gt = tile->media_gt; gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.__engine_mask = media_desc->hw_engine_mask; gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e1f2b4879fc2..79b0f80376a4 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -27,6 +27,7 @@ struct xe_graphics_desc { u8 has_asid:1; u8 has_atomic_enable_pte_bit:1; u8 has_flat_ccs:1; + u8 has_indirect_ring_state:1; u8 has_range_tlb_invalidation:1; u8 has_usm:1; }; @@ -37,6 +38,8 @@ struct xe_media_desc { u8 rel; u64 hw_engine_mask; /* hardware engines provided by media IP */ + + u8 has_indirect_ring_state:1; }; struct gmdid_map { -- cgit From 7578c2f811fffd7178de3075a19bddcbb7e0b2e7 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 7 May 2024 15:42:52 -0700 Subject: drm/xe: Dump Indirect Ring State registers Dump INDIRECT_RING_STATE and RING_START_UDW registers. v2: Add bspec reference Bspec: 67137, 67138 Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Stuart Summers Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-4-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 4 ++++ drivers/gpu/drm/xe/xe_hw_engine.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_hw_engine_types.h | 4 ++++ 3 files changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 260a44f46f7e..263ffc7bc2ef 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -55,6 +55,8 @@ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ +#define RING_START_UDW(base) XE_REG((base) + 0x48) + #define RING_PSMI_CTL(base) XE_REG((base) + 0x50, XE_REG_OPTION_MASKED) #define RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) #define WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) @@ -110,6 +112,8 @@ #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define REPLAY_MODE_GRANULARITY REG_BIT(0) +#define INDIRECT_RING_STATE(base) XE_REG((base) + 0x108) + #define RING_BBADDR(base) XE_REG((base) + 0x140) #define RING_BBADDR_UDW(base) XE_REG((base) + 0x168) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index ec69803152a2..45f582a7caaa 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -908,6 +908,13 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + if (xe_gt_has_indirect_ring_state(hwe->gt)) { + snapshot->reg.indirect_ring_state = + hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + snapshot->reg.ring_start_udw = + hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + } + snapshot->reg.ring_head = hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; snapshot->reg.ring_tail = @@ -997,6 +1004,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n", snapshot->reg.ring_execlist_sq_contents); drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); + drm_printf(p, "\tRING_START_UDW: 0x%08x\n", + snapshot->reg.ring_start_udw); drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); @@ -1010,6 +1019,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd); drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr); drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd); + drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n", + snapshot->reg.indirect_ring_state); drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr); xe_hw_engine_snapshot_instdone_print(snapshot, p); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 9f9755e31b9f..5f4b67acba99 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -189,6 +189,8 @@ struct xe_hw_engine_snapshot { u32 ring_hws_pga; /** @reg.ring_start: RING_START */ u32 ring_start; + /** @reg.ring_start_udw: RING_START_UDW */ + u32 ring_start_udw; /** @reg.ring_head: RING_HEAD */ u32 ring_head; /** @reg.ring_tail: RING_TAIL */ @@ -207,6 +209,8 @@ struct xe_hw_engine_snapshot { u32 ring_emr; /** @reg.ring_eir: RING_EIR */ u32 ring_eir; + /** @reg.indirect_ring_state: INDIRECT_RING_STATE */ + u32 indirect_ring_state; /** @reg.ipehr: IPEHR */ u32 ipehr; /** @reg.rcu_mode: RCU_MODE */ -- cgit From fe0154cf8222d9e38c60ccc124adb2f9b5272371 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 7 May 2024 15:42:53 -0700 Subject: drm/xe/xe2: Enable Indirect Ring State support for Xe2 Indirect Ring State is the recommended mode for Xe2 platforms, enable it by default. v2: Set has_indirect_ring_state to '1' instead of 'true' Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Stuart Summers Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240507224255.5059-5-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 595e4096a17a..83e662a36dfa 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -166,6 +166,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ + .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .va_bits = 48, \ -- cgit From 205e5c4b20c3c2e2033338a935f6a59843dc69de Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 7 May 2024 13:04:11 +0000 Subject: drm/xe: Fix UBSAN shift-out-of-bounds failure Here is the failure stack: [ 12.988209] ------------[ cut here ]------------ [ 12.988216] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 [ 12.988232] shift exponent 64 is too large for 64-bit type 'long unsigned int' [ 12.988235] CPU: 4 PID: 1310 Comm: gnome-shell Tainted: G U 6.9.0-rc6+prerelease1158+ #19 [ 12.988237] Hardware name: Intel Corporation Raptor Lake Client Platform/RPL-S ADP-S DDR5 UDIMM CRB, BIOS RPLSFWI1.R00.3301.A02.2208050712 08/05/2022 [ 12.988239] Call Trace: [ 12.988240] [ 12.988242] dump_stack_lvl+0xd7/0xf0 [ 12.988248] dump_stack+0x10/0x20 [ 12.988250] ubsan_epilogue+0x9/0x40 [ 12.988253] __ubsan_handle_shift_out_of_bounds+0x10e/0x170 [ 12.988260] dma_resv_reserve_fences.cold+0x2b/0x48 [ 12.988262] ? ww_mutex_lock_interruptible+0x3c/0x110 [ 12.988267] drm_exec_prepare_obj+0x45/0x60 [drm_exec] [ 12.988271] ? vm_bind_ioctl_ops_execute+0x5b/0x740 [xe] [ 12.988345] vm_bind_ioctl_ops_execute+0x78/0x740 [xe] It is caused by the value 0 of parameter num_fences in function drm_exec_prepare_obj. And lead to in function __rounddown_pow_of_two, "0 - 1" causes the shift-out-of-bounds. By design drm_exec_prepare_obj() should be called only when there are fences to be reserved. If num_fences is 0, calling drm_exec_lock_obj() is sufficient as was done in commit 9377de4cb3e8 ("drm/xe/vm: Avoid reserving zero fences") Cc: Nirmoy Das Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Nirmoy Das Link: https://lore.kernel.org/all/24d4a9a9-c622-4f56-8672-21f4c6785476@amd.com Link: https://patchwork.freedesktop.org/patch/msgid/20240507130411.630361-1-shuicheng.lin@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d17192c8b7de..c5b1694b292f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2692,7 +2692,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, if (bo) { if (!bo->vm) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0); + err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, xe_vma_vm(vma), true); } @@ -2777,7 +2777,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, struct xe_vma_op *op; int err; - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0); + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); if (err) return err; -- cgit From 515f08972355e160f896f612347121fbb685e740 Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Fri, 19 Apr 2024 18:29:45 +0530 Subject: drm/xe/hwmon: Remove unwanted write permission for currN_label Change umode of currN_label from 0644 to 0444 as write permission not needed for label. Signed-off-by: Karthik Poosa Reviewed-by: Riana Tauro Link: https://patchwork.freedesktop.org/patch/msgid/20240419125945.4085629-1-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 453e601ddd5e..dca275117232 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -550,12 +550,17 @@ xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) { u32 uval; + /* hwmon sysfs attribute of current available only for package */ + if (channel != CHANNEL_PKG) + return 0; + switch (attr) { case hwmon_curr_crit: - case hwmon_curr_label: - if (channel == CHANNEL_PKG) return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + case hwmon_curr_label: + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444; break; default: return 0; -- cgit From 62010b3cd6030ff743930c9ae898d8e4e943100d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 8 May 2024 19:48:56 +0200 Subject: drm/xe: Move xe_gpu_commands.h file to instructions/ All other files with commands definitions are in instructions/ folder. Move xe_gpu_commands.h also there. Signed-off-by: Michal Wajdeczko Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240508174856.1908-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/instructions/xe_gpu_commands.h | 70 +++++++++++++++++++++++ drivers/gpu/drm/xe/regs/xe_gpu_commands.h | 70 ----------------------- drivers/gpu/drm/xe/xe_bb.c | 1 - drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_execlist.c | 1 - drivers/gpu/drm/xe/xe_lrc.c | 1 - drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_ring_ops.c | 2 +- 8 files changed, 73 insertions(+), 76 deletions(-) create mode 100644 drivers/gpu/drm/xe/instructions/xe_gpu_commands.h delete mode 100644 drivers/gpu/drm/xe/regs/xe_gpu_commands.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h new file mode 100644 index 000000000000..a255946b6f77 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GPU_COMMANDS_H_ +#define _XE_GPU_COMMANDS_H_ + +#include "regs/xe_reg_defs.h" + +#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) +#define SRC_ACCESS_TYPE_SHIFT 21 +#define DST_ACCESS_TYPE_SHIFT 20 +#define CCS_SIZE_MASK GENMASK(17, 8) +#define XE2_CCS_SIZE_MASK GENMASK(18, 9) +#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26) +#define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28) +#define NUM_CCS_BYTES_PER_BLOCK 256 +#define NUM_BYTES_PER_CCS_BYTE(_xe) (GRAPHICS_VER(_xe) >= 20 ? 512 : 256) + +#define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) +#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) +#define XY_FAST_COLOR_BLT_DW 16 +#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22) +#define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24) +#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 + +#define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) +#define XY_FAST_COPY_BLT_DEPTH_32 (3<<24) +#define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31) +#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) +#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20) + +#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) +#define PVC_MEM_SET_CMD_LEN_DW 7 +#define PVC_MEM_SET_MATRIX REG_BIT(17) +#define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24) +/* Bspec lists field as [6:0], but index alone is from [6:1] */ +#define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1) +#define XE2_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 3) + +#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) + +#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ + +#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) +#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) +#define PIPE_CONTROL_AMFS_FLUSH (1<<25) +#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) +#define PIPE_CONTROL_LRI_POST_SYNC BIT(23) +#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) +#define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) +#define PIPE_CONTROL_TLB_INVALIDATE BIT(18) +#define PIPE_CONTROL_PSD_SYNC (1<<17) +#define PIPE_CONTROL_QW_WRITE (1<<14) +#define PIPE_CONTROL_DEPTH_STALL (1<<13) +#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) +#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) +#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) +#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) +#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) +#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h b/drivers/gpu/drm/xe/regs/xe_gpu_commands.h deleted file mode 100644 index a255946b6f77..000000000000 --- a/drivers/gpu/drm/xe/regs/xe_gpu_commands.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GPU_COMMANDS_H_ -#define _XE_GPU_COMMANDS_H_ - -#include "regs/xe_reg_defs.h" - -#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) -#define SRC_ACCESS_TYPE_SHIFT 21 -#define DST_ACCESS_TYPE_SHIFT 20 -#define CCS_SIZE_MASK GENMASK(17, 8) -#define XE2_CCS_SIZE_MASK GENMASK(18, 9) -#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26) -#define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28) -#define NUM_CCS_BYTES_PER_BLOCK 256 -#define NUM_BYTES_PER_CCS_BYTE(_xe) (GRAPHICS_VER(_xe) >= 20 ? 512 : 256) - -#define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) -#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) -#define XY_FAST_COLOR_BLT_DW 16 -#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22) -#define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24) -#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 - -#define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) -#define XY_FAST_COPY_BLT_DEPTH_32 (3<<24) -#define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31) -#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30) -#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20) - -#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22) -#define PVC_MEM_SET_CMD_LEN_DW 7 -#define PVC_MEM_SET_MATRIX REG_BIT(17) -#define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24) -/* Bspec lists field as [6:0], but index alone is from [6:1] */ -#define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1) -#define XE2_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 3) - -#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) - -#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ - -#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) -#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) -#define PIPE_CONTROL_AMFS_FLUSH (1<<25) -#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) -#define PIPE_CONTROL_LRI_POST_SYNC BIT(23) -#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) -#define PIPE_CONTROL_CS_STALL (1<<20) -#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19) -#define PIPE_CONTROL_TLB_INVALIDATE BIT(18) -#define PIPE_CONTROL_PSD_SYNC (1<<17) -#define PIPE_CONTROL_QW_WRITE (1<<14) -#define PIPE_CONTROL_DEPTH_STALL (1<<13) -#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) -#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) -#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) -#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) -#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) -#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) -#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) -#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) -#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) -#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) -#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) - -#endif diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 37e056fde95d..a13e0b3a169e 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -6,7 +6,6 @@ #include "xe_bb.h" #include "instructions/xe_mi_commands.h" -#include "regs/xe_gpu_commands.h" #include "xe_assert.h" #include "xe_device.h" #include "xe_exec_queue_types.h" diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f8eb477f359d..4165e1347371 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -16,7 +16,7 @@ #include #include "display/xe_display.h" -#include "regs/xe_gpu_commands.h" +#include "instructions/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index dece2785933c..e9dee1e14fef 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -9,7 +9,6 @@ #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "xe_assert.h" diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index ef954cd5cd68..9b0a4078add3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -11,7 +11,6 @@ #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_gfx_state_commands.h" #include "regs/xe_engine_regs.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_lrc_layout.h" #include "xe_bb.h" #include "xe_bo.h" diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 9f6e9b7f11c8..36db5ed1a572 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -14,8 +14,8 @@ #include +#include "instructions/xe_gpu_commands.h" #include "instructions/xe_mi_commands.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_gtt_defs.h" #include "tests/xe_test.h" #include "xe_assert.h" diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index d42b3f33bd7a..a3ca718456f6 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -7,9 +7,9 @@ #include +#include "instructions/xe_gpu_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" -#include "regs/xe_gpu_commands.h" #include "regs/xe_gt_regs.h" #include "regs/xe_lrc_layout.h" #include "xe_exec_queue_types.h" -- cgit From c3203ca3b8a6aab7c5a5dc3f5e165a53410461f6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 8 May 2024 19:10:00 +0200 Subject: drm/xe: Rename few xe_args.h macros To minimize the risk of future name collisions, rename macros to always include the ARG or ARGS tag: DROP_FIRST to DROP_FIRST_ARG PICK_FIRST to FIRST_ARG PICK_LAST to LAST_ARG Suggested-by: Andy Shevchenko Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Reviewed-by: Andy Shevchenko #v2 Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240508171000.1864-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/tests/xe_args_test.c | 101 +++++++++++++++++++++----------- drivers/gpu/drm/xe/xe_args.h | 72 +++++++++++++++-------- drivers/gpu/drm/xe/xe_rtp_helpers.h | 12 ++-- 3 files changed, 119 insertions(+), 66 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_args_test.c b/drivers/gpu/drm/xe/tests/xe_args_test.c index 9b44c1ab6364..f3fb23aa5d2e 100644 --- a/drivers/gpu/drm/xe/tests/xe_args_test.c +++ b/drivers/gpu/drm/xe/tests/xe_args_test.c @@ -21,10 +21,10 @@ static void call_args_example(struct kunit *test) #undef buz } -static void drop_first_example(struct kunit *test) +static void drop_first_arg_example(struct kunit *test) { #define foo X, Y, Z, Q -#define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo)) +#define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo)) KUNIT_EXPECT_EQ(test, bar, 3); @@ -32,12 +32,12 @@ static void drop_first_example(struct kunit *test) #undef bar } -static void pick_first_example(struct kunit *test) +static void first_arg_example(struct kunit *test) { int X = 1; #define foo X, Y, Z, Q -#define bar PICK_FIRST(foo) +#define bar FIRST_ARG(foo) KUNIT_EXPECT_EQ(test, bar, X); KUNIT_EXPECT_STREQ(test, __stringify(bar), "X"); @@ -46,12 +46,12 @@ static void pick_first_example(struct kunit *test) #undef bar } -static void pick_last_example(struct kunit *test) +static void last_arg_example(struct kunit *test) { int Q = 1; #define foo X, Y, Z, Q -#define bar PICK_LAST(foo) +#define bar LAST_ARG(foo) KUNIT_EXPECT_EQ(test, bar, Q); KUNIT_EXPECT_STREQ(test, __stringify(bar), "Q"); @@ -60,11 +60,29 @@ static void pick_last_example(struct kunit *test) #undef bar } +static void pick_arg_example(struct kunit *test) +{ + int Y = 1, Z = 2; + +#define foo X, Y, Z, Q +#define bar PICK_ARG(2, foo) +#define buz PICK_ARG3(foo) + + KUNIT_EXPECT_EQ(test, bar, Y); + KUNIT_EXPECT_STREQ(test, __stringify(bar), "Y"); + KUNIT_EXPECT_EQ(test, buz, Z); + KUNIT_EXPECT_STREQ(test, __stringify(buz), "Z"); + +#undef foo +#undef bar +#undef buz +} + static void sep_comma_example(struct kunit *test) { #define foo(f) f(X) f(Y) f(Z) f(Q) -#define bar DROP_FIRST(foo(ARGS_SEP_COMMA __stringify)) -#define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo(ARGS_SEP_COMMA))) +#define bar DROP_FIRST_ARG(foo(ARGS_SEP_COMMA __stringify)) +#define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo(ARGS_SEP_COMMA))) static const char * const a[] = { bar }; @@ -123,61 +141,74 @@ static void call_args_test(struct kunit *test) KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, MAX_ARGS), 12); } -static void drop_first_test(struct kunit *test) +static void drop_first_arg_test(struct kunit *test) { int Y = -2, Z = -3, Q = -4; - int a[] = { DROP_FIRST(FOO_ARGS) }; + int a[] = { DROP_FIRST_ARG(FOO_ARGS) }; - KUNIT_EXPECT_EQ(test, DROP_FIRST(0, -1), -1); - KUNIT_EXPECT_EQ(test, DROP_FIRST(DROP_FIRST(0, -1, -2)), -2); + KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(0, -1), -1); + KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(DROP_FIRST_ARG(0, -1, -2)), -2); - KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, DROP_FIRST(FOO_ARGS)), 3); - KUNIT_EXPECT_EQ(test, DROP_FIRST(DROP_FIRST(DROP_FIRST(FOO_ARGS))), -4); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(FOO_ARGS)), 3); + KUNIT_EXPECT_EQ(test, DROP_FIRST_ARG(DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS))), -4); KUNIT_EXPECT_EQ(test, a[0], -2); KUNIT_EXPECT_EQ(test, a[1], -3); KUNIT_EXPECT_EQ(test, a[2], -4); - KUNIT_EXPECT_STREQ(test, __stringify(DROP_FIRST(DROP_FIRST(DROP_FIRST(FOO_ARGS)))), "Q"); + +#define foo DROP_FIRST_ARG(FOO_ARGS) +#define bar DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS)) +#define buz DROP_FIRST_ARG(DROP_FIRST_ARG(DROP_FIRST_ARG(FOO_ARGS))) + + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, foo), 3); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, bar), 2); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, buz), 1); + KUNIT_EXPECT_STREQ(test, __stringify(buz), "Q"); + +#undef foo +#undef bar +#undef buz } -static void pick_first_test(struct kunit *test) +static void first_arg_test(struct kunit *test) { int X = -1; - int a[] = { PICK_FIRST(FOO_ARGS) }; + int a[] = { FIRST_ARG(FOO_ARGS) }; - KUNIT_EXPECT_EQ(test, PICK_FIRST(-1, -2), -1); + KUNIT_EXPECT_EQ(test, FIRST_ARG(-1, -2), -1); - KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, PICK_FIRST(FOO_ARGS)), 1); - KUNIT_EXPECT_EQ(test, PICK_FIRST(FOO_ARGS), -1); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, FIRST_ARG(FOO_ARGS)), 1); + KUNIT_EXPECT_EQ(test, FIRST_ARG(FOO_ARGS), -1); KUNIT_EXPECT_EQ(test, a[0], -1); - KUNIT_EXPECT_STREQ(test, __stringify(PICK_FIRST(FOO_ARGS)), "X"); + KUNIT_EXPECT_STREQ(test, __stringify(FIRST_ARG(FOO_ARGS)), "X"); } -static void pick_last_test(struct kunit *test) +static void last_arg_test(struct kunit *test) { int Q = -4; - int a[] = { PICK_LAST(FOO_ARGS) }; + int a[] = { LAST_ARG(FOO_ARGS) }; - KUNIT_EXPECT_EQ(test, PICK_LAST(-1, -2), -2); + KUNIT_EXPECT_EQ(test, LAST_ARG(-1, -2), -2); - KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, PICK_LAST(FOO_ARGS)), 1); - KUNIT_EXPECT_EQ(test, PICK_LAST(FOO_ARGS), -4); + KUNIT_EXPECT_EQ(test, CALL_ARGS(COUNT_ARGS, LAST_ARG(FOO_ARGS)), 1); + KUNIT_EXPECT_EQ(test, LAST_ARG(FOO_ARGS), -4); KUNIT_EXPECT_EQ(test, a[0], -4); - KUNIT_EXPECT_STREQ(test, __stringify(PICK_LAST(FOO_ARGS)), "Q"); + KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(FOO_ARGS)), "Q"); - KUNIT_EXPECT_EQ(test, PICK_LAST(MAX_ARGS), -12); - KUNIT_EXPECT_STREQ(test, __stringify(PICK_LAST(MAX_ARGS)), "-12"); + KUNIT_EXPECT_EQ(test, LAST_ARG(MAX_ARGS), -12); + KUNIT_EXPECT_STREQ(test, __stringify(LAST_ARG(MAX_ARGS)), "-12"); } static struct kunit_case args_tests[] = { KUNIT_CASE(count_args_test), KUNIT_CASE(call_args_example), KUNIT_CASE(call_args_test), - KUNIT_CASE(drop_first_example), - KUNIT_CASE(drop_first_test), - KUNIT_CASE(pick_first_example), - KUNIT_CASE(pick_first_test), - KUNIT_CASE(pick_last_example), - KUNIT_CASE(pick_last_test), + KUNIT_CASE(drop_first_arg_example), + KUNIT_CASE(drop_first_arg_test), + KUNIT_CASE(first_arg_example), + KUNIT_CASE(first_arg_test), + KUNIT_CASE(last_arg_example), + KUNIT_CASE(last_arg_test), + KUNIT_CASE(pick_arg_example), KUNIT_CASE(sep_comma_example), {} }; diff --git a/drivers/gpu/drm/xe/xe_args.h b/drivers/gpu/drm/xe/xe_args.h index 40b9eb4151d8..4dbc7e53c624 100644 --- a/drivers/gpu/drm/xe/xe_args.h +++ b/drivers/gpu/drm/xe/xe_args.h @@ -35,7 +35,7 @@ #define __CALL_ARGS(f, args...) f(args) /** - * DROP_FIRST - Returns all arguments except the first one. + * DROP_FIRST_ARG - Returns all arguments except the first one. * @args: arguments * * This helper macro allows manipulation the argument list before passing it @@ -44,15 +44,15 @@ * Example: * * #define foo X,Y,Z,Q - * #define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo)) + * #define bar CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo)) * * With above definitions bar expands to 3. */ -#define DROP_FIRST(args...) __DROP_FIRST(args) -#define __DROP_FIRST(a, b...) b +#define DROP_FIRST_ARG(args...) __DROP_FIRST_ARG(args) +#define __DROP_FIRST_ARG(a, b...) b /** - * PICK_FIRST - Returns the first argument. + * FIRST_ARG - Returns the first argument. * @args: arguments * * This helper macro allows manipulation the argument list before passing it @@ -61,15 +61,15 @@ * Example: * * #define foo X,Y,Z,Q - * #define bar PICK_FIRST(foo) + * #define bar FIRST_ARG(foo) * * With above definitions bar expands to X. */ -#define PICK_FIRST(args...) __PICK_FIRST(args) -#define __PICK_FIRST(a, b...) a +#define FIRST_ARG(args...) __FIRST_ARG(args) +#define __FIRST_ARG(a, b...) a /** - * PICK_LAST - Returns the last argument. + * LAST_ARG - Returns the last argument. * @args: arguments * * This helper macro allows manipulation the argument list before passing it @@ -80,24 +80,46 @@ * Example: * * #define foo X,Y,Z,Q - * #define bar PICK_LAST(foo) + * #define bar LAST_ARG(foo) * * With above definitions bar expands to Q. */ -#define PICK_LAST(args...) __PICK_ARG(COUNT_ARGS(args), args) +#define LAST_ARG(args...) __LAST_ARG(args) +#define __LAST_ARG(args...) PICK_ARG(COUNT_ARGS(args), args) + +/** + * PICK_ARG - Returns the n-th argument. + * @n: argument number to be returned + * @args: arguments + * + * This helper macro allows manipulation the argument list before passing it + * to the next level macro. + * + * Like COUNT_ARGS() this macro supports n up to 12. + * Specialized macros PICK_ARG1() to PICK_ARG12() are also available. + * + * Example: + * + * #define foo X,Y,Z,Q + * #define bar PICK_ARG(2, foo) + * #define buz PICK_ARG3(foo) + * + * With above definitions bar expands to Y and buz expands to Z. + */ +#define PICK_ARG(n, args...) __PICK_ARG(n, args) #define __PICK_ARG(n, args...) CALL_ARGS(CONCATENATE(PICK_ARG, n), args) -#define PICK_ARG1(args...) PICK_FIRST(args) -#define PICK_ARG2(args...) PICK_ARG1(DROP_FIRST(args)) -#define PICK_ARG3(args...) PICK_ARG2(DROP_FIRST(args)) -#define PICK_ARG4(args...) PICK_ARG3(DROP_FIRST(args)) -#define PICK_ARG5(args...) PICK_ARG4(DROP_FIRST(args)) -#define PICK_ARG6(args...) PICK_ARG5(DROP_FIRST(args)) -#define PICK_ARG7(args...) PICK_ARG6(DROP_FIRST(args)) -#define PICK_ARG8(args...) PICK_ARG7(DROP_FIRST(args)) -#define PICK_ARG9(args...) PICK_ARG8(DROP_FIRST(args)) -#define PICK_ARG10(args...) PICK_ARG9(DROP_FIRST(args)) -#define PICK_ARG11(args...) PICK_ARG10(DROP_FIRST(args)) -#define PICK_ARG12(args...) PICK_ARG11(DROP_FIRST(args)) +#define PICK_ARG1(args...) FIRST_ARG(args) +#define PICK_ARG2(args...) PICK_ARG1(DROP_FIRST_ARG(args)) +#define PICK_ARG3(args...) PICK_ARG2(DROP_FIRST_ARG(args)) +#define PICK_ARG4(args...) PICK_ARG3(DROP_FIRST_ARG(args)) +#define PICK_ARG5(args...) PICK_ARG4(DROP_FIRST_ARG(args)) +#define PICK_ARG6(args...) PICK_ARG5(DROP_FIRST_ARG(args)) +#define PICK_ARG7(args...) PICK_ARG6(DROP_FIRST_ARG(args)) +#define PICK_ARG8(args...) PICK_ARG7(DROP_FIRST_ARG(args)) +#define PICK_ARG9(args...) PICK_ARG8(DROP_FIRST_ARG(args)) +#define PICK_ARG10(args...) PICK_ARG9(DROP_FIRST_ARG(args)) +#define PICK_ARG11(args...) PICK_ARG10(DROP_FIRST_ARG(args)) +#define PICK_ARG12(args...) PICK_ARG11(DROP_FIRST_ARG(args)) /** * ARGS_SEP_COMMA - Definition of a comma character. @@ -109,8 +131,8 @@ * Example: * * #define foo(f) f(X) f(Y) f(Z) f(Q) - * #define bar DROP_FIRST(foo(ARGS_SEP_COMMA __stringify)) - * #define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST(foo(ARGS_SEP_COMMA))) + * #define bar DROP_FIRST_ARG(foo(ARGS_SEP_COMMA __stringify)) + * #define buz CALL_ARGS(COUNT_ARGS, DROP_FIRST_ARG(foo(ARGS_SEP_COMMA))) * * With above definitions bar expands to * "X", "Y", "Z", "Q" diff --git a/drivers/gpu/drm/xe/xe_rtp_helpers.h b/drivers/gpu/drm/xe/xe_rtp_helpers.h index 8129d6d9ef37..7735f217ba71 100644 --- a/drivers/gpu/drm/xe/xe_rtp_helpers.h +++ b/drivers/gpu/drm/xe/xe_rtp_helpers.h @@ -17,7 +17,7 @@ */ #define _XE_ESC(...) __VA_ARGS__ -#define _XE_TUPLE_TAIL(...) (DROP_FIRST(__VA_ARGS__)) +#define _XE_TUPLE_TAIL(...) (DROP_FIRST_ARG(__VA_ARGS__)) #define _XE_RTP_CONCAT(a, b) CONCATENATE(XE_RTP_, CONCATENATE(a, b)) @@ -54,10 +54,10 @@ * XE_RTP_TEST_FOO BANANA XE_RTP_TEST_BAR */ #define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_RTP_CONCAT(PASTE_, COUNT_ARGS args_)(prefix_, sep_, args_) -#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) -#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) -#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) -#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, PICK_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) +#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) +#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) /* * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer @@ -70,6 +70,6 @@ * * { .a = 10 } */ -#define XE_RTP_DROP_CAST(...) _XE_ESC(DROP_FIRST _XE_ESC __VA_ARGS__) +#define XE_RTP_DROP_CAST(...) _XE_ESC(DROP_FIRST_ARG _XE_ESC __VA_ARGS__) #endif -- cgit From 304aa805ee8e82adc30159ff43038cd96eb69eb9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 13 May 2024 10:42:15 +0200 Subject: drm/xe: Fix xe_gt_throttle_sysfs.h We don't need to include drm/drm_managed.h here. We don't need to comment final #endif. Also remove empty line at the end. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240513084218.2084-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h index 6c61e6f228a8..335c402b51a6 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h @@ -6,11 +6,8 @@ #ifndef _XE_GT_THROTTLE_SYSFS_H_ #define _XE_GT_THROTTLE_SYSFS_H_ -#include - struct xe_gt; int xe_gt_throttle_sysfs_init(struct xe_gt *gt); -#endif /* _XE_GT_THROTTLE_SYSFS_H_ */ - +#endif -- cgit From c5d9c6690ed4cefe9390f8d73ad690332dbc7142 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 13 May 2024 10:42:16 +0200 Subject: drm/xe: Fix xe_guc_ads.h We don't need to include xe_guc_ads_types.h here. Use forward declaration instead. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240513084218.2084-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_ads.h b/drivers/gpu/drm/xe/xe_guc_ads.h index 2e2531779122..2e6674c760ff 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.h +++ b/drivers/gpu/drm/xe/xe_guc_ads.h @@ -6,7 +6,7 @@ #ifndef _XE_GUC_ADS_H_ #define _XE_GUC_ADS_H_ -#include "xe_guc_ads_types.h" +struct xe_guc_ads; int xe_guc_ads_init(struct xe_guc_ads *ads); int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads); -- cgit From 38830bfe287f9ad97be87a844237beb8e7e64f4a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 13 May 2024 10:42:17 +0200 Subject: drm/xe: Fix xe_lrc.h Prefer forward declarations over #include xe_lrc_types.h Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240513084218.2084-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_lrc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 06a95a598736..e0e841963c23 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -5,14 +5,17 @@ #ifndef _XE_LRC_H_ #define _XE_LRC_H_ -#include "xe_lrc_types.h" +#include struct drm_printer; struct xe_bb; struct xe_device; struct xe_exec_queue; enum xe_engine_class; +struct xe_gt; struct xe_hw_engine; +struct xe_lrc; +struct xe_lrc_snapshot; struct xe_vm; #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) -- cgit From 664de50cbfae048d08e9f3c1c0da377d1269e6d1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 13 May 2024 10:42:18 +0200 Subject: drm/xe: Fix xe_reg_sr.h Prefer forward declarations over #include xe_reg_sr_types.h Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240513084218.2084-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_reg_sr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index e3197c33afe2..51fbba423e27 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -6,8 +6,6 @@ #ifndef _XE_REG_SR_ #define _XE_REG_SR_ -#include "xe_reg_sr_types.h" - /* * Reg save/restore bookkeeping */ @@ -15,6 +13,8 @@ struct xe_device; struct xe_gt; struct xe_hw_engine; +struct xe_reg_sr; +struct xe_reg_sr_entry; struct drm_printer; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); -- cgit From 61549a2ee5940af4d49ba227d7e7798ccf6f04a5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 May 2024 14:37:47 -0700 Subject: drm/xe: Drop __engine_mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not really used, it's just a copy of engine_mask, which already reads the fuses to mark engines as available/not-available. Reviewed-by: Michał Winiarski Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 3 --- drivers/gpu/drm/xe/xe_gt_types.h | 6 ------ drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- drivers/gpu/drm/xe/xe_pci.c | 6 +++--- 4 files changed, 5 insertions(+), 14 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 36c7b1631fa6..3b3418eb7bc4 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -518,9 +518,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) if (err) goto out_fw; - /* XXX: Fake that we pull the engine mask from hwconfig blob */ - gt->info.engine_mask = gt->info.__engine_mask; - out_fw: xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); out: diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 8dc203413a27..5a114fc9dde7 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -114,12 +114,6 @@ struct xe_gt { u32 reference_clock; /** @info.engine_mask: mask of engines present on GT */ u64 engine_mask; - /** - * @info.__engine_mask: mask of engines present on GT read from - * xe_pci.c, used to fake reading the engine_mask from the - * hwconfig blob. - */ - u64 __engine_mask; /** @info.gmdid: raw GMD_ID value from hardware */ u32 gmdid; /** @info.id: Unique ID of this GT within the PCI Device */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 36db5ed1a572..2b12f3621f9e 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -936,8 +936,8 @@ static bool has_service_copy_support(struct xe_gt *gt) * all of the actual service copy engines (BCS1-BCS8) have been fused * off. */ - return gt->info.__engine_mask & GENMASK(XE_HW_ENGINE_BCS8, - XE_HW_ENGINE_BCS1); + return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_BCS1); } static u32 emit_clear_cmd_len(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 83e662a36dfa..fc29eb8e99c5 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -663,9 +663,9 @@ static int xe_info_init(struct xe_device *xe, gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; - gt->info.__engine_mask = graphics_desc->hw_engine_mask; + gt->info.engine_mask = graphics_desc->hw_engine_mask; if (MEDIA_VER(xe) < 13 && media_desc) - gt->info.__engine_mask |= media_desc->hw_engine_mask; + gt->info.engine_mask |= media_desc->hw_engine_mask; if (MEDIA_VER(xe) < 13 || !media_desc) continue; @@ -681,7 +681,7 @@ static int xe_info_init(struct xe_device *xe, gt = tile->media_gt; gt->info.type = XE_GT_TYPE_MEDIA; gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; - gt->info.__engine_mask = media_desc->hw_engine_mask; + gt->info.engine_mask = media_desc->hw_engine_mask; gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH; -- cgit From 402c014cbcc7c9ada6d62ee646b2b359d4793ce2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 May 2024 14:37:48 -0700 Subject: drm/xe: Drop useless forcewake get/put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forcewake used to be needed in xe_gt_init_early() since it was calling xe_gt_topology_init(). That call was dropped in commit 4c47049d93b7 ("drm/xe/guc: Fix missing topology init"), but the forcewake calls were left behind. Remove them. Cc: Zhanjun Dong Reviewed-by: Michał Winiarski Reviewed-by: Zhanjun Dong Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3b3418eb7bc4..05b77214f996 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -318,14 +318,6 @@ int xe_gt_init_early(struct xe_gt *gt) return err; } - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - return err; - - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - if (err) - return err; - xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); err = xe_wa_init(gt); -- cgit From 65c4de2a9148385114b3ff1121143ef1af805a1a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 May 2024 14:37:49 -0700 Subject: drm/xe: Move xe_gt_init_early() where it belongs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Early shall be early enough, stop doing other things with gt before it. Now that xe_gt_init_early() doesn't need forcewake and doesn't depend on the fake engine_mask initialization, move it where it belongs: it doesn't need to be after hwconfig config anymore. Reviewed-by: Michał Winiarski Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 4165e1347371..9c32bd157ecf 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -567,6 +567,12 @@ int xe_device_probe(struct xe_device *xe) xe_ttm_sys_mgr_init(xe); + for_each_gt(gt, xe, id) { + err = xe_gt_init_early(gt); + if (err) + return err; + } + for_each_gt(gt, xe, id) xe_force_wake_init_gt(gt, gt_to_fw(gt)); @@ -605,12 +611,6 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err; - for_each_gt(gt, xe, id) { - err = xe_gt_init_early(gt); - if (err) - goto err_irq_shutdown; - } - err = xe_device_set_has_flat_ccs(xe); if (err) goto err_irq_shutdown; -- cgit From 45b9066ec351518657cd09599872f737ceb25f09 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 May 2024 14:37:50 -0700 Subject: drm/xe: Move xe_force_wake_init_gt() inside gt initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xe_force_wake_init_gt() is a software-only initialization and doesn't need to be called from xe_device_probe(). Move it to initialize together with the gt. Reviewed-by: Michał Winiarski Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 3 --- drivers/gpu/drm/xe/xe_gt.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 9c32bd157ecf..ad18c48cab99 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -573,9 +573,6 @@ int xe_device_probe(struct xe_device *xe) return err; } - for_each_gt(gt, xe, id) - xe_force_wake_init_gt(gt, gt_to_fw(gt)); - for_each_tile(tile, xe, id) { err = xe_ggtt_init_early(tile->mem.ggtt); if (err) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 05b77214f996..e05899691a8c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -328,6 +328,8 @@ int xe_gt_init_early(struct xe_gt *gt) xe_wa_process_oob(gt); xe_tuning_process_gt(gt); + xe_force_wake_init_gt(gt, gt_to_fw(gt)); + return 0; } -- cgit From d1855d284e9f6580c0eaba66bfa04722f4d4dc9b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 13 May 2024 14:37:51 -0700 Subject: drm/xe: Move sw-only pcode initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move it to xe_gt_init_early() that initializes the sw-only part for each gt. Reviewed-by: Rodrigo Vivi Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20240513213751.1017791-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 3 --- drivers/gpu/drm/xe/xe_gt.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index ad18c48cab99..bedf55928aa4 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -597,9 +597,6 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - for_each_gt(gt, xe, id) - xe_pcode_init(gt); - err = xe_display_init_noirq(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index e05899691a8c..11870ad2caf6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -44,6 +44,7 @@ #include "xe_migrate.h" #include "xe_mmio.h" #include "xe_pat.h" +#include "xe_pcode.h" #include "xe_pm.h" #include "xe_mocs.h" #include "xe_reg_sr.h" @@ -329,6 +330,7 @@ int xe_gt_init_early(struct xe_gt *gt) xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); + xe_pcode_init(gt); return 0; } -- cgit From c81858eb52266b3d6ba28ca4f62a198231a10cdc Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 8 May 2024 20:52:15 +0530 Subject: drm/xe: Change pcode timeout to 50msec while polling again Polling is initially attempted with timeout_base_ms enabled for preemption, and if it exceeds this timeframe, another attempt is made without preemption, allowing an additional 50 ms before timing out. v2 - Rebase v3 - Move warnings to separate patch (Lucas) Cc: Lucas De Marchi Cc: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Fixes: 7dc9b92dcfef ("drm/xe: Remove i915_utils dependency from xe_pcode.") Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240508152216.3263109-2-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index c010ef16fbf5..a5e7da8cf944 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -191,7 +191,7 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); preempt_disable(); ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, - true, timeout_base_ms * 1000, true); + true, 50 * 1000, true); preempt_enable(); out: -- cgit From 4c0be90e6874b8af30541c37689780fc7c8276c9 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 8 May 2024 20:52:16 +0530 Subject: drm/xe: Fix the warning conditions The maximum timeout display uses in xe_pcode_request is 3 msec, add the warning in cases the function is misused with higher timeouts. Add a warning if pcode_try_request is not passed the timeout parameter greater than 0. Cc: Lucas De Marchi Cc: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240508152216.3263109-3-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pcode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index a5e7da8cf944..9c4eefdf6642 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -10,6 +10,7 @@ #include +#include "xe_assert.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_mmio.h" @@ -124,6 +125,8 @@ static int pcode_try_request(struct xe_gt *gt, u32 mbox, { int slept, wait = 10; + xe_gt_assert(gt, timeout_us > 0); + for (slept = 0; slept < timeout_us; slept += wait) { if (locked) *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, @@ -169,6 +172,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, u32 status; int ret; + xe_gt_assert(gt, timeout_base_ms <= 3); + mutex_lock(>->pcode.lock); ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, @@ -188,7 +193,6 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, */ drm_err(>_to_xe(gt)->drm, "PCODE timeout, retrying with preemption disabled\n"); - drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); preempt_disable(); ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, true, 50 * 1000, true); -- cgit From 04f4a70a183a688a60fe3882d6e4236ea02cfc67 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 15 Apr 2024 12:04:53 -0700 Subject: drm/xe: Only use reserved BCS instances for usm migrate exec queue The GuC context scheduling queue is 2 entires deep, thus it is possible for a migration job to be stuck behind a fault if migration exec queue shares engines with user jobs. This can deadlock as the migrate exec queue is required to service page faults. Avoid deadlock by only using reserved BCS instances for usm migrate exec queue. Fixes: a043fbab7af5 ("drm/xe/pvc: Use fast copy engines as migrate engine on PVC") Cc: Matt Roper Cc: Niranjana Vishwanathapura Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240415190453.696553-2-matthew.brost@intel.com Reviewed-by: Brian Welty --- drivers/gpu/drm/xe/xe_migrate.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 2b12f3621f9e..bacb23de411b 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -34,7 +34,6 @@ #include "xe_sync.h" #include "xe_trace.h" #include "xe_vm.h" -#include "xe_wa.h" /** * struct xe_migrate - migrate context. @@ -300,10 +299,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } /* - * Due to workaround 16017236439, odd instance hardware copy engines are - * faster than even instance ones. - * This function returns the mask involving all fast copy engines and the - * reserved copy engine to be used as logical mask for migrate engine. * Including the reserved copy engine is required to avoid deadlocks due to * migrate jobs servicing the faults gets stuck behind the job that faulted. */ @@ -317,8 +312,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) if (hwe->class != XE_ENGINE_CLASS_COPY) continue; - if (!XE_WA(gt, 16017236439) || - xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) + if (xe_gt_is_usm_hwe(gt, hwe)) logical_mask |= BIT(hwe->logical_instance); } @@ -369,6 +363,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) if (!hwe || !logical_mask) return ERR_PTR(-EINVAL); + /* + * XXX: Currently only reserving 1 (likely slow) BCS instance on + * PVC, may want to revisit if performance is needed. + */ m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | -- cgit From 3df01f5c72b0b4ecdca2b3da88a4b6fac809986b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 10 May 2024 22:38:09 +0200 Subject: drm/xe/uc: Reorder post hwconfig uC initialization step We want to move the GuC submission initialization to the post hwconfig step, but now this step is done too late as migration initialization uses exec_queue that would crash due to a unset exec_queue_ops. We can easily fix that by small function reorder. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240510203810.1952-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 11870ad2caf6..e69a03ddd255 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -430,6 +430,10 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; + err = xe_uc_init_post_hwconfig(>->uc); + if (err) + goto err_force_wake; + if (!xe_gt_is_media_type(gt)) { /* * USM has its only SA pool to non-block behind user operations @@ -456,10 +460,6 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - err = xe_uc_init_post_hwconfig(>->uc); - if (err) - goto err_force_wake; - err = xe_uc_init_hw(>->uc); if (err) goto err_force_wake; -- cgit From 4071e0872fcad846381f86f5164236827f7e91c8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 10 May 2024 22:38:10 +0200 Subject: drm/xe/uc: Move GuC submission init to post hwconfig step We shouldn't need anything from the GuC submission code until we finish GuC initialization in post hwconfig step. While around add diagnostic message if we fail uC init. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240510203810.1952-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 9 +++++++++ drivers/gpu/drm/xe/xe_uc.c | 10 +++------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 0c9938e0ab8c..b1bb94914028 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -21,6 +21,7 @@ #include "xe_gt_printk.h" #include "xe_guc_ads.h" #include "xe_guc_ct.h" +#include "xe_guc_db_mgr.h" #include "xe_guc_hwconfig.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" @@ -356,6 +357,14 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) guc_init_params_post_hwconfig(guc); + ret = xe_guc_submit_init(guc); + if (ret) + return ret; + + ret = xe_guc_db_mgr_init(&guc->dbm, ~0); + if (ret) + return ret; + ret = xe_guc_pc_init(&guc->pc); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 45035e38388b..0186eafc947d 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -10,10 +10,9 @@ #include "xe_gsc.h" #include "xe_gsc_proxy.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc.h" -#include "xe_guc_db_mgr.h" #include "xe_guc_pc.h" -#include "xe_guc_submit.h" #include "xe_huc.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" @@ -58,13 +57,10 @@ int xe_uc_init(struct xe_uc *uc) if (ret) goto err; - ret = xe_guc_submit_init(&uc->guc); - if (ret) - goto err; - - ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0); + return 0; err: + xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret)); return ret; } -- cgit From 1564d411e17f51e2f64655b4e4da015be1ba7eaa Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Fri, 10 May 2024 12:45:38 -0700 Subject: drm/xe/xe_guc_submit: Fix exec queue stop race condition Reorder the xe_sched_tdr_queue_imm and set_exec_queue_banned calls in guc_exec_queue_stop. This prevents a possible race condition between the two events in which it's possible for xe_sched_tdr_queue_imm to wake the ufence waiter before the exec queue is banned, causing the ufence waiter to miss the banned state. Suggested-by: Matthew Brost Signed-off-by: Jonathan Cavitt Reviewed-by: Matthew Brost Reviewed-by: Stuart Summers Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240510194540.3246991-1-jonathan.cavitt@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index fde527d34f58..3a8e501f2bc2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1491,8 +1491,8 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) !xe_sched_job_completed(job)) || xe_sched_invalidate_job(job, 2)) { trace_xe_sched_job_ban(job); - xe_sched_tdr_queue_imm(&q->guc->sched); set_exec_queue_banned(q); + xe_sched_tdr_queue_imm(&q->guc->sched); } } } -- cgit From abdea2847acfe41313620a5359940522990018e3 Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Fri, 10 May 2024 12:45:39 -0700 Subject: drm/xe/xe_guc_submit: Allow lr exec queues to be banned LR queues currently don't get banned during a GT/GuC reset because they lack a job. Though they don't have a job to detect the reset status of, it's still possible to tell when they should be banned by looking at the LRC: if the LRC head and tail don't match, then the exec queue should be banned and cleaned up. This also requires swapping the usage of xe_sched_tdr_queue_imm with xe_guc_exec_queue_trigger_cleanup, as the former is specific to non-lr exec queues. Suggested-by: Matthew Brost Signed-off-by: Jonathan Cavitt Reviewed-by: Matthew Brost Reviewed-by: Stuart Summers Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240510194540.3246991-2-jonathan.cavitt@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 3a8e501f2bc2..bd507a916c1c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1485,15 +1485,23 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) */ if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { struct xe_sched_job *job = xe_sched_first_pending_job(sched); + bool ban = false; if (job) { if ((xe_sched_job_started(job) && !xe_sched_job_completed(job)) || xe_sched_invalidate_job(job, 2)) { trace_xe_sched_job_ban(job); - set_exec_queue_banned(q); - xe_sched_tdr_queue_imm(&q->guc->sched); + ban = true; } + } else if (xe_exec_queue_is_lr(q) && + (xe_lrc_ring_head(q->lrc) != xe_lrc_ring_tail(q->lrc))) { + ban = true; + } + + if (ban) { + set_exec_queue_banned(q); + xe_guc_exec_queue_trigger_cleanup(q); } } } -- cgit From b31cfb47b27ae02ea7fb3f956b99c79356730e2a Mon Sep 17 00:00:00 2001 From: Jonathan Cavitt Date: Fri, 10 May 2024 12:45:40 -0700 Subject: drm/xe/xe_guc_submit: Declare reset if banned or killed or wedged Add an additional condition to the reset_status guc_exec_queue_op that returns true if the exec queue has been banned or killed or wedged. The reset_status op is only used for exiting any xe_wait_user_fence_ioctl that waits on an exec queue without timing out, so doing this will exit the ioctl early in cases where the exec queue can no longer function, such as after a GuC stop during a reset. Suggested-by: Matthew Brost Signed-off-by: Jonathan Cavitt Reviewed-by: Stuart Summers Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240510194540.3246991-3-jonathan.cavitt@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index bd507a916c1c..4efb88e3e056 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1432,7 +1432,7 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q) static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) { - return exec_queue_reset(q); + return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); } /* -- cgit From c8ff26b82c5b0f589516edcf7628704e3a6bc426 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 May 2024 16:23:25 -0700 Subject: drm/xe: Only zap PTEs as needed If PTEs are already invalidated no need to invalidate again. Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240514232325.84508-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 87975e45622a..11dd0988ffda 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -812,8 +812,9 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) .tile = tile, }; struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; + u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - if (!(vma->tile_present & BIT(tile->id))) + if (!(pt_mask & BIT(tile->id))) return false; (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), -- cgit From 75fe5f347167aceb8b78b9f6ad0ba01a38a34e16 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 7 May 2024 18:57:57 +0200 Subject: drm/xe/pf: Don't advertise support to enable VFs if not ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even if we have not enabled SR-IOV support using the platform specific has_sriov flag, the hardware may still report SR-IOV capability and the PCI layer may wrongly advertise driver support to enable VFs. Explicitly reset the number of supported VFs to zero to avoid confusion. Applications may read the /sys/bus/pci/devices/.../sriov_totalvfs prior to enabling VFs using the sriov_numvfs to check if such an operation is possible. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240507165757.2835-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 1c3fa84b6adb..a274a5fb1401 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -53,6 +53,7 @@ static bool test_is_vf(struct xe_device *xe) */ void xe_sriov_probe_early(struct xe_device *xe) { + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE; bool has_sriov = xe->info.has_sriov; @@ -61,6 +62,16 @@ void xe_sriov_probe_early(struct xe_device *xe) mode = XE_SRIOV_MODE_VF; else if (xe_sriov_pf_readiness(xe)) mode = XE_SRIOV_MODE_PF; + } else if (pci_sriov_get_totalvfs(pdev)) { + /* + * Even if we have not enabled SR-IOV support using the + * platform specific has_sriov flag, the hardware may still + * report SR-IOV capability and the PCI layer may wrongly + * advertise driver support to enable VFs. Explicitly reset + * the number of supported VFs to zero to avoid confusion. + */ + drm_info(&xe->drm, "Support for SR-IOV is not available\n"); + pci_sriov_set_totalvfs(pdev, 0); } xe_assert(xe, !xe->sriov.__mode); -- cgit From 9aa8586063a465da986a39ef55e3e5c12140cde5 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 6 May 2024 20:41:21 +0200 Subject: drm/xe/pf: Implement pci_driver.sriov_configure callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCI subsystem already exposes the "sriov_numvfs" attribute that users can use to enable or disable SR-IOV VFs. Add custom implementation of the .sriov_configure callback defined by the pci_driver to perform additional steps, including fair VFs provisioning with the resources, as required by our platforms. Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Cc: Badal Nilawar Cc: Rodrigo Vivi Reviewed-by: Piotr Piórkowski #v2 Reviewed-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20240506184121.2615-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_pci.c | 4 ++ drivers/gpu/drm/xe/xe_pci_sriov.c | 143 ++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pci_sriov.h | 13 ++++ 4 files changed, 161 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_pci_sriov.c create mode 100644 drivers/gpu/drm/xe/xe_pci_sriov.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index a67977edff5b..6acde66f0827 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -169,6 +169,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt.o \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ + xe_pci_sriov.o \ xe_sriov_pf.o # include helpers for tests even when XE is built-in diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index fc29eb8e99c5..b1c8050b7bf5 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -23,6 +23,7 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_pci_sriov.h" #include "xe_pci_types.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -960,6 +961,9 @@ static struct pci_driver xe_pci_driver = { .probe = xe_pci_probe, .remove = xe_pci_remove, .shutdown = xe_pci_shutdown, +#ifdef CONFIG_PCI_IOV + .sriov_configure = xe_pci_sriov_configure, +#endif #ifdef CONFIG_PM_SLEEP .driver.pm = &xe_pm_ops, #endif diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c new file mode 100644 index 000000000000..06d0fceb5114 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_pci_sriov.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_printk.h" + +static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + struct xe_gt *gt; + unsigned int id; + int result = 0, err; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs); + result = result ?: err; + } + + return result; +} + +static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs) +{ + struct xe_gt *gt; + unsigned int id; + unsigned int n; + + for_each_gt(gt, xe, id) + for (n = 1; n <= num_vfs; n++) + xe_gt_sriov_pf_config_release(gt, n, true); +} + +static int pf_enable_vfs(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int total_vfs = xe_sriov_pf_get_totalvfs(xe); + int err; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, num_vfs > 0); + xe_assert(xe, num_vfs <= total_vfs); + xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs)); + + /* + * We must hold additional reference to the runtime PM to keep PF in D0 + * during VFs lifetime, as our VFs do not implement the PM capability. + * + * With PF being in D0 state, all VFs will also behave as in D0 state. + * This will also keep GuC alive with all VFs' configurations. + * + * We will release this additional PM reference in pf_disable_vfs(). + */ + xe_pm_runtime_get_noresume(xe); + + err = pf_provision_vfs(xe, num_vfs); + if (err < 0) + goto failed; + + err = pci_enable_sriov(pdev, num_vfs); + if (err < 0) + goto failed; + + xe_sriov_info(xe, "Enabled %u of %u VF%s\n", + num_vfs, total_vfs, str_plural(total_vfs)); + return num_vfs; + +failed: + pf_unprovision_vfs(xe, num_vfs); + xe_pm_runtime_put(xe); + + xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n", + num_vfs, str_plural(num_vfs), ERR_PTR(err)); + return err; +} + +static int pf_disable_vfs(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + struct pci_dev *pdev = to_pci_dev(dev); + u16 num_vfs = pci_num_vf(pdev); + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_sriov_dbg(xe, "disabling %u VF%s\n", num_vfs, str_plural(num_vfs)); + + if (!num_vfs) + return 0; + + pci_disable_sriov(pdev); + + pf_unprovision_vfs(xe, num_vfs); + + /* not needed anymore - see pf_enable_vfs() */ + xe_pm_runtime_put(xe); + + xe_sriov_info(xe, "Disabled %u VF%s\n", num_vfs, str_plural(num_vfs)); + return 0; +} + +/** + * xe_pci_sriov_configure - Configure SR-IOV (enable/disable VFs). + * @pdev: the &pci_dev + * @num_vfs: number of VFs to enable or zero to disable all VFs + * + * This is the Xe implementation of struct pci_driver.sriov_configure callback. + * + * This callback will be called by the PCI subsystem to enable or disable SR-IOV + * Virtual Functions (VFs) as requested by the used via the PCI sysfs interface. + * + * Return: number of configured VFs or a negative error code on failure. + */ +int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + int ret; + + if (!IS_SRIOV_PF(xe)) + return -ENODEV; + + if (num_vfs < 0) + return -EINVAL; + + if (num_vfs > xe_sriov_pf_get_totalvfs(xe)) + return -ERANGE; + + if (num_vfs && pci_num_vf(pdev)) + return -EBUSY; + + xe_pm_runtime_get(xe); + if (num_vfs > 0) + ret = pf_enable_vfs(xe, num_vfs); + else + ret = pf_disable_vfs(xe); + xe_pm_runtime_put(xe); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.h b/drivers/gpu/drm/xe/xe_pci_sriov.h new file mode 100644 index 000000000000..3b8bfbf7e1d9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_pci_sriov.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_PCI_SRIOV_H_ +#define _XE_PCI_SRIOV_H_ + +struct pci_dev; + +int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs); + +#endif -- cgit From e6946ea8fcb5625c46754435fef5523f12659c11 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 May 2024 21:00:08 +0200 Subject: drm/xe/guc: Add more KLV helper macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In upcoming patches we will want to generate some of the KLV keys from other macros. Add MAKE_GUC_KLV_{KEY|LEN} macros for that and make sure they will correctly expand provided TAG parameter. Also fix PREP_GUC_KLV_TAG to also work correctly within other macros. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_klv_helpers.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h index b835e0ebe6db..c676d21c173b 100644 --- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h @@ -6,6 +6,7 @@ #ifndef _XE_GUC_KLV_HELPERS_H_ #define _XE_GUC_KLV_HELPERS_H_ +#include #include struct drm_printer; @@ -37,6 +38,18 @@ int xe_guc_klv_count(const u32 *klvs, u32 num_dwords); (FIELD_PREP_CONST(GUC_KLV_0_KEY, (key)) | \ FIELD_PREP_CONST(GUC_KLV_0_LEN, (len))) +/** + * MAKE_GUC_KLV_KEY - Prepare KLV KEY name based on unique KLV definition tag. + * @TAG: unique tag of the KLV definition + */ +#define MAKE_GUC_KLV_KEY(TAG) CONCATENATE(CONCATENATE(GUC_KLV_, TAG), _KEY) + +/** + * MAKE_GUC_KLV_LEN - Prepare KLV LEN name based on unique KLV definition tag. + * @TAG: unique tag of the KLV definition + */ +#define MAKE_GUC_KLV_LEN(TAG) CONCATENATE(CONCATENATE(GUC_KLV_, TAG), _LEN) + /** * PREP_GUC_KLV_TAG - Prepare KLV header value based on unique KLV definition tag. * @TAG: unique tag of the KLV definition @@ -46,6 +59,6 @@ int xe_guc_klv_count(const u32 *klvs, u32 num_dwords); * Return: value of the KLV header (u32). */ #define PREP_GUC_KLV_TAG(TAG) \ - PREP_GUC_KLV_CONST(GUC_KLV_##TAG##_KEY, GUC_KLV_##TAG##_LEN) + PREP_GUC_KLV_CONST(MAKE_GUC_KLV_KEY(TAG), MAKE_GUC_KLV_LEN(TAG)) #endif -- cgit From b1ce52fbf6ebfc3815773045856c695ce86ca679 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 May 2024 21:00:09 +0200 Subject: drm/xe/guc: Introduce GuC KLV thresholds set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GuC firmware monitors VF's activity and notifies the PF driver once any configured threshold related to such activity is exceeded. The available thresholds are defined in the GuC ABI as part of the GuC VF Configuration KLVs. Threshold configurations performed by the PF driver and notifications sent by the GuC rely on the KLV keys, which are not zero-based and might not guarantee continuity. To simplify the driver code and eliminate the need to repeat very similar code for each threshold, introduce the threshold set macro that allows to generate required code based on unique threshold tag. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h | 64 ++++++++++++++++++++ .../gpu/drm/xe/xe_guc_klv_thresholds_set_types.h | 68 ++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h create mode 100644 drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h new file mode 100644 index 000000000000..da0fedbbdbaf --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_KLV_THRESHOLDS_SET_H_ +#define _XE_GUC_KLV_THRESHOLDS_SET_H_ + +#include "abi/guc_klvs_abi.h" +#include "xe_guc_klv_helpers.h" +#include "xe_guc_klv_thresholds_set_types.h" + +/** + * MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY - Prepare the name of the KLV key constant. + * @TAG: unique tag of the GuC threshold KLV key. + */ +#define MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG) \ + MAKE_GUC_KLV_KEY(CONCATENATE(VF_CFG_THRESHOLD_, TAG)) + +/** + * xe_guc_klv_threshold_key_to_index - Find index of the tracked GuC threshold. + * @key: GuC threshold KLV key. + * + * This translation is automatically generated using &MAKE_XE_GUC_KLV_THRESHOLDS_SET. + * Return: index of the GuC threshold KLV or -1 if not found. + */ +static inline int xe_guc_klv_threshold_key_to_index(u32 key) +{ + switch (key) { +#define define_xe_guc_klv_threshold_key_to_index_case(TAG, ...) \ + \ + case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG): \ + return MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG); + + /* private: auto-generated case statements */ + MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_xe_guc_klv_threshold_key_to_index_case) + } + return -1; +#undef define_xe_guc_klv_threshold_key_to_index_case +} + +/** + * xe_guc_klv_threshold_index_to_key - Get tracked GuC threshold KLV key. + * @index: GuC threshold KLV index. + * + * This translation is automatically generated using &MAKE_XE_GUC_KLV_THRESHOLDS_SET. + * Return: key of the GuC threshold KLV or 0 on malformed index. + */ +static inline u32 xe_guc_klv_threshold_index_to_key(enum xe_guc_klv_threshold_index index) +{ + switch (index) { +#define define_xe_guc_klv_threshold_index_to_key_case(TAG, ...) \ + \ + case MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG): \ + return MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG); + + /* private: auto-generated case statements */ + MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_xe_guc_klv_threshold_index_to_key_case) + } + return 0; /* unreachable */ +#undef define_xe_guc_klv_threshold_index_to_key_case +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h new file mode 100644 index 000000000000..0a028c94756d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_klv_thresholds_set_types.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_KLV_THRESHOLDS_SET_TYPES_H_ +#define _XE_GUC_KLV_THRESHOLDS_SET_TYPES_H_ + +#include "xe_args.h" + +/** + * MAKE_XE_GUC_KLV_THRESHOLDS_SET - Generate various GuC thresholds definitions. + * @define: name of the inner macro to expand. + * + * The GuC firmware is able to monitor VF's adverse activity and will notify the + * PF driver once any threshold is exceeded. + * + * This super macro allows various conversions between the GuC adverse event + * threshold KLV definitions and the driver code without repeating similar code + * or risking missing some cases. + * + * For each GuC threshold definition, the inner macro &define will be provided + * with the &TAG, that corresponds to the GuC threshold KLV key name defined by + * ABI and the associated &NAME, that may be used in code or debugfs/sysfs:: + * + * define(TAG, NAME) + */ +#define MAKE_XE_GUC_KLV_THRESHOLDS_SET(define) \ + define(CAT_ERR, cat_error_count) \ + define(ENGINE_RESET, engine_reset_count) \ + define(PAGE_FAULT, page_fault_count) \ + define(H2G_STORM, guc_time_us) \ + define(IRQ_STORM, irq_time_us) \ + define(DOORBELL_STORM, doorbell_time_us) \ + /* end */ + +/** + * XE_GUC_KLV_NUM_THRESHOLDS - Number of GuC thresholds KLVs. + * + * Calculated automatically using &MAKE_XE_GUC_KLV_THRESHOLDS_SET. + */ +#define XE_GUC_KLV_NUM_THRESHOLDS \ + (CALL_ARGS(COUNT_ARGS, MAKE_XE_GUC_KLV_THRESHOLDS_SET(ARGS_SEP_COMMA)) - 1) + +/** + * MAKE_XE_GUC_KLV_THRESHOLD_INDEX - Create enumerator name. + * @TAG: unique TAG of the enum xe_guc_klv_threshold_index. + */ +#define MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG) \ + CONCATENATE(XE_GUC_KLV_THRESHOLD_INDEX_, TAG) + +/** + * enum xe_guc_klv_threshold_index - Index of the tracked GuC threshold. + * + * This enum is automatically generated using &MAKE_XE_GUC_KLV_THRESHOLDS_SET. + * All these generated enumerators will only be used by the also generated code. + */ +enum xe_guc_klv_threshold_index { +#define define_xe_guc_klv_threshold_index_enum(TAG, ...) \ + \ + MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG), + + /* private: auto-generated enum definitions */ + MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_xe_guc_klv_threshold_index_enum) +#undef define_xe_guc_klv_threshold_index_enum +}; + +#endif -- cgit From 7aefee83fcdfe5a6a443b87650f3b6cb5721d3ad Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 May 2024 21:00:10 +0200 Subject: drm/xe/guc: Add support for threshold KLVs in to_string() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use MAKE_XE_GUC_KLV_THRESHOLDS_SET to generate missing conversion of threshold KLV keys to string. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_klv_helpers.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c index ceca949932a0..9d99fe266d97 100644 --- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c @@ -8,6 +8,7 @@ #include "abi/guc_klvs_abi.h" #include "xe_guc_klv_helpers.h" +#include "xe_guc_klv_thresholds_set.h" #define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) @@ -48,6 +49,17 @@ const char *xe_guc_klv_key_to_string(u16 key) return "begin_db_id"; case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY: return "begin_ctx_id"; + + /* VF CFG threshold keys */ +#define define_threshold_key_to_string_case(TAG, NAME, ...) \ + \ + case MAKE_GUC_KLV_VF_CFG_THRESHOLD_KEY(TAG): \ + return #NAME; + + /* private: auto-generated case statements */ + MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_threshold_key_to_string_case) +#undef define_threshold_key_to_string_case + default: return "(unknown)"; } -- cgit From 629df234bfe73dacb4bb0daa4bc2c14824dba159 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 May 2024 21:00:11 +0200 Subject: drm/xe/pf: Introduce functions to configure VF thresholds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GuC firmware monitors VF's activity and notifies the PF driver once any configured threshold related to such activity is exceeded. Add functions to allow configuration of these thresholds per VF. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 87 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h | 6 ++ drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h | 4 ++ 3 files changed, 97 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 7eac01e04cc5..f678cd1ad9c5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -25,6 +25,7 @@ #include "xe_guc_fwif.h" #include "xe_guc_id_mgr.h" #include "xe_guc_klv_helpers.h" +#include "xe_guc_klv_thresholds_set.h" #include "xe_guc_submit.h" #include "xe_lmtt.h" #include "xe_map.h" @@ -208,6 +209,15 @@ static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size); } +static int pf_push_vf_cfg_threshold(struct xe_gt *gt, unsigned int vfid, + enum xe_guc_klv_threshold_index index, u32 value) +{ + u32 key = xe_guc_klv_threshold_index_to_key(index); + + xe_gt_assert(gt, key); + return pf_push_vf_cfg_u32(gt, vfid, key, value); +} + static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned int vfid) { xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); @@ -1748,6 +1758,83 @@ static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *c config->preempt_timeout = 0; } +static int pf_provision_threshold(struct xe_gt *gt, unsigned int vfid, + enum xe_guc_klv_threshold_index index, u32 value) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + + err = pf_push_vf_cfg_threshold(gt, vfid, index, value); + if (unlikely(err)) + return err; + + config->thresholds[index] = value; + + return 0; +} + +static int pf_get_threshold(struct xe_gt *gt, unsigned int vfid, + enum xe_guc_klv_threshold_index index) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->thresholds[index]; +} + +static const char *threshold_unit(u32 threshold) +{ + return threshold ? "" : "(disabled)"; +} + +/** + * xe_gt_sriov_pf_config_set_threshold - Configure threshold for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @index: the threshold index + * @value: requested value (0 means disabled) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid, + enum xe_guc_klv_threshold_index index, u32 value) +{ + u32 key = xe_guc_klv_threshold_index_to_key(index); + const char *name = xe_guc_klv_key_to_string(key); + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_threshold(gt, vfid, index, value); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, value, + xe_gt_sriov_pf_config_get_threshold(gt, vfid, index), + name, threshold_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_threshold - Get VF's threshold. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @index: the threshold index + * + * This function can only be called on PF. + * + * Return: value of VF's (or PF's) threshold. + */ +u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, + enum xe_guc_klv_threshold_index index) +{ + u32 value; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + value = pf_get_threshold(gt, vfid, index); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return value; +} + static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 5e6b36f00b5b..e8238c1ad06a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -8,6 +8,7 @@ #include +enum xe_guc_klv_threshold_index; struct drm_printer; struct xe_gt; @@ -43,6 +44,11 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout); +u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, + enum xe_guc_klv_threshold_index index); +int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid, + enum xe_guc_klv_threshold_index index, u32 value); + int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force); int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index d3745c355957..7bc66656fcc7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -8,6 +8,8 @@ #include +#include "xe_guc_klv_thresholds_set_types.h" + struct xe_bo; /** @@ -32,6 +34,8 @@ struct xe_gt_sriov_config { u32 exec_quantum; /** @preempt_timeout: preemption timeout in microseconds. */ u32 preempt_timeout; + /** @thresholds: GuC thresholds for adverse events notifications. */ + u32 thresholds[XE_GUC_KLV_NUM_THRESHOLDS]; }; /** -- cgit From c4f5ded082bb9433b180dbfbb8352f92e319149b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 May 2024 21:00:12 +0200 Subject: drm/xe/pf: Allow configuration of VF thresholds over debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial values of all thresholds used by the GuC to monitor VF's activity is zero (disabled) and we need to explicitly configure them per each VF. Expose additional attributes over debugfs. Definitions of all attributes are generated so we will not need to make any changes if new thresholds would be added to the set. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 72 +++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 5102035faa7e..eb71c2009c34 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -197,6 +197,71 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── pf + * │   │   ├── threshold_cat_error_count + * │   │   ├── threshold_doorbell_time_us + * │   │   ├── threshold_engine_reset_count + * │   │   ├── threshold_guc_time_us + * │   │   ├── threshold_irq_time_us + * │   │   ├── threshold_page_fault_count + * │   ├── vf1 + * │   │   ├── threshold_cat_error_count + * │   │   ├── threshold_doorbell_time_us + * │   │   ├── threshold_engine_reset_count + * │   │   ├── threshold_guc_time_us + * │   │   ├── threshold_irq_time_us + * │   │   ├── threshold_page_fault_count + */ + +static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index index) +{ + struct xe_gt *gt = extract_gt(data); + unsigned int vfid = extract_vfid(data); + struct xe_device *xe = gt_to_xe(gt); + int err; + + if (val > (u32)~0ull) + return -EOVERFLOW; + + xe_pm_runtime_get(xe); + err = xe_gt_sriov_pf_config_set_threshold(gt, vfid, index, val); + xe_pm_runtime_put(xe); + + return err; +} + +static int get_threshold(void *data, u64 *val, enum xe_guc_klv_threshold_index index) +{ + struct xe_gt *gt = extract_gt(data); + unsigned int vfid = extract_vfid(data); + + *val = xe_gt_sriov_pf_config_get_threshold(gt, vfid, index); + return 0; +} + +#define DEFINE_SRIOV_GT_THRESHOLD_DEBUGFS_ATTRIBUTE(THRESHOLD, INDEX) \ + \ +static int THRESHOLD##_set(void *data, u64 val) \ +{ \ + return set_threshold(data, val, INDEX); \ +} \ + \ +static int THRESHOLD##_get(void *data, u64 *val) \ +{ \ + return get_threshold(data, val, INDEX); \ +} \ + \ +DEFINE_DEBUGFS_ATTRIBUTE(THRESHOLD##_fops, THRESHOLD##_get, THRESHOLD##_set, "%llu\n") + +/* generate all threshold attributes */ +#define define_threshold_attribute(TAG, NAME, ...) \ + DEFINE_SRIOV_GT_THRESHOLD_DEBUGFS_ATTRIBUTE(NAME, MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)); +MAKE_XE_GUC_KLV_THRESHOLDS_SET(define_threshold_attribute) +#undef define_threshold_attribute + static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigned int vfid) { xe_gt_assert(gt, gt == extract_gt(parent)); @@ -217,6 +282,13 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne &exec_quantum_fops); debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent, &preempt_timeout_fops); + + /* register all threshold attributes */ +#define register_threshold_attribute(TAG, NAME, ...) \ + debugfs_create_file_unsafe("threshold_" #NAME, 0644, parent, parent, \ + &NAME##_fops); + MAKE_XE_GUC_KLV_THRESHOLDS_SET(register_threshold_attribute) +#undef register_threshold_attribute } /* -- cgit From d5e12fffcc01b3a22157a9cd4a7474ee6355182e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 May 2024 21:00:13 +0200 Subject: drm/xe/guc: Add GUC2PF_ADVERSE_EVENT to ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When thresholds used to monitor VFs activities are configured, then GuC may send GUC2PF_ADVERSE_EVENT messages informing the PF driver about exceeded thresholds. Add necessary definitions to our GuC firmware ABI header. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index c1ad09b36453..f1aa7f88e217 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -171,6 +171,36 @@ #define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx GUC_HXG_REQUEST_MSG_n_DATAn #define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN +/** + * DOC: GUC2PF_ADVERSE_EVENT + * + * This message is used by the GuC to notify PF about adverse events. + * + * This G2H message must be sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_GUC2PF_ADVERSE_EVENT` = 0x5104 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **VFID** - VF identifier | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | DATA2 = **THRESHOLD** - key of the exceeded threshold | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_GUC2PF_ADVERSE_EVENT 0x5104 + +#define GUC2PF_ADVERSE_EVENT_EVENT_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u) +#define GUC2PF_ADVERSE_EVENT_EVENT_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0 +#define GUC2PF_ADVERSE_EVENT_EVENT_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2PF_ADVERSE_EVENT_EVENT_MSG_2_THRESHOLD GUC_HXG_EVENT_MSG_n_DATAn + /** * DOC: GUC2PF_VF_STATE_NOTIFY * -- cgit From 335d62ade5feaa46082f8da755ffdc569ae51768 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 May 2024 21:00:14 +0200 Subject: drm/xe/pf: Track adverse events notifications from GuC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When thresholds used to monitor VFs activities are configured, then GuC may send GUC2PF_ADVERSE_EVENT messages informing the PF driver about exceeded thresholds. Start handling such messages. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-8-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c | 147 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h | 27 ++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h | 22 ++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h | 5 + drivers/gpu/drm/xe/xe_guc_ct.c | 4 + 6 files changed, 206 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 6acde66f0827..8fe7bb80501f 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -164,6 +164,7 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ xe_gt_sriov_pf_debugfs.o \ + xe_gt_sriov_pf_monitor.o \ xe_gt_sriov_pf_policy.o \ xe_gt_sriov_pf_service.o \ xe_lmtt.o \ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c new file mode 100644 index 000000000000..7d532bded02a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include "abi/guc_actions_sriov_abi.h" +#include "abi/guc_messages_abi.h" + +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_monitor.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc_klv_helpers.h" +#include "xe_guc_klv_thresholds_set.h" + +/** + * xe_gt_sriov_pf_monitor_flr - Cleanup VF data after VF FLR. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * On FLR this function will reset all event data related to the VF. + * This function is for PF only. + */ +void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid) +{ + int e; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_sriov_pf_assert_vfid(gt, vfid); + + for (e = 0; e < XE_GUC_KLV_NUM_THRESHOLDS; e++) + gt->sriov.pf.vfs[vfid].monitor.guc.events[e] = 0; +} + +static void pf_update_event_counter(struct xe_gt *gt, u32 vfid, + enum xe_guc_klv_threshold_index e) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, e < XE_GUC_KLV_NUM_THRESHOLDS); + + gt->sriov.pf.vfs[vfid].monitor.guc.events[e]++; +} + +static int pf_handle_vf_threshold_event(struct xe_gt *gt, u32 vfid, u32 threshold) +{ + char origin[8]; + int e; + + e = xe_guc_klv_threshold_key_to_index(threshold); + xe_sriov_function_name(vfid, origin, sizeof(origin)); + + /* was there a new KEY added that we missed? */ + if (unlikely(e < 0)) { + xe_gt_sriov_notice(gt, "unknown threshold key %#x reported for %s\n", + threshold, origin); + return -ENOTCONN; + } + + xe_gt_sriov_dbg(gt, "%s exceeded threshold %u %s\n", + origin, xe_gt_sriov_pf_config_get_threshold(gt, vfid, e), + xe_guc_klv_key_to_string(threshold)); + + pf_update_event_counter(gt, vfid, e); + + return 0; +} + +/** + * xe_gt_sriov_pf_monitor_process_guc2pf - Handle adverse event notification from the GuC. + * @gt: the &xe_gt + * @msg: G2H event message + * @len: length of the message + * + * This function is intended for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 vfid; + u32 threshold; + + xe_gt_assert(gt, len >= GUC_HXG_MSG_MIN_LEN); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == + GUC_ACTION_GUC2PF_ADVERSE_EVENT); + + if (unlikely(!IS_SRIOV_PF(xe))) + return -EPROTO; + + if (unlikely(FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_0_MBZ, msg[0]))) + return -EPFNOSUPPORT; + + if (unlikely(len < GUC2PF_ADVERSE_EVENT_EVENT_MSG_LEN)) + return -EPROTO; + + vfid = FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_1_VFID, msg[1]); + threshold = FIELD_GET(GUC2PF_ADVERSE_EVENT_EVENT_MSG_2_THRESHOLD, msg[2]); + + if (unlikely(vfid > xe_gt_sriov_pf_get_totalvfs(gt))) + return -EINVAL; + + return pf_handle_vf_threshold_event(gt, vfid, threshold); +} + +/** + * xe_gt_sriov_pf_monitor_print_events - Print adverse events counters. + * @gt: the &xe_gt to print events from + * @p: the &drm_printer + * + * Print adverse events counters for all VFs. + * VFs with no events are not printed. + * + * This function can only be called on PF. + */ +void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_gt_sriov_pf_get_totalvfs(gt); + const struct xe_gt_sriov_monitor *data; + int e; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + for (n = 1; n <= total_vfs; n++) { + data = >->sriov.pf.vfs[n].monitor; + + for (e = 0; e < XE_GUC_KLV_NUM_THRESHOLDS; e++) + if (data->guc.events[e]) + break; + + /* skip empty unless in debug mode */ + if (e >= XE_GUC_KLV_NUM_THRESHOLDS && + !IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) + continue; + +#define __format(...) "%s:%u " +#define __value(TAG, NAME, ...) , #NAME, data->guc.events[MAKE_XE_GUC_KLV_THRESHOLD_INDEX(TAG)] + + drm_printf(p, "VF%u:\t" MAKE_XE_GUC_KLV_THRESHOLDS_SET(__format) "\n", + n MAKE_XE_GUC_KLV_THRESHOLDS_SET(__value)); + +#undef __format +#undef __value + } +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h new file mode 100644 index 000000000000..7ca9351a271b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_MONITOR_H_ +#define _XE_GT_SRIOV_PF_MONITOR_H_ + +#include +#include + +struct drm_printer; +struct xe_gt; + +void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid); +void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p); + +#ifdef CONFIG_PCI_IOV +int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); +#else +static inline int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) +{ + return -EPROTO; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h new file mode 100644 index 000000000000..e27c0308c5db --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_MONITOR_TYPES_H_ +#define _XE_GT_SRIOV_PF_MONITOR_TYPES_H_ + +#include "xe_guc_klv_thresholds_set_types.h" + +/** + * struct xe_gt_sriov_monitor - GT level per-VF monitoring data. + */ +struct xe_gt_sriov_monitor { + /** @guc: monitoring data related to the GuC. */ + struct { + /** @guc.events: number of adverse events reported by the GuC. */ + unsigned int events[XE_GUC_KLV_NUM_THRESHOLDS]; + } guc; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h index 880754f3e215..40cbaea3ef44 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -9,6 +9,7 @@ #include #include "xe_gt_sriov_pf_config_types.h" +#include "xe_gt_sriov_pf_monitor_types.h" #include "xe_gt_sriov_pf_policy_types.h" #include "xe_gt_sriov_pf_service_types.h" @@ -18,6 +19,10 @@ struct xe_gt_sriov_metadata { /** @config: per-VF provisioning data. */ struct xe_gt_sriov_config config; + + /** @monitor: per-VF monitoring data. */ + struct xe_gt_sriov_monitor monitor; + /** @version: negotiated VF/PF ABI version */ struct xe_gt_sriov_pf_service_version version; }; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 0151d29b3c58..c1f258348f5c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -22,6 +22,7 @@ #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" #include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_pf_monitor.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_relay.h" @@ -1071,6 +1072,9 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY: ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len); break; + case GUC_ACTION_GUC2PF_ADVERSE_EVENT: + ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len); + break; default: xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } -- cgit From 1c99d3d3edab25617afbb1592564d3ecc233bc5d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 14 May 2024 21:00:15 +0200 Subject: drm/xe/pf: Expose PF monitor details via debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For debug purposes we might want to view statistics maintained by the PF driver about VFs activity. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240514190015.2172-9-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index eb71c2009c34..2290ddaf9594 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -17,6 +17,7 @@ #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_monitor.h" #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_pf_service.h" #include "xe_pm.h" @@ -55,6 +56,7 @@ static unsigned int extract_vfid(struct dentry *d) * │   │   ├── doorbells_provisioned * │   │   ├── runtime_registers * │   │   ├── negotiated_versions + * │   │   ├── adverse_events */ static const struct drm_info_list pf_info[] = { @@ -88,6 +90,11 @@ static const struct drm_info_list pf_info[] = { .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_service_print_version, }, + { + "adverse_events", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_pf_monitor_print_events, + }, }; /* -- cgit From e158cf936114661044dface6da794437a91b53c4 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 May 2024 13:05:41 +0200 Subject: drm/xe/guc: Add VF2GUC_MATCH_VERSION to ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In upcoming patches we will add a version negotiation between the VF driver and the GuC firmware. Add necessary definitions to our GuC firmware ABI header. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 67 ++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index f1aa7f88e217..2642cd337a1f 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -243,6 +243,73 @@ #define GUC_PF_NOTIFY_VF_PAUSE_DONE 3u #define GUC_PF_NOTIFY_VF_FIXUP_DONE 4u +/** + * DOC: VF2GUC_MATCH_VERSION + * + * This action is used to match VF interface version used by VF and GuC. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_MATCH_VERSION` = 0x5500 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:24 | **BRANCH** - branch ID of the VF interface | + * | | | (use BRANCH_ANY to request latest version supported by GuC) | + * | +-------+--------------------------------------------------------------+ + * | | 23:16 | **MAJOR** - major version of the VF interface | + * | | | (use MAJOR_ANY to request latest version supported by GuC) | + * | +-------+--------------------------------------------------------------+ + * | | 15:8 | **MINOR** - minor version of the VF interface | + * | | | (use MINOR_ANY to request latest version supported by GuC) | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **MBZ** | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:24 | **BRANCH** - branch ID of the VF interface | + * | +-------+--------------------------------------------------------------+ + * | | 23:16 | **MAJOR** - major version of the VF interface | + * | +-------+--------------------------------------------------------------+ + * | | 15:8 | **MINOR** - minor version of the VF interface | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **PATCH** - patch version of the VF interface | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_VF2GUC_MATCH_VERSION 0x5500u + +#define VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define VF2GUC_MATCH_VERSION_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH (0xffu << 24) +#define GUC_VERSION_BRANCH_ANY 0 +#define VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR (0xffu << 16) +#define GUC_VERSION_MAJOR_ANY 0 +#define VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR (0xffu << 8) +#define GUC_VERSION_MINOR_ANY 0 +#define VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MBZ (0xffu << 0) + +#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_LEN (GUC_HXG_RESPONSE_MSG_MIN_LEN + 1u) +#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 +#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH (0xffu << 24) +#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR (0xffu << 16) +#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR (0xffu << 8) +#define VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH (0xffu << 0) + /** * DOC: PF2GUC_UPDATE_VGT_POLICY * -- cgit From 769551c45c2b66b4d0bbe5a78aab4156f85c6331 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 May 2024 13:05:42 +0200 Subject: drm/xe/guc: Add VF2GUC_VF_RESET to ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The version negotiation between the VF driver and the GuC firmware must start with explicit soft reset of the GuC state initiated by the VF driver. Add VF2GUC action definitions to the ABI header. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index 2642cd337a1f..23e21c3f033f 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -464,4 +464,41 @@ #define GUC_PF_TRIGGER_VF_FLR_START 4u #define GUC_PF_TRIGGER_VF_FLR_FINISH 5u +/** + * DOC: VF2GUC_VF_RESET + * + * This action is used by VF to reset GuC's VF state. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_VF_RESET` = 0x5507 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_VF2GUC_VF_RESET 0x5507u + +#define VF2GUC_VF_RESET_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN +#define VF2GUC_VF_RESET_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 + +#define VF2GUC_VF_RESET_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + #endif -- cgit From c454f1a6b994e44e338ac837981441a298c941b8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 May 2024 13:05:43 +0200 Subject: drm/xe/guc: Add VF2GUC_QUERY_SINGLE_KLV to ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In upcoming patches we will add support to the VF driver to read its configuration from the GuC using special H2G actions. Add necessary definitions to our GuC firmware ABI header. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index 23e21c3f033f..181180f5945c 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -501,4 +501,60 @@ #define VF2GUC_VF_RESET_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN #define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 +/** + * DOC: VF2GUC_QUERY_SINGLE_KLV + * + * This action is used by VF to query value of the single KLV data. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_QUERY_SINGLE_KLV` = 0x5509 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **KEY** - key for which value is requested | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **LENGTH** - length of data in dwords | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VALUE32** - bits 31:0 of value if **LENGTH** >= 1 | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **VALUE64** - bits 63:32 of value if **LENGTH** >= 2 | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **VALUE96** - bits 95:64 of value if **LENGTH** >= 3 | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_VF2GUC_QUERY_SINGLE_KLV 0x5509u + +#define VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_1_MBZ (0xffffu << 16) +#define VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_1_KEY (0xffffu << 0) + +#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_MIN_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_MAX_LEN (GUC_HXG_RESPONSE_MSG_MIN_LEN + 3u) +#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_MBZ (0xfffu << 16) +#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_LENGTH (0xffffu << 0) +#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_1_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn +#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn +#define VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96 GUC_HXG_REQUEST_MSG_n_DATAn + #endif -- cgit From f2345ed5374ef964ff97e13e82f53b07c827b373 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 May 2024 13:05:44 +0200 Subject: drm/xe/vf: Add support for VF to query its configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VF driver doesn't know which GuC firmware was loaded by the PF driver and must perform GuC ABI version handshake prior to sending any other H2G actions to the GuC to submit workloads. The VF driver also doesn't have access to the fuse registers and must rely on the runtime info, which includes values of the fuse registers, that the PF driver is exposing to the VFs. Add functions to cover that functionality. We will use these functions in upcoming patches. Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 747 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 23 + drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 82 ++++ drivers/gpu/drm/xe/xe_gt_types.h | 3 + 5 files changed, 856 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf.h create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 8fe7bb80501f..e9ba6c753779 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -155,6 +155,7 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support xe-y += \ + xe_gt_sriov_vf.o \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c new file mode 100644 index 000000000000..378dde5ad4f9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -0,0 +1,747 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include +#include + +#include "abi/guc_actions_sriov_abi.h" +#include "abi/guc_communication_mmio_abi.h" +#include "abi/guc_klvs_abi.h" +#include "abi/guc_relay_actions_abi.h" + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt_sriov_printk.h" +#include "xe_gt_sriov_vf.h" +#include "xe_gt_sriov_vf_types.h" +#include "xe_guc.h" +#include "xe_guc_hxg_helpers.h" +#include "xe_guc_relay.h" +#include "xe_sriov.h" + +#define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) + +static int guc_action_vf_reset(struct xe_guc *guc) +{ + u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_VF_RESET), + }; + int ret; + + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int vf_reset_guc_state(struct xe_gt *gt) +{ + struct xe_guc *guc = >->uc.guc; + int err; + + err = guc_action_vf_reset(guc); + if (unlikely(err)) + xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err)); + return err; +} + +static int guc_action_match_version(struct xe_guc *guc, + u32 wanted_branch, u32 wanted_major, u32 wanted_minor, + u32 *branch, u32 *major, u32 *minor, u32 *patch) +{ + u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_ACTION_VF2GUC_MATCH_VERSION), + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted_branch) | + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted_major) | + FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted_minor), + }; + u32 response[GUC_MAX_MMIO_MSG_LEN]; + int ret; + + BUILD_BUG_ON(VF2GUC_MATCH_VERSION_RESPONSE_MSG_LEN > GUC_MAX_MMIO_MSG_LEN); + + ret = xe_guc_mmio_send_recv(guc, request, ARRAY_SIZE(request), response); + if (unlikely(ret < 0)) + return ret; + + if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0]))) + return -EPROTO; + + *branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]); + *major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]); + *minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]); + *patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]); + + return 0; +} + +static void vf_minimum_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) +{ + struct xe_device *xe = gt_to_xe(gt); + + switch (xe->info.platform) { + case XE_TIGERLAKE ... XE_PVC: + /* 1.1 this is current baseline for Xe driver */ + *branch = 0; + *major = 1; + *minor = 1; + break; + default: + /* 1.2 has support for the GMD_ID KLV */ + *branch = 0; + *major = 1; + *minor = 2; + break; + } +} + +static void vf_wanted_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor) +{ + /* for now it's the same as minimum */ + return vf_minimum_guc_version(gt, branch, major, minor); +} + +static int vf_handshake_with_guc(struct xe_gt *gt) +{ + struct xe_gt_sriov_vf_guc_version *guc_version = >->sriov.vf.guc_version; + struct xe_guc *guc = >->uc.guc; + u32 wanted_branch, wanted_major, wanted_minor; + u32 branch, major, minor, patch; + int err; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + /* select wanted version - prefer previous (if any) */ + if (guc_version->major || guc_version->minor) { + wanted_branch = guc_version->branch; + wanted_major = guc_version->major; + wanted_minor = guc_version->minor; + } else { + vf_wanted_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); + xe_gt_assert(gt, wanted_major != GUC_VERSION_MAJOR_ANY); + } + + err = guc_action_match_version(guc, wanted_branch, wanted_major, wanted_minor, + &branch, &major, &minor, &patch); + if (unlikely(err)) + goto fail; + + /* we don't support interface version change */ + if ((guc_version->major || guc_version->minor) && + (guc_version->branch != branch || guc_version->major != major || + guc_version->minor != minor)) { + xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n", + branch, major, minor, patch); + xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n", + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch); + err = -EREMCHG; + goto fail; + } + + /* illegal */ + if (major > wanted_major) { + err = -EPROTO; + goto unsupported; + } + + /* there's no fallback on major version. */ + if (major != wanted_major) { + err = -ENOPKG; + goto unsupported; + } + + /* check against minimum version supported by us */ + vf_minimum_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor); + xe_gt_assert(gt, major != GUC_VERSION_MAJOR_ANY); + if (major < wanted_major || (major == wanted_major && minor < wanted_minor)) { + err = -ENOKEY; + goto unsupported; + } + + xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n", + branch, major, minor, patch); + + guc_version->branch = branch; + guc_version->major = major; + guc_version->minor = minor; + guc_version->patch = patch; + return 0; + +unsupported: + xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n", + branch, major, minor, patch, ERR_PTR(err)); +fail: + xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n", + wanted_major, wanted_minor, ERR_PTR(err)); + + /* try again with *any* just to query which version is supported */ + if (!guc_action_match_version(guc, GUC_VERSION_BRANCH_ANY, + GUC_VERSION_MAJOR_ANY, GUC_VERSION_MINOR_ANY, + &branch, &major, &minor, &patch)) + xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n", + branch, major, minor, patch); + return err; +} + +/** + * xe_gt_sriov_vf_bootstrap - Query and setup GuC ABI interface version. + * @gt: the &xe_gt + * + * This function is for VF use only. + * It requires functional `GuC MMIO based communication`_. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) +{ + int err; + + err = vf_reset_guc_state(gt); + if (unlikely(err)) + return err; + + err = vf_handshake_with_guc(gt); + if (unlikely(err)) + return err; + + return 0; +} + +static int guc_action_query_single_klv(struct xe_guc *guc, u32 key, + u32 *value, u32 value_len) +{ + u32 request[VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_ACTION_VF2GUC_QUERY_SINGLE_KLV), + FIELD_PREP(VF2GUC_QUERY_SINGLE_KLV_REQUEST_MSG_1_KEY, key), + }; + u32 response[GUC_MAX_MMIO_MSG_LEN]; + u32 length; + int ret; + + BUILD_BUG_ON(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_MAX_LEN > GUC_MAX_MMIO_MSG_LEN); + ret = xe_guc_mmio_send_recv(guc, request, ARRAY_SIZE(request), response); + if (unlikely(ret < 0)) + return ret; + + if (unlikely(FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_MBZ, response[0]))) + return -EPROTO; + + length = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_0_LENGTH, response[0]); + if (unlikely(length > value_len)) + return -EOVERFLOW; + if (unlikely(length < value_len)) + return -ENODATA; + + switch (value_len) { + default: + xe_gt_WARN_ON(guc_to_gt(guc), value_len > 3); + fallthrough; + case 3: + value[2] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_3_VALUE96, response[3]); + fallthrough; + case 2: + value[1] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_2_VALUE64, response[2]); + fallthrough; + case 1: + value[0] = FIELD_GET(VF2GUC_QUERY_SINGLE_KLV_RESPONSE_MSG_1_VALUE32, response[1]); + fallthrough; + case 0: + break; + } + + return 0; +} + +static int guc_action_query_single_klv32(struct xe_guc *guc, u32 key, u32 *value32) +{ + return guc_action_query_single_klv(guc, key, value32, hxg_sizeof(u32)); +} + +static int guc_action_query_single_klv64(struct xe_guc *guc, u32 key, u64 *value64) +{ + u32 value[2]; + int err; + + err = guc_action_query_single_klv(guc, key, value, hxg_sizeof(value)); + if (unlikely(err)) + return err; + + *value64 = make_u64_from_u32(value[1], value[0]); + return 0; +} + +static int vf_get_ggtt_info(struct xe_gt *gt) +{ + struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; + struct xe_guc *guc = >->uc.guc; + u64 start, size; + int err; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start); + if (unlikely(err)) + return err; + + err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_SIZE_KEY, &size); + if (unlikely(err)) + return err; + + if (config->ggtt_size && config->ggtt_size != size) { + xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n", + size / SZ_1K, config->ggtt_size / SZ_1K); + return -EREMCHG; + } + + xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", + start, start + size - 1, size / SZ_1K); + + config->ggtt_base = start; + config->ggtt_size = size; + + return config->ggtt_size ? 0 : -ENODATA; +} + +static int vf_get_lmem_info(struct xe_gt *gt) +{ + struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; + struct xe_guc *guc = >->uc.guc; + char size_str[10]; + u64 size; + int err; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, &size); + if (unlikely(err)) + return err; + + if (config->lmem_size && config->lmem_size != size) { + xe_gt_sriov_err(gt, "Unexpected LMEM reassignment: %lluM != %lluM\n", + size / SZ_1M, config->lmem_size / SZ_1M); + return -EREMCHG; + } + + string_get_size(size, 1, STRING_UNITS_2, size_str, sizeof(size_str)); + xe_gt_sriov_dbg_verbose(gt, "LMEM %lluM %s\n", size / SZ_1M, size_str); + + config->lmem_size = size; + + return config->lmem_size ? 0 : -ENODATA; +} + +static int vf_get_submission_cfg(struct xe_gt *gt) +{ + struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; + struct xe_guc *guc = >->uc.guc; + u32 num_ctxs, num_dbs; + int err; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + err = guc_action_query_single_klv32(guc, GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY, &num_ctxs); + if (unlikely(err)) + return err; + + err = guc_action_query_single_klv32(guc, GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY, &num_dbs); + if (unlikely(err)) + return err; + + if (config->num_ctxs && config->num_ctxs != num_ctxs) { + xe_gt_sriov_err(gt, "Unexpected CTXs reassignment: %u != %u\n", + num_ctxs, config->num_ctxs); + return -EREMCHG; + } + if (config->num_dbs && config->num_dbs != num_dbs) { + xe_gt_sriov_err(gt, "Unexpected DBs reassignment: %u != %u\n", + num_dbs, config->num_dbs); + return -EREMCHG; + } + + xe_gt_sriov_dbg_verbose(gt, "CTXs %u DBs %u\n", num_ctxs, num_dbs); + + config->num_ctxs = num_ctxs; + config->num_dbs = num_dbs; + + return config->num_ctxs ? 0 : -ENODATA; +} + +/** + * xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO. + * @gt: the &xe_gt + * + * This function is for VF use only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_vf_query_config(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int err; + + err = vf_get_ggtt_info(gt); + if (unlikely(err)) + return err; + + if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + err = vf_get_lmem_info(gt); + if (unlikely(err)) + return err; + } + + err = vf_get_submission_cfg(gt); + if (unlikely(err)) + return err; + + return 0; +} + +static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) +{ + u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VF2PF_HANDSHAKE), + FIELD_PREP(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, *major) | + FIELD_PREP(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, *minor), + }; + u32 response[VF2PF_HANDSHAKE_RESPONSE_MSG_LEN]; + int ret; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + ret = xe_guc_relay_send_to_pf(>->uc.guc.relay, + request, ARRAY_SIZE(request), + response, ARRAY_SIZE(response)); + if (unlikely(ret < 0)) + return ret; + + if (unlikely(ret != VF2PF_HANDSHAKE_RESPONSE_MSG_LEN)) + return -EPROTO; + + if (unlikely(FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_0_MBZ, response[0]))) + return -EPROTO; + + *major = FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MAJOR, response[1]); + *minor = FIELD_GET(VF2PF_HANDSHAKE_RESPONSE_MSG_1_MINOR, response[1]); + + return 0; +} + +static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor) +{ + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + gt->sriov.vf.pf_version.major = major; + gt->sriov.vf.pf_version.minor = minor; +} + +static void vf_disconnect_pf(struct xe_gt *gt) +{ + vf_connect_pf(gt, 0, 0); +} + +static int vf_handshake_with_pf(struct xe_gt *gt) +{ + u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; + u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; + u32 major = major_wanted, minor = minor_wanted; + int err; + + err = relay_action_handshake(gt, &major, &minor); + if (unlikely(err)) + goto failed; + + if (!major && !minor) { + err = -ENODATA; + goto failed; + } + + xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); + vf_connect_pf(gt, major, minor); + return 0; + +failed: + xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", + major, minor, ERR_PTR(err)); + vf_disconnect_pf(gt); + return err; +} + +/** + * xe_gt_sriov_vf_connect - Establish connection with the PF driver. + * @gt: the &xe_gt + * + * This function is for VF use only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_vf_connect(struct xe_gt *gt) +{ + int err; + + err = vf_handshake_with_pf(gt); + if (unlikely(err)) + goto failed; + + return 0; + +failed: + xe_gt_sriov_err(gt, "Failed to get version info (%pe)\n", ERR_PTR(err)); + return err; +} + +static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) +{ + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + return major == gt->sriov.vf.pf_version.major && + minor <= gt->sriov.vf.pf_version.minor; +} + +static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) +{ + struct vf_runtime_reg *regs = gt->sriov.vf.runtime.regs; + unsigned int regs_size = round_up(num_regs, 4); + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + if (regs) { + if (num_regs <= gt->sriov.vf.runtime.regs_size) { + memset(regs, 0, num_regs * sizeof(*regs)); + gt->sriov.vf.runtime.num_regs = num_regs; + return 0; + } + + drmm_kfree(&xe->drm, regs); + gt->sriov.vf.runtime.regs = NULL; + gt->sriov.vf.runtime.num_regs = 0; + gt->sriov.vf.runtime.regs_size = 0; + } + + regs = drmm_kcalloc(&xe->drm, regs_size, sizeof(*regs), GFP_KERNEL); + if (unlikely(!regs)) + return -ENOMEM; + + gt->sriov.vf.runtime.regs = regs; + gt->sriov.vf.runtime.num_regs = num_regs; + gt->sriov.vf.runtime.regs_size = regs_size; + return 0; +} + +static int vf_query_runtime_info(struct xe_gt *gt) +{ + u32 request[VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN]; + u32 response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 32]; /* up to 16 regs */ + u32 limit = (ARRAY_SIZE(response) - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2; + u32 count, remaining, num, i; + u32 start = 0; + int ret; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_gt_assert(gt, limit); + + /* this is part of the 1.0 PF/VF ABI */ + if (!vf_is_negotiated(gt, 1, 0)) + return -ENOPKG; + + request[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, + GUC_RELAY_ACTION_VF2PF_QUERY_RUNTIME) | + FIELD_PREP(VF2PF_QUERY_RUNTIME_REQUEST_MSG_0_LIMIT, limit); + +repeat: + request[1] = FIELD_PREP(VF2PF_QUERY_RUNTIME_REQUEST_MSG_1_START, start); + ret = xe_guc_relay_send_to_pf(>->uc.guc.relay, + request, ARRAY_SIZE(request), + response, ARRAY_SIZE(response)); + if (unlikely(ret < 0)) + goto failed; + + if (unlikely(ret < VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN)) { + ret = -EPROTO; + goto failed; + } + if (unlikely((ret - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) % 2)) { + ret = -EPROTO; + goto failed; + } + + num = (ret - VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN) / 2; + count = FIELD_GET(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_0_COUNT, response[0]); + remaining = FIELD_GET(VF2PF_QUERY_RUNTIME_RESPONSE_MSG_1_REMAINING, response[1]); + + xe_gt_sriov_dbg_verbose(gt, "count=%u num=%u ret=%d start=%u remaining=%u\n", + count, num, ret, start, remaining); + + if (unlikely(count != num)) { + ret = -EPROTO; + goto failed; + } + + if (start == 0) { + ret = vf_prepare_runtime_info(gt, num + remaining); + if (unlikely(ret < 0)) + goto failed; + } else if (unlikely(start + num > gt->sriov.vf.runtime.num_regs)) { + ret = -EPROTO; + goto failed; + } + + for (i = 0; i < num; ++i) { + struct vf_runtime_reg *reg = >->sriov.vf.runtime.regs[start + i]; + + reg->offset = response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 2 * i]; + reg->value = response[VF2PF_QUERY_RUNTIME_RESPONSE_MSG_MIN_LEN + 2 * i + 1]; + } + + if (remaining) { + start += num; + goto repeat; + } + + return 0; + +failed: + vf_prepare_runtime_info(gt, 0); + return ret; +} + +static void vf_show_runtime_info(struct xe_gt *gt) +{ + struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs; + unsigned int size = gt->sriov.vf.runtime.num_regs; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + for (; size--; vf_regs++) + xe_gt_sriov_dbg(gt, "runtime(%#x) = %#x\n", + vf_regs->offset, vf_regs->value); +} + +/** + * xe_gt_sriov_vf_query_runtime - Query SR-IOV runtime data. + * @gt: the &xe_gt + * + * This function is for VF use only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt) +{ + int err; + + err = vf_query_runtime_info(gt); + if (unlikely(err)) + goto failed; + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + vf_show_runtime_info(gt); + + return 0; + +failed: + xe_gt_sriov_err(gt, "Failed to get runtime info (%pe)\n", + ERR_PTR(err)); + return err; +} + +/** + * xe_gt_sriov_vf_print_config - Print VF self config. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * This function is for VF use only. + */ +void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; + struct xe_device *xe = gt_to_xe(gt); + char buf[10]; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + drm_printf(p, "GGTT range:\t%#llx-%#llx\n", + config->ggtt_base, + config->ggtt_base + config->ggtt_size - 1); + + string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf); + + if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); + } + + drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs); + drm_printf(p, "GuC doorbells:\t%u\n", config->num_dbs); +} + +/** + * xe_gt_sriov_vf_print_runtime - Print VF's runtime regs received from PF. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * This function is for VF use only. + */ +void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) +{ + struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs; + unsigned int size = gt->sriov.vf.runtime.num_regs; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + for (; size--; vf_regs++) + drm_printf(p, "%#x = %#x\n", vf_regs->offset, vf_regs->value); +} + +/** + * xe_gt_sriov_vf_print_version - Print VF ABI versions. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * This function is for VF use only. + */ +void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_gt_sriov_vf_guc_version *guc_version = >->sriov.vf.guc_version; + struct xe_gt_sriov_vf_relay_version *pf_version = >->sriov.vf.pf_version; + u32 branch, major, minor; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + drm_printf(p, "GuC ABI:\n"); + + vf_minimum_guc_version(gt, &branch, &major, &minor); + drm_printf(p, "\tbase:\t%u.%u.%u.*\n", branch, major, minor); + + vf_wanted_guc_version(gt, &branch, &major, &minor); + drm_printf(p, "\twanted:\t%u.%u.%u.*\n", branch, major, minor); + + drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n", + guc_version->branch, guc_version->major, + guc_version->minor, guc_version->patch); + + drm_printf(p, "PF ABI:\n"); + + drm_printf(p, "\tbase:\t%u.%u\n", + GUC_RELAY_VERSION_BASE_MAJOR, GUC_RELAY_VERSION_BASE_MINOR); + drm_printf(p, "\twanted:\t%u.%u\n", + GUC_RELAY_VERSION_LATEST_MAJOR, GUC_RELAY_VERSION_LATEST_MINOR); + drm_printf(p, "\thandshake:\t%u.%u\n", + pf_version->major, pf_version->minor); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h new file mode 100644 index 000000000000..997cb7541036 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_VF_H_ +#define _XE_GT_SRIOV_VF_H_ + +#include + +struct drm_printer; +struct xe_gt; + +int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt); +int xe_gt_sriov_vf_query_config(struct xe_gt *gt); +int xe_gt_sriov_vf_connect(struct xe_gt *gt); +int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); + +void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h new file mode 100644 index 000000000000..519492f4b7d0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_VF_TYPES_H_ +#define _XE_GT_SRIOV_VF_TYPES_H_ + +#include + +/** + * struct xe_gt_sriov_vf_guc_version - GuC ABI version details. + */ +struct xe_gt_sriov_vf_guc_version { + /** @branch: branch version. */ + u8 branch; + /** @major: major version. */ + u8 major; + /** @minor: minor version. */ + u8 minor; + /** @patch: patch version. */ + u8 patch; +}; + +/** + * struct xe_gt_sriov_vf_relay_version - PF ABI version details. + */ +struct xe_gt_sriov_vf_relay_version { + /** @major: major version. */ + u16 major; + /** @minor: minor version. */ + u16 minor; +}; + +/** + * struct xe_gt_sriov_vf_selfconfig - VF configuration data. + */ +struct xe_gt_sriov_vf_selfconfig { + /** @ggtt_base: assigned base offset of the GGTT region. */ + u64 ggtt_base; + /** @ggtt_size: assigned size of the GGTT region. */ + u64 ggtt_size; + /** @lmem_size: assigned size of the LMEM. */ + u64 lmem_size; + /** @num_ctxs: assigned number of GuC submission context IDs. */ + u16 num_ctxs; + /** @num_dbs: assigned number of GuC doorbells IDs. */ + u16 num_dbs; +}; + +/** + * struct xe_gt_sriov_vf_runtime - VF runtime data. + */ +struct xe_gt_sriov_vf_runtime { + /** @regs_size: size of runtime register array. */ + u32 regs_size; + /** @num_regs: number of runtime registers in the array. */ + u32 num_regs; + /** @regs: pointer to array of register offset/value pairs. */ + struct vf_runtime_reg { + /** @regs.offset: register offset. */ + u32 offset; + /** @regs.value: register value. */ + u32 value; + } *regs; +}; + +/** + * struct xe_gt_sriov_vf - GT level VF virtualization data. + */ +struct xe_gt_sriov_vf { + /** @guc_version: negotiated GuC ABI version. */ + struct xe_gt_sriov_vf_guc_version guc_version; + /** @self_config: resource configurations. */ + struct xe_gt_sriov_vf_selfconfig self_config; + /** @pf_version: negotiated VF/PF ABI version. */ + struct xe_gt_sriov_vf_relay_version pf_version; + /** @runtime: runtime data retrieved from the PF. */ + struct xe_gt_sriov_vf_runtime runtime; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 5a114fc9dde7..475fb58882f1 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -9,6 +9,7 @@ #include "xe_force_wake_types.h" #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" +#include "xe_gt_sriov_vf_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_reg_sr_types.h" @@ -143,6 +144,8 @@ struct xe_gt { union { /** @sriov.pf: PF data. Valid only if driver is running as PF */ struct xe_gt_sriov_pf pf; + /** @sriov.vf: VF data. Valid only if driver is running as VF */ + struct xe_gt_sriov_vf vf; } sriov; /** -- cgit From 25275c8a4f1be38494caae25cfbac116280a5351 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 May 2024 13:05:45 +0200 Subject: drm/xe/vf: Custom hardware config load step if VF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VF drivers may immediately communicate with the GuC to obtain the hardware config since the firmware shall already be running. With the GuC communication established, VFs can also obtain the values of the runtime registers (fuses) from the PF driver. Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index b1bb94914028..e52b544ac690 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -19,6 +19,7 @@ #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_vf.h" #include "xe_guc_ads.h" #include "xe_guc_ct.h" #include "xe_guc_db_mgr.h" @@ -547,6 +548,38 @@ out: return 0 /* FIXME: ret, don't want to stop load currently */; } +static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = xe_gt_sriov_vf_bootstrap(gt); + if (ret) + return ret; + + ret = xe_gt_sriov_vf_query_config(gt); + if (ret) + return ret; + + ret = xe_guc_hwconfig_init(guc); + if (ret) + return ret; + + ret = xe_guc_enable_communication(guc); + if (ret) + return ret; + + ret = xe_gt_sriov_vf_connect(gt); + if (ret) + return ret; + + ret = xe_gt_sriov_vf_query_runtime(gt); + if (ret) + return ret; + + return 0; +} + /** * xe_guc_min_load_for_hwconfig - load minimal GuC and read hwconfig table * @guc: The GuC object @@ -562,6 +595,9 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) { int ret; + if (IS_SRIOV_VF(guc_to_xe(guc))) + return vf_guc_min_load_for_hwconfig(guc); + xe_guc_ads_populate_minimal(&guc->ads); /* Raise GT freq to speed up HuC/GuC load */ -- cgit From 63d8cb8fe3ddf74627003f99ad085887baf91e60 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 May 2024 13:05:46 +0200 Subject: drm/xe/vf: Expose SR-IOV VF attributes to GT debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For debug purposes we might want to view actual VF configuration (including GGTT range, LMEM size, number of GuC contexts IDs or doorbells) and the negotiated ABI versions (with GuC and PF). Reviewed-by: Piotr Piórkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240516110546.2216-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_debugfs.c | 3 ++ drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c | 72 +++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h | 14 ++++++ 4 files changed, 90 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c create mode 100644 drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e9ba6c753779..71b5c35ee4c2 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -156,6 +156,7 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support xe-y += \ xe_gt_sriov_vf.o \ + xe_gt_sriov_vf_debugfs.o \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index c5e562e143fd..66f897a9b6ca 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -16,6 +16,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_sriov_pf_debugfs.h" +#include "xe_gt_sriov_vf_debugfs.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_lrc.h" @@ -306,4 +307,6 @@ void xe_gt_debugfs_register(struct xe_gt *gt) if (IS_SRIOV_PF(xe)) xe_gt_sriov_pf_debugfs_register(gt, root); + else if (IS_SRIOV_VF(xe)) + xe_gt_sriov_vf_debugfs_register(gt, root); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c new file mode 100644 index 000000000000..f3ddcbefc6bc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include + +#include "xe_gt_debugfs.h" +#include "xe_gt_sriov_vf.h" +#include "xe_gt_sriov_vf_debugfs.h" +#include "xe_gt_types.h" +#include "xe_sriov.h" + +/* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── vf + * │   │   ├── self_config + * │   │   ├── abi_versions + * │   │   ├── runtime_regs + */ + +static const struct drm_info_list vf_info[] = { + { + "self_config", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_vf_print_config, + }, + { + "abi_versions", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_vf_print_version, + }, +#if defined(CONFIG_DRM_XE_DEBUG) || defined(CONFIG_DRM_XE_DEBUG_SRIOV) + { + "runtime_regs", + .show = xe_gt_debugfs_simple_show, + .data = xe_gt_sriov_vf_print_runtime, + }, +#endif +}; + +/** + * xe_gt_sriov_vf_debugfs_register - Register SR-IOV VF specific entries in GT debugfs. + * @gt: the &xe_gt to register + * @root: the &dentry that represents the GT directory + * + * Register SR-IOV VF entries that are GT related and must be shown under GT debugfs. + */ +void xe_gt_sriov_vf_debugfs_register(struct xe_gt *gt, struct dentry *root) +{ + struct xe_device *xe = gt_to_xe(gt); + struct drm_minor *minor = xe->drm.primary; + struct dentry *vfdentry; + + xe_assert(xe, IS_SRIOV_VF(xe)); + xe_assert(xe, root->d_inode->i_private == gt); + + /* + * /sys/kernel/debug/dri/0/ + * ├── gt0 + * │   ├── vf + */ + vfdentry = debugfs_create_dir("vf", root); + if (IS_ERR(vfdentry)) + return; + vfdentry->d_inode->i_private = gt; + + drm_debugfs_create_files(vf_info, ARRAY_SIZE(vf_info), vfdentry, minor); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h new file mode 100644 index 000000000000..b2cff7ef5c78 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_VF_DEBUGFS_H_ +#define _XE_GT_SRIOV_VF_DEBUGFS_H_ + +struct xe_gt; +struct dentry; + +void xe_gt_sriov_vf_debugfs_register(struct xe_gt *gt, struct dentry *root); + +#endif -- cgit From 844f3228d225d25af8a21a7e1554d78c20823a37 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 10 May 2024 08:01:08 -0700 Subject: drm/xe: Replace RING_START_UDW by u64 RING_START MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Other u64 registers are printed in a single line so RING_START needs to follow that too. As there is no upstream decoder tool parsing RING_START this will not break any decoder application. Cc: Niranjana Vishwanathapura Cc: Matt Roper Signed-off-by: José Roberto de Souza Reviewed-by: Niranjana Vishwanathapura Link: https://patchwork.freedesktop.org/patch/msgid/20240510150108.80679-1-jose.souza@intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 10 +++++----- drivers/gpu/drm/xe/xe_hw_engine_types.h | 4 +--- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 45f582a7caaa..e19af179af33 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -908,11 +908,13 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { + val = hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + snapshot->reg.ring_start |= val << 32; + } if (xe_gt_has_indirect_ring_state(hwe->gt)) { snapshot->reg.indirect_ring_state = hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); - snapshot->reg.ring_start_udw = - hw_engine_mmio_read32(hwe, RING_START_UDW(0)); } snapshot->reg.ring_head = @@ -1003,9 +1005,7 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, snapshot->reg.ring_execlist_status); drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n", snapshot->reg.ring_execlist_sq_contents); - drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); - drm_printf(p, "\tRING_START_UDW: 0x%08x\n", - snapshot->reg.ring_start_udw); + drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start); drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 5f4b67acba99..b2f64b92a636 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -188,9 +188,7 @@ struct xe_hw_engine_snapshot { /** @reg.ring_hws_pga: RING_HWS_PGA */ u32 ring_hws_pga; /** @reg.ring_start: RING_START */ - u32 ring_start; - /** @reg.ring_start_udw: RING_START_UDW */ - u32 ring_start_udw; + u64 ring_start; /** @reg.ring_head: RING_HEAD */ u32 ring_head; /** @reg.ring_tail: RING_TAIL */ -- cgit From ab689514b6ac518ef6e88afa245b834b0dae15a5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 May 2024 13:43:03 -0700 Subject: drm/xe: Promote xe_hw_engine_class_to_str() Move it out of the sysfs compilation unit so it can be re-used in other places. Reviewed-by: Nirmoy Das Reviewed-by: Oak Zeng Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 18 ++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine.h | 2 ++ drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 18 ------------------ 3 files changed, 20 insertions(+), 18 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index e19af179af33..b71e90c555fa 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -1099,3 +1099,21 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe) return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY && hwe->instance == gt->usm.reserved_bcs_instance; } + +const char *xe_hw_engine_class_to_str(enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + return "rcs"; + case XE_ENGINE_CLASS_VIDEO_DECODE: + return "vcs"; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return "vecs"; + case XE_ENGINE_CLASS_COPY: + return "bcs"; + case XE_ENGINE_CLASS_COMPUTE: + return "ccs"; + default: + return NULL; + } +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 71968ee2f600..843de159e47c 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -67,4 +67,6 @@ static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) return hwe->name; } +const char *xe_hw_engine_class_to_str(enum xe_engine_class class); + #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 844ec68cbbb8..efce6c7dd2a2 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -618,24 +618,6 @@ static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(kobj); } -static const char *xe_hw_engine_class_to_str(enum xe_engine_class class) -{ - switch (class) { - case XE_ENGINE_CLASS_RENDER: - return "rcs"; - case XE_ENGINE_CLASS_VIDEO_DECODE: - return "vcs"; - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - return "vecs"; - case XE_ENGINE_CLASS_COPY: - return "bcs"; - case XE_ENGINE_CLASS_COMPUTE: - return "ccs"; - default: - return NULL; - } -} - /** * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT. * @gt: Xe GT. -- cgit From bd49e50d81b543e678965118a86958d87c045c73 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 May 2024 13:43:04 -0700 Subject: drm/xe: Add XE_ENGINE_CLASS_OTHER to str conversion XE_ENGINE_CLASS_OTHER was missing from the str conversion. Add it and remove the default handling so it's protected by -Wswitch. Currently the only user is xe_hw_engine_class_sysfs_init(), which already skips XE_ENGINE_CLASS_OTHER, so there's no change in behavior. Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b71e90c555fa..942fca8f1eb9 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -1111,9 +1111,13 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class) return "vecs"; case XE_ENGINE_CLASS_COPY: return "bcs"; + case XE_ENGINE_CLASS_OTHER: + return "other"; case XE_ENGINE_CLASS_COMPUTE: return "ccs"; - default: - return NULL; + case XE_ENGINE_CLASS_MAX: + break; } + + return NULL; } -- cgit From 9b090d57746d965684f53a1aefcb363bab653ad3 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 17 May 2024 13:43:05 -0700 Subject: drm/xe/lrc: Add helper to capture context timestamp Add a helper to capture CTX_TIMESTAMP from the context image so it can be used to calculate the runtime. v2: Add kernel-doc to clarify expectation from caller Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_lrc_layout.h | 1 + drivers/gpu/drm/xe/xe_lrc.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_lrc_types.h | 3 +++ 4 files changed, 30 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index e6ca8bbda8f4..045dfd09db99 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -11,6 +11,7 @@ #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) +#define CTX_TIMESTAMP (0x22 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 9b0a4078add3..f679cb9aaea7 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -844,6 +844,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; + lrc->ctx_timestamp = 0; xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); @@ -898,6 +899,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); } + xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); + if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); @@ -1576,3 +1579,12 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) xe_bo_put(snapshot->lrc_bo); kfree(snapshot); } + +u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) +{ + *old_ts = lrc->ctx_timestamp; + + lrc->ctx_timestamp = xe_lrc_read_ctx_reg(lrc, CTX_TIMESTAMP); + + return lrc->ctx_timestamp; +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index e0e841963c23..b9da1031083b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -66,4 +66,18 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); +/** + * xe_lrc_update_timestamp - readout LRC timestamp and update cached value + * @lrc: logical ring context for this exec queue + * @old_ts: pointer where to save the previous timestamp + * + * Read the current timestamp for this LRC and update the cached value. The + * previous cached value is also returned in @old_ts so the caller can calculate + * the delta between 2 updates. Note that this is not intended to be called from + * any place, but just by the paths updating the drm client utilization. + * + * Returns the current LRC timestamp + */ +u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts); + #endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index cdbf03faef15..0fa055da6b27 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -45,6 +45,9 @@ struct xe_lrc { /** @fence_ctx: context for hw fence */ struct xe_hw_fence_ctx fence_ctx; + + /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ + u32 ctx_timestamp; }; struct xe_lrc_snapshot; -- cgit From f2f6b667c67daee6fe2c51b5cec3bb0f1b4c1ce0 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 May 2024 13:43:06 -0700 Subject: drm/xe: Add helper to capture engine timestamp Just like CTX_TIMESTAMP is used to calculate runtime, add a helper to get the timestamp for the engine so it can be used to calculate the "engine time" with the same unit as the runtime is recorded. Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 5 +++++ drivers/gpu/drm/xe/xe_hw_engine.h | 1 + 2 files changed, 6 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 942fca8f1eb9..de1aefaa2335 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -1121,3 +1121,8 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class) return NULL; } + +u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe) +{ + return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base)); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 843de159e47c..7f2d27c0ba1a 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -68,5 +68,6 @@ static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) } const char *xe_hw_engine_class_to_str(enum xe_engine_class class); +u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe); #endif -- cgit From 6109f24f87d75122cf6de50901115cbee4285ce2 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 17 May 2024 13:43:07 -0700 Subject: drm/xe: Add helper to accumulate exec queue runtime Add a helper to accumulate per-client runtime of all its exec queues. This is called every time a sched job is finished. v2: - Use guc_exec_queue_free_job() and execlist_job_free() to accumulate runtime when job is finished since xe_sched_job_completed() is not a notification that job finished. - Stop trying to update runtime from xe_exec_queue_fini() - that is redundant and may happen after xef is closed, leading to a use-after-free - Do not special case the first timestamp read: the default LRC sets CTX_TIMESTAMP to zero, so even the first sample should be a valid one. - Handle the parallel submission case by multiplying the runtime by width. v3: Update comments Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-6-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_exec_queue.c | 37 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_exec_queue.h | 1 + drivers/gpu/drm/xe/xe_execlist.c | 1 + drivers/gpu/drm/xe/xe_guc_submit.c | 2 ++ 5 files changed, 44 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 0af739981ebf..13da7a079c5f 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -559,6 +559,9 @@ struct xe_file { struct mutex lock; } exec_queue; + /** @runtime: hw engine class runtime in ticks for this drm client */ + u64 runtime[XE_ENGINE_CLASS_MAX]; + /** @client: drm client */ struct xe_drm_client *client; }; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 395de93579fa..fa6dc996eca8 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -769,6 +769,43 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) q->lrc[0].fence_ctx.next_seqno - 1; } +/** + * xe_exec_queue_update_runtime() - Update runtime for this exec queue from hw + * @q: The exec queue + * + * Update the timestamp saved by HW for this exec queue and save runtime + * calculated by using the delta from last update. On multi-lrc case, only the + * first is considered. + */ +void xe_exec_queue_update_runtime(struct xe_exec_queue *q) +{ + struct xe_file *xef; + struct xe_lrc *lrc; + u32 old_ts, new_ts; + + /* + * Jobs that are run during driver load may use an exec_queue, but are + * not associated with a user xe file, so avoid accumulating busyness + * for kernel specific work. + */ + if (!q->vm || !q->vm->xef) + return; + + xef = q->vm->xef; + + /* + * Only sample the first LRC. For parallel submission, all of them are + * scheduled together and we compensate that below by multiplying by + * width - this may introduce errors if that premise is not true and + * they don't exit 100% aligned. On the other hand, looping through + * the LRCs and reading them in different time could also introduce + * errors. + */ + lrc = &q->lrc[0]; + new_ts = xe_lrc_update_timestamp(lrc, &old_ts); + xef->runtime[q->class] += (new_ts - old_ts) * q->width; +} + void xe_exec_queue_kill(struct xe_exec_queue *q) { struct xe_exec_queue *eq = q, *next; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 48f6da53a292..e0f07d28ee1a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -75,5 +75,6 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); +void xe_exec_queue_update_runtime(struct xe_exec_queue *q); #endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index e9dee1e14fef..bd7f27efe0e0 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -306,6 +306,7 @@ static void execlist_job_free(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); + xe_exec_queue_update_runtime(job->q); xe_sched_job_put(job); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 4efb88e3e056..ad2b8067d071 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -749,6 +749,8 @@ static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); + xe_exec_queue_update_runtime(job->q); + trace_xe_sched_job_free(job); xe_sched_job_put(job); } -- cgit From baa14865529bf1f3c12dc6145bd9109ef289e038 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 May 2024 13:43:08 -0700 Subject: drm/xe: Cache data about user-visible engines gt->info.engine_mask used to indicate the available engines, but that is not always true anymore: some engines are reserved to kernel and some may be exposed as a single engine (e.g. with ccs_mode). Runtime changes only happen when no clients exist, so it's safe to cache the list of engines in the gt and update that when it's needed. This will help implementing per client engine utilization so this (mostly constant) information doesn't need to be re-calculated on every query. Reviewed-by: Jonathan Cavitt Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-7-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt.h | 13 +++++++++++++ drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 1 + drivers/gpu/drm/xe/xe_gt_types.h | 21 ++++++++++++++++++++- 4 files changed, 57 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index e69a03ddd255..5194a3d38e76 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -560,9 +560,32 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; + xe_gt_record_user_engines(gt); + return drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); } +void xe_gt_record_user_engines(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + gt->user_engines.mask = 0; + memset(gt->user_engines.instances_per_class, 0, + sizeof(gt->user_engines.instances_per_class)); + + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + gt->user_engines.mask |= BIT_ULL(id); + gt->user_engines.instances_per_class[hwe->class]++; + } + + xe_gt_assert(gt, (gt->user_engines.mask | gt->info.engine_mask) + == gt->info.engine_mask); +} + static int do_gt_reset(struct xe_gt *gt) { int err; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 8474c50b1b30..1d010bf4a756 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -38,6 +38,19 @@ int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); int xe_gt_record_default_lrcs(struct xe_gt *gt); + +/** + * xe_gt_record_user_engines - save data related to engines available to + * usersapce + * @gt: GT structure + * + * Walk the available HW engines from gt->info.engine_mask and calculate data + * related to those engines that may be used by userspace. To be used whenever + * available engines change in runtime (e.g. with ccs_mode) or during + * initialization + */ +void xe_gt_record_user_engines(struct xe_gt *gt); + void xe_gt_suspend_prepare(struct xe_gt *gt); int xe_gt_suspend(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index a34c9a24dafc..c36218f4f6c8 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -134,6 +134,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, if (gt->ccs_mode != num_engines) { xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); gt->ccs_mode = num_engines; + xe_gt_record_user_engines(gt); xe_gt_reset_async(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 475fb58882f1..10a9a9529377 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -113,7 +113,11 @@ struct xe_gt { enum xe_gt_type type; /** @info.reference_clock: clock frequency */ u32 reference_clock; - /** @info.engine_mask: mask of engines present on GT */ + /** + * @info.engine_mask: mask of engines present on GT. Some of + * them may be reserved in runtime and not available for user. + * See @user_engines.mask + */ u64 engine_mask; /** @info.gmdid: raw GMD_ID value from hardware */ u32 gmdid; @@ -368,6 +372,21 @@ struct xe_gt { /** @wa_active.oob: bitmap with active OOB workaroudns */ unsigned long *oob; } wa_active; + + /** @user_engines: engines present in GT and available to userspace */ + struct { + /** + * @user_engines.mask: like @info->engine_mask, but take in + * consideration only engines available to userspace + */ + u64 mask; + + /** + * @user_engines.instances_per_class: aggregate per class the + * number of engines available to userspace + */ + u8 instances_per_class[XE_ENGINE_CLASS_MAX]; + } user_engines; }; #endif -- cgit From 6aa18d7436b0c11f7e62fd6cdb707eaeab1dc473 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 May 2024 13:43:09 -0700 Subject: drm/xe: Add helper to return any available hw engine Get the first available engine from a gt, which helps in the case any engine serves as a context, like when reading RING_TIMESTAMP. Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-8-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_gt.h | 7 +++++++ 2 files changed, 18 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5194a3d38e76..3432fef56486 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -833,3 +833,14 @@ struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, return NULL; } + +struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) + return hwe; + + return NULL; +} diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 1d010bf4a756..9073ac68a777 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -67,6 +67,13 @@ void xe_gt_remove(struct xe_gt *gt); struct xe_hw_engine * xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, enum xe_engine_class class); +/** + * xe_gt_any_hw_engine - scan the list of engines and return the + * first available + * @gt: GT structure + */ +struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt); + struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, enum xe_engine_class class, u16 instance, -- cgit From 188ced1e0ff892f0948f20480e2e0122380ae46d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 17 May 2024 13:43:10 -0700 Subject: drm/xe/client: Print runtime to fdinfo Print the accumulated runtime for client when printing fdinfo. Each time a query is done it first does 2 things: 1) loop through all the exec queues for the current client and accumulate the runtime, per engine class. CTX_TIMESTAMP is used for that, being read from the context image. 2) Read a "GPU timestamp" that can be used for considering "how much GPU time has passed" and that has the same unit/refclock as the one recording the runtime. RING_TIMESTAMP is used for that via MMIO. Since for all current platforms RING_TIMESTAMP follows the same refclock, just read it once, using any first engine available. This is exported to userspace as 2 numbers in fdinfo: drm-cycles-: drm-total-cycles-: Userspace is expected to collect at least 2 samples, which allows to know the client engine busyness as per: RUNTIME1 - RUNTIME0 busyness = --------------------- T1 - T0 Since drm-cycles- always starts at 0, it's also possible to know if and engine was ever used by a client. It's expected that userspace will read any 2 samples every few seconds. Given the update frequency of the counters involved and that CTX_TIMESTAMP is 32-bits, the counter for each exec_queue can wrap around (assuming 100% utilization) after ~200s. The wraparound is not perceived by userspace since it's just accumulated for all the exec_queues in a 64-bit counter) but the measurement will not be accurate if the samples are too far apart. This could be mitigated by adding a workqueue to accumulate the counters every so often, but it's additional complexity for something that is done already by userspace every few seconds in tools like gputop (from igt), htop, nvtop, etc, with none of them really defaulting to 1 sample per minute or more. Reviewed-by: Umesh Nerlige Ramappa Acked-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240517204310.88854-9-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- Documentation/gpu/drm-usage-stats.rst | 21 ++++- Documentation/gpu/xe/index.rst | 1 + Documentation/gpu/xe/xe-drm-usage-stats.rst | 10 +++ drivers/gpu/drm/xe/xe_drm_client.c | 121 +++++++++++++++++++++++++++- 4 files changed, 150 insertions(+), 3 deletions(-) create mode 100644 Documentation/gpu/xe/xe-drm-usage-stats.rst (limited to 'drivers/gpu') diff --git a/Documentation/gpu/drm-usage-stats.rst b/Documentation/gpu/drm-usage-stats.rst index 6dc299343b48..a80f95ca1b2f 100644 --- a/Documentation/gpu/drm-usage-stats.rst +++ b/Documentation/gpu/drm-usage-stats.rst @@ -112,6 +112,19 @@ larger value within a reasonable period. Upon observing a value lower than what was previously read, userspace is expected to stay with that larger previous value until a monotonic update is seen. +- drm-total-cycles-: + +Engine identifier string must be the same as the one specified in the +drm-cycles- tag and shall contain the total number cycles for the given +engine. + +This is a timestamp in GPU unspecified unit that matches the update rate +of drm-cycles-. For drivers that implement this interface, the engine +utilization can be calculated entirely on the GPU clock domain, without +considering the CPU sleep time between 2 samples. + +A driver may implement either this key or drm-maxfreq-, but not both. + - drm-maxfreq-: [Hz|MHz|KHz] Engine identifier string must be the same as the one specified in the @@ -121,6 +134,9 @@ percentage utilization of the engine, whereas drm-engine- only reflects time active without considering what frequency the engine is operating as a percentage of its maximum frequency. +A driver may implement either this key or drm-total-cycles-, but not +both. + Memory ^^^^^^ @@ -168,5 +184,6 @@ be documented above and where possible, aligned with other drivers. Driver specific implementations ------------------------------- -:ref:`i915-usage-stats` -:ref:`panfrost-usage-stats` +* :ref:`i915-usage-stats` +* :ref:`panfrost-usage-stats` +* :ref:`xe-usage-stats` diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst index c224ecaee81e..3f07aa3b5432 100644 --- a/Documentation/gpu/xe/index.rst +++ b/Documentation/gpu/xe/index.rst @@ -23,3 +23,4 @@ DG2, etc is provided to prototype the driver. xe_firmware xe_tile xe_debugging + xe-drm-usage-stats.rst diff --git a/Documentation/gpu/xe/xe-drm-usage-stats.rst b/Documentation/gpu/xe/xe-drm-usage-stats.rst new file mode 100644 index 000000000000..482d503ae68a --- /dev/null +++ b/Documentation/gpu/xe/xe-drm-usage-stats.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. _xe-usage-stats: + +======================================== +Xe DRM client usage stats implementation +======================================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_drm_client.c + :doc: DRM Client usage stats diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 08f0b7c95901..af404c9e5cc0 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -2,6 +2,7 @@ /* * Copyright © 2023 Intel Corporation */ +#include "xe_drm_client.h" #include #include @@ -12,9 +13,66 @@ #include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" -#include "xe_drm_client.h" +#include "xe_exec_queue.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_hw_engine.h" +#include "xe_pm.h" #include "xe_trace.h" +/** + * DOC: DRM Client usage stats + * + * The drm/xe driver implements the DRM client usage stats specification as + * documented in :ref:`drm-client-usage-stats`. + * + * Example of the output showing the implemented key value pairs and entirety of + * the currently possible format options: + * + * :: + * + * pos: 0 + * flags: 0100002 + * mnt_id: 26 + * ino: 685 + * drm-driver: xe + * drm-client-id: 3 + * drm-pdev: 0000:03:00.0 + * drm-total-system: 0 + * drm-shared-system: 0 + * drm-active-system: 0 + * drm-resident-system: 0 + * drm-purgeable-system: 0 + * drm-total-gtt: 192 KiB + * drm-shared-gtt: 0 + * drm-active-gtt: 0 + * drm-resident-gtt: 192 KiB + * drm-total-vram0: 23992 KiB + * drm-shared-vram0: 16 MiB + * drm-active-vram0: 0 + * drm-resident-vram0: 23992 KiB + * drm-total-stolen: 0 + * drm-shared-stolen: 0 + * drm-active-stolen: 0 + * drm-resident-stolen: 0 + * drm-cycles-rcs: 28257900 + * drm-total-cycles-rcs: 7655183225 + * drm-cycles-bcs: 0 + * drm-total-cycles-bcs: 7655183225 + * drm-cycles-vcs: 0 + * drm-total-cycles-vcs: 7655183225 + * drm-engine-capacity-vcs: 2 + * drm-cycles-vecs: 0 + * drm-total-cycles-vecs: 7655183225 + * drm-engine-capacity-vecs: 2 + * drm-cycles-ccs: 0 + * drm-total-cycles-ccs: 7655183225 + * drm-engine-capacity-ccs: 4 + * + * Possible `drm-cycles-` key names are: `rcs`, `ccs`, `bcs`, `vcs`, `vecs` and + * "other". + */ + /** * xe_drm_client_alloc() - Allocate drm client * @void: No arg @@ -179,6 +237,66 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) } } +static void show_runtime(struct drm_printer *p, struct drm_file *file) +{ + unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { }; + struct xe_file *xef = file->driver_priv; + struct xe_device *xe = xef->xe; + struct xe_gt *gt; + struct xe_hw_engine *hwe; + struct xe_exec_queue *q; + u64 gpu_timestamp; + + xe_pm_runtime_get(xe); + + /* Accumulate all the exec queues from this client */ + mutex_lock(&xef->exec_queue.lock); + xa_for_each(&xef->exec_queue.xa, i, q) + xe_exec_queue_update_runtime(q); + mutex_unlock(&xef->exec_queue.lock); + + /* Get the total GPU cycles */ + for_each_gt(gt, xe, gt_id) { + hwe = xe_gt_any_hw_engine(gt); + if (!hwe) + continue; + + xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + gpu_timestamp = xe_hw_engine_read_timestamp(hwe); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + break; + } + + xe_pm_runtime_put(xe); + + if (unlikely(!hwe)) + return; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) { + const char *class_name; + + for_each_gt(gt, xe, gt_id) + capacity[class] += gt->user_engines.instances_per_class[class]; + + /* + * Engines may be fused off or not exposed to userspace. Don't + * return anything if this entire class is not available + */ + if (!capacity[class]) + continue; + + class_name = xe_hw_engine_class_to_str(class); + drm_printf(p, "drm-cycles-%s:\t%llu\n", + class_name, xef->runtime[class]); + drm_printf(p, "drm-total-cycles-%s:\t%llu\n", + class_name, gpu_timestamp); + + if (capacity[class] > 1) + drm_printf(p, "drm-engine-capacity-%s:\t%lu\n", + class_name, capacity[class]); + } +} + /** * xe_drm_client_fdinfo() - Callback for fdinfo interface * @p: The drm_printer ptr @@ -192,5 +310,6 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) { show_meminfo(p, file); + show_runtime(p, file); } #endif -- cgit From 995f7dafd110eecbeef1e02846d897d64839d838 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Tue, 16 Apr 2024 14:50:37 +0000 Subject: drm/xe/uapi: Expose the L3 bank mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The L3 bank mask is already generated and stored internally with the rest of the GT topology. In user space, the compute runtime now needs this information to be added to the device properties therefore the topology mask query is extended to provide a new mask which represents the L3 banks enabled on the GT. The changes in the compute runtime are ready and approved, see link below. v2: Rewrite commit message and add a link to the compute runtime PR (Francois Dugast) Cc: Matt Roper Cc: Robert Krzemien Cc: Mateusz Jablonski Link: https://github.com/intel/compute-runtime/pull/722 Signed-off-by: Francois Dugast Acked-by: Mateusz Jablonski Reviewed-by: José Roberto de Souza Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240416145037.7-2-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_query.c | 9 ++++++++- include/uapi/drm/xe_drm.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 29f847debb5c..995effcb904b 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -455,9 +455,10 @@ static int query_hwconfig(struct xe_device *xe, static size_t calc_topo_query_size(struct xe_device *xe) { return xe->info.gt_count * - (3 * sizeof(struct drm_xe_query_topology_mask) + + (4 * sizeof(struct drm_xe_query_topology_mask) + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); } @@ -511,6 +512,12 @@ static int query_gt_topology(struct xe_device *xe, if (err) return err; + topo.type = DRM_XE_TOPO_L3_BANK; + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, + sizeof(gt->fuse_topo.l3_bank_mask)); + if (err) + return err; + topo.type = DRM_XE_TOPO_EU_PER_DSS; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.eu_mask_per_dss, diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1446c3bae515..d7b0903c22b2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -508,6 +508,7 @@ struct drm_xe_query_gt_list { * containing the following in mask: * ``DSS_COMPUTE ff ff ff ff 00 00 00 00`` * means 32 DSS are available for compute. + * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks * - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU) * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: @@ -520,6 +521,7 @@ struct drm_xe_query_topology_mask { #define DRM_XE_TOPO_DSS_GEOMETRY 1 #define DRM_XE_TOPO_DSS_COMPUTE 2 +#define DRM_XE_TOPO_L3_BANK 3 #define DRM_XE_TOPO_EU_PER_DSS 4 /** @type: type of mask */ __u16 type; -- cgit From 735940f99966a5d510c43d05c932da536b33715a Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 21 May 2024 12:36:23 +0200 Subject: drm/xe: Add warn when level can not be zero. At xe_pt_zap_ptes_entry() and xe_pt_stage_unbind_entry, the level cannot be 0. Therefore, add an independent check for the level. Since the level cannot be zero at this point, there is no need to check for `is_compact`, so remove that instead. Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240521103623.11645-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_pt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 11dd0988ffda..cd60c009b679 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -763,7 +763,7 @@ static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, pgoff_t end_offset; XE_WARN_ON(!*child); - XE_WARN_ON(!level && xe_child->is_compact); + XE_WARN_ON(!level); /* * Note that we're called from an entry callback, and we're dealing @@ -1445,7 +1445,7 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); XE_WARN_ON(!*child); - XE_WARN_ON(!level && xe_child->is_compact); + XE_WARN_ON(!level); xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); -- cgit From 01d71dff61c7e1efae1d7f11b71dfa4549c172bb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 21 May 2024 12:27:15 +0200 Subject: drm/xe/tests: Use uninterruptible VM lock Interruptible lock can return error and needed a return value check. This test should finish quick enough so use a uninterruptible lock instead. Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240521102715.22700-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index b6e7f80c3774..962f6438e219 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -344,7 +344,7 @@ static int migrate_test_run_device(struct xe_device *xe) struct xe_migrate *m = tile->migrate; kunit_info(test, "Testing tile id %d.\n", id); - xe_vm_lock(m->q->vm, true); + xe_vm_lock(m->q->vm, false); xe_migrate_sanity_test(m, test); xe_vm_unlock(m->q->vm); } -- cgit From bdc9abed51b52965557f9c46d541b5ca3fc66da3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 21 May 2024 12:28:25 +0200 Subject: drm/xe: Fix xe_uc.h Prefer forward declaration over #include xe_uc_types.h Signed-off-by: Michal Wajdeczko Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240521102828.668-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_uc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 5dfa7725483d..11856f24e6f9 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -6,7 +6,7 @@ #ifndef _XE_UC_H_ #define _XE_UC_H_ -#include "xe_uc_types.h" +struct xe_uc; int xe_uc_init(struct xe_uc *uc); int xe_uc_init_hwconfig(struct xe_uc *uc); -- cgit From 2291c091107d0635f10269098152900c0a12fd00 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 21 May 2024 12:28:26 +0200 Subject: drm/xe: Fix xe_gsc.h Prefer forward declaration over #include xe_gsc_types.h Signed-off-by: Michal Wajdeczko Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240521102828.668-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gsc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index dd16e9b8b894..1c7a623faf11 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -6,8 +6,9 @@ #ifndef _XE_GSC_H_ #define _XE_GSC_H_ -#include "xe_gsc_types.h" +#include +struct xe_gsc; struct xe_gt; struct xe_hw_engine; -- cgit From de1429a99fd37f706e6bdbf5e9ad318e1523442c Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 21 May 2024 12:28:27 +0200 Subject: drm/xe: Fix xe_huc.h Prefer forward declaration over #include xe_huc_types.h Signed-off-by: Michal Wajdeczko Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240521102828.668-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_huc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h index 3ab56cc14b00..fa1c45e70443 100644 --- a/drivers/gpu/drm/xe/xe_huc.h +++ b/drivers/gpu/drm/xe/xe_huc.h @@ -6,9 +6,10 @@ #ifndef _XE_HUC_H_ #define _XE_HUC_H_ -#include "xe_huc_types.h" +#include struct drm_printer; +struct xe_huc; enum xe_huc_auth_types { XE_HUC_AUTH_VIA_GUC = 0, -- cgit From a6bc7cda37d1ad52cdc59a8d4c4d654836f8c238 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 21 May 2024 12:28:28 +0200 Subject: drm/xe: Fix xe_guc_pc.h Prefer forward declaration over #include xe_guc_pc_types.h Signed-off-by: Michal Wajdeczko Reviewed-by: Francois Dugast Link: https://patchwork.freedesktop.org/patch/msgid/20240521102828.668-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index d3680d89490e..532cac985a6d 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -6,7 +6,9 @@ #ifndef _XE_GUC_PC_H_ #define _XE_GUC_PC_H_ -#include "xe_guc_pc_types.h" +#include + +struct xe_guc_pc; int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); @@ -27,4 +29,5 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); + #endif /* _XE_GUC_PC_H_ */ -- cgit From 31a278b5a11e6785db7f4976419d2b284591720e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 20 May 2024 20:18:12 +0200 Subject: drm/i915/display: Add missing include to intel_vga.c This compilation unit uses udelay() function without including it's header file. Fix that to break dependency on other code. Signed-off-by: Michal Wajdeczko Cc: Jani Nikula Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240520181814.2392-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/display/intel_vga.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c index 4b98833bfa8c..0b5916c15307 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.c +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -3,6 +3,7 @@ * Copyright © 2019 Intel Corporation */ +#include #include #include