From 036e348fdccf74db83f18c466123553e12bd35b9 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 7 Jul 2023 20:02:41 -0400 Subject: drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3 Implement the similarities as GC v9.4.2, and the difference for GC v9.4.3 HW spec, i.e. xcc instance. Signed-off-by: Jonathan Kim Signed-off-by: Eric Huang Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index fff3ccc04fa9..24083db44724 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -466,7 +466,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, watch_address_mask, *watch_id, watch_mode, - pdd->dev->vm_info.last_vmid_kfd); + pdd->dev->vm_info.last_vmid_kfd, + 0); amdgpu_gfx_off_ctrl(pdd->dev->adev, true); if (!pdd->dev->kfd->shared_resources.enable_mes) -- cgit From 7a93cc579c1e63d956d9ec124100a36d9798ffe8 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 8 Mar 2023 14:44:22 -0500 Subject: drm/amdkfd: enable watch points globally for gfx943 Set watch points for all xcc instances on GFX943. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Eric Huang Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 24083db44724..190b03efe5ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -446,7 +446,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, uint32_t *watch_id, uint32_t watch_mode) { - int r = kfd_dbg_get_dev_watch_id(pdd, watch_id); + int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id); + uint32_t xcc_mask = pdd->dev->xcc_mask; if (r) return r; @@ -460,14 +461,15 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, } amdgpu_gfx_off_ctrl(pdd->dev->adev, false); - pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch( + for_each_inst(xcc_id, xcc_mask) + pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch( pdd->dev->adev, watch_address, watch_address_mask, *watch_id, watch_mode, pdd->dev->vm_info.last_vmid_kfd, - 0); + xcc_id); amdgpu_gfx_off_ctrl(pdd->dev->adev, true); if (!pdd->dev->kfd->shared_resources.enable_mes) -- cgit From cef600e1fd63c338bfe19ee3e1adeff8801ba14d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 12 Jul 2023 16:32:29 -0400 Subject: drm/amdkfd: fix trap handling work around for debugging Update the list of devices that require the cwsr trap handling workaround for debugging use cases. Signed-off-by: Jonathan Kim Acked-by: Ruili Ji Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 5 ++--- drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 6 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++---- 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 190b03efe5ff..ccfc81f085ce 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable) if (!q) return 0; - if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) || - KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0)) + if (!kfd_dbg_has_cwsr_workaround(q->device)) return 0; if (enable && q->properties.is_user_cu_masked) @@ -349,7 +348,7 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd) { uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; uint32_t flags = pdd->process->dbg_flags; - bool sq_trap_en = !!spi_dbg_cntl; + bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev); if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index ba616ed17dee..586d7f886712 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -101,6 +101,12 @@ static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev) KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1)); } +static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev) +{ + return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3); +} + static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev) { if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 31cac1fd0d58..761963ad6154 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -226,8 +226,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.paging = false; queue_input.tba_addr = qpd->tba_addr; queue_input.tma_addr = qpd->tma_addr; - queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) || - KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3); + queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled; queue_type = convert_to_mes_queue_type(q->properties.type); @@ -1827,8 +1826,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, */ q->properties.is_evicted = !!qpd->evicted; q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && - KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) && - KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3); + kfd_dbg_has_cwsr_workaround(q->device); if (qd) mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, -- cgit From 7a1c5c6753858cbbf0b073eaa9b53d8f56ee0927 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 12 Jul 2023 16:58:55 -0400 Subject: drm/amdkfd: enable cooperative groups for gfx11 MES can concurrently schedule queues on the device that require exclusive device access if marked exclusively_scheduled without the requirement of GWS. Similar to the F32 HWS, MES will manage quality of service for these queues. Use this for cooperative groups since cooperative groups are device occupancy limited. Since some GFX11 devices can only be debugged with partial CUs, do not allow the debugging of cooperative groups on these devices as the CU occupancy limit will change on attach. In addition, zero initialize the MES add queue submission vector for MES initialization tests as we do not want these to be cooperative dispatches. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 6 +++++- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 ++----- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 12 ++++++++---- drivers/gpu/drm/amd/include/mes_v11_api_def.h | 4 +++- 9 files changed, 27 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index f808841310fd..72ab6a838bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -642,6 +642,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, unsigned long flags; int r; + memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); + /* allocate the mes queue buffer */ queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); if (!queue) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2d6ac30b7135..2053954a235c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -224,6 +224,7 @@ struct mes_add_queue_input { uint32_t is_kfd_process; uint32_t is_aql_queue; uint32_t queue_size; + uint32_t exclusively_scheduled; }; struct mes_remove_queue_input { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 1bdaa00c0b46..8e67e965f7ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -214,6 +214,8 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; mes_add_queue_pkt.gds_size = input->queue_size; + mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled; + return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 76b41b5de281..9307f303c7fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1487,7 +1487,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, goto out_unlock; } - if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) { + if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) || + kfd_dbg_has_cwsr_workaround(dev))) { retval = -EBUSY; goto out_unlock; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index ccfc81f085ce..1f82caea59ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -753,7 +753,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, if (!KFD_IS_SOC15(pdd->dev)) return -ENODEV; - if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws) + if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) || + kfd_dbg_has_cwsr_workaround(pdd->dev))) return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0b3dc754e06b..ebc9674d3ce1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -508,6 +508,7 @@ static int kfd_gws_init(struct kfd_node *node) { int ret = 0; struct kfd_dev *kfd = node->kfd; + uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) return 0; @@ -524,7 +525,10 @@ static int kfd_gws_init(struct kfd_node *node) (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) - && kfd->mec2_fw_version >= 0x6b)))) + && kfd->mec2_fw_version >= 0x6b) || + (KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0) + && KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0) + && mes_rev >= 68)))) ret = amdgpu_amdkfd_alloc_gws(node->adev, node->adev->gds.gws_size, &node->gws); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 761963ad6154..71b7f16c0173 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -237,10 +237,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, } queue_input.queue_type = (uint32_t)queue_type; - if (q->gws) { - queue_input.gws_base = 0; - queue_input.gws_size = qpd->num_gws; - } + queue_input.exclusively_scheduled = q->properties.is_gws; amdgpu_mes_lock(&adev->mes); r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); @@ -250,7 +247,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, q->properties.doorbell_off); pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); kfd_hws_hang(dqm); -} + } return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index ba9d69054119..60e6b37b43ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -123,7 +123,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) { + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) { if (gws) ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, gws, &mem); @@ -136,7 +136,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, } else { /* * Intentionally set GWS to a non-NULL value - * for GFX 9.4.3. + * for devices that do not use GWS for global wave + * synchronization but require the formality + * of setting GWS for cooperative groups. */ pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; } @@ -173,7 +175,8 @@ void pqm_uninit(struct process_queue_manager *pqm) list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { if (pqn->q && pqn->q->gws && - KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && + !pqn->q->device->kfd->shared_resources.enable_mes) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); kfd_procfs_del_queue(pqn->q); @@ -455,7 +458,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q->gws) { - if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3)) + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && + !dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_remove_gws_from_process( pqm->process->kgd_process_info, pqn->q->gws); diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 0997e999416a..b1db2b190187 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -275,7 +275,9 @@ union MESAPI__ADD_QUEUE { uint32_t trap_en : 1; uint32_t is_aql_queue : 1; uint32_t skip_process_ctx_clear : 1; - uint32_t reserved : 19; + uint32_t map_legacy_kq : 1; + uint32_t exclusively_scheduled : 1; + uint32_t reserved : 17; }; struct MES_API_STATUS api_status; uint64_t tma_addr; -- cgit From fc7f1d9697bcd3f7a4c64fb00a0e9bb050eaaa2a Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 12 Jun 2023 11:31:07 -0400 Subject: drm/amdkfd: fix and enable ttmp setup for gfx11 The MES cached process context must be cleared on adding any queue for the first time. For proper debug support, the MES will clear it's cached process context on the first call to SET_SHADER_DEBUGGER. This allows TTMPs to be pesistently enabled in a safe manner. Signed-off-by: Jonathan Kim Reviewed-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 ++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 19 +++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 11 ++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 12 +++++------- 6 files changed, 39 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index 77ca5cbfb601..d67d003bada2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -637,7 +637,7 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev, { uint32_t data = 0; - data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 35b36cbe5aa2..aef8e12df61f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2759,6 +2759,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug, if (pdd->qpd.queue_count) return -EEXIST; + + /* + * Setup TTMPs by default. + * Note that this call must remain here for MES ADD QUEUE to + * skip_process_ctx_clear unconditionally as the first call to + * SET_SHADER_DEBUGGER clears any stale process context data + * saved in MES. + */ + if (pdd->dev->kfd->shared_resources.enable_mes) + kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev)); } p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; @@ -2852,7 +2862,8 @@ static int runtime_disable(struct kfd_process *p) if (!pdd->dev->kfd->shared_resources.enable_mes) debug_refresh_runlist(pdd->dev->dqm); else - kfd_dbg_set_mes_debug_mode(pdd); + kfd_dbg_set_mes_debug_mode(pdd, + !kfd_dbg_has_cwsr_workaround(pdd->dev)); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 1f82caea59ba..9ec750666382 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -344,11 +344,10 @@ unwind: return r; } -int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd) +int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en) { uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode; uint32_t flags = pdd->process->dbg_flags; - bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev); if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) return 0; @@ -432,7 +431,7 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd, if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_map_and_unlock(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); kfd_dbg_clear_dev_watch_id(pdd, watch_id); @@ -474,7 +473,7 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_map_and_unlock(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); /* HWS is broken so no point in HW rollback but release the watchpoint anyways */ if (r) @@ -516,7 +515,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_refresh_runlist(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); if (r) { target->dbg_flags = prev_flags; @@ -539,7 +538,7 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) if (!pdd->dev->kfd->shared_resources.enable_mes) debug_refresh_runlist(pdd->dev->dqm); else - kfd_dbg_set_mes_debug_mode(pdd); + kfd_dbg_set_mes_debug_mode(pdd, true); } } @@ -601,7 +600,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind if (!pdd->dev->kfd->shared_resources.enable_mes) debug_refresh_runlist(pdd->dev->dqm); else - kfd_dbg_set_mes_debug_mode(pdd); + kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev)); } kfd_dbg_set_workaround(target, false); @@ -717,7 +716,7 @@ int kfd_dbg_trap_activate(struct kfd_process *target) if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_refresh_runlist(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); if (r) { target->runtime_info.runtime_state = @@ -851,7 +850,7 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target, if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_refresh_runlist(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); if (r) break; @@ -883,7 +882,7 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target, if (!pdd->dev->kfd->shared_resources.enable_mes) r = debug_refresh_runlist(pdd->dev->dqm); else - r = kfd_dbg_set_mes_debug_mode(pdd); + r = kfd_dbg_set_mes_debug_mode(pdd, true); if (r) break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index 586d7f886712..fd0ff64d4184 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -126,5 +126,14 @@ static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev) return true; } -int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd); +int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en); + +static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev) +{ + return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) || + (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) && + (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70); +} #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 71b7f16c0173..ccaf85fc12c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -228,6 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.tma_addr = qpd->tma_addr; queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled; + queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled || + kfd_dbg_has_ttmps_always_setup(q->device); queue_type = convert_to_mes_queue_type(q->properties.type); if (queue_type < 0) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index eeedc3ddffeb..3b0749390388 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -38,6 +38,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_iommu.h" #include "kfd_svm.h" +#include "kfd_debug.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" #include "amdgpu.h" @@ -1931,6 +1932,9 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED | HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED; + if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) + dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3)) dev->node_props.debug_prop |= @@ -1941,10 +1945,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; - if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 4, 2)) - dev->node_props.debug_prop |= - HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; - if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; @@ -1952,9 +1952,7 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; - if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(11, 0, 0)) - dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; - else + if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; } -- cgit