aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2022-09-01 11:27:15 -0400
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 12:35:52 -0400
commit69a8c3ae2dea84a6d571e4c1aad306f630f3ccfd (patch)
tree17a0ba33f8f739f1cfeb7971438b55d32849e41b /drivers/gpu/drm/amd/amdkfd/kfd_debug.c
parent218895820e6fccade42a7c3ab9c0a44dec0a1ebc (diff)
drm/amdkfd: apply trap workaround for gfx11
Due to a HW bug, waves in only half the shader arrays can enter trap. When starting a debug session, relocate all waves to the first shader array of each shader engine and mask off the 2nd shader array as unavailable. When ending a debug session, re-enable the 2nd shader array per shader engine. User CU masking per queue cannot be guaranteed to remain functional if requested during debugging (e.g. user cu mask requests only 2nd shader array as an available resource leading to zero HW resources available) nor can runtime be alerted of any of these changes during execution. Make user CU masking and debugging mutual exclusive with respect to availability. If the debugger tries to attach to a process with a user cu masked queue, return the runtime status as enabled but busy. If the debugger tries to attach and fails to reallocate queue waves to the first shader array of each shader engine, return the runtime status as enabled but with an error. In addition, like any other mutli-process debug supported devices, disable trap temporary setup per-process to avoid performance impact from setup overhead. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c57
1 files changed, 57 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 73b07b5f17f1..5e2ee2d1acc4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -24,6 +24,57 @@
#include "kfd_device_queue_manager.h"
#include <linux/file.h>
+static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
+{
+ struct mqd_update_info minfo = {0};
+ int err;
+
+ if (!q)
+ return 0;
+
+ if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
+ KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
+ return 0;
+
+ if (enable && q->properties.is_user_cu_masked)
+ return -EBUSY;
+
+ minfo.update_flag = enable ? UPDATE_FLAG_DBG_WA_ENABLE : UPDATE_FLAG_DBG_WA_DISABLE;
+
+ q->properties.is_dbg_wa = enable;
+ err = q->device->dqm->ops.update_queue(q->device->dqm, q, &minfo);
+ if (err)
+ q->properties.is_dbg_wa = false;
+
+ return err;
+}
+
+static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable)
+{
+ struct process_queue_manager *pqm = &target->pqm;
+ struct process_queue_node *pqn;
+ int r = 0;
+
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ r = kfd_dbg_set_queue_workaround(pqn->q, enable);
+ if (enable && r)
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list)
+ kfd_dbg_set_queue_workaround(pqn->q, false);
+
+ if (enable)
+ target->runtime_info.runtime_state = r == -EBUSY ?
+ DEBUG_RUNTIME_STATE_ENABLED_BUSY :
+ DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+
+ return r;
+}
+
static int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
@@ -77,6 +128,8 @@ static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int
else
kfd_dbg_set_mes_debug_mode(pdd);
}
+
+ kfd_dbg_set_workaround(target, false);
}
int kfd_dbg_trap_disable(struct kfd_process *target)
@@ -111,6 +164,10 @@ static int kfd_dbg_trap_activate(struct kfd_process *target)
{
int i, r = 0;
+ r = kfd_dbg_set_workaround(target, true);
+ if (r)
+ return r;
+
for (i = 0; i < target->n_pdds; i++) {
struct kfd_process_device *pdd = target->pdds[i];