aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorYong Zhao <yong.zhao@amd.com>2018-07-11 22:33:05 -0400
committerOded Gabbay <oded.gabbay@gmail.com>2018-07-11 22:33:05 -0400
commit0e9a860c72ec387140a0feb4b8d9a6d0004e9316 (patch)
tree4b3df7d853733bfd541427fdbff2f956310dde55 /drivers
parenta29ec470b19e58044005973301f233e0b20ed8c4 (diff)
drm/amdkfd: Introduce KFD module parameter halt_if_hws_hang
This avoids triggering a GPU reset or otherwise changing the HW state. Instead KFD will hang, which allows HW debugging tools to analyze the problem. Signed-off-by: Yong Zhao <yong.zhao@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h5
3 files changed, 16 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 32e93b53e7e8..5d05d125c3c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1217,6 +1217,13 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
while (*fence_addr != fence_value) {
if (time_after(jiffies, end_jiffies)) {
pr_err("qcm fence wait loop timeout expired\n");
+ /* In HWS case, this is used to halt the driver thread
+ * in order not to mess up CP states before doing
+ * scandumps for FW debugging.
+ */
+ while (halt_if_hws_hang)
+ schedule();
+
return -ETIME;
}
schedule();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index ee7bf07db472..3a8c15ad0c64 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -92,6 +92,10 @@ MODULE_PARM_DESC(noretry,
static int amdkfd_init_completed;
+int halt_if_hws_hang;
+module_param(halt_if_hws_hang, int, 0644);
+MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
int kgd2kfd_init(unsigned int interface_version,
const struct kgd2kfd_calls **g2f)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d9bf70b52857..8473e7b3dcc2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -144,6 +144,11 @@ extern int ignore_crat;
*/
extern int vega10_noretry;
+/*
+ * Halt if HWS hang is detected
+ */
+extern int halt_if_hws_hang;
+
/**
* enum kfd_sched_policy
*