aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2020-06-11 23:19:37 -0400
committerAlex Deucher <alexander.deucher@amd.com>2020-07-01 01:59:21 -0400
commitb205795677c034a1975e75a466ad158950fde4b4 (patch)
tree131a04e30eb868e0ff6c925631d7c500f8a65c69 /drivers/gpu/drm
parente3569fab49886123c81f18d675fb1719d73cf27d (diff)
drm/amdkfd: Add eviction debug messages
Use WARN to print messages with backtrace when evictions are triggered. This can help determine the root cause of evictions and help spot driver bugs triggering evictions unintentionally, or help with performance tuning by avoiding conditions that cause evictions in a specific workload. The messages are controlled by a new module parameter that can be changed at runtime: echo Y > /sys/module/amdgpu/parameters/debug_evictions echo N > /sys/module/amdgpu/parameters/debug_evictions Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Philip Yang <Philip.Yang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h5
5 files changed, 20 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 905cf0bac100..3d2625beacf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -186,8 +186,10 @@ extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
+extern bool debug_evictions;
#else
static const int sched_policy = KFD_SCHED_POLICY_HWS;
+static const bool debug_evictions; /* = false */
#endif
extern int amdgpu_tmz;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 75bcd1789185..653a377dd342 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -705,6 +705,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false =
int queue_preemption_timeout_ms = 9000;
module_param(queue_preemption_timeout_ms, int, 0644);
MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
+
+/**
+ * DOC: debug_evictions(bool)
+ * Enable extra debug messages to help determine the cause of evictions
+ */
+bool debug_evictions;
+module_param(debug_evictions, bool, 0644);
+MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
#endif
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index b87ca171986a..072f0e1185a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -275,6 +275,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
continue;
}
+ WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+ "Adding eviction fence to sync obj");
r = amdgpu_sync_fence(sync, f, false);
if (r)
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 314c4b99671d..7f6d0958ed62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -935,6 +935,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
if (!p)
return -ESRCH;
+ WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
r = kfd_process_evict_queues(p);
kfd_unref_process(p);
@@ -1002,6 +1003,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
/* During process initialization eviction_work.dwork is initialized
* to kfd_evict_bo_worker
*/
+ WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
+ p->lead_thread->pid, delay_jiffies);
schedule_delayed_work(&p->eviction_work, delay_jiffies);
out:
kfd_unref_process(p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 3a4fbb6a9aca..308e96f1dab5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -177,6 +177,11 @@ extern bool hws_gws_support;
*/
extern int queue_preemption_timeout_ms;
+/*
+ * Enable eviction debug messages
+ */
+extern bool debug_evictions;
+
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent