aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorYunxiang Li <Yunxiang.Li@amd.com>2024-04-22 14:59:02 -0400
committerAlex Deucher <alexander.deucher@amd.com>2024-05-02 15:40:44 -0400
commitf4322b9f8ad5f9f62add288c785d2e10bb6a5efe (patch)
tree16568819259068d87856a8dc5cde125945735d2d /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parentf5007c67fc77ec555cf824fb8c2038a834201b38 (diff)
drm/amdgpu: Fix two reset triggered in a row
Some times a hang GPU causes multiple reset sources to schedule resets. The second source will be able to trigger an unnecessary reset if they schedule after we call amdgpu_device_stop_pending_resets. Move amdgpu_device_stop_pending_resets to after the reset is done. Since at this point the GPU is supposedly in a good state, any reset scheduled after this point would be a legitimate reset. Remove unnecessary and incorrect checks for amdgpu_in_reset that was kinda serving this purpose. Signed-off-by: Yunxiang Li <Yunxiang.Li@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c19
1 files changed, 10 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 861ccff78af9..8befd10bf007 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5070,8 +5070,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
retry:
amdgpu_amdkfd_pre_reset(adev);
- amdgpu_device_stop_pending_resets(adev);
-
if (from_hypervisor)
r = amdgpu_virt_request_full_gpu(adev, true);
else
@@ -5823,13 +5821,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
r, adev_to_drm(tmp_adev)->unique);
tmp_adev->asic_reset_res = r;
}
-
- if (!amdgpu_sriov_vf(tmp_adev))
- /*
- * Drop all pending non scheduler resets. Scheduler resets
- * were already dropped during drm_sched_stop
- */
- amdgpu_device_stop_pending_resets(tmp_adev);
}
/* Actual ASIC resets if needed.*/
@@ -5851,6 +5842,16 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
goto retry;
}
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ /*
+ * Drop any pending non scheduler resets queued before reset is done.
+ * Any reset scheduled after this point would be valid. Scheduler resets
+ * were already dropped during drm_sched_stop and no new ones can come
+ * in before drm_sched_start.
+ */
+ amdgpu_device_stop_pending_resets(tmp_adev);
+ }
+
skip_hw_reset:
/* Post ASIC reset for all devs .*/