diff options
author | Yunxiang Li <Yunxiang.Li@amd.com> | 2024-04-22 14:44:38 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-05-02 15:40:50 -0400 |
commit | 25c01191c2555351922e5515b6b6d31357975031 (patch) | |
tree | 6ae9d6296928adda447c2bb6cb2bbf16284559eb /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |
parent | f4322b9f8ad5f9f62add288c785d2e10bb6a5efe (diff) |
drm/amdgpu: Add reset_context flag for host FLR
There are other reset sources that pass NULL as the job pointer, such as
amdgpu_amdkfd_reset_work. Therefore, using the job pointer to check if
the FLR comes from the host does not work.
Add a flag in reset_context to explicitly mark host triggered reset, and
set this flag when we receive host reset notification.
Signed-off-by: Yunxiang Li <Yunxiang.Li@amd.com>
Reviewed-by: Emily Deng <Emily.Deng@amd.com>
Reviewed-by: Zhigang Luo <zhigang.luo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8befd10bf007..33c889c027a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5055,13 +5055,13 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu_device pointer - * @from_hypervisor: request from hypervisor + * @reset_context: amdgpu reset context pointer * * do VF FLR and reinitialize Asic * return 0 means succeeded otherwise failed */ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, - bool from_hypervisor) + struct amdgpu_reset_context *reset_context) { int r; struct amdgpu_hive_info *hive = NULL; @@ -5070,12 +5070,15 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, retry: amdgpu_amdkfd_pre_reset(adev); - if (from_hypervisor) + if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) { + clear_bit(AMDGPU_HOST_FLR, &reset_context->flags); r = amdgpu_virt_request_full_gpu(adev, true); - else + } else { r = amdgpu_virt_reset_gpu(adev); + } if (r) return r; + amdgpu_ras_set_fed(adev, false); amdgpu_irq_gpu_reset_resume_helper(adev); @@ -5826,7 +5829,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ /* Actual ASIC resets if needed.*/ /* Host driver will handle XGMI hive reset for SRIOV */ if (amdgpu_sriov_vf(adev)) { - r = amdgpu_device_reset_sriov(adev, job ? false : true); + r = amdgpu_device_reset_sriov(adev, reset_context); if (r) adev->asic_reset_res = r; |