diff options
author | Hawking Zhang <[email protected]> | 2024-09-06 16:06:13 +0800 |
---|---|---|
committer | Alex Deucher <[email protected]> | 2024-09-06 17:55:05 -0400 |
commit | 7eafe7a730bad6f0efb62b2d260f1ae3f2d09b95 (patch) | |
tree | 1bb420fba58a6d74661d18020fcaf8feb60c60ff | |
parent | 101025e94b537e8b5426c73a985b26fc95c199cb (diff) |
drm/amdkfd: Select reset method for poison handling
Driver mode-2 is only supported by relative new
smc firmware.
Signed-off-by: Hawking Zhang <[email protected]>
Reviewed-by: Tao Zhou <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 40 |
1 files changed, 32 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index fecdbbab9894..d46a13156ee9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -167,11 +167,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SE3SH: case SOC15_IH_CLIENTID_UTCL2: block = AMDGPU_RAS_BLOCK__GFX; - if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; - else + if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x00557300 and onwards */ + if (dev->adev->pm.fw_version < 0x00557300) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x05550C00 and onwards */ + if (dev->adev->pm.fw_version < 0x05550C00) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } break; case SOC15_IH_CLIENTID_VMC: case SOC15_IH_CLIENTID_VMC1: @@ -184,11 +196,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SDMA3: case SOC15_IH_CLIENTID_SDMA4: block = AMDGPU_RAS_BLOCK__SDMA; - if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; - else + if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x00557300 and onwards */ + if (dev->adev->pm.fw_version < 0x00557300) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x05550C00 and onwards */ + if (dev->adev->pm.fw_version < 0x05550C00) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } break; default: dev_warn(dev->adev->dev, |