aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-05-19 14:09:46 +1000
committerDave Airlie <airlied@redhat.com>2022-05-19 14:09:54 +1000
commit00df0514ab13813655a6fbaba85425f8f4780be2 (patch)
tree85e9e8908b702575ff4a7e4a58cf36dcca93c204 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parentf8122500a039abeabfff41b0ad8b6a2c94c1107d (diff)
parent0223e516470aa0589da6c03e6d177c10594cabbd (diff)
Merge tag 'amd-drm-next-5.19-2022-05-18' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.19-2022-05-18: amdgpu: - Misc code cleanups - Additional SMU 13.x enablement - Smartshift fixes - GFX11 fixes - Support for SMU 13.0.4 - SMU mutex fix - Suspend/resume fix amdkfd: - static checker fix - Doorbell/MMIO resource handling fix Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220518205621.5741-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c47
1 files changed, 29 insertions, 18 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 7e126dff004f..035891ec59d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1538,33 +1538,42 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
struct amdgpu_iv_entry *entry)
{
- bool poison_stat = true, need_reset = true;
+ bool poison_stat = false;
struct amdgpu_device *adev = obj->adev;
struct ras_err_data err_data = {0, 0, 0, NULL};
struct amdgpu_ras_block_object *block_obj =
amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
- if (!adev->gmc.xgmi.connected_to_cpu)
- amdgpu_umc_poison_handler(adev, &err_data, false);
-
- /* both query_poison_status and handle_poison_consumption are optional */
- if (block_obj && block_obj->hw_ops) {
- if (block_obj->hw_ops->query_poison_status) {
- poison_stat = block_obj->hw_ops->query_poison_status(adev);
- if (!poison_stat)
- dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
- block_obj->ras_comm.name);
- }
+ if (!block_obj || !block_obj->hw_ops)
+ return;
- if (poison_stat && block_obj->hw_ops->handle_poison_consumption) {
- poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
- need_reset = poison_stat;
+ /* both query_poison_status and handle_poison_consumption are optional,
+ * but at least one of them should be implemented if we need poison
+ * consumption handler
+ */
+ if (block_obj->hw_ops->query_poison_status) {
+ poison_stat = block_obj->hw_ops->query_poison_status(adev);
+ if (!poison_stat) {
+ /* Not poison consumption interrupt, no need to handle it */
+ dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
+ block_obj->ras_comm.name);
+
+ return;
}
}
- /* gpu reset is fallback for all failed cases */
- if (need_reset)
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ amdgpu_umc_poison_handler(adev, &err_data, false);
+
+ if (block_obj->hw_ops->handle_poison_consumption)
+ poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
+
+ /* gpu reset is fallback for failed and default cases */
+ if (poison_stat) {
+ dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
+ block_obj->ras_comm.name);
amdgpu_ras_reset_gpu(adev);
+ }
}
static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
@@ -2516,7 +2525,9 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
return 0;
ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
- if (ras_obj->ras_cb) {
+ if (ras_obj->ras_cb || (ras_obj->hw_ops &&
+ (ras_obj->hw_ops->query_poison_status ||
+ ras_obj->hw_ops->handle_poison_consumption))) {
r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
if (r)
goto cleanup;