diff options
author | Dave Airlie <airlied@redhat.com> | 2023-01-25 12:07:53 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-01-25 12:07:53 +1000 |
commit | 7dd1be30f02f7115002fe00f1f6802bbcf79f857 (patch) | |
tree | 5711a6081243f510f5f4df2e8e372617a63644ae /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | b8f55f24bc82ed7064645cc7f6675430609314a5 (diff) | |
parent | b4a9b36e69e935104e52e561aa9a82d39b5efc36 (diff) |
Merge tag 'amd-drm-next-6.3-2023-01-20' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.3-2023-01-20:
amdgpu:
- Secure display fixes
- Fix scaling
- Misc code cleanups
- Display BW alloc logic updates
- DCN 3.2 fixes
- Fix power reporting on certain firmwares for CZN/RN
- SR-IOV fixes
- Link training cleanup and code rework
- HDCP fixes
- Reserved VMID fix
- Documentation updates
- Colorspace fixes
- RAS updates
- GC11.0 fixes
- VCN instance harvesting fixes
- DCN 3.1.4/5 workarounds for S/G displays
- Add PCIe info to the INFO IOCTL
amdkfd:
- XNACK fix
UAPI:
- Add PCIe gen/lanes info to the amdgpu INFO IOCTL
Nesa ultimately plans to use this to make decisions about buffer placement optimizations
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20790
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230120234523.7610-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 28 |
1 files changed, 21 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d06beb884a16..6e543558386d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -920,9 +920,6 @@ static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_de if (block >= AMDGPU_RAS_BLOCK__LAST) return NULL; - if (!amdgpu_ras_is_supported(adev, block)) - return NULL; - list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { if (!node->ras_obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); @@ -1620,14 +1617,14 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, obj->head.block, 0); - if (!block_obj || !block_obj->hw_ops) + if (!block_obj) return; /* both query_poison_status and handle_poison_consumption are optional, * but at least one of them should be implemented if we need poison * consumption handler */ - if (block_obj->hw_ops->query_poison_status) { + if (block_obj->hw_ops && block_obj->hw_ops->query_poison_status) { poison_stat = block_obj->hw_ops->query_poison_status(adev); if (!poison_stat) { /* Not poison consumption interrupt, no need to handle it */ @@ -1641,7 +1638,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * if (!adev->gmc.xgmi.connected_to_cpu) amdgpu_umc_poison_handler(adev, false); - if (block_obj->hw_ops->handle_poison_consumption) + if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); /* gpu reset is fallback for failed and default cases */ @@ -1649,6 +1646,8 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n", block_obj->ras_comm.name); amdgpu_ras_reset_gpu(adev); + } else { + amdgpu_gfx_poison_consumption_handler(adev, entry); } } @@ -3023,11 +3022,26 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block) { + int ret = 0; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (block >= AMDGPU_RAS_BLOCK_COUNT) return 0; - return ras && (adev->ras_enabled & (1 << block)); + + ret = ras && (adev->ras_enabled & (1 << block)); + + /* For the special asic with mem ecc enabled but sram ecc + * not enabled, even if the ras block is not supported on + * .ras_enabled, if the asic supports poison mode and the + * ras block has ras configuration, it can be considered + * that the ras block supports ras function. + */ + if (!ret && + amdgpu_ras_is_poison_mode_supported(adev) && + amdgpu_ras_get_ras_block(adev, block, 0)) + ret = 1; + + return ret; } int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) |