aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-04-17 15:48:59 +1000
committerDave Airlie <airlied@redhat.com>2024-04-17 15:48:59 +1000
commit34633158b8eb8fca145c9a73f8fe4f98c7275b06 (patch)
treea8e0e2d55dff19f68a1c6842142255e9deaf2d7d /drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
parent6e1f415e7129f7cd4c2394af83b35cdcdd40baf7 (diff)
parentab956ed95b8bc4a65c913d7057075866d5fc3724 (diff)
Merge tag 'amd-drm-next-6.10-2024-04-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.10-2024-04-13: amdgpu: - HDCP fixes - ODM fixes - RAS fixes - Devcoredump improvements - Misc code cleanups - Expose VCN activity via sysfs - SMY 13.0.x updates - Enable fast updates on DCN 3.1.4 - Add dclk and vclk reporting on additional devices - Add ACA RAS infrastructure - Implement TLB flush fence - EEPROM handling fixes - SMUIO 14.0.2 support - SMU 14.0.1 Updates - Sync page table freeing with TLB flushes - DML2 refactor - DC debug improvements - SR-IOV fixes - Suspend and Resume fixes - DCN 3.5.x Updates - Z8 fixes - UMSCH fixes - GPU reset fixes - HDP fix for second GFX pipe on GC 10.x - Enable secondary GFX pipe on GC 10.3 - Refactor and clean up BACO/BOCO/BAMACO handling - VCN partitioning fix - DC DWB fixes - VSC SDP fixes - DCN 3.1.6 fix - GC 11.5 fixes - Remove invalid TTM resource start check - DCN 1.0 fixes amdkfd: - MQD handling cleanup - Preemption handling fixes for XCDs - TLB flush fix for GC 9.4.2 - Properly clean up workqueue during module unload - Fix memory leak process create failure - Range check CP bad op exception targets to avoid reporting invalid exceptions to userspace radeon: - Misc code cleanups From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240413213708.3427038-1-alexander.deucher@amd.com Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c32
1 files changed, 18 insertions, 14 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 24ad4b97177b..0734490347db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -210,22 +210,26 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
return -EOPNOTSUPP;
}
-static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry)
+static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry,
+ struct ras_query_context *qctx)
{
- dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n");
- dev_info(adev->dev, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_STATUS]);
- dev_info(adev->dev, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_ADDR]);
- dev_info(adev->dev, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_MISC0]);
- dev_info(adev->dev, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_IPID]);
- dev_info(adev->dev, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_SYND]);
+ u64 event_id = qctx->event_id;
+
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_STATUS]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_ADDR]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_MISC0]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_IPID]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_SYND]);
}
-int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data)
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
{
struct amdgpu_smuio_mcm_config_info mcm_info;
struct ras_err_addr err_addr = {0};
@@ -244,7 +248,7 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo
list_for_each_entry(node, &mca_set.list, node) {
entry = &node->entry;
- amdgpu_mca_smu_mca_bank_dump(adev, i++, entry);
+ amdgpu_mca_smu_mca_bank_dump(adev, i++, entry, qctx);
count = 0;
ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);