diff options
author | Dave Airlie <airlied@redhat.com> | 2024-04-17 15:48:59 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-04-17 15:48:59 +1000 |
commit | 34633158b8eb8fca145c9a73f8fe4f98c7275b06 (patch) | |
tree | a8e0e2d55dff19f68a1c6842142255e9deaf2d7d /drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | |
parent | 6e1f415e7129f7cd4c2394af83b35cdcdd40baf7 (diff) | |
parent | ab956ed95b8bc4a65c913d7057075866d5fc3724 (diff) |
Merge tag 'amd-drm-next-6.10-2024-04-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.10-2024-04-13:
amdgpu:
- HDCP fixes
- ODM fixes
- RAS fixes
- Devcoredump improvements
- Misc code cleanups
- Expose VCN activity via sysfs
- SMY 13.0.x updates
- Enable fast updates on DCN 3.1.4
- Add dclk and vclk reporting on additional devices
- Add ACA RAS infrastructure
- Implement TLB flush fence
- EEPROM handling fixes
- SMUIO 14.0.2 support
- SMU 14.0.1 Updates
- Sync page table freeing with TLB flushes
- DML2 refactor
- DC debug improvements
- SR-IOV fixes
- Suspend and Resume fixes
- DCN 3.5.x Updates
- Z8 fixes
- UMSCH fixes
- GPU reset fixes
- HDP fix for second GFX pipe on GC 10.x
- Enable secondary GFX pipe on GC 10.3
- Refactor and clean up BACO/BOCO/BAMACO handling
- VCN partitioning fix
- DC DWB fixes
- VSC SDP fixes
- DCN 3.1.6 fix
- GC 11.5 fixes
- Remove invalid TTM resource start check
- DCN 1.0 fixes
amdkfd:
- MQD handling cleanup
- Preemption handling fixes for XCDs
- TLB flush fix for GC 9.4.2
- Properly clean up workqueue during module unload
- Fix memory leak process create failure
- Range check CP bad op exception targets to avoid reporting invalid exceptions to userspace
radeon:
- Misc code cleanups
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240413213708.3427038-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 20d51f6c9bb8..dd2ec48cf5c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1035,15 +1035,16 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } -static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, - struct aca_bank_report *report, void *data) +static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; const char *error_str; - u64 status; + u64 status, count; int ret, ext_error_code; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; @@ -1055,15 +1056,28 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc if (error_str) dev_info(adev->dev, "%s detected\n", error_str); - if ((type == ACA_ERROR_TYPE_UE && ext_error_code == 0) || - (type == ACA_ERROR_TYPE_CE && ext_error_code == 6)) - report->count[type] = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); - return 0; + switch (type) { + case ACA_SMU_TYPE_UE: + if (ext_error_code != 0 && ext_error_code != 9) + count = 0ULL; + + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); + break; + case ACA_SMU_TYPE_CE: + count = ext_error_code == 6 ? count : 0ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); + break; + default: + return -EINVAL; + } + + return ret; } static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { - .aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report, + .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser, }; static const struct aca_info xgmi_v6_4_0_aca_info = { |