diff options
author | Yang Wang <kevinyang.wang@amd.com> | 2023-09-05 11:39:10 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2023-09-20 12:25:01 -0400 |
commit | 7ff607e27233861b3f83e658317b3fb18b047229 (patch) | |
tree | 81f4495243384d6539bd446bba30678739d10e07 /drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | |
parent | 615585d09b33e609bef5eb7986f5df2b4e053ac3 (diff) |
drm/amdgpu: add amdgpu smu mca dump feature support
add amdgpu smu mca dump feature support.
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 997a073e2409..be3189bad97f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -21,6 +21,26 @@ #ifndef __AMDGPU_MCA_H__ #define __AMDGPU_MCA_H__ +#include "amdgpu_ras.h" + +#define MCA_MAX_REGS_COUNT (16) + +enum amdgpu_mca_ip { + AMDGPU_MCA_IP_UNKNOW = -1, + AMDGPU_MCA_IP_PSP = 0, + AMDGPU_MCA_IP_SDMA, + AMDGPU_MCA_IP_GC, + AMDGPU_MCA_IP_SMU, + AMDGPU_MCA_IP_MP5, + AMDGPU_MCA_IP_UMC, + AMDGPU_MCA_IP_COUNT, +}; + +enum amdgpu_mca_error_type { + AMDGPU_MCA_ERROR_TYPE_UE = 0, + AMDGPU_MCA_ERROR_TYPE_CE, +}; + struct amdgpu_mca_ras_block { struct amdgpu_ras_block_object ras_block; }; @@ -34,6 +54,36 @@ struct amdgpu_mca { struct amdgpu_mca_ras mp0; struct amdgpu_mca_ras mp1; struct amdgpu_mca_ras mpio; + const struct amdgpu_mca_smu_funcs *mca_funcs; +}; + +struct mca_bank_info { + int socket_id; + int aid; + int hwid; + int mcatype; +}; + +struct mca_bank_entry { + int idx; + enum amdgpu_mca_error_type type; + enum amdgpu_mca_ip ip; + struct mca_bank_info info; + uint64_t regs[MCA_MAX_REGS_COUNT]; +}; + +struct amdgpu_mca_smu_funcs { + int max_ue_count; + int max_ce_count; + int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable); + int (*mca_get_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *count); + int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + uint32_t *count); + int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + int idx, struct mca_bank_entry *entry); + int (*mca_get_ras_mca_idx_array)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size); }; void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, @@ -53,4 +103,13 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev); int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev); int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); + +void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs); +int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable); +int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + int idx, struct mca_bank_entry *entry); + #endif |