aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
diff options
context:
space:
mode:
authorYang Wang <kevinyang.wang@amd.com>2023-09-05 11:39:10 +0800
committerAlex Deucher <alexander.deucher@amd.com>2023-09-20 12:25:01 -0400
commit7ff607e27233861b3f83e658317b3fb18b047229 (patch)
tree81f4495243384d6539bd446bba30678739d10e07 /drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
parent615585d09b33e609bef5eb7986f5df2b4e053ac3 (diff)
drm/amdgpu: add amdgpu smu mca dump feature support
add amdgpu smu mca dump feature support. Signed-off-by: Yang Wang <kevinyang.wang@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h59
1 files changed, 59 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 997a073e2409..be3189bad97f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -21,6 +21,26 @@
#ifndef __AMDGPU_MCA_H__
#define __AMDGPU_MCA_H__
+#include "amdgpu_ras.h"
+
+#define MCA_MAX_REGS_COUNT (16)
+
+enum amdgpu_mca_ip {
+ AMDGPU_MCA_IP_UNKNOW = -1,
+ AMDGPU_MCA_IP_PSP = 0,
+ AMDGPU_MCA_IP_SDMA,
+ AMDGPU_MCA_IP_GC,
+ AMDGPU_MCA_IP_SMU,
+ AMDGPU_MCA_IP_MP5,
+ AMDGPU_MCA_IP_UMC,
+ AMDGPU_MCA_IP_COUNT,
+};
+
+enum amdgpu_mca_error_type {
+ AMDGPU_MCA_ERROR_TYPE_UE = 0,
+ AMDGPU_MCA_ERROR_TYPE_CE,
+};
+
struct amdgpu_mca_ras_block {
struct amdgpu_ras_block_object ras_block;
};
@@ -34,6 +54,36 @@ struct amdgpu_mca {
struct amdgpu_mca_ras mp0;
struct amdgpu_mca_ras mp1;
struct amdgpu_mca_ras mpio;
+ const struct amdgpu_mca_smu_funcs *mca_funcs;
+};
+
+struct mca_bank_info {
+ int socket_id;
+ int aid;
+ int hwid;
+ int mcatype;
+};
+
+struct mca_bank_entry {
+ int idx;
+ enum amdgpu_mca_error_type type;
+ enum amdgpu_mca_ip ip;
+ struct mca_bank_info info;
+ uint64_t regs[MCA_MAX_REGS_COUNT];
+};
+
+struct amdgpu_mca_smu_funcs {
+ int max_ue_count;
+ int max_ce_count;
+ int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable);
+ int (*mca_get_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, uint32_t *count);
+ int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ uint32_t *count);
+ int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ int idx, struct mca_bank_entry *entry);
+ int (*mca_get_ras_mca_idx_array)(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size);
};
void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
@@ -53,4 +103,13 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
+
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs);
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count);
+int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, uint32_t *count);
+int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ int idx, struct mca_bank_entry *entry);
+
#endif