diff options
author | YiPeng Chai <YiPeng.Chai@amd.com> | 2024-01-15 11:02:52 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-01-22 17:13:25 -0500 |
commit | 0795b5d234902269fc5182dd8e46f21bdafbcd13 (patch) | |
tree | a38ca06430a55eea7a07c8fa5752649192e085bb /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | afb617f38f221e88dc5b3f3fc2d87cc749175609 (diff) |
drm/amdgpu:Support retiring multiple MCA error address pages
Support retiring multiple MCA error address pages in
one in-band query for umc v12_0.
Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 43 |
1 files changed, 35 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 349c810d2399..3c1106cf9d80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3920,8 +3920,7 @@ static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct } static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr) + struct amdgpu_smuio_mcm_config_info *mcm_info) { struct ras_err_node *err_node; @@ -3933,10 +3932,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d if (!err_node) return NULL; - memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); + INIT_LIST_HEAD(&err_node->err_info.err_addr_list); - if (err_addr) - memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr)); + memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); @@ -3945,6 +3943,29 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d return &err_node->err_info; } +void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) +{ + struct ras_err_addr *mca_err_addr; + + mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL); + if (!mca_err_addr) + return; + + INIT_LIST_HEAD(&mca_err_addr->node); + + mca_err_addr->err_status = err_addr->err_status; + mca_err_addr->err_ipid = err_addr->err_ipid; + mca_err_addr->err_addr = err_addr->err_addr; + + list_add_tail(&mca_err_addr->node, &err_info->err_addr_list); +} + +void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) +{ + list_del(&mca_err_addr->node); + kfree(mca_err_addr); +} + int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info, struct ras_err_addr *err_addr, u64 count) @@ -3957,10 +3978,13 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); if (!err_info) return -EINVAL; + if (err_addr && err_addr->err_status) + amdgpu_ras_add_mca_err_addr(err_info, err_addr); + err_info->ue_count += count; err_data->ue_count += count; @@ -3979,7 +4003,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); if (!err_info) return -EINVAL; @@ -4001,10 +4025,13 @@ int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); if (!err_info) return -EINVAL; + if (err_addr && err_addr->err_status) + amdgpu_ras_add_mca_err_addr(err_info, err_addr); + err_info->de_count += count; err_data->de_count += count; |