diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index c50202215f6b..57bda66e85ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -80,6 +80,9 @@ static void aca_banks_release(struct aca_banks *banks) { struct aca_bank_node *node, *tmp; + if (list_empty(&banks->list)) + return; + list_for_each_entry_safe(node, tmp, &banks->list, node) { list_del(&node->node); kvfree(node); @@ -119,7 +122,7 @@ static struct aca_regs_dump { static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank, struct ras_query_context *qctx) { - u64 event_id = qctx ? qctx->event_id : 0ULL; + u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID; int i; RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); @@ -453,13 +456,13 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er switch (type) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_DEFERRED: - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count); break; default: break; @@ -534,7 +537,7 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han if (aca_handle_is_valid(handle)) return -EOPNOTSUPP; - if (!(BIT(type) & handle->mask)) + if ((type < 0) || (!(BIT(type) & handle->mask))) return 0; return __aca_get_error_data(adev, handle, type, err_data, qctx); @@ -562,9 +565,13 @@ static void aca_error_fini(struct aca_error *aerr) struct aca_bank_error *bank_error, *tmp; mutex_lock(&aerr->lock); + if (list_empty(&aerr->list)) + goto out_unlock; + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) aca_bank_error_remove(aerr, bank_error); +out_unlock: mutex_destroy(&aerr->lock); } @@ -680,13 +687,17 @@ static void aca_manager_fini(struct aca_handle_manager *mgr) { struct aca_handle *handle, *tmp; + if (list_empty(&mgr->list)) + return; + list_for_each_entry_safe(handle, tmp, &mgr->list, node) amdgpu_aca_remove_handle(handle); } bool amdgpu_aca_is_enabled(struct amdgpu_device *adev) { - return adev->aca.is_enabled; + return (adev->aca.is_enabled || + adev->debug_enable_ras_aca); } int amdgpu_aca_init(struct amdgpu_device *adev) @@ -714,9 +725,11 @@ void amdgpu_aca_fini(struct amdgpu_device *adev) int amdgpu_aca_reset(struct amdgpu_device *adev) { - amdgpu_aca_fini(adev); + struct amdgpu_aca *aca = &adev->aca; - return amdgpu_aca_init(adev); + atomic_set(&aca->ue_update_flag, 0); + + return 0; } void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs) @@ -892,7 +905,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_se void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) { #if defined(CONFIG_DEBUG_FS) - if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + if (!root) return; debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops); |