diff options
author | Luben Tuikov <luben.tuikov@amd.com> | 2021-05-18 21:07:17 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2021-05-27 12:22:54 -0400 |
commit | a46751fbcde505e6aff8622e17995092c8d86ae4 (patch) | |
tree | b652fc0e941f3e517dc5a24eb0666575cd741679 | |
parent | 2871e10199430132c69d81c3c302db05d19db4e1 (diff) |
drm/amdgpu: Fix RAS function interface
The correctable and uncorrectable errors
are calculated at each invocation of this
function. Therefore, it is highly inefficient to
return just one of them based on a Boolean
input. If the caller wants both, twice the work
would be done. (And this work is O(n^3) on
Vega20.)
Fix this "interface" to simply return what it had
calculated--both values. Let the caller choose
what it wants to record, inspect, use.
Cc: Alexander Deucher <Alexander.Deucher@amd.com>
Cc: John Clements <john.clements@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 5 |
2 files changed, 18 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e3a4c3a7635a..ed3c43e8b0b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1043,29 +1043,36 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, } /* get the total error counts on all IPs */ -unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, - bool is_ce) +void amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - struct ras_err_data data = {0, 0}; + unsigned long ce, ue; if (!adev->ras_enabled || !con) - return 0; + return; + ce = 0; + ue = 0; list_for_each_entry(obj, &con->head, node) { struct ras_query_if info = { .head = obj->head, }; if (amdgpu_ras_query_error_status(adev, &info)) - return 0; + return; - data.ce_count += info.ce_count; - data.ue_count += info.ue_count; + ce += info.ce_count; + ue += info.ue_count; } - return is_ce ? data.ce_count : data.ue_count; + if (ce_count) + *ce_count = ce; + + if (ue_count) + *ue_count = ue; } /* query/inject/cure end */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index bfa40c8ecc94..10fca0393106 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -485,8 +485,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev); -unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, - bool is_ce); +void amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count); /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, |