aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuben Tuikov <luben.tuikov@amd.com>2021-05-18 21:07:17 -0400
committerAlex Deucher <alexander.deucher@amd.com>2021-05-27 12:22:54 -0400
commita46751fbcde505e6aff8622e17995092c8d86ae4 (patch)
treeb652fc0e941f3e517dc5a24eb0666575cd741679
parent2871e10199430132c69d81c3c302db05d19db4e1 (diff)
drm/amdgpu: Fix RAS function interface
The correctable and uncorrectable errors are calculated at each invocation of this function. Therefore, it is highly inefficient to return just one of them based on a Boolean input. If the caller wants both, twice the work would be done. (And this work is O(n^3) on Vega20.) Fix this "interface" to simply return what it had calculated--both values. Let the caller choose what it wants to record, inspect, use. Cc: Alexander Deucher <Alexander.Deucher@amd.com> Cc: John Clements <john.clements@amd.com> Cc: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com> Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h5
2 files changed, 18 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e3a4c3a7635a..ed3c43e8b0b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1043,29 +1043,36 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
}
/* get the total error counts on all IPs */
-unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
- bool is_ce)
+void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- struct ras_err_data data = {0, 0};
+ unsigned long ce, ue;
if (!adev->ras_enabled || !con)
- return 0;
+ return;
+ ce = 0;
+ ue = 0;
list_for_each_entry(obj, &con->head, node) {
struct ras_query_if info = {
.head = obj->head,
};
if (amdgpu_ras_query_error_status(adev, &info))
- return 0;
+ return;
- data.ce_count += info.ce_count;
- data.ue_count += info.ue_count;
+ ce += info.ce_count;
+ ue += info.ue_count;
}
- return is_ce ? data.ce_count : data.ue_count;
+ if (ce_count)
+ *ce_count = ce;
+
+ if (ue_count)
+ *ue_count = ue;
}
/* query/inject/cure end */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index bfa40c8ecc94..10fca0393106 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -485,8 +485,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
void amdgpu_ras_resume(struct amdgpu_device *adev);
void amdgpu_ras_suspend(struct amdgpu_device *adev);
-unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
- bool is_ce);
+void amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+ unsigned long *ce_count,
+ unsigned long *ue_count);
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,