aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-02-14 10:31:51 +1000
committerDave Airlie <airlied@redhat.com>2022-02-14 10:31:51 +1000
commit123db17ddff007080d464e785689fb14f94cbc7a (patch)
tree11da22fd6a508e496be838e43e0b504266c4a4d3 /drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
parente7a09cea6483b44ea0c82f07145fcbd8a918bf96 (diff)
parent7f161df1a513e2961f4e3c96a8355c8ce93ad175 (diff)
Merge tag 'amd-drm-next-5.18-2022-02-11-1' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.18-2022-02-11-1: amdgpu: - Clean up of power management code - Enable freesync video mode by default - Clean up of RAS code - Improve VRAM access for debug using SDMA - Coding style cleanups - SR-IOV fixes - More display FP reorg - TLB flush fixes for Arcuturus, Vega20 - Misc display fixes - Rework special register access methods for SR-IOV - DP2 fixes - DP tunneling fixes - DSC fixes - More IP discovery cleanups - Misc RAS fixes - Enable both SMU i2c buses where applicable - s2idle improvements - DPCS header cleanup - Add new CAP firmware support for SR-IOV amdkfd: - Misc cleanups - SVM fixes - CRIU support - Clean up MQD manager UAPI: - Add interface to amdgpu CTX ioctl to request a stable power state for profiling https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/207 - Add amdkfd support for CRIU https://github.com/checkpoint-restore/criu/pull/1709 - Remove old unused amdkfd debugger interface Was only implemented for Kaveri and was only ever used by an old HSA tool that was never open sourced radeon: - Fix error handling in radeon_driver_open_kms - UVD suspend fix - Misc fixes From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20220211220706.5803-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c52
1 files changed, 43 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index e8b8f28c2f72..5929d6f528c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -732,7 +732,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
return psp_xgmi_terminate(&adev->psp);
}
-static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, void *ras_info)
{
int r;
struct ras_ih_if ih_info = {
@@ -746,7 +746,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
adev->gmc.xgmi.num_physical_nodes == 0)
return 0;
- adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
+ adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
if (!adev->gmc.xgmi.ras_if) {
adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
@@ -865,7 +865,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
return 0;
}
-static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -874,7 +874,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
uint32_t ue_cnt = 0, ce_cnt = 0;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
- return -EINVAL;
+ return ;
err_data->ue_count = 0;
err_data->ce_count = 0;
@@ -940,17 +940,51 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
break;
}
- adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
+ adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
err_data->ue_count += ue_cnt;
err_data->ce_count += ce_cnt;
+}
- return 0;
+/* Trigger XGMI/WAFL error */
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if)
+{
+ int ret = 0;
+ struct ta_ras_trigger_error_input *block_info =
+ (struct ta_ras_trigger_error_input *)inject_if;
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+ dev_warn(adev->dev, "Failed to disallow df cstate");
+
+ if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
+ dev_warn(adev->dev, "Failed to disallow XGMI power down");
+
+ ret = psp_ras_trigger_error(&adev->psp, block_info);
+
+ if (amdgpu_ras_intr_triggered())
+ return ret;
+
+ if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
+ dev_warn(adev->dev, "Failed to allow XGMI power down");
+
+ if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
+ dev_warn(adev->dev, "Failed to allow df cstate");
+
+ return ret;
}
-const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = {
- .ras_late_init = amdgpu_xgmi_ras_late_init,
- .ras_fini = amdgpu_xgmi_ras_fini,
+struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
+ .ras_error_inject = amdgpu_ras_error_inject_xgmi,
+};
+
+struct amdgpu_xgmi_ras xgmi_ras = {
+ .ras_block = {
+ .name = "xgmi",
+ .block = AMDGPU_RAS_BLOCK__XGMI_WAFL,
+ .hw_ops = &xgmi_ras_hw_ops,
+ .ras_late_init = amdgpu_xgmi_ras_late_init,
+ .ras_fini = amdgpu_xgmi_ras_fini,
+ },
};