From a340847b0214aa9b8fd9839f7b2822ccc607edab Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 13 Oct 2022 11:06:33 +0800 Subject: Revert "drm/amdgpu: let mode2 reset fallback to default when failure" This reverts commit dac6b80818ac2353631c5a33d140d8d5508e2957. This commit reverted the AMDGPU_SKIP_MODE2_RESET as it conflicts with the original design of reset handler. Will redesign it. Fixes: dac6b80818ac23 ("drm/amdgpu: let mode2 reset fallback to default when failure") Signed-off-by: Victor Zhao Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ab8f970b2849..bb73fb420ffc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5210,7 +5210,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->job = job; reset_context->hive = hive; - /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list @@ -5337,11 +5336,8 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, reset_context); - if (r && r == -EAGAIN) { - set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags); - adev->asic_reset_res = 0; + if (r && r == -EAGAIN) goto retry; - } if (!r && gpu_reset_for_dev_remove) goto recover_end; @@ -5777,7 +5773,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(adev, &reset_context); -- cgit From 3059cd8c5f797ad83d2b194ae66339f5c007ca43 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 29 Sep 2022 10:50:44 +0800 Subject: drm/amd/pm: disable cstate feature for gpu reset scenario Suggested by PMFW team and same as what did for gfxoff feature. This can address some Mode1Reset failures observed on SMU13.0.0. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++++ drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 8 ++++++++ drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 9 +++++++++ 3 files changed, 25 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bb73fb420ffc..e0445e8cc342 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2928,6 +2928,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + /* + * Per PMFW team's suggestion, driver needs to handle gfxoff + * and df cstate features disablement for gpu reset(e.g. Mode1Reset) + * scenario. Add the missing df cstate disablement here. + */ + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) + dev_warn(adev->dev, "Failed to disallow df cstate"); + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 445005571f76..9cd005131f56 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2242,9 +2242,17 @@ static void arcturus_get_unique_id(struct smu_context *smu) static int arcturus_set_df_cstate(struct smu_context *smu, enum pp_df_cstate state) { + struct amdgpu_device *adev = smu->adev; uint32_t smu_version; int ret; + /* + * Arcturus does not need the cstate disablement + * prerequisite for gpu reset. + */ + if (amdgpu_in_reset(adev) || adev->in_suspend) + return 0; + ret = smu_cmn_get_smc_version(smu, NULL, &smu_version); if (ret) { dev_err(smu->adev->dev, "Failed to get smu version!\n"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 619aee51b123..d30ec3005ea1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1640,6 +1640,15 @@ static bool aldebaran_is_baco_supported(struct smu_context *smu) static int aldebaran_set_df_cstate(struct smu_context *smu, enum pp_df_cstate state) { + struct amdgpu_device *adev = smu->adev; + + /* + * Aldebaran does not need the cstate disablement + * prerequisite for gpu reset. + */ + if (amdgpu_in_reset(adev) || adev->in_suspend) + return 0; + return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state, NULL); } -- cgit