From 3fdcd0a31d7aed3ef5de104ff8f7e4e4908a0c36 Mon Sep 17 00:00:00 2001 From: YiPeng Chai <YiPeng.Chai@amd.com> Date: Thu, 18 Jan 2024 14:57:22 +0800 Subject: drm/amdgpu: Prepare for asynchronous processing of umc page retirement Preparing for asynchronous processing of umc page retirement. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 850cad69e4b7..7e7bb85341af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2660,6 +2660,25 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +static int amdgpu_ras_page_retirement_thread(void *param) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)param; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + while (!kthread_should_stop()) { + + wait_event_interruptible(con->page_retirement_wq, + atomic_read(&con->page_retirement_req_cnt)); + + dev_info(adev->dev, "Start processing page retirement. request:%d\n", + atomic_read(&con->page_retirement_req_cnt)); + + atomic_dec(&con->page_retirement_req_cnt); + } + + return 0; +} + int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2723,6 +2742,16 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } } + mutex_init(&con->page_retirement_lock); + init_waitqueue_head(&con->page_retirement_wq); + atomic_set(&con->page_retirement_req_cnt, 0); + con->page_retirement_thread = + kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); + if (IS_ERR(con->page_retirement_thread)) { + con->page_retirement_thread = NULL; + dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); + } + #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) @@ -2758,6 +2787,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) if (!data) return 0; + if (con->page_retirement_thread) + kthread_stop(con->page_retirement_thread); + + atomic_set(&con->page_retirement_req_cnt, 0); + cancel_work_sync(&con->recovery_work); mutex_lock(&con->recovery_lock); -- cgit