diff options
author | YiPeng Chai <YiPeng.Chai@amd.com> | 2024-01-18 14:57:22 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-01-22 17:13:25 -0500 |
commit | 3fdcd0a31d7aed3ef5de104ff8f7e4e4908a0c36 (patch) | |
tree | c88829a5664b3893a503717a80641c98aa946f99 | |
parent | 22f6e3e112979ced02cb403559546c17c3b34ad9 (diff) |
drm/amdgpu: Prepare for asynchronous processing of umc page retirement
Preparing for asynchronous processing of umc page retirement.
Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 34 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 5 |
2 files changed, 39 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 850cad69e4b7..7e7bb85341af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2660,6 +2660,25 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +static int amdgpu_ras_page_retirement_thread(void *param) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)param; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + while (!kthread_should_stop()) { + + wait_event_interruptible(con->page_retirement_wq, + atomic_read(&con->page_retirement_req_cnt)); + + dev_info(adev->dev, "Start processing page retirement. request:%d\n", + atomic_read(&con->page_retirement_req_cnt)); + + atomic_dec(&con->page_retirement_req_cnt); + } + + return 0; +} + int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2723,6 +2742,16 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } } + mutex_init(&con->page_retirement_lock); + init_waitqueue_head(&con->page_retirement_wq); + atomic_set(&con->page_retirement_req_cnt, 0); + con->page_retirement_thread = + kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); + if (IS_ERR(con->page_retirement_thread)) { + con->page_retirement_thread = NULL; + dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); + } + #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) @@ -2758,6 +2787,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) if (!data) return 0; + if (con->page_retirement_thread) + kthread_stop(con->page_retirement_thread); + + atomic_set(&con->page_retirement_req_cnt, 0); + cancel_work_sync(&con->recovery_work); mutex_lock(&con->recovery_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 2cd89328dc73..9c3df9985fad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -461,6 +461,11 @@ struct amdgpu_ras { /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; + + struct task_struct *page_retirement_thread; + wait_queue_head_t page_retirement_wq; + struct mutex page_retirement_lock; + atomic_t page_retirement_req_cnt; }; struct ras_fs_data { |