aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYiPeng Chai <YiPeng.Chai@amd.com>2024-06-24 11:33:19 +0800
committerAlex Deucher <alexander.deucher@amd.com>2024-06-27 17:32:06 -0400
commite278849cb2b663bca7dd67ba5d531ecb5b4557df (patch)
tree5ceaf053818575fe3e7faf76cd4f5eb583cb6b2d
parent5f08275cfd88609c86ee86d92efdb196d27c732d (diff)
drm/amdgpu: refine poison consumption interrupt handler
1. The poison fifo is only used for poison consumption requests. 2. Merge reset requests when poison fifo caches multiple poison consumption messages Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c11
2 files changed, 43 insertions, 23 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 6e7c4f1f86da..d3247533d15e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2911,23 +2911,41 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
}
static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
- struct ras_poison_msg *poison_msg)
+ uint32_t msg_count, uint32_t *gpu_reset)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- uint32_t reset = poison_msg->reset;
- uint16_t pasid = poison_msg->pasid;
+ uint32_t reset_flags = 0, reset = 0;
+ struct ras_poison_msg msg;
+ int ret, i;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (poison_msg->pasid_fn)
- poison_msg->pasid_fn(adev, pasid, poison_msg->data);
+ for (i = 0; i < msg_count; i++) {
+ ret = amdgpu_ras_get_poison_req(adev, &msg);
+ if (!ret)
+ continue;
+
+ if (msg.pasid_fn)
+ msg.pasid_fn(adev, msg.pasid, msg.data);
+
+ reset_flags |= msg.reset;
+ }
/* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */
- if (reset && !con->is_rma) {
+ if (reset_flags && !con->is_rma) {
+ if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET)
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ else
+ reset = reset_flags;
+
flush_delayed_work(&con->page_retirement_dwork);
con->gpu_reset_flags |= reset;
amdgpu_ras_reset_gpu(adev);
+
+ *gpu_reset = reset;
}
return 0;
@@ -2937,10 +2955,9 @@ static int amdgpu_ras_page_retirement_thread(void *param)
{
struct amdgpu_device *adev = (struct amdgpu_device *)param;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- uint32_t poison_creation_count;
+ uint32_t poison_creation_count, msg_count;
+ uint32_t gpu_reset;
int ret;
- struct ras_poison_msg poison_msg;
- enum amdgpu_ras_block ras_block;
while (!kthread_should_stop()) {
@@ -2951,6 +2968,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
if (kthread_should_stop())
break;
+ gpu_reset = 0;
do {
poison_creation_count = atomic_read(&con->poison_creation_count);
@@ -2964,15 +2982,16 @@ static int amdgpu_ras_page_retirement_thread(void *param)
}
} while (atomic_read(&con->poison_creation_count));
- if (!amdgpu_ras_get_poison_req(adev, &poison_msg))
- continue;
-
- ras_block = poison_msg.block;
-
- dev_dbg(adev->dev, "Start processing ras block %s(%d)\n",
- ras_block_str(ras_block), ras_block);
-
- amdgpu_ras_poison_consumption_handler(adev, &poison_msg);
+ if (ret != -EIO) {
+ msg_count = kfifo_len(&con->poison_fifo);
+ if (msg_count) {
+ ret = amdgpu_ras_poison_consumption_handler(adev,
+ msg_count, &gpu_reset);
+ if ((ret != -EIO) &&
+ (gpu_reset != AMDGPU_RAS_GPU_RESET_MODE1_RESET))
+ atomic_sub(msg_count, &con->page_retirement_req_cnt);
+ }
+ }
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 20e0e522fb51..2f84bdb8c594 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -293,14 +293,15 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
amdgpu_ras_error_data_fini(&err_data);
} else {
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-
- amdgpu_ras_put_poison_req(adev,
- block, pasid, pasid_fn, data, reset);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret;
+ ret = amdgpu_ras_put_poison_req(adev,
+ block, pasid, pasid_fn, data, reset);
+ if (!ret) {
atomic_inc(&con->page_retirement_req_cnt);
-
wake_up(&con->page_retirement_wq);
+ }
}
} else {
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)