From 86edcc7dba53995ea9448e535d7eb21dff5245d0 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 5 Sep 2019 19:16:19 +0800 Subject: drm/amdgpu: move umc late init from gmc to umc block umc late init is umc specific, it's more suitable to be put in umc block Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 73 +++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c new file mode 100644 index 000000000000..c8de127097ab --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -0,0 +1,73 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) +{ + int r; + struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info; + struct ras_fs_if fs_info = { + .sysfs_name = "umc_err_count", + .debugfs_name = "umc_err_inject", + }; + + if (!ih_info) + return -EINVAL; + + if (!adev->gmc.umc_ras_if) { + adev->gmc.umc_ras_if = + kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->gmc.umc_ras_if) + return -ENOMEM; + adev->gmc.umc_ras_if->block = AMDGPU_RAS_BLOCK__UMC; + adev->gmc.umc_ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gmc.umc_ras_if->sub_block_index = 0; + strcpy(adev->gmc.umc_ras_if->name, "umc"); + } + ih_info->head = fs_info.head = *adev->gmc.umc_ras_if; + + r = amdgpu_ras_late_init(adev, adev->gmc.umc_ras_if, + &fs_info, ih_info); + if (r) + goto free; + + if (amdgpu_ras_is_supported(adev, adev->gmc.umc_ras_if->block)) { + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); + if (r) + goto late_fini; + } else { + r = 0; + goto free; + } + + return 0; + +late_fini: + amdgpu_ras_late_fini(adev, adev->gmc.umc_ras_if, ih_info); +free: + kfree(adev->gmc.umc_ras_if); + adev->gmc.umc_ras_if = NULL; + return r; +} -- cgit From 4930aabe7c4e72cc62eca2a800e72eee17f40430 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 5 Sep 2019 19:25:18 +0800 Subject: drm/amdgpu: move umc ras init to umc block move umc ras init from ras module to umc block, generic ras module should pay less attention to specific ras block. Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e461386d697b..3268f5453eb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1655,10 +1655,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (amdgpu_ras_fs_init(adev)) goto fs_out; - /* ras init for each ras block */ - if (adev->umc.funcs->ras_init) - adev->umc.funcs->ras_init(adev); - DRM_INFO("RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", con->hw_supported, con->supported); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index c8de127097ab..5683c51710aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -62,6 +62,10 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) goto free; } + /* ras init of specific umc version */ + if (adev->umc.funcs && adev->umc.funcs->ras_init) + adev->umc.funcs->ras_init(adev); + return 0; late_fini: -- cgit From d99659a062316bc95269a52f2428a7ad96ee4315 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Fri, 6 Sep 2019 14:32:14 +0800 Subject: drm/amdgpu: rename umc ras_init to err_cnt_init this interface is related to specific version of umc, distinguish it from ras_late_init Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 5683c51710aa..c5d8b08af731 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -63,8 +63,8 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) } /* ras init of specific umc version */ - if (adev->umc.funcs && adev->umc.funcs->ras_init) - adev->umc.funcs->ras_init(adev); + if (adev->umc.funcs && adev->umc.funcs->err_cnt_init) + adev->umc.funcs->err_cnt_init(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 6f22c9704555..3ec36d9e012a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -54,7 +54,7 @@ adev->umc.funcs->disable_umc_index_mode(adev); struct amdgpu_umc_funcs { - void (*ras_init)(struct amdgpu_device *adev); + void (*err_cnt_init)(struct amdgpu_device *adev); int (*ras_late_init)(struct amdgpu_device *adev, void *ras_ih_info); void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 4cdb5c04cd17..1c0da32c1561 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -234,7 +234,7 @@ static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); } -static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, +static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, struct ras_err_data *err_data, uint32_t umc_reg_offset, uint32_t channel_index) { @@ -264,15 +264,15 @@ static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); } -static void umc_v6_1_ras_init(struct amdgpu_device *adev) +static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) { void *ras_error_status = NULL; - amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel); + amdgpu_umc_for_each_channel(umc_v6_1_err_cnt_init_per_channel); } const struct amdgpu_umc_funcs umc_v6_1_funcs = { - .ras_init = umc_v6_1_ras_init, + .err_cnt_init = umc_v6_1_err_cnt_init, .ras_late_init = amdgpu_umc_ras_late_init, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, -- cgit From 34cc4fd9ff710f31b172dbb8bdcc3a3ab492fff3 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 12 Sep 2019 11:11:25 +0800 Subject: drm/amdgpu: move umc ras irq functions to umc block move umc ras irq functions from gmc v9 to generic umc block, these functions are relevant to umc and they can be shared among all generations of umc Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 65 ++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 6 +++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 68 +-------------------------------- 3 files changed, 72 insertions(+), 67 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index c5d8b08af731..d11b4d38ca1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -21,7 +21,6 @@ * */ -#include "amdgpu.h" #include "amdgpu_ras.h" int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) @@ -75,3 +74,67 @@ free: adev->gmc.umc_ras_if = NULL; return r; } + +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return AMDGPU_RAS_SUCCESS; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, ras_error_status); + + if (adev->umc.funcs && + adev->umc.funcs->query_ras_error_address && + adev->umc.max_ras_err_cnt_per_query) { + err_data->err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + /* still call query_ras_error_address to clear error status + * even NOMEM error is encountered + */ + if(!err_data->err_addr) + DRM_WARN("Failed to alloc memory for umc error address record!\n"); + + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + adev->umc.funcs->query_ras_error_address(adev, ras_error_status); + } + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) { + if (err_data->err_addr_cnt && + amdgpu_ras_add_bad_pages(adev, err_data->err_addr, + err_data->err_addr_cnt)) + DRM_WARN("Failed to add ras bad page!\n"); + + amdgpu_ras_reset_gpu(adev, 0); + } + + kfree(err_data->err_addr); + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct ras_common_if *ras_if = adev->gmc.umc_ras_if; + struct ras_dispatch_if ih_data = { + .entry = entry, + }; + + if (!ras_if) + return 0; + + ih_data.head = *ras_if; + + amdgpu_ras_interrupt_dispatch(adev, &ih_data); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index c907b14c9be8..9ac1c2f79299 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -82,4 +82,10 @@ struct amdgpu_umc { }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info); +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry); +int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8910a18547b0..37f180ce4b77 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -245,70 +245,6 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, - void *ras_error_status, - struct amdgpu_iv_entry *entry) -{ - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return AMDGPU_RAS_SUCCESS; - - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->umc.funcs && - adev->umc.funcs->query_ras_error_count) - adev->umc.funcs->query_ras_error_count(adev, ras_error_status); - - if (adev->umc.funcs && - adev->umc.funcs->query_ras_error_address && - adev->umc.max_ras_err_cnt_per_query) { - err_data->err_addr = - kcalloc(adev->umc.max_ras_err_cnt_per_query, - sizeof(struct eeprom_table_record), GFP_KERNEL); - /* still call query_ras_error_address to clear error status - * even NOMEM error is encountered - */ - if(!err_data->err_addr) - DRM_WARN("Failed to alloc memory for umc error address record!\n"); - - /* umc query_ras_error_address is also responsible for clearing - * error status - */ - adev->umc.funcs->query_ras_error_address(adev, err_data); - } - - /* only uncorrectable error needs gpu reset */ - if (err_data->ue_count) { - if (err_data->err_addr_cnt && - amdgpu_ras_add_bad_pages(adev, err_data->err_addr, - err_data->err_addr_cnt)) - DRM_WARN("Failed to add ras bad page!\n"); - - amdgpu_ras_reset_gpu(adev, 0); - } - - kfree(err_data->err_addr); - return AMDGPU_RAS_SUCCESS; -} - -static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - struct ras_common_if *ras_if = adev->gmc.umc_ras_if; - struct ras_dispatch_if ih_data = { - .entry = entry, - }; - - if (!ras_if) - return 0; - - ih_data.head = *ras_if; - - amdgpu_ras_interrupt_dispatch(adev, &ih_data); - return 0; -} - static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -449,7 +385,7 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = { .set = gmc_v9_0_ecc_interrupt_state, - .process = gmc_v9_0_process_ecc_irq, + .process = amdgpu_umc_process_ecc_irq, }; static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) @@ -805,7 +741,7 @@ static int gmc_v9_0_ecc_late_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct ras_ih_if umc_ih_info = { - .cb = gmc_v9_0_process_ras_data_cb, + .cb = amdgpu_umc_process_ras_data_cb, }; if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { -- cgit From 03740baab3b574ed40f39bb9eda715bb32045337 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 12 Sep 2019 16:34:08 +0800 Subject: drm/amdgpu: move umc_ras_if from gmc to umc block umc_ras_if is relevant to umc Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 28 ++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index b36d4c686844..524df154a5ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -181,7 +181,6 @@ struct amdgpu_gmc { struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; - struct ras_common_if *umc_ras_if; struct ras_common_if *mmhub_ras_if; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index d11b4d38ca1e..acc0c428f6fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -35,24 +35,24 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) if (!ih_info) return -EINVAL; - if (!adev->gmc.umc_ras_if) { - adev->gmc.umc_ras_if = + if (!adev->umc.ras_if) { + adev->umc.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->gmc.umc_ras_if) + if (!adev->umc.ras_if) return -ENOMEM; - adev->gmc.umc_ras_if->block = AMDGPU_RAS_BLOCK__UMC; - adev->gmc.umc_ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->gmc.umc_ras_if->sub_block_index = 0; - strcpy(adev->gmc.umc_ras_if->name, "umc"); + adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if->sub_block_index = 0; + strcpy(adev->umc.ras_if->name, "umc"); } - ih_info->head = fs_info.head = *adev->gmc.umc_ras_if; + ih_info->head = fs_info.head = *adev->umc.ras_if; - r = amdgpu_ras_late_init(adev, adev->gmc.umc_ras_if, + r = amdgpu_ras_late_init(adev, adev->umc.ras_if, &fs_info, ih_info); if (r) goto free; - if (amdgpu_ras_is_supported(adev, adev->gmc.umc_ras_if->block)) { + if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); if (r) goto late_fini; @@ -68,10 +68,10 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->gmc.umc_ras_if, ih_info); + amdgpu_ras_late_fini(adev, adev->umc.ras_if, ih_info); free: - kfree(adev->gmc.umc_ras_if); - adev->gmc.umc_ras_if = NULL; + kfree(adev->umc.ras_if); + adev->umc.ras_if = NULL; return r; } @@ -125,7 +125,7 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct ras_common_if *ras_if = adev->gmc.umc_ras_if; + struct ras_common_if *ras_if = adev->umc.ras_if; struct ras_dispatch_if ih_data = { .entry = entry, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 9ac1c2f79299..72c378aec724 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -77,6 +77,7 @@ struct amdgpu_umc { uint32_t channel_offs; /* channel index table of interleaved memory */ const uint32_t *channel_idx_tbl; + struct ras_common_if *ras_if; const struct amdgpu_umc_funcs *funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 37f180ce4b77..40dad08ed1fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1109,8 +1109,8 @@ static int gmc_v9_0_sw_fini(void *handle) void *stolen_vga_buf; if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->gmc.umc_ras_if) { - struct ras_common_if *ras_if = adev->gmc.umc_ras_if; + adev->umc.ras_if) { + struct ras_common_if *ras_if = adev->umc.ras_if; struct ras_ih_if ih_info = { .head = *ras_if, }; -- cgit From 56c54b25c3b60a64d4ffd7b6b2a309b3febcfdd4 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 12 Sep 2019 18:54:33 +0800 Subject: drm/amdgpu: remove ih_info parameter of umc_ras_late_init umc_ras_late_init can get the info by itself Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 15 +++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 +---- 3 files changed, 10 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index acc0c428f6fa..08037f086d28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -23,17 +23,16 @@ #include "amdgpu_ras.h" -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) { int r; - struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info; struct ras_fs_if fs_info = { .sysfs_name = "umc_err_count", .debugfs_name = "umc_err_inject", }; - - if (!ih_info) - return -EINVAL; + struct ras_ih_if ih_info = { + .cb = amdgpu_umc_process_ras_data_cb, + }; if (!adev->umc.ras_if) { adev->umc.ras_if = @@ -45,10 +44,10 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) adev->umc.ras_if->sub_block_index = 0; strcpy(adev->umc.ras_if->name, "umc"); } - ih_info->head = fs_info.head = *adev->umc.ras_if; + ih_info.head = fs_info.head = *adev->umc.ras_if; r = amdgpu_ras_late_init(adev, adev->umc.ras_if, - &fs_info, ih_info); + &fs_info, &ih_info); if (r) goto free; @@ -68,7 +67,7 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info) return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->umc.ras_if, ih_info); + amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); free: kfree(adev->umc.ras_if); adev->umc.ras_if = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 72c378aec724..8cc9852e99e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -55,7 +55,7 @@ struct amdgpu_umc_funcs { void (*err_cnt_init)(struct amdgpu_device *adev); - int (*ras_late_init)(struct amdgpu_device *adev, void *ras_ih_info); + int (*ras_late_init)(struct amdgpu_device *adev); void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*query_ras_error_address)(struct amdgpu_device *adev, @@ -82,7 +82,7 @@ struct amdgpu_umc { const struct amdgpu_umc_funcs *funcs; }; -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info); +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 5ba99f8aec2e..bf7929499f0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -740,12 +740,9 @@ static int gmc_v9_0_ecc_late_init(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_ih_if umc_ih_info = { - .cb = amdgpu_umc_process_ras_data_cb, - }; if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { - r = adev->umc.funcs->ras_late_init(adev, &umc_ih_info); + r = adev->umc.funcs->ras_late_init(adev); if (r) return r; } -- cgit From 181c93e5ecdb8c8f3bcc74e38adcd668db85c441 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 18 Sep 2019 17:46:42 +0800 Subject: drm/amdgpu: move umc ras fini to umc block it's more suitable to put umc ras fini in umc block Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 12 +----------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 1 + 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 17027f771908..32687f4c21dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -309,17 +309,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) { - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->umc.ras_if) { - struct ras_common_if *ras_if = adev->umc.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - .cb = amdgpu_umc_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + amdgpu_umc_ras_fini(adev); if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && adev->mmhub.ras_if) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 08037f086d28..7744de149949 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -74,6 +74,21 @@ free: return r; } +void amdgpu_umc_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && + adev->umc.ras_if) { + struct ras_common_if *ras_if = adev->umc.ras_if; + struct ras_ih_if ih_info = { + .head = *ras_if, + .cb = amdgpu_umc_process_ras_data_cb, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} + int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 8cc9852e99e6..3283032a78e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -83,6 +83,7 @@ struct amdgpu_umc { }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); +void amdgpu_umc_ras_fini(struct amdgpu_device *adev); int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry); -- cgit From 3d8361b11ca45b329349ec82ad765d059d8b0673 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 23 Sep 2019 19:10:19 +0800 Subject: drm/amdgpu: add comments in ras interrupt callback add comments to clarify why checking GFX IP BLOCK for each ras interrupt callback Signed-off-by: Tao Zhou Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1b7a022996f6..68495f0e4354 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -636,7 +636,12 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry) { - /* TODO ue will trigger an interrupt. */ + /* TODO ue will trigger an interrupt. + * + * When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); if (adev->gfx.funcs->query_ras_error_count) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 7744de149949..d4fb9cf27e21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -95,6 +95,10 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + /* When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) return AMDGPU_RAS_SUCCESS; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 98fd1e5484d0..96581b5b0a8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1930,6 +1930,10 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, { int instance; + /* When “Full RAS” is enabled, the per-IP interrupt sources should + * be disabled and the driver should only look for the aggregated + * interrupt via sync flood + */ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) goto out; -- cgit