From c65b0805e779196ba07c2cb29e7f71777e81009d Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Thu, 8 Apr 2021 11:34:26 -0400 Subject: drm/amdgpu: RAS EEPROM table is now in debugfs Add "ras_eeprom_size" file in debugfs, which reports the maximum size allocated to the RAS table in EEROM, as the number of bytes and the number of records it could store. For instance, $cat /sys/kernel/debug/dri/0/ras/ras_eeprom_size 262144 bytes or 10921 records $_ Add "ras_eeprom_table" file in debugfs, which dumps the RAS table stored EEPROM, in a formatted way. For instance, $cat ras_eeprom_table Signature Version FirstOffs Size Checksum 0x414D4452 0x00010000 0x00000014 0x000000EC 0x000000DA Index Offset ErrType Bank/CU TimeStamp Offs/Addr MemChl MCUMCID RetiredPage 0 0x00014 ue 0x00 0x00000000607608DC 0x000000000000 0x00 0x00 0x000000000000 1 0x0002C ue 0x00 0x00000000607608DC 0x000000001000 0x00 0x00 0x000000000001 2 0x00044 ue 0x00 0x00000000607608DC 0x000000002000 0x00 0x00 0x000000000002 3 0x0005C ue 0x00 0x00000000607608DC 0x000000003000 0x00 0x00 0x000000000003 4 0x00074 ue 0x00 0x00000000607608DC 0x000000004000 0x00 0x00 0x000000000004 5 0x0008C ue 0x00 0x00000000607608DC 0x000000005000 0x00 0x00 0x000000000005 6 0x000A4 ue 0x00 0x00000000607608DC 0x000000006000 0x00 0x00 0x000000000006 7 0x000BC ue 0x00 0x00000000607608DC 0x000000007000 0x00 0x00 0x000000000007 8 0x000D4 ue 0x00 0x00000000607608DD 0x000000008000 0x00 0x00 0x000000000008 $_ Cc: Alexander Deucher Cc: Andrey Grodzovsky Cc: John Clements Cc: Hawking Zhang Cc: Xinhui Pan Signed-off-by: Luben Tuikov Acked-by: Alexander Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 256cea5d34f2..283afd791db1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -318,6 +318,7 @@ struct amdgpu_ras { /* sysfs */ struct device_attribute features_attr; struct bin_attribute badpages_attr; + struct dentry *de_ras_eeprom_table; /* block array */ struct ras_manager *objs; -- cgit From 4d9f771e111ee0144338c1012a90f1762220141a Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Fri, 2 Jul 2021 18:35:14 -0400 Subject: drm/amdgpu: Return error if no RAS In amdgpu_ras_query_error_count() return an error if the device doesn't support RAS. This prevents that function from having to always set the values of the integer pointers (if set), and thus prevents function side effects--always to have to set values of integers if integer pointers set, regardless of whether RAS is supported or not--with this change this side effect is mitigated. Also, if no pointers are set, don't count, since we've no way of reporting the counts. Also, give this function a kernel-doc. Cc: Alexander Deucher Cc: John Clements Cc: Hawking Zhang Reported-by: Tom Rix Fixes: a46751fbcde505 ("drm/amdgpu: Fix RAS function interface") Signed-off-by: Luben Tuikov Reviewed-by: Alexander Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 49 +++++++++++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 6 ++-- 2 files changed, 38 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 875874ea745e..194f7ccfbf94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -813,7 +813,7 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, /* query/inject/cure begin */ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, - struct ras_query_if *info) + struct ras_query_if *info) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; @@ -1047,17 +1047,32 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, return ret; } -/* get the total error counts on all IPs */ -void amdgpu_ras_query_error_count(struct amdgpu_device *adev, - unsigned long *ce_count, - unsigned long *ue_count) +/** + * amdgpu_ras_query_error_count -- Get error counts of all IPs + * adev: pointer to AMD GPU device + * ce_count: pointer to an integer to be set to the count of correctible errors. + * ue_count: pointer to an integer to be set to the count of uncorrectible + * errors. + * + * If set, @ce_count or @ue_count, count and return the corresponding + * error counts in those integer pointers. Return 0 if the device + * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS. + */ +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; unsigned long ce, ue; if (!adev->ras_enabled || !con) - return; + return -EOPNOTSUPP; + + /* Don't count since no reporting. + */ + if (!ce_count && !ue_count) + return 0; ce = 0; ue = 0; @@ -1065,9 +1080,11 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev, struct ras_query_if info = { .head = obj->head, }; + int res; - if (amdgpu_ras_query_error_status(adev, &info)) - return; + res = amdgpu_ras_query_error_status(adev, &info); + if (res) + return res; ce += info.ce_count; ue += info.ue_count; @@ -1078,6 +1095,8 @@ void amdgpu_ras_query_error_count(struct amdgpu_device *adev, if (ue_count) *ue_count = ue; + + return 0; } /* query/inject/cure end */ @@ -2145,9 +2164,10 @@ static void amdgpu_ras_counte_dw(struct work_struct *work) /* Cache new values. */ - amdgpu_ras_query_error_count(adev, &ce_count, &ue_count); - atomic_set(&con->ras_ce_count, ce_count); - atomic_set(&con->ras_ue_count, ue_count); + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } pm_runtime_mark_last_busy(dev->dev); Out: @@ -2320,9 +2340,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, /* Those are the cached values at init. */ - amdgpu_ras_query_error_count(adev, &ce_count, &ue_count); - atomic_set(&con->ras_ce_count, ce_count); - atomic_set(&con->ras_ue_count, ue_count); + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } return 0; cleanup: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 283afd791db1..4d9c63f2f377 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -491,9 +491,9 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev); -void amdgpu_ras_query_error_count(struct amdgpu_device *adev, - unsigned long *ce_count, - unsigned long *ue_count); +int amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ce_count, + unsigned long *ue_count); /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, -- cgit From 6457205c07563f1f6b101ff9ef747bd7ed57e4a7 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Fri, 13 Aug 2021 10:14:43 +0800 Subject: drm/amd/amdgpu: consolidate PSP TA context Signed-off-by: Candice Li Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 162 +++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 37 ++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 20 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 +- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 40 ++++---- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 28 +++--- drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 24 ++--- 11 files changed, 158 insertions(+), 187 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 20b049ad61c1..7e45640fbee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -341,27 +341,27 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, switch (query_fw->index) { case TA_FW_TYPE_PSP_XGMI: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_xgmi_ucode_version; + fw_info->feature = adev->psp.xgmi.feature_version; break; case TA_FW_TYPE_PSP_RAS: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_ras_ucode_version; + fw_info->feature = adev->psp.ras.feature_version; break; case TA_FW_TYPE_PSP_HDCP: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_hdcp_ucode_version; + fw_info->feature = adev->psp.hdcp.feature_version; break; case TA_FW_TYPE_PSP_DTM: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_dtm_ucode_version; + fw_info->feature = adev->psp.dtm.feature_version; break; case TA_FW_TYPE_PSP_RAP: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_rap_ucode_version; + fw_info->feature = adev->psp.rap.feature_version; break; case TA_FW_TYPE_PSP_SECUREDISPLAY: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_securedisplay_ucode_version; + fw_info->feature = adev->psp.securedisplay.feature_version; break; default: return -EINVAL; @@ -378,8 +378,8 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = adev->psp.sos.feature_version; break; case AMDGPU_INFO_FW_ASD: - fw_info->ver = adev->psp.asd_fw_version; - fw_info->feature = adev->psp.asd_feature_version; + fw_info->ver = adev->psp.asd.fw_version; + fw_info->feature = adev->psp.asd.feature_version; break; case AMDGPU_INFO_FW_DMCU: fw_info->ver = adev->dm.dmcu_fw_version; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9dc2d6d9712a..cf40609f39d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -799,15 +799,15 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size) + if (amdgpu_sriov_vf(psp->adev) || !psp->asd.size_bytes) return 0; cmd = acquire_psp_cmd_buf(psp); - psp_copy_fw(psp, psp->asd_start_addr, psp->asd_ucode_size); + psp_copy_fw(psp, psp->asd.start_addr, psp->asd.size_bytes); psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->asd_ucode_size); + psp->asd.size_bytes); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -952,11 +952,11 @@ static int psp_xgmi_load(struct psp_context *psp) cmd = acquire_psp_cmd_buf(psp); - psp_copy_fw(psp, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size); + psp_copy_fw(psp, psp->xgmi.start_addr, psp->xgmi.size_bytes); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_xgmi_ucode_size, + psp->xgmi.size_bytes, psp->xgmi_context.xgmi_shared_mc_addr, PSP_XGMI_SHARED_MEM_SIZE); @@ -1031,9 +1031,9 @@ int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw || - !psp->adev->psp.ta_xgmi_ucode_size || - !psp->adev->psp.ta_xgmi_start_addr) + if (!psp->ta_fw || + !psp->xgmi.size_bytes || + !psp->xgmi.start_addr) return -ENOENT; if (!psp->xgmi_context.initialized) { @@ -1100,7 +1100,7 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp) { return psp->adev->asic_type == CHIP_ALDEBARAN && - psp->ta_xgmi_ucode_version >= 0x2000000b; + psp->xgmi.feature_version >= 0x2000000b; } int psp_xgmi_get_topology_info(struct psp_context *psp, @@ -1206,9 +1206,9 @@ static int psp_ras_init_shared_buf(struct psp_context *psp) */ ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAS_SHARED_MEM_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->ras.ras_shared_bo, - &psp->ras.ras_shared_mc_addr, - &psp->ras.ras_shared_buf); + &psp->ras_context.ras_shared_bo, + &psp->ras_context.ras_shared_mc_addr, + &psp->ras_context.ras_shared_buf); return ret; } @@ -1225,9 +1225,9 @@ static int psp_ras_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - psp_copy_fw(psp, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); + psp_copy_fw(psp, psp->ras.start_addr, psp->ras.size_bytes); - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.ras_shared_buf; if (psp->adev->gmc.xgmi.connected_to_cpu) ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; @@ -1238,18 +1238,18 @@ static int psp_ras_load(struct psp_context *psp) psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_ras_ucode_size, - psp->ras.ras_shared_mc_addr, + psp->ras.size_bytes, + psp->ras_context.ras_shared_mc_addr, PSP_RAS_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->ras.session_id = cmd->resp.session_id; + psp->ras_context.session_id = cmd->resp.session_id; if (!ras_cmd->ras_status) - psp->ras.ras_initialized = true; + psp->ras_context.ras_initialized = true; else dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); } @@ -1275,7 +1275,7 @@ static int psp_ras_unload(struct psp_context *psp) cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->ras_context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -1290,7 +1290,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) struct ta_ras_shared_memory *ras_cmd; int ret; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.ras_shared_buf; /* * TODO: bypass the loading in sriov for now @@ -1298,7 +1298,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; - ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id); + ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras_context.session_id); if (amdgpu_ras_intr_triggered()) return ret; @@ -1354,10 +1354,10 @@ int psp_ras_enable_features(struct psp_context *psp, struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.ras_initialized) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.ras_shared_buf; memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); if (enable) @@ -1384,19 +1384,19 @@ static int psp_ras_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.ras_initialized) return 0; ret = psp_ras_unload(psp); if (ret) return ret; - psp->ras.ras_initialized = false; + psp->ras_context.ras_initialized = false; /* free ras shared memory */ - amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo, - &psp->ras.ras_shared_mc_addr, - &psp->ras.ras_shared_buf); + amdgpu_bo_free_kernel(&psp->ras_context.ras_shared_bo, + &psp->ras_context.ras_shared_mc_addr, + &psp->ras_context.ras_shared_buf); return 0; } @@ -1413,8 +1413,8 @@ static int psp_ras_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(adev)) return 0; - if (!adev->psp.ta_ras_ucode_size || - !adev->psp.ta_ras_start_addr) { + if (!adev->psp.ras.size_bytes || + !adev->psp.ras.start_addr) { dev_info(adev->dev, "RAS: optional ras ta ucode is not available\n"); return 0; } @@ -1460,7 +1460,7 @@ static int psp_ras_initialize(struct psp_context *psp) } } - if (!psp->ras.ras_initialized) { + if (!psp->ras_context.ras_initialized) { ret = psp_ras_init_shared_buf(psp); if (ret) return ret; @@ -1479,10 +1479,10 @@ int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.ras_initialized) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.ras_shared_buf; memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; @@ -1530,14 +1530,14 @@ static int psp_hdcp_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - psp_copy_fw(psp, psp->ta_hdcp_start_addr, - psp->ta_hdcp_ucode_size); + psp_copy_fw(psp, psp->hdcp.start_addr, + psp->hdcp.size_bytes); cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_hdcp_ucode_size, + psp->hdcp.size_bytes, psp->hdcp_context.hdcp_shared_mc_addr, PSP_HDCP_SHARED_MEM_SIZE); @@ -1563,8 +1563,8 @@ static int psp_hdcp_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_hdcp_ucode_size || - !psp->adev->psp.ta_hdcp_start_addr) { + if (!psp->hdcp.size_bytes || + !psp->hdcp.start_addr) { dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n"); return 0; } @@ -1677,13 +1677,13 @@ static int psp_dtm_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - psp_copy_fw(psp, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size); + psp_copy_fw(psp, psp->dtm.start_addr, psp->dtm.size_bytes); cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_dtm_ucode_size, + psp->dtm.size_bytes, psp->dtm_context.dtm_shared_mc_addr, PSP_DTM_SHARED_MEM_SIZE); @@ -1710,8 +1710,8 @@ static int psp_dtm_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_dtm_ucode_size || - !psp->adev->psp.ta_dtm_start_addr) { + if (!psp->dtm.size_bytes || + !psp->dtm.start_addr) { dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n"); return 0; } @@ -1818,13 +1818,13 @@ static int psp_rap_load(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; - psp_copy_fw(psp, psp->ta_rap_start_addr, psp->ta_rap_ucode_size); + psp_copy_fw(psp, psp->rap.start_addr, psp->rap.size_bytes); cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_rap_ucode_size, + psp->rap.size_bytes, psp->rap_context.rap_shared_mc_addr, PSP_RAP_SHARED_MEM_SIZE); @@ -1866,8 +1866,8 @@ static int psp_rap_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_rap_ucode_size || - !psp->adev->psp.ta_rap_start_addr) { + if (!psp->rap.size_bytes || + !psp->rap.start_addr) { dev_info(psp->adev->dev, "RAP: optional rap ta ucode is not available\n"); return 0; } @@ -1979,11 +1979,11 @@ static int psp_securedisplay_load(struct psp_context *psp) struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_securedisplay_start_addr, psp->ta_securedisplay_ucode_size); + memcpy(psp->fw_pri_buf, psp->securedisplay.start_addr, psp->securedisplay.size_bytes); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_securedisplay_ucode_size, + psp->securedisplay.size_bytes, psp->securedisplay_context.securedisplay_shared_mc_addr, PSP_SECUREDISPLAY_SHARED_MEM_SIZE); @@ -2025,8 +2025,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_securedisplay_ucode_size || - !psp->adev->psp.ta_securedisplay_start_addr) { + if (!psp->securedisplay.size_bytes || + !psp->securedisplay.start_addr) { dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n"); return 0; } @@ -2420,7 +2420,7 @@ static int psp_load_smu_fw(struct psp_context *psp) struct amdgpu_device *adev = psp->adev; struct amdgpu_firmware_info *ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; - struct amdgpu_ras *ras = psp->ras.ras; + struct amdgpu_ras *ras = psp->ras_context.ras; if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; @@ -2625,7 +2625,7 @@ skip_memalloc: return ret; } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, @@ -2697,7 +2697,7 @@ static int psp_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { psp_ras_terminate(psp); psp_securedisplay_terminate(psp); psp_rap_terminate(psp); @@ -2735,7 +2735,7 @@ static int psp_suspend(void *handle) } } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_terminate(psp); if (ret) { DRM_ERROR("Failed to terminate ras ta\n"); @@ -2826,7 +2826,7 @@ static int psp_resume(void *handle) "XGMI: Failed to initialize XGMI session\n"); } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, @@ -2978,10 +2978,10 @@ int psp_init_asd_microcode(struct psp_context *psp, goto out; asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; - adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); - adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->sos.fw_version); - adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); - adev->psp.asd_start_addr = (uint8_t *)asd_hdr + + adev->psp.asd.fw_version = le32_to_cpu(asd_hdr->header.ucode_version); + adev->psp.asd.feature_version = le32_to_cpu(asd_hdr->sos.fw_version); + adev->psp.asd.size_bytes = le32_to_cpu(asd_hdr->header.ucode_size_bytes); + adev->psp.asd.start_addr = (uint8_t *)asd_hdr + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); return 0; out: @@ -3266,40 +3266,40 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, switch (desc->fw_type) { case TA_FW_TYPE_PSP_ASD: - psp->asd_fw_version = le32_to_cpu(desc->fw_version); - psp->asd_feature_version = le32_to_cpu(desc->fw_version); - psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); - psp->asd_start_addr = ucode_start_addr; + psp->asd.fw_version = le32_to_cpu(desc->fw_version); + psp->asd.feature_version = le32_to_cpu(desc->fw_version); + psp->asd.size_bytes = le32_to_cpu(desc->size_bytes); + psp->asd.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_XGMI: - psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_xgmi_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_xgmi_start_addr = ucode_start_addr; + psp->xgmi.feature_version = le32_to_cpu(desc->fw_version); + psp->xgmi.size_bytes = le32_to_cpu(desc->size_bytes); + psp->xgmi.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_RAS: - psp->ta_ras_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_ras_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_ras_start_addr = ucode_start_addr; + psp->ras.feature_version = le32_to_cpu(desc->fw_version); + psp->ras.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ras.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_HDCP: - psp->ta_hdcp_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_hdcp_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_hdcp_start_addr = ucode_start_addr; + psp->hdcp.feature_version = le32_to_cpu(desc->fw_version); + psp->hdcp.size_bytes = le32_to_cpu(desc->size_bytes); + psp->hdcp.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_DTM: - psp->ta_dtm_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_dtm_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_dtm_start_addr = ucode_start_addr; + psp->dtm.feature_version = le32_to_cpu(desc->fw_version); + psp->dtm.size_bytes = le32_to_cpu(desc->size_bytes); + psp->dtm.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_RAP: - psp->ta_rap_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_rap_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_rap_start_addr = ucode_start_addr; + psp->rap.feature_version = le32_to_cpu(desc->fw_version); + psp->rap.size_bytes = le32_to_cpu(desc->size_bytes); + psp->rap.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_SECUREDISPLAY: - psp->ta_securedisplay_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_securedisplay_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_securedisplay_start_addr = ucode_start_addr; + psp->securedisplay.feature_version = le32_to_cpu(desc->fw_version); + psp->securedisplay.size_bytes = le32_to_cpu(desc->size_bytes); + psp->securedisplay.start_addr = ucode_start_addr; break; default: dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 6b1645598fa3..2cd84e21592a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -327,11 +327,8 @@ struct psp_context uint64_t tmr_mc_addr; /* asd firmware */ - const struct firmware *asd_fw; - uint32_t asd_fw_version; - uint32_t asd_feature_version; - uint32_t asd_ucode_size; - uint8_t *asd_start_addr; + const struct firmware *asd_fw; + struct psp_bin_desc asd; /* toc firmware */ const struct firmware *toc_fw; @@ -356,32 +353,16 @@ struct psp_context /* xgmi ta firmware and buffer */ const struct firmware *ta_fw; uint32_t ta_fw_version; - uint32_t ta_xgmi_ucode_version; - uint32_t ta_xgmi_ucode_size; - uint8_t *ta_xgmi_start_addr; - uint32_t ta_ras_ucode_version; - uint32_t ta_ras_ucode_size; - uint8_t *ta_ras_start_addr; - - uint32_t ta_hdcp_ucode_version; - uint32_t ta_hdcp_ucode_size; - uint8_t *ta_hdcp_start_addr; - - uint32_t ta_dtm_ucode_version; - uint32_t ta_dtm_ucode_size; - uint8_t *ta_dtm_start_addr; - - uint32_t ta_rap_ucode_version; - uint32_t ta_rap_ucode_size; - uint8_t *ta_rap_start_addr; - - uint32_t ta_securedisplay_ucode_version; - uint32_t ta_securedisplay_ucode_size; - uint8_t *ta_securedisplay_start_addr; + struct psp_bin_desc xgmi; + struct psp_bin_desc ras; + struct psp_bin_desc hdcp; + struct psp_bin_desc dtm; + struct psp_bin_desc rap; + struct psp_bin_desc securedisplay; struct psp_asd_context asd_context; struct psp_xgmi_context xgmi_context; - struct psp_ras_context ras; + struct psp_ras_context ras_context; struct psp_hdcp_context hdcp_context; struct psp_dtm_context dtm_context; struct psp_rap_context rap_context; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 194f7ccfbf94..3811b6b6a192 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1866,7 +1866,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) { struct amdgpu_ras_eeprom_control *control = - &adev->psp.ras.ras->eeprom_control; + &adev->psp.ras_context.ras->eeprom_control; struct eeprom_table_record *bps; int ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 4d9c63f2f377..471ffe885fdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -470,8 +470,8 @@ struct ras_debug_if { * 8: feature disable */ -#define amdgpu_ras_get_context(adev) ((adev)->psp.ras.ras) -#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras.ras = (ras_con)) +#define amdgpu_ras_get_context(adev) ((adev)->psp.ras_context.ras) +#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras_context.ras = (ras_con)) /* check if ras is supported on block, say, sdma, gfx */ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 5fdeceaa979f..abd8469380e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -525,9 +525,9 @@ FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version); -FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); -FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_ras_ucode_version); -FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_xgmi_ucode_version); +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd.fw_version); +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras.feature_version); +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.xgmi.feature_version); FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index e2e2624ac653..7c2538db3cd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -136,21 +136,11 @@ struct psp_firmware_header_v2_0 { /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; - uint32_t ta_xgmi_ucode_version; - uint32_t ta_xgmi_offset_bytes; - uint32_t ta_xgmi_size_bytes; - uint32_t ta_ras_ucode_version; - uint32_t ta_ras_offset_bytes; - uint32_t ta_ras_size_bytes; - uint32_t ta_hdcp_ucode_version; - uint32_t ta_hdcp_offset_bytes; - uint32_t ta_hdcp_size_bytes; - uint32_t ta_dtm_ucode_version; - uint32_t ta_dtm_offset_bytes; - uint32_t ta_dtm_size_bytes; - uint32_t ta_securedisplay_ucode_version; - uint32_t ta_securedisplay_offset_bytes; - uint32_t ta_securedisplay_size_bytes; + struct psp_fw_legacy_bin_desc xgmi; + struct psp_fw_legacy_bin_desc ras; + struct psp_fw_legacy_bin_desc hdcp; + struct psp_fw_legacy_bin_desc dtm; + struct psp_fw_legacy_bin_desc securedisplay; }; enum ta_fw_type { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 12a7cc2f01cd..ca058fbcccd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -532,9 +532,9 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev) POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ta_ras_ucode_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.ta_xgmi_ucode_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ras.feature_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.xgmi.feature_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 4b1cc5e9ee92..5872d68ed13d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -84,29 +84,29 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = - le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = - le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = + adev->psp.hdcp.feature_version = + le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = + le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); - adev->psp.ta_dtm_ucode_version = - le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = - le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); - - adev->psp.ta_securedisplay_ucode_version = - le32_to_cpu(ta_hdr->ta_securedisplay_ucode_version); - adev->psp.ta_securedisplay_ucode_size = - le32_to_cpu(ta_hdr->ta_securedisplay_size_bytes); - adev->psp.ta_securedisplay_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_securedisplay_offset_bytes); + adev->psp.dtm.feature_version = + le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = + le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); + + adev->psp.securedisplay.feature_version = + le32_to_cpu(ta_hdr->securedisplay.fw_version); + adev->psp.securedisplay.size_bytes = + le32_to_cpu(ta_hdr->securedisplay.size_bytes); + adev->psp.securedisplay.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->securedisplay.offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 8862684f8b43..29bf9f09944b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -151,15 +151,15 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) goto out2; ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); - adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); - adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + adev->psp.xgmi.feature_version = le32_to_cpu(ta_hdr->xgmi.fw_version); + adev->psp.xgmi.size_bytes = le32_to_cpu(ta_hdr->xgmi.size_bytes); + adev->psp.xgmi.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version); - adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes); - adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr + - le32_to_cpu(ta_hdr->ta_ras_offset_bytes); + adev->psp.ras.feature_version = le32_to_cpu(ta_hdr->ras.fw_version); + adev->psp.ras.size_bytes = le32_to_cpu(ta_hdr->ras.size_bytes); + adev->psp.ras.start_addr = (uint8_t *)adev->psp.xgmi.start_addr + + le32_to_cpu(ta_hdr->ras.offset_bytes); } break; case CHIP_NAVI10: @@ -186,17 +186,17 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) goto out2; ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr + + adev->psp.hdcp.feature_version = le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + adev->psp.dtm.feature_version = le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); } break; case CHIP_SIENNA_CICHLID: diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 0c908d4566e8..cc649406234b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -84,23 +84,23 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = - le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = - le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = + adev->psp.hdcp.feature_version = + le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = + le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_dtm_ucode_version = - le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = - le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + adev->psp.dtm.feature_version = + le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = + le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); } return 0; -- cgit From 893cf382c0403d7c4581f0f01f6d06c76485123d Mon Sep 17 00:00:00 2001 From: Candice Li Date: Fri, 13 Aug 2021 19:06:33 +0800 Subject: drm/amd/amdgpu: remove unnecessary RAS context field Delete ras_if->name in the RAS ctx structure and remove related lines. Signed-off-by: Candice Li Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 1 - drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 4 ++-- 10 files changed, 6 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a0be0772c8b3..e7e9655c5623 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -615,7 +615,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->gfx.ras_if->sub_block_index = 0; - strcpy(adev->gfx.ras_if->name, "gfx"); } fs_info.head = ih_info.head = *adev->gfx.ras_if; r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index 1d50d534d77c..a766e1aad2b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -41,7 +41,6 @@ int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev) adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP; adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->hdp.ras_if->sub_block_index = 0; - strcpy(adev->hdp.ras_if->name, "hdp"); } ih_info.head = fs_info.head = *adev->hdp.ras_if; r = amdgpu_ras_late_init(adev, adev->hdp.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c index ead3dc572ec5..24297dc51434 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -41,7 +41,6 @@ int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev) adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB; adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->mmhub.ras_if->sub_block_index = 0; - strcpy(adev->mmhub.ras_if->name, "mmhub"); } ih_info.head = fs_info.head = *adev->mmhub.ras_if; r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 6201a5f4b4fa..6afb02fef8cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -39,7 +39,6 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev) adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF; adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->nbio.ras_if->sub_block_index = 0; - strcpy(adev->nbio.ras_if->name, "pcie_bif"); } ih_info.head = fs_info.head = *adev->nbio.ras_if; r = amdgpu_ras_late_init(adev, adev->nbio.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3811b6b6a192..96a8fd0ca1df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -64,7 +64,6 @@ const char *ras_block_string[] = { }; #define ras_err_str(i) (ras_error_string[ffs(i)]) -#define ras_block_str(i) (ras_block_string[i]) #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) @@ -530,7 +529,7 @@ static inline void put_obj(struct ras_manager *obj) if (obj && (--obj->use == 0)) list_del(&obj->node); if (obj && (obj->use < 0)) - DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name); + DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", ras_block_str(obj->head.block)); } /* make one obj and return it. */ @@ -793,7 +792,6 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, .type = default_ras_type, .sub_block_index = 0, }; - strcpy(head.name, ras_block_str(i)); if (bypass) { /* * bypass psp. vbios enable ras for us. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 471ffe885fdf..abc5710898e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -53,6 +53,9 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__LAST }; +extern const char *ras_block_string[]; + +#define ras_block_str(i) (ras_block_string[i]) #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) @@ -306,8 +309,6 @@ struct ras_common_if { enum amdgpu_ras_block block; enum amdgpu_ras_error_type type; uint32_t sub_block_index; - /* block name */ - char name[32]; }; struct amdgpu_ras { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index de91d29c9d96..65debb65a5df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -105,7 +105,6 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->sdma.ras_if->sub_block_index = 0; - strcpy(adev->sdma.ras_if->name, "sdma"); } fs_info.head = ih_info->head = *adev->sdma.ras_if; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 0c7c56a91b25..a90029ee9733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -41,7 +41,6 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->umc.ras_if->sub_block_index = 0; - strcpy(adev->umc.ras_if->name, "umc"); } ih_info.head = fs_info.head = *adev->umc.ras_if; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 258cf86b32f6..2e47bc446700 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -663,7 +663,6 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->gmc.xgmi.ras_if->sub_block_index = 0; - strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl"); } ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index cef929746739..1c94a14fc18d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -372,13 +372,13 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device "errors detected in %s block, " "no user action is needed.\n", obj->err_data.ce_count, - adev->nbio.ras_if->name); + ras_block_str(adev->nbio.ras_if->block)); if (err_data.ue_count) dev_info(adev->dev, "%ld uncorrectable hardware " "errors detected in %s block\n", obj->err_data.ue_count, - adev->nbio.ras_if->name); + ras_block_str(adev->nbio.ras_if->block)); } dev_info(adev->dev, "RAS controller interrupt triggered " -- cgit From 355e3e4ccc2cd4b29cc37546474448a8f02e3bbc Mon Sep 17 00:00:00 2001 From: Candice Li Date: Mon, 23 Aug 2021 15:17:35 +0800 Subject: drm/amd/amdgpu: add name field back to ras_common_if Adding name field back to ras_common_if to work around error injection failure with amdgpuras tool. Signed-off-by: Candice Li Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index abc5710898e8..5b5163357fcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -309,6 +309,7 @@ struct ras_common_if { enum amdgpu_ras_block block; enum amdgpu_ras_error_type type; uint32_t sub_block_index; + char name[32]; }; struct amdgpu_ras { -- cgit From 3907c492184e13a5d8d336963a6ec1f6ebe0064d Mon Sep 17 00:00:00 2001 From: John Clements Date: Tue, 24 Aug 2021 13:24:25 +0800 Subject: drm/amdgpu: Add driver infrastructure for MCA RAS Add MCA specific IP blocks targetting RAS features Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 21 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 117 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 72 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++++ drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 125 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/mca_v3_0.h | 26 +++++++ 9 files changed, 388 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h create mode 100644 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c create mode 100644 drivers/gpu/drm/amd/amdgpu/mca_v3_0.h (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 0d814c957461..8d0748184a14 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -58,7 +58,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o \ - amdgpu_eeprom.o + amdgpu_eeprom.o amdgpu_mca.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -189,6 +189,10 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_reset.o +# add MCA block +amdgpu-y += \ + mca_v3_0.o + # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0f278cc3a5f4..dc3c6b3a00e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -108,6 +108,7 @@ #include "amdgpu_df.h" #include "amdgpu_smuio.h" #include "amdgpu_fdinfo.h" +#include "amdgpu_mca.h" #define MAX_GPU_INSTANCE 16 @@ -1009,6 +1010,9 @@ struct amdgpu_device { /* df */ struct amdgpu_df df; + /* MCA */ + struct amdgpu_mca mca; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; uint32_t harvest_ip_mask; int num_ip_blocks; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d0b8d415b63b..c7797eac83c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -471,6 +471,27 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) return r; } + if (adev->mca.mp0.ras_funcs && + adev->mca.mp0.ras_funcs->ras_late_init) { + r = adev->mca.mp0.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mca.mp1.ras_funcs && + adev->mca.mp1.ras_funcs->ras_late_init) { + r = adev->mca.mp1.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mca.mpio.ras_funcs && + adev->mca.mpio.ras_funcs->ras_late_init) { + r = adev->mca.mpio.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c new file mode 100644 index 000000000000..a2d3dbbf7d25 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -0,0 +1,117 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_ras.h" +#include "amdgpu.h" +#include "amdgpu_mca.h" + +#include "umc/umc_6_7_0_offset.h" +#include "umc/umc_6_7_0_sh_mask.h" + +void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count) +{ + uint64_t mc_status = RREG64_PCIE(mc_status_addr * 4); + + if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count) +{ + uint64_t mc_status = RREG64_PCIE(mc_status_addr * 4); + + if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +void amdgpu_mca_reset_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr) +{ + WREG64_PCIE(mc_status_addr * 4, 0x0ULL); +} + +void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count)); + amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count)); + + amdgpu_mca_reset_error_count(adev, mc_status_addr); +} + +int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = mca_dev->ras_funcs->sysfs_name, + }; + + if (!mca_dev->ras_if) { + mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!mca_dev->ras_if) + return -ENOMEM; + mca_dev->ras_if->block = mca_dev->ras_funcs->ras_block; + mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + mca_dev->ras_if->sub_block_index = 0; + } + ih_info.head = fs_info.head = *mca_dev->ras_if; + r = amdgpu_ras_late_init(adev, mca_dev->ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, mca_dev->ras_if->block)) { + kfree(mca_dev->ras_if); + mca_dev->ras_if = NULL; + } + + return r; +} + +void amdgpu_mca_ras_fini(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev) +{ + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + if (!mca_dev->ras_if) + return; + + amdgpu_ras_late_fini(adev, mca_dev->ras_if, &ih_info); + kfree(mca_dev->ras_if); + mca_dev->ras_if = NULL; +} \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h new file mode 100644 index 000000000000..f860f2f0e296 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_MCA_H__ +#define __AMDGPU_MCA_H__ + +struct amdgpu_mca_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); + uint32_t ras_block; + const char* sysfs_name; +}; + +struct amdgpu_mca_ras { + struct ras_common_if *ras_if; + const struct amdgpu_mca_ras_funcs *ras_funcs; +}; + +struct amdgpu_mca_funcs { + void (*init)(struct amdgpu_device *adev); +}; + +struct amdgpu_mca { + const struct amdgpu_mca_funcs *funcs; + struct amdgpu_mca_ras mp0; + struct amdgpu_mca_ras mp1; + struct amdgpu_mca_ras mpio; +}; + +void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count); + +void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count); + +void amdgpu_mca_reset_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr); + +void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + void *ras_error_status); + +int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev); + +void amdgpu_mca_ras_fini(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 5b5163357fcb..eae604fd90b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -49,6 +49,7 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MP0, AMDGPU_RAS_BLOCK__MP1, AMDGPU_RAS_BLOCK__FUSE, + AMDGPU_RAS_BLOCK__MPIO, AMDGPU_RAS_BLOCK__LAST }; @@ -420,7 +421,7 @@ struct ras_badpage { /* interfaces for IP */ struct ras_fs_if { struct ras_common_if head; - char sysfs_name[32]; + const char* sysfs_name; char debugfs_name[32]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 097230b5e946..085fab45245d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -55,6 +55,7 @@ #include "umc_v6_0.h" #include "umc_v6_7.h" #include "hdp_v4_0.h" +#include "mca_v3_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -1229,6 +1230,18 @@ static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs; } +static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_ALDEBARAN: + if (!adev->gmc.xgmi.connected_to_cpu) + adev->mca.funcs = &mca_v3_0_funcs; + break; + default: + break; + } +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1250,6 +1263,7 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_mmhub_ras_funcs(adev); gmc_v9_0_set_gfxhub_funcs(adev); gmc_v9_0_set_hdp_ras_funcs(adev); + gmc_v9_0_set_mca_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -1461,6 +1475,8 @@ static int gmc_v9_0_sw_init(void *handle) adev->gfxhub.funcs->init(adev); adev->mmhub.funcs->init(adev); + if (adev->mca.funcs) + adev->mca.funcs->init(adev); spin_lock_init(&adev->gmc.invalidate_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c new file mode 100644 index 000000000000..058b65730a84 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -0,0 +1,125 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_ras.h" +#include "amdgpu.h" +#include "amdgpu_mca.h" + +#define smnMCMP0_STATUST0 0x03830408 +#define smnMCMP1_STATUST0 0x03b30408 +#define smnMCMPIO_STATUST0 0x0c930408 + + +static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMP0_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mp0); +} + +static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mp0); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mp0_ras_funcs = { + .ras_late_init = mca_v3_0_mp0_ras_late_init, + .ras_fini = mca_v3_0_mp0_ras_fini, + .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MP0, + .sysfs_name = "mp0_err_count", +}; + +static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMP1_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mp1); +} + +static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mp1); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mp1_ras_funcs = { + .ras_late_init = mca_v3_0_mp1_ras_late_init, + .ras_fini = mca_v3_0_mp1_ras_fini, + .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MP1, + .sysfs_name = "mp1_err_count", +}; + +static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMPIO_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mpio_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mpio); +} + +static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mpio); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mpio_ras_funcs = { + .ras_late_init = mca_v3_0_mpio_ras_late_init, + .ras_fini = mca_v3_0_mpio_ras_fini, + .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MPIO, + .sysfs_name = "mpio_err_count", +}; + + +static void mca_v3_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_mca *mca = &adev->mca; + + mca->mp0.ras_funcs = &mca_v3_0_mp0_ras_funcs; + mca->mp1.ras_funcs = &mca_v3_0_mp1_ras_funcs; + mca->mpio.ras_funcs = &mca_v3_0_mpio_ras_funcs; +} + +const struct amdgpu_mca_funcs mca_v3_0_funcs = { + .init = mca_v3_0_init, +}; \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h new file mode 100644 index 000000000000..b899b86194c2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __MCA_V3_0_H__ +#define __MCA_V3_0_H__ + +extern const struct amdgpu_mca_funcs mca_v3_0_funcs; + +#endif -- cgit