diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
32 files changed, 568 insertions, 613 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 17c4872a0287..8df702eaa2ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -773,6 +773,17 @@ struct amdgpu_mqd { struct amdgpu_reset_domain; struct amdgpu_fru_info; +struct amdgpu_reset_info { + /* reset dump register */ + u32 *reset_dump_reg_list; + u32 *reset_dump_reg_value; + int num_regs; + +#ifdef CONFIG_DEV_COREDUMP + struct amdgpu_coredump_info *coredump_info; +#endif +}; + /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1081,10 +1092,7 @@ struct amdgpu_device { struct mutex benchmark_mutex; - /* reset dump register */ - uint32_t *reset_dump_reg_list; - uint32_t *reset_dump_reg_value; - int num_regs; + struct amdgpu_reset_info reset_info; bool scpm_enabled; uint32_t scpm_status; @@ -1111,15 +1119,6 @@ static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, return adev->ip_versions[ip][inst] & ~0xFFU; } -#ifdef CONFIG_DEV_COREDUMP -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; -}; -#endif - static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) { return container_of(ddev, struct amdgpu_device, ddev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a4faea4fa0b5..3136a0774dd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2016,8 +2016,8 @@ static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, if (ret) return ret; - for (i = 0; i < adev->num_regs; i++) { - sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]); + for (i = 0; i < adev->reset_info.num_regs; i++) { + sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]); up_read(&adev->reset_domain->sem); if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) return -EFAULT; @@ -2074,9 +2074,9 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, if (ret) goto error_free; - swap(adev->reset_dump_reg_list, tmp); - swap(adev->reset_dump_reg_value, new); - adev->num_regs = i; + swap(adev->reset_info.reset_dump_reg_list, tmp); + swap(adev->reset_info.reset_dump_reg_value, new); + adev->reset_info.num_regs = i; up_write(&adev->reset_domain->sem); ret = size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0f98f720d9ca..9340e8dc0413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -32,8 +32,6 @@ #include <linux/slab.h> #include <linux/iommu.h> #include <linux/pci.h> -#include <linux/devcoredump.h> -#include <generated/utsrelease.h> #include <linux/pci-p2pdma.h> #include <linux/apple-gmux.h> @@ -3578,9 +3576,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (adev->asic_reset_res) goto fail; - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); } else { task_barrier_full(&hive->tb); @@ -5050,90 +5046,16 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) lockdep_assert_held(&adev->reset_domain->sem); - for (i = 0; i < adev->num_regs; i++) { - adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], - adev->reset_dump_reg_value[i]); - } - - return 0; -} - -#ifndef CONFIG_DEV_COREDUMP -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; - struct drm_print_iterator iter; - int i; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_dump_reg_list[i], - coredump->adev->reset_dump_reg_value[i]); - } + for (i = 0; i < adev->reset_info.num_regs; i++) { + adev->reset_info.reset_dump_reg_value[i] = + RREG32(adev->reset_info.reset_dump_reg_list[i]); - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ - kfree(data); -} - -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev = adev_to_drm(adev); - - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); - return; + trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], + adev->reset_info.reset_dump_reg_value[i]); } - coredump->reset_vram_lost = vram_lost; - - if (reset_context->job && reset_context->job->vm) - coredump->reset_task_info = reset_context->job->vm->task_info; - - coredump->adev = adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); + return 0; } -#endif int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) @@ -5201,9 +5123,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (!r && amdgpu_ras_intr_triggered()) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); + amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB); } amdgpu_ras_intr_cleared(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9a158018ae16..c92e0aba69e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -29,6 +29,7 @@ #include "amdgpu_rlc.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" +#include "amdgpu_xgmi.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -501,6 +502,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; + struct amdgpu_hive_info *hive; + struct amdgpu_ras *ras; + int hive_ras_recovery = 0; int i, r = 0; int j; @@ -521,6 +525,23 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) RESET_QUEUES, 0, 0); } + /** + * This is workaround: only skip kiq_ring test + * during ras recovery in suspend stage for gfx9.4.3 + */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + + ras = amdgpu_ras_get_context(adev); + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && + ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) { + spin_unlock(&kiq->ring_lock); + return 0; + } + if (kiq_ring->sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index b153acf9ed05..648bd5e12830 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1267,6 +1267,8 @@ invoke: xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + /* note down the capbility flag for XGMI TA */ + psp->xgmi_context.xgmi_ta_caps = xgmi_cmd->caps_flag; return ret; } @@ -1388,7 +1390,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Fill in the shared memory with topology information as input */ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_TOPOLOGY_INFO; topology_info_input->num_nodes = number_devices; for (i = 0; i < topology_info_input->num_nodes; i++) { @@ -1399,7 +1401,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, } /* Invoke xgmi ta to get the topology information */ - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_TOPOLOGY_INFO); if (ret) return ret; @@ -1424,28 +1426,53 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Invoke xgmi ta again to get the link information */ if (psp_xgmi_peer_link_info_supported(psp)) { - struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; + struct ta_xgmi_cmd_get_peer_link_info *link_info_output; + struct ta_xgmi_cmd_get_extend_peer_link_info *link_extend_info_output; bool requires_reflection = (psp->xgmi_context.supports_extended_data && get_extended_data) || amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6); + bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & + EXTEND_PEER_LINK_INFO_CMD_FLAG; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + /* popluate the shared output buffer rather than the cmd input buffer + * with node_ids as the input for GET_PEER_LINKS command execution. + * This is required for GET_PEER_LINKS per xgmi ta implementation. + * The same requirement for GET_EXTEND_PEER_LINKS command. + */ + if (ta_port_num_support) { + link_extend_info_output = &xgmi_cmd->xgmi_out_message.get_extend_link_info; + + for (i = 0; i < topology->num_nodes; i++) + link_extend_info_output->nodes[i].node_id = topology->nodes[i].node_id; + + link_extend_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS; + } else { + link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS); + for (i = 0; i < topology->num_nodes; i++) + link_info_output->nodes[i].node_id = topology->nodes[i].node_id; + link_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + } + + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); if (ret) return ret; - link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; for (i = 0; i < topology->num_nodes; i++) { + uint8_t node_num_links = ta_port_num_support ? + link_extend_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links; /* accumulate num_links on extended data */ - topology->nodes[i].num_links = get_extended_data ? - topology->nodes[i].num_links + - link_info_output->nodes[i].num_links : - ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links : - link_info_output->nodes[i].num_links); + if (get_extended_data) { + topology->nodes[i].num_links = topology->nodes[i].num_links + node_num_links; + } else { + topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ? + topology->nodes[i].num_links : node_num_links; + } /* reflect the topology information for bi-directionality */ if (requires_reflection && topology->nodes[i].num_hops) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3e67ed63e638..7111dd32e66f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -189,6 +189,7 @@ struct psp_xgmi_context { struct ta_context context; struct psp_xgmi_topology_info top_info; bool supports_extended_data; + uint8_t xgmi_ta_caps; }; struct psp_ras_context { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 973073e07b2a..3c83a2b8fb2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1038,7 +1038,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, ras_mgr->err_data.ue_count, blk_name); else dev_info(adev->dev, "%ld correctable hardware errors detected in %s block\n", - ras_mgr->err_data.ue_count, blk_name); + ras_mgr->err_data.ce_count, blk_name); for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; @@ -1055,7 +1055,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, "%lld correctable hardware errors detected in %s block\n", mcm_info->socket_id, mcm_info->die_id, - err_info->ue_count, + err_info->ce_count, blk_name); } } @@ -1170,23 +1170,34 @@ out_fini_err_data: return ret; } -int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, +int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block block) { struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); if (!block_obj || !block_obj->hw_ops) { dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", - ras_block_str(block)); - return 0; + ras_block_str(block)); + return -EOPNOTSUPP; } if (!amdgpu_ras_is_supported(adev, block)) - return 0; + return -EOPNOTSUPP; if (block_obj->hw_ops->reset_ras_error_count) block_obj->hw_ops->reset_ras_error_count(adev); + return 0; +} + +int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + + if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP) + return 0; + if ((block == AMDGPU_RAS_BLOCK__GFX) || (block == AMDGPU_RAS_BLOCK__MMHUB)) { if (block_obj->hw_ops->reset_ras_error_status) @@ -2140,9 +2151,11 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *remote_adev = NULL; struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + if (hive) + atomic_set(&hive->ras_recovery, 1); if (!ras->disable_ras_err_cnt_harvest) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); /* Build list of devices to query RAS related errors */ if (hive && adev->gmc.xgmi.num_physical_nodes > 1) { @@ -2159,7 +2172,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_ras_log_on_err_counter(remote_adev); } - amdgpu_put_xgmi_hive(hive); } if (amdgpu_device_should_recover_gpu(ras->adev)) { @@ -2194,6 +2206,10 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } atomic_set(&ras->in_recovery, 0); + if (hive) { + atomic_set(&hive->ras_recovery, 0); + amdgpu_put_xgmi_hive(hive); + } } /* alloc/realloc bps array */ @@ -2606,7 +2622,9 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) || amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 0)) + IP_VERSION(4, 0, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(4, 0, 3)) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | 1 << AMDGPU_RAS_BLOCK__JPEG); else @@ -2635,18 +2653,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) /* hw_supported needs to be aligned with RAS block mask. */ adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; - - /* - * Disable ras feature for aqua vanjaram - * by default on apu platform. - */ - if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) && - adev->gmc.is_app_apu) - adev->ras_enabled = amdgpu_ras_enable != 1 ? 0 : - adev->ras_hw_enabled & amdgpu_ras_mask; - else - adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : - adev->ras_hw_enabled & amdgpu_ras_mask; + adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : + adev->ras_hw_enabled & amdgpu_ras_mask; } static void amdgpu_ras_counte_dw(struct work_struct *work) @@ -3303,6 +3311,27 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) return 0; } +void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (con) + con->is_mca_debug_mode = enable; +} + +bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!con) + return false; + + if (mca_funcs && mca_funcs->mca_set_debug_mode) + return con->is_mca_debug_mode; + else + return true; +} /* Register each ip ras block into amdgpu ras */ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, @@ -3485,10 +3514,8 @@ void amdgpu_ras_error_data_fini(struct ras_err_data *err_data) { struct ras_err_node *err_node, *tmp; - list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) { + list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) amdgpu_ras_error_node_release(err_node); - list_del(&err_node->node); - } } static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 0a5c8a107fb2..2fdfef62ee27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -434,6 +434,8 @@ struct amdgpu_ras { /* Indicates smu whether need update bad channel info */ bool update_channel_flag; + /* Record status of smu mca debug mode */ + bool is_mca_debug_mode; /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; @@ -714,6 +716,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info); +int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, + enum amdgpu_ras_block block); int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, enum amdgpu_ras_block block); @@ -766,6 +770,9 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); +void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); +bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); + int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj); void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 970bfece775c..4baa300121d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,6 +21,9 @@ * */ +#include <linux/devcoredump.h> +#include <generated/utsrelease.h> + #include "amdgpu_reset.h" #include "aldebaran.h" #include "sienna_cichlid.h" @@ -159,5 +162,82 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) up_write(&reset_domain->sem); } +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump = data; + struct drm_print_iterator iter; + int i; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, + coredump->reset_time.tv_nsec); + + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->reset_info.num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < coredump->adev->reset_info.num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); + } + + return count - iter.remain; +} +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev = adev_to_drm(adev); + + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + + coredump->reset_vram_lost = vram_lost; + + if (reset_context->job && reset_context->job->vm) + coredump->reset_task_info = reset_context->job->vm->task_info; + + coredump->adev = adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 471d789b33a5..b0335a1c5e90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -89,6 +89,17 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; +#ifdef CONFIG_DEV_COREDUMP + +#define AMDGPU_COREDUMP_VERSION "1" + +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; +}; +#endif int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); @@ -130,6 +141,9 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); + #define for_each_handler(i, handler, reset_ctl) \ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ (handler = (*reset_ctl->reset_handlers)[i]); \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 1c523eb5f342..ca45ba8ac171 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -843,6 +843,20 @@ static int umsch_mm_hw_fini(void *handle) return 0; } +static int umsch_mm_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_hw_fini(adev); +} + +static int umsch_mm_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_hw_init(adev); +} + static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .name = "umsch_mm_v4_0", .early_init = umsch_mm_early_init, @@ -851,6 +865,8 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .sw_fini = umsch_mm_sw_fini, .hw_init = umsch_mm_hw_init, .hw_fini = umsch_mm_hw_fini, + .suspend = umsch_mm_suspend, + .resume = umsch_mm_resume, }; const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 0815c5a97564..514c98ea144f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -169,6 +169,9 @@ #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11) #define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14) +#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15) + +#define MAX_NUM_VCN_RB_SETUP 4 #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -335,15 +338,30 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; +struct amdgpu_vcn_rb_setup_info { + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; +}; + struct amdgpu_fw_shared_rb_setup { uint32_t is_rb_enabled_flags; - uint32_t rb_addr_lo; - uint32_t rb_addr_hi; - uint32_t rb_size; - uint32_t rb4_addr_lo; - uint32_t rb4_addr_hi; - uint32_t rb4_size; - uint32_t reserved[6]; + + union { + struct { + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; + uint32_t rb4_addr_lo; + uint32_t rb4_addr_hi; + uint32_t rb4_size; + uint32_t reserved[6]; + }; + + struct { + struct amdgpu_vcn_rb_setup_info rb_info[MAX_NUM_VCN_RB_SETUP]; + }; + }; }; struct amdgpu_fw_shared_drm_key_wa { @@ -351,6 +369,11 @@ struct amdgpu_fw_shared_drm_key_wa { uint8_t reserved[3]; }; +struct amdgpu_fw_shared_queue_decouple { + uint8_t is_enabled; + uint8_t reserved[7]; +}; + struct amdgpu_vcn4_fw_shared { uint32_t present_flag_0; uint8_t pad[12]; @@ -361,6 +384,8 @@ struct amdgpu_vcn4_fw_shared { struct amdgpu_fw_shared_rb_setup rb_setup; struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; + struct amdgpu_fw_shared_queue_decouple decouple; }; struct amdgpu_vcn_fwlog { @@ -378,6 +403,15 @@ struct amdgpu_vcn_decode_buffer { uint32_t pad[30]; }; +struct amdgpu_vcn_rb_metadata { + uint32_t size; + uint32_t present_flag_0; + + uint8_t version; + uint8_t ring_id; + uint8_t pad[26]; +}; + #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define VCN_BLOCK_DECODE_DISABLE_MASK 0x40 #define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index fabb83e9d9ae..858ef21ae515 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -126,6 +126,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), /* AV1 Support MODE*/ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), + /* VCN RB decouple */ + AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -326,6 +328,8 @@ static inline bool is_virtual_machine(void) ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) #define amdgpu_sriov_is_av1_support(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) +#define amdgpu_sriov_is_vcn_rb_decouple(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 19aa6b7b16fb..9d5d742ee9d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -908,7 +908,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm adev->gmc.xgmi.num_physical_nodes == 0) return 0; - adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); return amdgpu_ras_block_late_init(adev, ras_block); } @@ -1075,7 +1075,7 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, break; } - adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); err_data->ue_count += ue_cnt; err_data->ce_count += ce_cnt; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 86fbf56938f4..6cab882e8061 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -44,6 +44,7 @@ struct amdgpu_hive_info { struct amdgpu_reset_domain *reset_domain; uint32_t device_remove_count; + atomic_t ras_recovery; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 104a5ad8397d..51a14f6d93bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -90,10 +90,11 @@ union amd_sriov_msg_feature_flags { uint32_t host_load_ucodes : 1; uint32_t host_flr_vramlost : 1; uint32_t mm_bw_management : 1; - uint32_t pp_one_vf_mode : 1; + uint32_t pp_one_vf_mode : 1; uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; - uint32_t reserved : 25; + uint32_t vcn_rb_decouple : 1; + uint32_t reserved : 24; } flags; uint32_t all; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 27b224b0688a..fd22943685f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -589,6 +589,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) adev->gfx.mec2_fw = NULL; gfx_v11_0_check_fw_cp_gfx_shadow(adev); + + if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { + err = adev->gfx.imu.funcs->init_microcode(adev); + if (err) + DRM_ERROR("Failed to init imu firmware!\n"); + return err; + } + out: if (err) { amdgpu_ucode_release(&adev->gfx.pfp_fw); @@ -1395,14 +1403,6 @@ static int gfx_v11_0_sw_init(void *handle) adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; - if (adev->gfx.imu.funcs) { - if (adev->gfx.imu.funcs->init_microcode) { - r = adev->gfx.imu.funcs->init_microcode(adev); - if (r) - DRM_ERROR("Failed to load imu firmware!\n"); - } - } - gfx_v11_0_me_init(adev); r = gfx_v11_0_rlc_init(adev); @@ -4373,6 +4373,10 @@ static int gfx_v11_0_hw_init(void *handle) if (r) return r; + /* get IMU version from HW if it's not set */ + if (!adev->gfx.imu_fw_version) + adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); + return r; } @@ -5170,45 +5174,17 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -5233,42 +5209,14 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx11 now */ - } + BUG(); /* only DOORBELL method supported on gfx11 now */ } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index db179d085efa..362bf51ab1d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -623,7 +623,7 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) { int ret, i, num_xcc; - u32 tmp = 0, regval; + u32 tmp = 0; if (adev->psp.funcs) { ret = psp_spatial_partition(&adev->psp, @@ -631,24 +631,23 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, num_xccs_per_xcp); if (ret) return ret; - } - - num_xcc = NUM_XCC(adev->gfx.xcc_mask); + } else { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); - for (i = 0; i < num_xcc; i++) { - tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, - num_xccs_per_xcp); - tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, - i % num_xccs_per_xcp); - regval = RREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL); - if (regval != tmp) + for (i = 0; i < num_xcc; i++) { + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, + num_xccs_per_xcp); + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, + i % num_xccs_per_xcp); WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, tmp); + } + ret = 0; } adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; - return 0; + return ret; } static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) @@ -3755,10 +3754,6 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { AMDGPU_GFX_LDS_MEM, 4}, }; -static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = { - SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 -}; - static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { @@ -3847,39 +3842,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); } -static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t i, j; - uint32_t reg_value; - - mutex_lock(&adev->grbm_idx_mutex); - - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); - reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regGCEA_ERR_STATUS); - if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "GCEA err detected at instance: %d, status: 0x%x!\n", - j, reg_value); - } - /* clear after read */ - reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS, - CLEAR_ERROR_STATUS, 0x1); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, - reg_value); - } - } - - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev, int xcc_id) { @@ -3984,7 +3946,6 @@ static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev, static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { - gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id); gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id); } @@ -3997,27 +3958,6 @@ static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3); } -static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t i, j; - uint32_t value; - - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); - value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS); - value = REG_SET_FIELD(value, GCEA_ERR_STATUS, - CLEAR_ERROR_STATUS, 0x1); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value); - } - } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev, int xcc_id) { @@ -4043,7 +3983,6 @@ static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3a1050344b59..5fed01e34928 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1587,13 +1587,8 @@ static int gmc_v9_0_late_init(void *handle) } if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); - - if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops && - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count) - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP); } r = amdgpu_gmc_ras_late_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 875fb5ac70b5..c0bdab3bf0e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -55,7 +55,6 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; - adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); //adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { @@ -69,7 +68,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) info->fw = adev->gfx.imu_fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE); - } + } else + adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); out: if (err) { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 31b26e6f0b30..4dfec56e1b7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -414,60 +414,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } -static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes) -{ - struct amdgpu_device *adev = mes->adev; - uint32_t data; - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1); - data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << - CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2); - data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << - CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3); - data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << - CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4); - data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << - CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5); - data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << - CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data); - - data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data); -} - static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -1243,8 +1189,6 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; - mes_v11_0_init_aggregated_doorbell(&adev->mes); - r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index aa00483e7b37..ea142611be1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -700,152 +700,9 @@ static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev) mmhub_v1_8_inst_reset_ras_error_count(adev, i); } -static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = { - regMMEA0_ERR_STATUS, - regMMEA1_ERR_STATUS, - regMMEA2_ERR_STATUS, - regMMEA3_ERR_STATUS, - regMMEA4_ERR_STATUS, -}; - -static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev, - uint32_t mmhub_inst) -{ - uint32_t reg_value; - uint32_t mmea_err_status_addr_dist; - uint32_t i; - - /* query mmea ras err status */ - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist); - if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "Detected MMEA%d err in MMHUB%d, status: 0x%x\n", - i, mmhub_inst, reg_value); - } - } - - /* query mm_cane ras err status */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); - if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "Detected MM CANE err in MMHUB%d, status: 0x%x\n", - mmhub_inst, reg_value); - } -} - -static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev) -{ - uint32_t inst_mask; - uint32_t i; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); - return; - } - - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) - mmhub_v1_8_inst_query_ras_err_status(adev, i); -} - -static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev, - uint32_t mmhub_inst) -{ - uint32_t mmea_cgtt_clk_cntl_addr_dist; - uint32_t mmea_err_status_addr_dist; - uint32_t reg_value; - uint32_t i; - - /* reset mmea ras err status */ - mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL; - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { - /* force clk branch on for response path - * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1 - */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, - SOFT_OVERRIDE_RETURN, 1); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist, - reg_value); - - /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS, - CLEAR_ERROR_STATUS, 1); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist, - reg_value); - - /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, - SOFT_OVERRIDE_RETURN, 0); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist, - reg_value); - } - - /* reset mm_cane ras err status - * force clk branch on for response path - * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1 - */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, - SOFT_OVERRIDE_ATRET, 1); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); - - /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS, - CLEAR_ERROR_STATUS, 1); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value); - - /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, - SOFT_OVERRIDE_ATRET, 0); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); -} - -static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev) -{ - uint32_t inst_mask; - uint32_t i; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); - return; - } - - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) - mmhub_v1_8_inst_reset_ras_err_status(adev, i); -} - static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .query_ras_error_count = mmhub_v1_8_query_ras_error_count, .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, - .query_ras_error_status = mmhub_v1_8_query_ras_error_status, - .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status, }; struct amdgpu_mmhub_ras mmhub_v1_8_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 3a94f249929e..676ab1d20d2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -272,6 +272,81 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev) */ } +static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL); + if (enable) { + data |= (BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL, data); +} + +static void nbio_v7_11_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2); + if (enable) + data |= BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1); + if (enable) { + data |= (BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1, data); +} + +static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL); + if (data & BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2); + if (data & BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { .get_hdp_flush_req_offset = nbio_v7_11_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_11_get_hdp_flush_done_offset, @@ -288,6 +363,9 @@ const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { .enable_doorbell_aperture = nbio_v7_11_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_11_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_11_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_11_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_11_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_11_get_clockgating_state, .ih_control = nbio_v7_11_ih_control, .init_registers = nbio_v7_11_init_registers, .remap_hdp_registers = nbio_v7_11_remap_hdp_registers, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 573046702861..4142e2fcd866 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -168,7 +168,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < 10; retry_loop++) { + for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index dff66e1ae7ea..683d51ae4bf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1749,11 +1749,8 @@ static int sdma_v4_0_late_init(void *handle) sdma_v4_0_setup_ulv(adev); - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 31aa245552d6..c46bc6aa4f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1276,11 +1276,8 @@ static int sdma_v4_4_2_late_init(void *handle) .cb = sdma_v4_4_2_process_ras_data_cb, }; #endif - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 1f8122fd1ad7..7e4d5188cbfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -156,68 +156,35 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; - - DRM_DEBUG("Setting write pointer\n"); - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, - ring->me, regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, - ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index df7462cec6ab..8c6cab641a1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -863,6 +863,8 @@ static int soc21_common_set_clockgating_state(void *handle, case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): case IP_VERSION(7, 7, 0): + case IP_VERSION(7, 7, 1): + case IP_VERSION(7, 11, 0): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h index da815a93d46e..d5748032674e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -1,5 +1,5 @@ /* - * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2018-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,7 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - #ifndef _TA_XGMI_IF_H #define _TA_XGMI_IF_H @@ -28,20 +27,31 @@ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) +#define EXTEND_PEER_LINK_INFO_CMD_FLAG 1 + enum ta_command_xgmi { + /* Initialize the Context and Session Topology */ TA_COMMAND_XGMI__INITIALIZE = 0x00, + /* Gets the current GPU's node ID */ TA_COMMAND_XGMI__GET_NODE_ID = 0x01, + /* Gets the current GPU's hive ID */ TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, - TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, + /* Gets the Peer's topology Information */ + TA_COMMAND_XGMI__GET_TOPOLOGY_INFO = 0x03, + /* Sets the Peer's topology Information */ TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04, - TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B + /* Gets the total links between adjacent peer dies in hive */ + TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B, + /* Gets the total links and connected port numbers between adjacent peer dies in hive */ + TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS = 0x0C }; /* XGMI related enumerations */ /**********************************************************/; -enum ta_xgmi_connected_nodes { - TA_XGMI__MAX_CONNECTED_NODES = 64 -}; +enum { TA_XGMI__MAX_CONNECTED_NODES = 64 }; +enum { TA_XGMI__MAX_INTERNAL_STATE = 32 }; +enum { TA_XGMI__MAX_INTERNAL_STATE_BUFFER = 128 }; +enum { TA_XGMI__MAX_PORT_NUM = 8 }; enum ta_xgmi_status { TA_XGMI_STATUS__SUCCESS = 0x00, @@ -81,6 +91,18 @@ struct ta_xgmi_peer_link_info { uint8_t num_links; }; +struct xgmi_connected_port_num { + uint8_t dst_xgmi_port_num; + uint8_t src_xgmi_port_num; +}; + +/* support both the port num and num_links */ +struct ta_xgmi_extend_peer_link_info { + uint64_t node_id; + uint8_t num_links; + struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM]; +}; + struct ta_xgmi_cmd_initialize_output { uint32_t status; }; @@ -103,16 +125,21 @@ struct ta_xgmi_cmd_get_topology_info_output { struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; -struct ta_xgmi_cmd_get_peer_link_info_output { +struct ta_xgmi_cmd_set_topology_info_input { uint32_t num_nodes; - struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; -struct ta_xgmi_cmd_set_topology_info_input { +/* support XGMI TA w/ and w/o port_num both so two similar structs defined */ +struct ta_xgmi_cmd_get_peer_link_info { uint32_t num_nodes; - struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; + struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; +struct ta_xgmi_cmd_get_extend_peer_link_info { + uint32_t num_nodes; + struct ta_xgmi_extend_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; /**********************************************************/ /* Common input structure for XGMI callbacks */ union ta_xgmi_cmd_input { @@ -126,16 +153,23 @@ union ta_xgmi_cmd_output { struct ta_xgmi_cmd_get_node_id_output get_node_id; struct ta_xgmi_cmd_get_hive_id_output get_hive_id; struct ta_xgmi_cmd_get_topology_info_output get_topology_info; - struct ta_xgmi_cmd_get_peer_link_info_output get_link_info; + struct ta_xgmi_cmd_get_peer_link_info get_link_info; + struct ta_xgmi_cmd_get_extend_peer_link_info get_extend_link_info; }; -/**********************************************************/ struct ta_xgmi_shared_memory { uint32_t cmd_id; uint32_t resp_id; enum ta_xgmi_status xgmi_status; + + /* if the number of xgmi link record is more than 128, driver will set the + * flag 0 to get the first 128 of the link records and will set to 1, to get + * the second set + */ uint8_t flag_extend_link_record; - uint8_t reserved0[3]; + /* bit0: port_num info support flag for GET_EXTEND_PEER_LINKS commmand */ + uint8_t caps_flag; + uint8_t reserved[2]; union ta_xgmi_cmd_input xgmi_in_message; union ta_xgmi_cmd_output xgmi_out_message; }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index b664ee3ee92d..025e6aeb058d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -105,7 +105,9 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0))) *error_count += 1; } @@ -125,7 +127,6 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) @@ -293,7 +294,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1) { mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 7ab9263d3e19..48bfcd0d558b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -176,9 +176,6 @@ static int vcn_v4_0_sw_init(void *handle) AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; } - if (amdgpu_sriov_vf(adev)) - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); - if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } @@ -1209,6 +1206,24 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t vcn_inst, struct amdgpu_ring *ring_enc) +{ + struct amdgpu_vcn_rb_metadata *rb_metadata = NULL; + uint8_t *rb_ptr = (uint8_t *)ring_enc->ring; + + rb_ptr += ring_enc->ring_size; + rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr; + + memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata)); + rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata); + rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); + rb_metadata->version = 1; + rb_metadata->ring_id = vcn_inst & 0xFF; + + return 0; +} + static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) { int i; @@ -1331,11 +1346,30 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) rb_enc_addr = ring_enc->gpu_addr; rb_setup->is_rb_enabled_flags |= RB_ENABLED; - rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); - rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); - rb_setup->rb_size = ring_enc->ring_size / 4; fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { + vcn_v4_0_init_ring_metadata(adev, i, ring_enc); + + memset((void *)&rb_setup->rb_info, 0, sizeof(struct amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP); + if (!(adev->vcn.harvest_config & (1 << 0))) { + rb_setup->rb_info[0].rb_addr_lo = lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); + rb_setup->rb_info[0].rb_addr_hi = upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); + rb_setup->rb_info[0].rb_size = adev->vcn.inst[0].ring_enc[0].ring_size / 4; + } + if (!(adev->vcn.harvest_config & (1 << 1))) { + rb_setup->rb_info[2].rb_addr_lo = lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); + rb_setup->rb_info[2].rb_addr_hi = upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); + rb_setup->rb_info[2].rb_size = adev->vcn.inst[1].ring_enc[0].ring_size / 4; + } + fw_shared->decouple.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); + } else { + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + } + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); @@ -1807,6 +1841,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, @@ -2020,16 +2055,20 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ { uint32_t ip_instance; - switch (entry->client_id) { - case SOC15_IH_CLIENTID_VCN: - ip_instance = 0; - break; - case SOC15_IH_CLIENTID_VCN1: - ip_instance = 1; - break; - default: - DRM_ERROR("Unhandled client id: %d\n", entry->client_id); - return 0; + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { + ip_instance = entry->ring_id; + } else { + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } } DRM_DEBUG("IH: VCN TRAP\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f85d18cd74ec..810bbfccd6f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1760,6 +1760,11 @@ static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), tmp, 0, indirect); + tmp = UVD_VCPU_INT_EN2__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_VCPU_INT_EN2), + tmp, 0, indirect); + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), |